feat: secure build phase 1 — cosign cell-side image verification (warn default) + Dockerfile validation
Unit Tests / test (push) Successful in 13m28s
Unit Tests / test (push) Successful in 13m28s
- config/cosign/cosign.pub: public verification key committed to repo (safe); cosign private key lives in /home/roof/.pic-secrets/ and is NEVER committed - api/config_manager.py: image_verification config block (modes: off|warn|enforce, default: warn) so existing deployments are unaffected until images are signed - api/service_composer.py: cosign verify before pull/up; enforce aborts the operation, warn logs and proceeds, off skips entirely; also fixes the prior unsafe proceed-on-pull-failure path - api/service_store_manager.py: store-image digest requirement (warn default, reject under enforce) - api/Dockerfile: cosign binary copied from the official cosign image - docker-compose.yml: config/cosign/ bind-mounted into cell-api container - install.sh: ensure/verify bundled cosign pubkey on new cell installs - api/manifest_validator.py: validate_build_context() — Dockerfile lint - tests: full coverage for config modes, composer verify paths, store digest guard, and validate_build_context Verification defaults to warn so nothing breaks in production until images are signed (phase 2). Private key stored outside git at /home/roof/.pic-secrets/. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+94
-2
@@ -32,6 +32,16 @@ logger = logging.getLogger('picell')
|
||||
|
||||
_SECRET_RE = re.compile(r'\$\{(PIC_SECRET_\w+)\}')
|
||||
_SAFE_ID_RE = re.compile(r'^[a-z0-9][a-z0-9_-]{0,63}$')
|
||||
_DIGEST_RE = re.compile(r'@sha256:[0-9a-f]{64}$')
|
||||
|
||||
# Bundled cosign public key — shipped in the repo (config/cosign/cosign.pub) so
|
||||
# every cell can verify store-service image signatures offline. install.sh keeps
|
||||
# it at /opt/pic/config/cosign/cosign.pub; in the cell-api container it is
|
||||
# COPYed to /app/config/cosign/cosign.pub.
|
||||
_COSIGN_PUBKEY_PATH = os.environ.get(
|
||||
'PIC_COSIGN_PUBKEY', '/app/config/cosign/cosign.pub'
|
||||
)
|
||||
_COSIGN_BIN = os.environ.get('PIC_COSIGN_BIN', 'cosign')
|
||||
|
||||
|
||||
class ServiceComposer:
|
||||
@@ -265,18 +275,100 @@ class ServiceComposer:
|
||||
self.write_compose(service_id, manifest, template_content)
|
||||
return self.up(service_id)
|
||||
|
||||
# ── Image signature verification ──────────────────────────────────────
|
||||
|
||||
def _verification_mode(self) -> str:
|
||||
"""Resolve the configured image verification mode (off|warn|enforce)."""
|
||||
getter = getattr(self.cm, 'get_image_verification_mode', None)
|
||||
if callable(getter):
|
||||
try:
|
||||
return getter()
|
||||
except Exception as e: # config corruption must not crash install
|
||||
logger.warning('service_composer: could not read verification mode: %s', e)
|
||||
return 'warn'
|
||||
|
||||
def _cosign_verify(self, image_ref: str) -> Dict:
|
||||
"""Run `cosign verify` against the bundled public key for one image ref.
|
||||
|
||||
Factored out so tests can mock it / mock the subprocess call. Returns a
|
||||
_run-style dict ({'ok': bool, 'stdout', 'stderr'/'error'}).
|
||||
"""
|
||||
cmd = [
|
||||
_COSIGN_BIN, 'verify',
|
||||
'--key', _COSIGN_PUBKEY_PATH,
|
||||
'--insecure-ignore-tlog=true',
|
||||
image_ref,
|
||||
]
|
||||
return self._run(cmd, timeout=120)
|
||||
|
||||
def verify_image(self, service_id: str, manifest: Dict) -> Dict:
|
||||
"""Verify a store image's signature subject to the configured mode.
|
||||
|
||||
Returns {'ok': True, 'skipped'|'verified'|'warned': ...} when the install
|
||||
may proceed, or {'ok': False, 'error': ...} when it must abort (enforce
|
||||
mode with a missing digest or a failed/absent signature).
|
||||
"""
|
||||
mode = self._verification_mode()
|
||||
if mode == 'off':
|
||||
return {'ok': True, 'skipped': True}
|
||||
|
||||
image_ref = (manifest or {}).get('image', '')
|
||||
if not image_ref:
|
||||
# No image to verify (e.g. builtin-style manifest); nothing to do.
|
||||
return {'ok': True, 'skipped': True}
|
||||
|
||||
# Store images must be digest-pinned to be verifiable by digest.
|
||||
if not _DIGEST_RE.search(image_ref):
|
||||
msg = (f'image {image_ref!r} for {service_id} is not digest-pinned '
|
||||
'(@sha256:) — cannot verify signature')
|
||||
if mode == 'enforce':
|
||||
logger.error('service_composer: %s; aborting install (enforce)', msg)
|
||||
return {'ok': False, 'error': msg}
|
||||
logger.warning('service_composer: %s; proceeding (warn)', msg)
|
||||
return {'ok': True, 'warned': True}
|
||||
|
||||
result = self._cosign_verify(image_ref)
|
||||
if result.get('ok'):
|
||||
logger.info('service_composer: cosign verified %s', image_ref)
|
||||
return {'ok': True, 'verified': True}
|
||||
|
||||
detail = result.get('stderr') or result.get('error') or 'signature verification failed'
|
||||
msg = f'cosign verification failed for {image_ref}: {str(detail)[:200]}'
|
||||
if mode == 'enforce':
|
||||
logger.error('service_composer: %s; aborting install (enforce)', msg)
|
||||
return {'ok': False, 'error': msg}
|
||||
logger.warning('service_composer: %s; proceeding (warn)', msg)
|
||||
return {'ok': True, 'warned': True}
|
||||
|
||||
def install(self, service_id: str, manifest: Dict,
|
||||
template_content: str) -> Dict:
|
||||
"""Write compose file, pull image, then start containers.
|
||||
"""Write compose file, verify + pull image, then start containers.
|
||||
|
||||
Image signature verification runs before pull/up. Under enforce mode a
|
||||
missing digest, missing signature, or failed verification aborts the
|
||||
install (containers are never started); under warn mode the problem is
|
||||
logged and the install proceeds; under off mode verification is skipped.
|
||||
|
||||
pull is run first so the up step doesn't time out on slow connections.
|
||||
A single retry handles transient registry hiccups on first install.
|
||||
"""
|
||||
self.write_compose(service_id, manifest, template_content)
|
||||
|
||||
verify = self.verify_image(service_id, manifest)
|
||||
if not verify.get('ok'):
|
||||
return {'ok': False, 'error': verify.get('error', 'image verification failed')}
|
||||
|
||||
mode = self._verification_mode()
|
||||
pull = self._store_cmd(service_id, 'pull', timeout=600)
|
||||
if not pull.get('ok'):
|
||||
pull_err = pull.get('stderr') or pull.get('error') or 'unknown error'
|
||||
if mode == 'enforce':
|
||||
logger.error('service_composer: image pull for %s failed under enforce, '
|
||||
'aborting: %s', service_id, str(pull_err)[:200])
|
||||
return {'ok': False,
|
||||
'error': f'image pull failed (enforce): {str(pull_err)[:200]}'}
|
||||
logger.warning('service_composer: image pull for %s failed, proceeding anyway: %s',
|
||||
service_id, pull.get('stderr', '')[:200])
|
||||
service_id, str(pull_err)[:200])
|
||||
result = self.up(service_id)
|
||||
if not result.get('ok'):
|
||||
logger.info('service_composer: retrying up for %s after initial failure', service_id)
|
||||
|
||||
Reference in New Issue
Block a user