feat: secure build phase 1 — cosign cell-side image verification (warn default) + Dockerfile validation
Unit Tests / test (push) Successful in 13m28s
Unit Tests / test (push) Successful in 13m28s
- config/cosign/cosign.pub: public verification key committed to repo (safe); cosign private key lives in /home/roof/.pic-secrets/ and is NEVER committed - api/config_manager.py: image_verification config block (modes: off|warn|enforce, default: warn) so existing deployments are unaffected until images are signed - api/service_composer.py: cosign verify before pull/up; enforce aborts the operation, warn logs and proceeds, off skips entirely; also fixes the prior unsafe proceed-on-pull-failure path - api/service_store_manager.py: store-image digest requirement (warn default, reject under enforce) - api/Dockerfile: cosign binary copied from the official cosign image - docker-compose.yml: config/cosign/ bind-mounted into cell-api container - install.sh: ensure/verify bundled cosign pubkey on new cell installs - api/manifest_validator.py: validate_build_context() — Dockerfile lint - tests: full coverage for config modes, composer verify paths, store digest guard, and validate_build_context Verification defaults to warn so nothing breaks in production until images are signed (phase 2). Private key stored outside git at /home/roof/.pic-secrets/. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -48,6 +48,48 @@ _IMAGE_DIGEST_RE = re.compile(
|
||||
r'^git\.pic\.ngo/roof/[a-zA-Z0-9._/-]+@sha256:[0-9a-f]{64}$'
|
||||
)
|
||||
|
||||
# ── Build-context (Dockerfile) lint ───────────────────────────────────────
|
||||
#
|
||||
# These checks are *defense-in-depth*, not a guarantee. A Dockerfile is
|
||||
# Turing-ish: a determined author can still fetch code at build time via a
|
||||
# permitted base image's package manager, multi-stage tricks, or build args.
|
||||
# The real trust boundary is the isolated builder + cosign signature applied
|
||||
# by the trusted publish stage (P2). This static lint exists to catch the
|
||||
# obvious-and-cheap mistakes (un-pinned bases, remote ADD, secret-named args)
|
||||
# before an image is ever built, and to keep the published corpus uniform.
|
||||
|
||||
# Base images a community Dockerfile may build FROM. Each MUST be digest
|
||||
# pinned so the build is reproducible and the base cannot be swapped under us.
|
||||
# Keep this curated and small; extend deliberately as P2/P3 add languages.
|
||||
BUILD_BASE_IMAGE_ALLOWLIST = frozenset({
|
||||
'docker.io/library/alpine',
|
||||
'docker.io/library/debian',
|
||||
'docker.io/library/python',
|
||||
'docker.io/library/golang',
|
||||
'docker.io/library/node',
|
||||
'alpine',
|
||||
'debian',
|
||||
'python',
|
||||
'golang',
|
||||
'node',
|
||||
'gcr.io/distroless/static',
|
||||
'gcr.io/distroless/base',
|
||||
})
|
||||
# FROM scratch is only allowed for these (otherwise rejected). Empty by
|
||||
# default — community images should start from a pinned, scannable base.
|
||||
BUILD_SCRATCH_ALLOWLIST = frozenset()
|
||||
|
||||
_DOCKERFILE_SECRET_NAME_RE = re.compile(r'(TOKEN|KEY|PASSWORD|SECRET)', re.IGNORECASE)
|
||||
_FROM_RE = re.compile(r'^FROM\s+(.+?)(?:\s+AS\s+\S+)?$', re.IGNORECASE)
|
||||
_ADD_RE = re.compile(r'^ADD\s+(.+)$', re.IGNORECASE)
|
||||
_ARG_RE = re.compile(r'^ARG\s+([A-Za-z_][A-Za-z0-9_]*)', re.IGNORECASE)
|
||||
_ENV_RE = re.compile(r'^ENV\s+(.+)$', re.IGNORECASE)
|
||||
|
||||
# Context size / file-count caps — a community build context should be small
|
||||
# (a Dockerfile + a handful of config/entrypoint files), never a whole tree.
|
||||
BUILD_CONTEXT_MAX_BYTES = 5 * 1024 * 1024 # 5 MiB
|
||||
BUILD_CONTEXT_MAX_FILES = 200
|
||||
|
||||
|
||||
def validate_manifest(manifest: dict) -> tuple:
|
||||
"""
|
||||
@@ -294,6 +336,149 @@ def validate_rendered_compose(yaml_text: str, allowed_data_dir: str = None,
|
||||
return (len(errors) == 0, errors)
|
||||
|
||||
|
||||
def _stage_aliases(dockerfile_text: str) -> set:
|
||||
"""Collect multi-stage build aliases (FROM x AS alias) so later FROM <alias>
|
||||
references resolve to a same-file stage rather than an external base."""
|
||||
aliases = set()
|
||||
for raw in dockerfile_text.splitlines():
|
||||
line = raw.strip()
|
||||
m = re.match(r'^FROM\s+\S+\s+AS\s+(\S+)\s*$', line, re.IGNORECASE)
|
||||
if m:
|
||||
aliases.add(m.group(1).lower())
|
||||
return aliases
|
||||
|
||||
|
||||
def _base_is_allowed(base_ref: str) -> tuple:
|
||||
"""Return (ok, error_or_None) for a single FROM base image reference.
|
||||
|
||||
Requires an @sha256: digest pin and that the repository part (sans tag/
|
||||
digest) is in BUILD_BASE_IMAGE_ALLOWLIST. 'scratch' is handled separately.
|
||||
"""
|
||||
if '@sha256:' not in base_ref:
|
||||
return (False, f'FROM base image must be digest-pinned (@sha256:): {base_ref!r}')
|
||||
repo = base_ref.split('@', 1)[0].split(':', 1)[0]
|
||||
if repo not in BUILD_BASE_IMAGE_ALLOWLIST:
|
||||
return (False, f'FROM base image not in allowlist: {repo!r}')
|
||||
return (True, None)
|
||||
|
||||
|
||||
def validate_build_context(dockerfile_text: str, context_files=None) -> tuple:
|
||||
"""
|
||||
Static lint of a community Dockerfile and its build context.
|
||||
|
||||
Returns (True, []) when the Dockerfile passes; (False, [errors]) otherwise.
|
||||
|
||||
Enforced (defense-in-depth — see module note above, this is NOT a sandbox):
|
||||
- every external FROM base must be in BUILD_BASE_IMAGE_ALLOWLIST and
|
||||
digest-pinned (@sha256:)
|
||||
- FROM scratch only when allowlisted in BUILD_SCRATCH_ALLOWLIST
|
||||
- no `ADD http(s)://...` (fetches arbitrary remote content at build time)
|
||||
- no ARG/ENV whose name matches /(TOKEN|KEY|PASSWORD|SECRET)/i (baking a
|
||||
secret into a layer / build cache)
|
||||
- context size and file-count caps when context_files metadata is given
|
||||
|
||||
context_files: optional iterable of (path, size_bytes) tuples describing the
|
||||
build context. Pass None to skip the size/count checks (e.g. when only the
|
||||
Dockerfile text is available, as in CI lint of the manifest repo).
|
||||
"""
|
||||
errors = []
|
||||
|
||||
if not isinstance(dockerfile_text, str) or not dockerfile_text.strip():
|
||||
return (False, ['Dockerfile is empty'])
|
||||
|
||||
aliases = _stage_aliases(dockerfile_text)
|
||||
|
||||
# Join backslash-continued lines so a multi-line instruction is one logical line.
|
||||
logical_lines = []
|
||||
buf = ''
|
||||
for raw in dockerfile_text.splitlines():
|
||||
stripped = raw.rstrip()
|
||||
if stripped.endswith('\\'):
|
||||
buf += stripped[:-1] + ' '
|
||||
continue
|
||||
buf += stripped
|
||||
logical_lines.append(buf)
|
||||
buf = ''
|
||||
if buf:
|
||||
logical_lines.append(buf)
|
||||
|
||||
saw_from = False
|
||||
for line in logical_lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
m_from = _FROM_RE.match(line)
|
||||
if m_from:
|
||||
saw_from = True
|
||||
base = m_from.group(1).strip().split()[0]
|
||||
base_l = base.lower()
|
||||
if base_l in aliases:
|
||||
continue # references an earlier build stage, not an external base
|
||||
if base_l == 'scratch':
|
||||
if 'scratch' not in BUILD_SCRATCH_ALLOWLIST:
|
||||
errors.append('FROM scratch is not allowed')
|
||||
continue
|
||||
ok, err = _base_is_allowed(base)
|
||||
if not ok:
|
||||
errors.append(err)
|
||||
continue
|
||||
|
||||
m_add = _ADD_RE.match(line)
|
||||
if m_add:
|
||||
if re.search(r'https?://', m_add.group(1), re.IGNORECASE):
|
||||
errors.append(f'ADD from a remote URL is not allowed: {line!r}')
|
||||
continue
|
||||
|
||||
m_arg = _ARG_RE.match(line)
|
||||
if m_arg and _DOCKERFILE_SECRET_NAME_RE.search(m_arg.group(1)):
|
||||
errors.append(
|
||||
f'ARG name looks secret-bearing (matches TOKEN|KEY|PASSWORD|SECRET): {m_arg.group(1)!r}'
|
||||
)
|
||||
continue
|
||||
|
||||
m_env = _ENV_RE.match(line)
|
||||
if m_env:
|
||||
# ENV NAME value | ENV NAME=value [NAME2=value2 ...]
|
||||
body = m_env.group(1).strip()
|
||||
names = []
|
||||
if '=' in body:
|
||||
for tok in body.split():
|
||||
if '=' in tok:
|
||||
names.append(tok.split('=', 1)[0])
|
||||
else:
|
||||
names.append(body.split()[0] if body.split() else '')
|
||||
for name in names:
|
||||
if name and _DOCKERFILE_SECRET_NAME_RE.search(name):
|
||||
errors.append(
|
||||
f'ENV name looks secret-bearing (matches TOKEN|KEY|PASSWORD|SECRET): {name!r}'
|
||||
)
|
||||
|
||||
if not saw_from:
|
||||
errors.append('Dockerfile has no FROM instruction')
|
||||
|
||||
if context_files is not None:
|
||||
total_bytes = 0
|
||||
count = 0
|
||||
for entry in context_files:
|
||||
try:
|
||||
_path, size = entry
|
||||
except (TypeError, ValueError):
|
||||
_path, size = entry, 0
|
||||
count += 1
|
||||
total_bytes += int(size or 0)
|
||||
if count > BUILD_CONTEXT_MAX_FILES:
|
||||
errors.append(
|
||||
f'build context has too many files: {count} > {BUILD_CONTEXT_MAX_FILES}'
|
||||
)
|
||||
if total_bytes > BUILD_CONTEXT_MAX_BYTES:
|
||||
errors.append(
|
||||
f'build context too large: {total_bytes} bytes > {BUILD_CONTEXT_MAX_BYTES}'
|
||||
)
|
||||
|
||||
return (len(errors) == 0, errors)
|
||||
|
||||
|
||||
def validate_provision_hook(hook) -> tuple:
|
||||
"""
|
||||
Validate a provision_hook value from accounts.provision_hook.
|
||||
|
||||
Reference in New Issue
Block a user