Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fa746a3b30 | |||
| c806a9bb54 | |||
| 6bc1d625bf |
@@ -8,7 +8,11 @@ WORKDIR /app/api
|
||||
|
||||
# The API runs as root by design: it drives iptables, the docker socket, and
|
||||
# docker-execs into sibling containers. Non-root is not feasible here.
|
||||
# The Compose v2 plugin is a separate binary under cli-plugins/ — ServiceComposer
|
||||
# shells out to `docker compose` for every store-service lifecycle op, so it must
|
||||
# be copied alongside the docker CLI, not just the docker binary.
|
||||
COPY --from=dockercli /usr/local/bin/docker /usr/local/bin/docker
|
||||
COPY --from=dockercli /usr/local/libexec/docker/cli-plugins/docker-compose /usr/local/libexec/docker/cli-plugins/docker-compose
|
||||
|
||||
# cosign verifies store-service image signatures against the bundled public key
|
||||
# (config/cosign/cosign.pub) before ServiceComposer starts a container.
|
||||
|
||||
@@ -1419,6 +1419,25 @@ class ConnectivityManager(BaseServiceManager):
|
||||
logger.warning(f"delete_connection: container teardown failed "
|
||||
f"(non-fatal): {e}")
|
||||
|
||||
# Free this connection's host policy-routing rule and kill-switch.
|
||||
# apply_routes only re-adds rules for *existing* connections and only
|
||||
# flushes the PIC_CONNECTIVITY chains — it never removes the deleted
|
||||
# connection's individually-managed `ip rule fwmark→table` or its
|
||||
# FORWARD kill-switch, so they must be torn down here or they leak.
|
||||
mark, table = record.get('mark'), record.get('table')
|
||||
if (record.get('type') != self.CELL_RELAY_TYPE
|
||||
and isinstance(mark, int) and isinstance(table, int)):
|
||||
try:
|
||||
self._remove_ip_rule(mark, table)
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: ip rule cleanup failed "
|
||||
f"(non-fatal): {e}")
|
||||
try:
|
||||
self._remove_killswitch(mark, record.get('iface'))
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: killswitch cleanup failed "
|
||||
f"(non-fatal): {e}")
|
||||
|
||||
for secret_ref in record.get('secret_refs', []):
|
||||
if self.vault_manager is not None:
|
||||
try:
|
||||
@@ -2138,6 +2157,24 @@ class ConnectivityManager(BaseServiceManager):
|
||||
'-m', 'mark', '--mark', hex(mark),
|
||||
'!', '-o', iface, '-j', 'DROP'])
|
||||
|
||||
def _remove_killswitch(self, mark: int, iface: Optional[str]) -> None:
|
||||
"""Remove a connection's kill-switch FORWARD DROP (idempotent).
|
||||
|
||||
Unlike the per-peer MARK/REDIRECT rules (which live in the flushed
|
||||
PIC_CONNECTIVITY chains), the kill-switch is appended directly to
|
||||
FORWARD, so it is not cleared by apply_routes' chain flush — a deleted
|
||||
connection would otherwise leave a stale DROP that blocks a later
|
||||
connection reusing the same mark. Drain duplicates with a bounded loop.
|
||||
"""
|
||||
if not iface:
|
||||
return
|
||||
for _ in range(8):
|
||||
r = self._wg_iptables(['-D', 'FORWARD',
|
||||
'-m', 'mark', '--mark', hex(mark),
|
||||
'!', '-o', iface, '-j', 'DROP'])
|
||||
if r.returncode != 0:
|
||||
break
|
||||
|
||||
def _exit_status(self, exit_type: str) -> Dict[str, Any]:
|
||||
"""Return per-exit status (config presence + interface up/down).
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ _HOOK_BINARY_RE = re.compile(r'^[a-z][a-z0-9_-]{0,31}$')
|
||||
_CAP_NAME_RE = re.compile(r'^[A-Z_]+$')
|
||||
_ID_RE = re.compile(r'^[a-z][a-z0-9_-]{0,30}$')
|
||||
_IMAGE_DIGEST_RE = re.compile(
|
||||
r'^git\.pic\.ngo/roof/[a-zA-Z0-9._/-]+@sha256:[0-9a-f]{64}$'
|
||||
r'^git\.pic\.ngo/roof/[a-zA-Z0-9._/-]+(:[a-zA-Z0-9._-]+)?@sha256:[0-9a-f]{64}$'
|
||||
)
|
||||
|
||||
# ── Build-context (Dockerfile) lint ───────────────────────────────────────
|
||||
|
||||
@@ -35,9 +35,10 @@ _SAFE_ID_RE = re.compile(r'^[a-z0-9][a-z0-9_-]{0,63}$')
|
||||
_DIGEST_RE = re.compile(r'@sha256:[0-9a-f]{64}$')
|
||||
|
||||
# Bundled cosign public key — shipped in the repo (config/cosign/cosign.pub) so
|
||||
# every cell can verify store-service image signatures offline. install.sh keeps
|
||||
# it at /opt/pic/config/cosign/cosign.pub; in the cell-api container it is
|
||||
# COPYed to /app/config/cosign/cosign.pub.
|
||||
# every cell can verify store-service image signatures offline. It is bind-mounted
|
||||
# into cell-api at /app/config/cosign/cosign.pub (see docker-compose.yml). Because
|
||||
# `make reinstall`/`uninstall` run `rm -rf config/`, setup_cell.ensure_cosign_pubkey()
|
||||
# restores it from git on every setup so the mount is never empty.
|
||||
_COSIGN_PUBKEY_PATH = os.environ.get(
|
||||
'PIC_COSIGN_PUBKEY', '/app/config/cosign/cosign.pub'
|
||||
)
|
||||
@@ -157,6 +158,11 @@ class ServiceComposer:
|
||||
result = result.replace('${PIC_CELL_NAME}', cell_name)
|
||||
result = result.replace('${PIC_SERVICE_ID}', service_id)
|
||||
result = result.replace('${PIC_DATA_DIR}', str(Path(self.data_dir).resolve()))
|
||||
# ${PIC_IMAGE} resolves to the manifest's image — the digest-pinned,
|
||||
# cosign-verified reference. Templates (especially instanceable ones)
|
||||
# MUST use this rather than hardcoding an image:tag, so the container
|
||||
# that actually runs is the same image the store verified at install.
|
||||
result = result.replace('${PIC_IMAGE}', str(manifest.get('image', '')))
|
||||
|
||||
if instance_vars:
|
||||
for var in ('INSTANCE_ID', 'REDIRECT_PORT'):
|
||||
|
||||
@@ -333,8 +333,24 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to fetch compose template: {e}'}
|
||||
|
||||
# Write compose file and start containers (validation inside write_compose)
|
||||
# Write compose file and start containers (validation inside write_compose).
|
||||
# Instanceable connectivity services back one container PER connection
|
||||
# instance, rendered later by ConnectivityManager with a concrete
|
||||
# ${INSTANCE_ID}/${REDIRECT_PORT}. Their base template still contains
|
||||
# those placeholders, so there is no base container to bring up at
|
||||
# install time — rendering/pulling/up-ing it here fails on the unset
|
||||
# variables. Verify the image signature now (the enforce gate still
|
||||
# applies), but defer the container to connection creation.
|
||||
if self.service_composer is not None:
|
||||
if manifest.get('instanceable'):
|
||||
try:
|
||||
verify = self.service_composer.verify_image(service_id, manifest)
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'image verification failed: {e}'}
|
||||
if not verify.get('ok'):
|
||||
return {'ok': False,
|
||||
'error': verify.get('error', 'image verification failed')}
|
||||
else:
|
||||
try:
|
||||
result = self.service_composer.install(service_id, manifest, template_content)
|
||||
except ValueError as e:
|
||||
|
||||
@@ -62,6 +62,38 @@ def ensure_file(rel):
|
||||
print(f'[EXISTS] {rel}')
|
||||
|
||||
|
||||
def ensure_cosign_pubkey():
|
||||
"""Restore the tracked cosign public key if a config wipe removed it.
|
||||
|
||||
`config/cosign/cosign.pub` is a git-tracked asset bind-mounted into cell-api
|
||||
and used to verify store-service image signatures. `make reinstall`/
|
||||
`uninstall` run `rm -rf config/`, which deletes it from the working tree, and
|
||||
nothing else recreates it — leaving every store install broken under the
|
||||
default enforce mode. Restore it from HEAD here (setup runs on every
|
||||
install/reinstall). Best-effort: if this is not a git checkout, warn rather
|
||||
than fail — install.sh surfaces the same warning.
|
||||
"""
|
||||
rel = os.path.join('config', 'cosign', 'cosign.pub')
|
||||
path = os.path.join(ROOT, rel)
|
||||
if os.path.exists(path) and os.path.getsize(path) > 0:
|
||||
print(f'[EXISTS] {rel}')
|
||||
return
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
try:
|
||||
blob = subprocess.run(
|
||||
['git', '-C', ROOT, 'show', 'HEAD:config/cosign/cosign.pub'],
|
||||
capture_output=True, check=True).stdout
|
||||
if blob:
|
||||
with open(path, 'wb') as f:
|
||||
f.write(blob)
|
||||
print(f'[RESTORED] {rel} (from git HEAD)')
|
||||
return
|
||||
except Exception as e:
|
||||
print(f'[WARN] could not restore {rel} from git: {e}')
|
||||
print(f'[WARN] {rel} is missing — store-service image signature '
|
||||
'verification will fail under enforce mode until it is provided')
|
||||
|
||||
|
||||
def ensure_caddy_ca_cert():
|
||||
cert_dir = os.path.join(ROOT, 'config', 'caddy', 'certs')
|
||||
ca_key = os.path.join(cert_dir, 'ca.key')
|
||||
@@ -402,6 +434,7 @@ def main():
|
||||
for f in REQUIRED_FILES:
|
||||
ensure_file(f)
|
||||
|
||||
ensure_cosign_pubkey()
|
||||
ensure_caddy_ca_cert()
|
||||
priv, _pub = generate_wg_keys()
|
||||
write_wg0_conf(priv, vpn_address, wg_port)
|
||||
|
||||
@@ -110,6 +110,13 @@ class _Base(unittest.TestCase):
|
||||
for _svc in ('wireguard-ext', 'openvpn-client', 'tor', 'sshuttle', 'proxy'):
|
||||
self.cm.set_installed_service(_svc, {'id': _svc, 'manifest': {}})
|
||||
self.cm._save_all_configs()
|
||||
# No test in this module should shell into the WireGuard container. Stub
|
||||
# the exec helpers so host-rule cleanup paths (delete_connection) never
|
||||
# touch real docker/iptables; returncode 1 makes the drain loops stop.
|
||||
self.mgr._wg_ip = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
self.mgr._wg_iptables = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp, ignore_errors=True)
|
||||
@@ -296,6 +303,42 @@ class TestDeleteConnection(_Base):
|
||||
out = self.mgr.delete_connection('conn_nope')
|
||||
self.assertFalse(out['ok'])
|
||||
|
||||
def test_delete_removes_host_ip_rule(self):
|
||||
"""Deleting a connection must remove its fwmark->table ip rule.
|
||||
|
||||
apply_routes only re-adds rules for surviving connections and only
|
||||
flushes the PIC_CONNECTIVITY chains, so the deleted connection's
|
||||
individually-managed `ip rule` would otherwise leak in cell-wireguard.
|
||||
"""
|
||||
res = self.mgr.create_connection('proxy', 'gone', _proxy_cfg())
|
||||
conn = res['connection']
|
||||
mark, table = conn['mark'], conn['table']
|
||||
self.mgr._wg_ip.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ip_calls = [c.args[0] for c in self.mgr._wg_ip.call_args_list]
|
||||
self.assertIn(
|
||||
['rule', 'del', 'fwmark', hex(mark), 'lookup', str(table)],
|
||||
ip_calls,
|
||||
)
|
||||
|
||||
def test_delete_removes_iface_killswitch(self):
|
||||
"""An iface-type connection's FORWARD kill-switch is removed on delete."""
|
||||
res = self.mgr.create_connection(
|
||||
'wireguard_ext', 'wgks', {},
|
||||
secrets={'conf': '[Interface]\nPrivateKey = x\n'})
|
||||
conn = res['connection']
|
||||
mark, iface = conn['mark'], conn['iface']
|
||||
self.mgr._wg_iptables.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ipt_calls = [c.args[0] for c in self.mgr._wg_iptables.call_args_list]
|
||||
self.assertTrue(
|
||||
any(c[:2] == ['-D', 'FORWARD'] and '--mark' in c and hex(mark) in c
|
||||
and iface in c for c in ipt_calls),
|
||||
ipt_calls,
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateConnection(_Base):
|
||||
|
||||
|
||||
@@ -175,6 +175,19 @@ class TestValidateManifest(unittest.TestCase):
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_tag_and_digest_passes(self):
|
||||
# The publish pipeline writes back name:tag@sha256:<digest> (a valid OCI
|
||||
# reference). The validator must accept the tag alongside the digest —
|
||||
# service_store_manager already does, and rejecting it here blocks every
|
||||
# published store image from installing.
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
_minimal_manifest(
|
||||
image=f'git.pic.ngo/roof/svc-proxy:latest@sha256:{digest}')
|
||||
)
|
||||
self.assertTrue(ok, errs)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_wrong_registry_rejected(self):
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
|
||||
@@ -88,6 +88,23 @@ class TestRenderTemplate(unittest.TestCase):
|
||||
result = self.composer.render_template('myservice', manifest, template)
|
||||
self.assertEqual(result, 'ID=myservice')
|
||||
|
||||
def test_pic_image_substituted_from_manifest(self):
|
||||
# ${PIC_IMAGE} must resolve to the manifest's digest-pinned image so the
|
||||
# per-instance container runs the exact ref the store verified — not an
|
||||
# unpinned :latest hardcoded in the template.
|
||||
digest = 'git.pic.ngo/roof/svc-proxy:latest@sha256:' + 'a' * 64
|
||||
manifest = _make_manifest()
|
||||
manifest['image'] = digest
|
||||
template = 'image: ${PIC_IMAGE}'
|
||||
result = self.composer.render_template('myservice', manifest, template)
|
||||
self.assertEqual(result, f'image: {digest}')
|
||||
|
||||
def test_pic_image_empty_when_manifest_has_no_image(self):
|
||||
manifest = _make_manifest()
|
||||
result = self.composer.render_template(
|
||||
'myservice', manifest, 'image: ${PIC_IMAGE}')
|
||||
self.assertEqual(result, 'image: ')
|
||||
|
||||
def test_pic_secret_generated_and_substituted(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
composer = _composer(data_dir=tmpdir)
|
||||
|
||||
@@ -726,6 +726,43 @@ class TestInstall(unittest.TestCase):
|
||||
self.assertIn('digest', result['error'].lower())
|
||||
composer.install.assert_not_called()
|
||||
|
||||
def test_install_instanceable_verifies_image_but_does_not_up_container(self):
|
||||
"""Instanceable services defer the container to connection creation.
|
||||
|
||||
Their base compose template still contains ${INSTANCE_ID}/${REDIRECT_PORT},
|
||||
so the base container must NOT be rendered/pulled/up'd at install time —
|
||||
only the image signature is verified, and the record (with the raw
|
||||
template) is stored for ConnectivityManager to render per instance.
|
||||
"""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': True}
|
||||
result = ssm.install('proxy')
|
||||
self.assertTrue(result['ok'], result)
|
||||
composer.verify_image.assert_called_once()
|
||||
composer.install.assert_not_called()
|
||||
# The raw template is persisted so per-instance rendering needs no refetch.
|
||||
record = cm.set_installed_service.call_args[0][1]
|
||||
self.assertIn('compose_template', record)
|
||||
|
||||
def test_install_instanceable_aborts_when_image_verification_fails(self):
|
||||
"""An instanceable service whose image fails verification must not install."""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': False, 'error': 'signature verification failed'}
|
||||
result = ssm.install('proxy')
|
||||
self.assertFalse(result['ok'])
|
||||
composer.install.assert_not_called()
|
||||
cm.set_installed_service.assert_not_called()
|
||||
|
||||
def test_install_without_composer_stores_record(self):
|
||||
"""When service_composer=None, skip compose but still store the install record."""
|
||||
manifest = _valid_manifest(id='myapp', container_name='cell-myapp')
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Tests for scripts/setup_cell.py setup helpers."""
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))
|
||||
import setup_cell # noqa: E402
|
||||
|
||||
|
||||
class TestEnsureCosignPubkey(unittest.TestCase):
|
||||
"""ensure_cosign_pubkey restores the tracked key after a `rm -rf config/`.
|
||||
|
||||
Regression: `make reinstall`/`uninstall` wipe config/, deleting the tracked
|
||||
config/cosign/cosign.pub; without restore, enforce-mode store installs break.
|
||||
"""
|
||||
|
||||
KEY_REL = os.path.join('config', 'cosign', 'cosign.pub')
|
||||
KEY_BODY = '-----BEGIN PUBLIC KEY-----\nTESTKEYDATA\n-----END PUBLIC KEY-----\n'
|
||||
|
||||
def setUp(self):
|
||||
self.tmp = tempfile.mkdtemp()
|
||||
env = {**os.environ, 'GIT_CONFIG_GLOBAL': '/dev/null', 'GIT_CONFIG_SYSTEM': '/dev/null'}
|
||||
subprocess.run(['git', 'init', '-q', self.tmp], check=True, env=env)
|
||||
subprocess.run(['git', '-C', self.tmp, 'config', 'user.email', 't@t'], check=True)
|
||||
subprocess.run(['git', '-C', self.tmp, 'config', 'user.name', 't'], check=True)
|
||||
self.key = os.path.join(self.tmp, self.KEY_REL)
|
||||
os.makedirs(os.path.dirname(self.key))
|
||||
with open(self.key, 'w') as f:
|
||||
f.write(self.KEY_BODY)
|
||||
subprocess.run(['git', '-C', self.tmp, 'add', '-A'], check=True)
|
||||
subprocess.run(['git', '-C', self.tmp, 'commit', '-qm', 'init'], check=True, env=env)
|
||||
self._root = setup_cell.ROOT
|
||||
setup_cell.ROOT = self.tmp
|
||||
|
||||
def tearDown(self):
|
||||
setup_cell.ROOT = self._root
|
||||
shutil.rmtree(self.tmp, ignore_errors=True)
|
||||
|
||||
def test_restores_key_when_wiped(self):
|
||||
os.remove(self.key)
|
||||
shutil.rmtree(os.path.dirname(self.key)) # mimic `rm -rf config/`
|
||||
self.assertFalse(os.path.exists(self.key))
|
||||
setup_cell.ensure_cosign_pubkey()
|
||||
self.assertTrue(os.path.exists(self.key))
|
||||
self.assertEqual(open(self.key).read(), self.KEY_BODY)
|
||||
|
||||
def test_noop_when_key_present(self):
|
||||
setup_cell.ensure_cosign_pubkey()
|
||||
self.assertEqual(open(self.key).read(), self.KEY_BODY)
|
||||
|
||||
def test_warns_not_raises_outside_git(self):
|
||||
# Not a git checkout and key missing → must warn, never raise.
|
||||
non_git = tempfile.mkdtemp()
|
||||
setup_cell.ROOT = non_git
|
||||
try:
|
||||
setup_cell.ensure_cosign_pubkey() # should not raise
|
||||
self.assertFalse(os.path.exists(os.path.join(non_git, self.KEY_REL)))
|
||||
finally:
|
||||
shutil.rmtree(non_git, ignore_errors=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user