fix: unblock instanceable connectivity store-service install + clean up on delete
Live verification on pic1 of the connectivity v2 multi-instance feature
surfaced four integration bugs that prevented installing any published
connectivity store service (proxy/wireguard-ext/openvpn-client/sshuttle)
and left stale host routing state behind. All four are fixed here:
1. manifest_validator rejected the CI-published `name:tag@sha256:<digest>`
image form (it required digest-only), while service_store_manager already
accepted it — so every published store image failed validation. Allow an
optional tag before the digest, matching service_store_manager.
2. The cell-api image shipped the docker CLI but not the Compose v2 plugin,
so every `docker compose` ServiceComposer runs (pull/up/down for store
services) failed with "'compose' is not a docker command". Copy the
compose plugin binary from the docker-cli stage.
3. service_store_manager.install ran the base compose up for instanceable
services, whose template still contains ${INSTANCE_ID}/${REDIRECT_PORT}
(there is no base container — one runs per connection instance). It now
verifies the image signature but defers the container to connection
creation for instanceable manifests.
4. delete_connection freed the record/secrets/container but never removed the
connection's individually-managed `ip rule fwmark->table` or its FORWARD
kill-switch (apply_routes only flushes the PIC_CONNECTIVITY chains and
re-adds rules for surviving connections), leaking stale host routing state.
It now tears both down; added _remove_killswitch.
Verified end-to-end on pic1: two proxy instances allocate distinct
marks/tables/ports (skipping in-use resources), render distinct per-instance
containers, two peers route through distinct instances (per-peer MARK +
REDIRECT), delete is blocked while referenced (409) and cleans its ip rule.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -110,6 +110,13 @@ class _Base(unittest.TestCase):
|
||||
for _svc in ('wireguard-ext', 'openvpn-client', 'tor', 'sshuttle', 'proxy'):
|
||||
self.cm.set_installed_service(_svc, {'id': _svc, 'manifest': {}})
|
||||
self.cm._save_all_configs()
|
||||
# No test in this module should shell into the WireGuard container. Stub
|
||||
# the exec helpers so host-rule cleanup paths (delete_connection) never
|
||||
# touch real docker/iptables; returncode 1 makes the drain loops stop.
|
||||
self.mgr._wg_ip = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
self.mgr._wg_iptables = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp, ignore_errors=True)
|
||||
@@ -296,6 +303,42 @@ class TestDeleteConnection(_Base):
|
||||
out = self.mgr.delete_connection('conn_nope')
|
||||
self.assertFalse(out['ok'])
|
||||
|
||||
def test_delete_removes_host_ip_rule(self):
|
||||
"""Deleting a connection must remove its fwmark->table ip rule.
|
||||
|
||||
apply_routes only re-adds rules for surviving connections and only
|
||||
flushes the PIC_CONNECTIVITY chains, so the deleted connection's
|
||||
individually-managed `ip rule` would otherwise leak in cell-wireguard.
|
||||
"""
|
||||
res = self.mgr.create_connection('proxy', 'gone', _proxy_cfg())
|
||||
conn = res['connection']
|
||||
mark, table = conn['mark'], conn['table']
|
||||
self.mgr._wg_ip.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ip_calls = [c.args[0] for c in self.mgr._wg_ip.call_args_list]
|
||||
self.assertIn(
|
||||
['rule', 'del', 'fwmark', hex(mark), 'lookup', str(table)],
|
||||
ip_calls,
|
||||
)
|
||||
|
||||
def test_delete_removes_iface_killswitch(self):
|
||||
"""An iface-type connection's FORWARD kill-switch is removed on delete."""
|
||||
res = self.mgr.create_connection(
|
||||
'wireguard_ext', 'wgks', {},
|
||||
secrets={'conf': '[Interface]\nPrivateKey = x\n'})
|
||||
conn = res['connection']
|
||||
mark, iface = conn['mark'], conn['iface']
|
||||
self.mgr._wg_iptables.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ipt_calls = [c.args[0] for c in self.mgr._wg_iptables.call_args_list]
|
||||
self.assertTrue(
|
||||
any(c[:2] == ['-D', 'FORWARD'] and '--mark' in c and hex(mark) in c
|
||||
and iface in c for c in ipt_calls),
|
||||
ipt_calls,
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateConnection(_Base):
|
||||
|
||||
|
||||
@@ -175,6 +175,19 @@ class TestValidateManifest(unittest.TestCase):
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_tag_and_digest_passes(self):
|
||||
# The publish pipeline writes back name:tag@sha256:<digest> (a valid OCI
|
||||
# reference). The validator must accept the tag alongside the digest —
|
||||
# service_store_manager already does, and rejecting it here blocks every
|
||||
# published store image from installing.
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
_minimal_manifest(
|
||||
image=f'git.pic.ngo/roof/svc-proxy:latest@sha256:{digest}')
|
||||
)
|
||||
self.assertTrue(ok, errs)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_wrong_registry_rejected(self):
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
|
||||
@@ -726,6 +726,43 @@ class TestInstall(unittest.TestCase):
|
||||
self.assertIn('digest', result['error'].lower())
|
||||
composer.install.assert_not_called()
|
||||
|
||||
def test_install_instanceable_verifies_image_but_does_not_up_container(self):
|
||||
"""Instanceable services defer the container to connection creation.
|
||||
|
||||
Their base compose template still contains ${INSTANCE_ID}/${REDIRECT_PORT},
|
||||
so the base container must NOT be rendered/pulled/up'd at install time —
|
||||
only the image signature is verified, and the record (with the raw
|
||||
template) is stored for ConnectivityManager to render per instance.
|
||||
"""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': True}
|
||||
result = ssm.install('proxy')
|
||||
self.assertTrue(result['ok'], result)
|
||||
composer.verify_image.assert_called_once()
|
||||
composer.install.assert_not_called()
|
||||
# The raw template is persisted so per-instance rendering needs no refetch.
|
||||
record = cm.set_installed_service.call_args[0][1]
|
||||
self.assertIn('compose_template', record)
|
||||
|
||||
def test_install_instanceable_aborts_when_image_verification_fails(self):
|
||||
"""An instanceable service whose image fails verification must not install."""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': False, 'error': 'signature verification failed'}
|
||||
result = ssm.install('proxy')
|
||||
self.assertFalse(result['ok'])
|
||||
composer.install.assert_not_called()
|
||||
cm.set_installed_service.assert_not_called()
|
||||
|
||||
def test_install_without_composer_stores_record(self):
|
||||
"""When service_composer=None, skip compose but still store the install record."""
|
||||
manifest = _valid_manifest(id='myapp', container_name='cell-myapp')
|
||||
|
||||
Reference in New Issue
Block a user