fix: complete cross-cell peer-sync push (domain SNI + source-preserving NAT)
Unit Tests / test (push) Successful in 9m45s
Unit Tests / test (push) Successful in 9m45s
Finishes the transport repair (L1+L2 landed in 714fb9b). The push now works
end-to-end between linked cells — verified live: offer/permission state
propagates automatically and the cell_relay derives/reverts without manual steps.
L3 — push by domain, not bare IP (cell_link_manager): the push targeted
https://<vpn-ip>, but in DDNS/ACME mode Caddy only holds a cert for the cell's
domain, so the TLS handshake failed by IP. Target https://<remote-domain> with
`curl --resolve <domain>:443:<dns_ip>` — connect to the VPN IP over the tunnel
but present the domain as SNI/Host. remote_api_url is now domain-based; legacy
http://ip:3000 and https://ip URLs migrate on load.
L4 — preserve the real source for auth (firewall_manager): the blanket
`-o eth0 MASQUERADE` rewrote the push source, so the remote's X-Forwarded-For
source-subnet auth couldn't match. apply_cell_rules adds a tightly-scoped nat
POSTROUTING RETURN (linked-subnet → caddy:443 only) above the masquerade; the
host route returns Caddy's reply through the tunnel. Reviewed by pic-security:
WireGuard per-cell AllowedIPs + Caddy last-XFF (no trusted_proxies) keep this
un-spoofable; the API stays 127.0.0.1-only.
Also:
- validate remote-invite domain/dns_ip/endpoint/subnet at ingest (they reach a
curl --resolve argv — block leading-dash argument-injection).
- remove the host subnet route on cell unlink (remove_cell_subnet_route); the
route was never cleaned, leaving a stale subnet that made is_local_request
treat it as local. Mock firewall side-effects in the affected unit tests.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -14,9 +14,39 @@ import json
|
||||
import shutil
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import cell_link_manager
|
||||
from cell_link_manager import CellLinkManager
|
||||
|
||||
|
||||
_fw_patch = None
|
||||
|
||||
|
||||
def setUpModule():
|
||||
"""Stop cell-link unit tests from running real firewall side-effects.
|
||||
|
||||
add_connection/remove_connection call into firewall_manager, which shells out
|
||||
to `docker exec cell-wireguard iptables` and `docker run` host-route changes.
|
||||
On the dev/CI host those mutate live routes — a stale cell-subnet route once
|
||||
made is_local_request treat a VPN subnet as local and broke the full suite.
|
||||
Tests that assert specific firewall calls use their own local patch, which
|
||||
takes precedence within its context.
|
||||
"""
|
||||
global _fw_patch
|
||||
_fw_patch = patch.multiple(
|
||||
'firewall_manager',
|
||||
apply_cell_rules=MagicMock(return_value=True),
|
||||
clear_cell_rules=MagicMock(),
|
||||
ensure_cell_subnet_routes=MagicMock(),
|
||||
remove_cell_subnet_route=MagicMock(),
|
||||
)
|
||||
_fw_patch.start()
|
||||
|
||||
|
||||
def tearDownModule():
|
||||
if _fw_patch is not None:
|
||||
_fw_patch.stop()
|
||||
|
||||
|
||||
def _make_wg_mock():
|
||||
wg = MagicMock()
|
||||
wg.get_keys.return_value = {'public_key': 'serverpubkey=', 'private_key': 'serverprivkey='}
|
||||
@@ -50,6 +80,37 @@ SAMPLE_INVITE = {
|
||||
}
|
||||
|
||||
|
||||
class TestInviteFieldValidation(unittest.TestCase):
|
||||
"""_validate_invite_fields rejects malformed remote-invite fields.
|
||||
|
||||
The domain/dns_ip flow into a `curl --resolve` argv on peer-sync push, so a
|
||||
leading-dash domain (argument injection) and non-IP dns_ip must be rejected.
|
||||
"""
|
||||
|
||||
def test_valid_invite_passes(self):
|
||||
cell_link_manager._validate_invite_fields(SAMPLE_INVITE) # no raise
|
||||
|
||||
def test_rejects_leading_dash_domain(self):
|
||||
bad = {**SAMPLE_INVITE, 'domain': '-oProxyCommand=evil'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_non_ip_dns_ip(self):
|
||||
bad = {**SAMPLE_INVITE, 'dns_ip': '-x'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_subnet(self):
|
||||
bad = {**SAMPLE_INVITE, 'vpn_subnet': 'not-a-cidr'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_endpoint(self):
|
||||
bad = {**SAMPLE_INVITE, 'endpoint': '-evil:51820'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
|
||||
class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@@ -146,6 +207,15 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
self.mgr.remove_connection('office')
|
||||
self.nm.remove_cell_dns_forward.assert_called_once_with('office.cell')
|
||||
|
||||
def test_remove_connection_removes_host_subnet_route(self):
|
||||
"""Unlinking a cell removes its host route so the subnet isn't left
|
||||
blackholed / treated as locally attached."""
|
||||
import firewall_manager as _fm
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
_fm.remove_cell_subnet_route.reset_mock()
|
||||
self.mgr.remove_connection('office')
|
||||
_fm.remove_cell_subnet_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
def test_remove_connection_deletes_from_list(self):
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
self.mgr.remove_connection('office')
|
||||
@@ -190,7 +260,9 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
result = self.mgr.accept_invite(updated_invite)
|
||||
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.2')
|
||||
self.assertEqual(result['remote_api_url'], 'https://10.1.0.2')
|
||||
# remote_api_url is domain-based (the push --resolves it to the VPN IP),
|
||||
# so a dns_ip change does not alter it.
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.2')
|
||||
@@ -615,7 +687,7 @@ class TestAcceptInviteNew(unittest.TestCase):
|
||||
with patch('firewall_manager.apply_cell_rules'):
|
||||
result = self.mgr.accept_invite(updated)
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'https://10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.5')
|
||||
@@ -1055,10 +1127,11 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
|
||||
def test_add_connection_sets_remote_api_url_from_dns_ip(self):
|
||||
def test_add_connection_sets_remote_api_url_from_domain(self):
|
||||
link = self._add_office()
|
||||
# Cross-cell API is reached over the tunnel via Caddy/443, not :3000.
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.1')
|
||||
# Cross-cell API is reached via the remote's domain over Caddy/443 (the
|
||||
# push --resolves the domain to the VPN IP over the tunnel).
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
|
||||
def test_add_connection_triggers_push(self):
|
||||
push_mock = MagicMock(return_value={'ok': True, 'error': None})
|
||||
@@ -1332,7 +1405,7 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_status', link)
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.1')
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
self.assertTrue(link['pending_push']) # pre-existing → marked pending
|
||||
self.assertEqual(link['last_push_status'], 'never')
|
||||
|
||||
@@ -1341,23 +1414,26 @@ class TestPermissionSync(unittest.TestCase):
|
||||
raw = json.load(f)
|
||||
self.assertIn('pending_push', raw[0])
|
||||
|
||||
def test_load_migrates_legacy_http_3000_url_to_https(self):
|
||||
"""An existing link with the old http://<ip>:3000 URL (unreachable across
|
||||
cells) is rewritten to the HTTPS/Caddy form on load."""
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': 'http://10.1.0.9:3000',
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.9')
|
||||
def test_load_migrates_legacy_url_forms_to_https_domain(self):
|
||||
"""Legacy remote_api_url forms — http://<ip>:3000 (unreachable) and
|
||||
https://<ip> (no matching Caddy cert by bare IP) — are rewritten on load to
|
||||
the domain-based HTTPS form."""
|
||||
for legacy_url in ('http://10.1.0.9:3000', 'https://10.1.0.9'):
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': legacy_url,
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell',
|
||||
f'failed to migrate {legacy_url!r}')
|
||||
|
||||
|
||||
class TestExitOffer(unittest.TestCase):
|
||||
|
||||
@@ -652,6 +652,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value=self._FAKE_API_IP), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value=self._FAKE_CADDY_IP), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value=self._FAKE_DNS_IP), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules(cell_name, vpn_subnet, inbound_services)
|
||||
|
||||
@@ -742,6 +743,20 @@ class TestCellRules(unittest.TestCase):
|
||||
]
|
||||
self.assertFalse(api_3000, 'Peer-sync must not target cell-api:3000')
|
||||
|
||||
def test_apply_cell_rules_excludes_peer_sync_from_masquerade(self):
|
||||
"""Peer-sync to Caddy:443 must RETURN in nat POSTROUTING (skip the blanket
|
||||
MASQUERADE) so the remote sees the linked cell's real VPN source for auth."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
returns = [
|
||||
c for c in calls
|
||||
if '-t' in c and 'nat' in c and 'POSTROUTING' in c
|
||||
and '-s' in c and '10.0.1.0/24' in c
|
||||
and '-d' in c and self._FAKE_CADDY_IP in c
|
||||
and '--dport' in c and '443' in c
|
||||
and '-j' in c and c[c.index('-j') + 1] == 'RETURN'
|
||||
]
|
||||
self.assertTrue(returns, 'Expected nat POSTROUTING RETURN to preserve peer-sync source')
|
||||
|
||||
def test_apply_cell_rules_api_sync_accept_before_catchall_drop(self):
|
||||
"""The API-sync ACCEPT must be inserted after service rules so it ends up above DROP."""
|
||||
insertion_order = []
|
||||
@@ -756,6 +771,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules('office', '10.0.1.0/24', [])
|
||||
|
||||
@@ -852,6 +868,31 @@ class TestCellRules(unittest.TestCase):
|
||||
# peer rule for a different entity must survive
|
||||
self.assertIn('pic-peer-10-0-0-2/32', content)
|
||||
|
||||
# ── remove_cell_subnet_route ──────────────────────────────────────────────
|
||||
|
||||
def test_remove_cell_subnet_route_issues_ip_route_del(self):
|
||||
"""remove_cell_subnet_route deletes the host route for the cell's subnet."""
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kw):
|
||||
captured['cmd'] = cmd
|
||||
return MagicMock(returncode=0, stdout='', stderr='')
|
||||
|
||||
with patch.object(firewall_manager, '_run', side_effect=fake_run):
|
||||
firewall_manager.remove_cell_subnet_route('10.1.0.0/24')
|
||||
cmd = captured.get('cmd', [])
|
||||
self.assertIn('ip', cmd)
|
||||
self.assertIn('route', cmd)
|
||||
self.assertIn('del', cmd)
|
||||
self.assertIn('10.1.0.0/24', cmd)
|
||||
self.assertIn('172.20.0.9', cmd)
|
||||
|
||||
def test_remove_cell_subnet_route_noop_on_empty(self):
|
||||
"""An empty subnet is a no-op (no docker call)."""
|
||||
with patch.object(firewall_manager, '_run') as run:
|
||||
firewall_manager.remove_cell_subnet_route('')
|
||||
run.assert_not_called()
|
||||
|
||||
# ── apply_all_cell_rules ──────────────────────────────────────────────────
|
||||
|
||||
def test_apply_all_cell_rules_calls_apply_for_each(self):
|
||||
@@ -1130,6 +1171,7 @@ class TestEnsureForwardStateful(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful') as mock_stateful:
|
||||
firewall_manager.apply_cell_rules('testcell', '10.0.0.0/24', [])
|
||||
mock_stateful.assert_called_once()
|
||||
|
||||
Reference in New Issue
Block a user