fix: complete cross-cell peer-sync push (domain SNI + source-preserving NAT)
Unit Tests / test (push) Successful in 9m45s
Unit Tests / test (push) Successful in 9m45s
Finishes the transport repair (L1+L2 landed in 714fb9b). The push now works
end-to-end between linked cells — verified live: offer/permission state
propagates automatically and the cell_relay derives/reverts without manual steps.
L3 — push by domain, not bare IP (cell_link_manager): the push targeted
https://<vpn-ip>, but in DDNS/ACME mode Caddy only holds a cert for the cell's
domain, so the TLS handshake failed by IP. Target https://<remote-domain> with
`curl --resolve <domain>:443:<dns_ip>` — connect to the VPN IP over the tunnel
but present the domain as SNI/Host. remote_api_url is now domain-based; legacy
http://ip:3000 and https://ip URLs migrate on load.
L4 — preserve the real source for auth (firewall_manager): the blanket
`-o eth0 MASQUERADE` rewrote the push source, so the remote's X-Forwarded-For
source-subnet auth couldn't match. apply_cell_rules adds a tightly-scoped nat
POSTROUTING RETURN (linked-subnet → caddy:443 only) above the masquerade; the
host route returns Caddy's reply through the tunnel. Reviewed by pic-security:
WireGuard per-cell AllowedIPs + Caddy last-XFF (no trusted_proxies) keep this
un-spoofable; the API stays 127.0.0.1-only.
Also:
- validate remote-invite domain/dns_ip/endpoint/subnet at ingest (they reach a
curl --resolve argv — block leading-dash argument-injection).
- remove the host subnet route on cell unlink (remove_cell_subnet_route); the
route was never cleaned, leaving a stale subnet that made is_local_request
treat it as local. Mock firewall side-effects in the affected unit tests.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -14,9 +14,39 @@ import json
|
||||
import shutil
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import cell_link_manager
|
||||
from cell_link_manager import CellLinkManager
|
||||
|
||||
|
||||
_fw_patch = None
|
||||
|
||||
|
||||
def setUpModule():
|
||||
"""Stop cell-link unit tests from running real firewall side-effects.
|
||||
|
||||
add_connection/remove_connection call into firewall_manager, which shells out
|
||||
to `docker exec cell-wireguard iptables` and `docker run` host-route changes.
|
||||
On the dev/CI host those mutate live routes — a stale cell-subnet route once
|
||||
made is_local_request treat a VPN subnet as local and broke the full suite.
|
||||
Tests that assert specific firewall calls use their own local patch, which
|
||||
takes precedence within its context.
|
||||
"""
|
||||
global _fw_patch
|
||||
_fw_patch = patch.multiple(
|
||||
'firewall_manager',
|
||||
apply_cell_rules=MagicMock(return_value=True),
|
||||
clear_cell_rules=MagicMock(),
|
||||
ensure_cell_subnet_routes=MagicMock(),
|
||||
remove_cell_subnet_route=MagicMock(),
|
||||
)
|
||||
_fw_patch.start()
|
||||
|
||||
|
||||
def tearDownModule():
|
||||
if _fw_patch is not None:
|
||||
_fw_patch.stop()
|
||||
|
||||
|
||||
def _make_wg_mock():
|
||||
wg = MagicMock()
|
||||
wg.get_keys.return_value = {'public_key': 'serverpubkey=', 'private_key': 'serverprivkey='}
|
||||
@@ -50,6 +80,37 @@ SAMPLE_INVITE = {
|
||||
}
|
||||
|
||||
|
||||
class TestInviteFieldValidation(unittest.TestCase):
|
||||
"""_validate_invite_fields rejects malformed remote-invite fields.
|
||||
|
||||
The domain/dns_ip flow into a `curl --resolve` argv on peer-sync push, so a
|
||||
leading-dash domain (argument injection) and non-IP dns_ip must be rejected.
|
||||
"""
|
||||
|
||||
def test_valid_invite_passes(self):
|
||||
cell_link_manager._validate_invite_fields(SAMPLE_INVITE) # no raise
|
||||
|
||||
def test_rejects_leading_dash_domain(self):
|
||||
bad = {**SAMPLE_INVITE, 'domain': '-oProxyCommand=evil'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_non_ip_dns_ip(self):
|
||||
bad = {**SAMPLE_INVITE, 'dns_ip': '-x'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_subnet(self):
|
||||
bad = {**SAMPLE_INVITE, 'vpn_subnet': 'not-a-cidr'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_endpoint(self):
|
||||
bad = {**SAMPLE_INVITE, 'endpoint': '-evil:51820'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
|
||||
class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@@ -146,6 +207,15 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
self.mgr.remove_connection('office')
|
||||
self.nm.remove_cell_dns_forward.assert_called_once_with('office.cell')
|
||||
|
||||
def test_remove_connection_removes_host_subnet_route(self):
|
||||
"""Unlinking a cell removes its host route so the subnet isn't left
|
||||
blackholed / treated as locally attached."""
|
||||
import firewall_manager as _fm
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
_fm.remove_cell_subnet_route.reset_mock()
|
||||
self.mgr.remove_connection('office')
|
||||
_fm.remove_cell_subnet_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
def test_remove_connection_deletes_from_list(self):
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
self.mgr.remove_connection('office')
|
||||
@@ -190,7 +260,9 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
result = self.mgr.accept_invite(updated_invite)
|
||||
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.2')
|
||||
self.assertEqual(result['remote_api_url'], 'https://10.1.0.2')
|
||||
# remote_api_url is domain-based (the push --resolves it to the VPN IP),
|
||||
# so a dns_ip change does not alter it.
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.2')
|
||||
@@ -615,7 +687,7 @@ class TestAcceptInviteNew(unittest.TestCase):
|
||||
with patch('firewall_manager.apply_cell_rules'):
|
||||
result = self.mgr.accept_invite(updated)
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'https://10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.5')
|
||||
@@ -1055,10 +1127,11 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
|
||||
def test_add_connection_sets_remote_api_url_from_dns_ip(self):
|
||||
def test_add_connection_sets_remote_api_url_from_domain(self):
|
||||
link = self._add_office()
|
||||
# Cross-cell API is reached over the tunnel via Caddy/443, not :3000.
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.1')
|
||||
# Cross-cell API is reached via the remote's domain over Caddy/443 (the
|
||||
# push --resolves the domain to the VPN IP over the tunnel).
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
|
||||
def test_add_connection_triggers_push(self):
|
||||
push_mock = MagicMock(return_value={'ok': True, 'error': None})
|
||||
@@ -1332,7 +1405,7 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_status', link)
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.1')
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
self.assertTrue(link['pending_push']) # pre-existing → marked pending
|
||||
self.assertEqual(link['last_push_status'], 'never')
|
||||
|
||||
@@ -1341,23 +1414,26 @@ class TestPermissionSync(unittest.TestCase):
|
||||
raw = json.load(f)
|
||||
self.assertIn('pending_push', raw[0])
|
||||
|
||||
def test_load_migrates_legacy_http_3000_url_to_https(self):
|
||||
"""An existing link with the old http://<ip>:3000 URL (unreachable across
|
||||
cells) is rewritten to the HTTPS/Caddy form on load."""
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': 'http://10.1.0.9:3000',
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://10.1.0.9')
|
||||
def test_load_migrates_legacy_url_forms_to_https_domain(self):
|
||||
"""Legacy remote_api_url forms — http://<ip>:3000 (unreachable) and
|
||||
https://<ip> (no matching Caddy cert by bare IP) — are rewritten on load to
|
||||
the domain-based HTTPS form."""
|
||||
for legacy_url in ('http://10.1.0.9:3000', 'https://10.1.0.9'):
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': legacy_url,
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell',
|
||||
f'failed to migrate {legacy_url!r}')
|
||||
|
||||
|
||||
class TestExitOffer(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user