feat: fix cross-cell service access — DNS DNAT, service DNAT, Caddy routing

DNS A records now return the WireGuard server IP (10.0.0.1) instead of
Docker bridge VIPs so cross-cell peers resolve service names correctly
regardless of their bridge subnet. DNAT rules (wg0:53→cell-dns:53 and
wg0:80→cell-caddy:80) are applied at startup. Caddy routes by Host header,
eliminating the Docker bridge subnet conflict. Firewall cell rules allow
DNS and service (Caddy) traffic from linked cell subnets. Split-tunnel
AllowedIPs now dynamically includes connected-cell VPN subnets and drops
the 172.20.0.0/16 range. Peers with route_via set now receive full-tunnel
config (0.0.0.0/0) so all their traffic exits via the remote cell.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 03:12:09 -04:00
parent f2f15eb17e
commit 9a800e3b6b
11 changed files with 325 additions and 146 deletions
+61 -45
View File
@@ -205,6 +205,8 @@ class TestGenerateCorefileWithCellLinks(unittest.TestCase):
class TestApplyPeerRules(unittest.TestCase):
"""Verify correct iptables calls for full-internet vs split-tunnel peers."""
_FAKE_CADDY_IP = '172.20.0.2'
def _run_apply(self, peer_ip, settings):
calls_made = []
@@ -215,7 +217,9 @@ class TestApplyPeerRules(unittest.TestCase):
m.stdout = ''
return m
with patch.object(firewall_manager, '_wg_exec', side_effect=fake_wg_exec):
with patch.object(firewall_manager, '_wg_exec', side_effect=fake_wg_exec), \
patch.object(firewall_manager, '_get_caddy_container_ip',
return_value=self._FAKE_CADDY_IP):
firewall_manager.apply_peer_rules(peer_ip, settings)
return calls_made
@@ -239,23 +243,23 @@ class TestApplyPeerRules(unittest.TestCase):
self.assertIn('DROP', targets)
self.assertIn('ACCEPT', targets)
def test_service_access_restriction_generates_drop(self):
def test_service_access_restriction_uses_caddy_rule(self):
"""service_access controls access via a single Caddy ACCEPT/DROP rule, not per-VIP rules."""
calls = self._run_apply('10.0.0.4', {'internet_access': False,
'service_access': ['calendar'],
'peer_access': True})
iptables_calls = [c for c in calls if 'iptables' in c]
# files/mail/webdav should be DROPped, calendar ACCEPTed
targets_with_ips = [
(c[c.index('-d') + 1], c[c.index('-j') + 1])
for c in iptables_calls
if '-d' in c and '-j' in c
]
svc_rules = {ip: t for ip, t in targets_with_ips
if ip in firewall_manager.SERVICE_IPS.values()}
calendar_ip = firewall_manager.SERVICE_IPS['calendar']
files_ip = firewall_manager.SERVICE_IPS['files']
self.assertEqual(svc_rules.get(calendar_ip), 'ACCEPT')
self.assertEqual(svc_rules.get(files_ip), 'DROP')
# Caddy rule should be ACCEPT (any non-empty service_access)
caddy_rules = [c for c in iptables_calls
if '-d' in c and self._FAKE_CADDY_IP in c
and '--dport' in c and '80' in c]
self.assertTrue(caddy_rules, "Expected a Caddy port-80 rule for service access")
target = caddy_rules[-1][caddy_rules[-1].index('-j') + 1]
self.assertEqual(target, 'ACCEPT', "Non-empty service_access should ACCEPT Caddy")
# No per-VIP rules — per-service control is at DNS ACL level
for svc_ip in firewall_manager.SERVICE_IPS.values():
vip_rules = [c for c in iptables_calls if '-d' in c and svc_ip in c]
self.assertFalse(vip_rules, f"No per-VIP FORWARD rules expected for {svc_ip}")
def test_all_rules_tagged_with_peer_comment(self):
calls = self._run_apply('10.0.0.2', {'internet_access': True,
@@ -380,18 +384,21 @@ class TestUpdateServiceIps(unittest.TestCase):
self.assertEqual(set(firewall_manager.SERVICE_IPS.keys()),
{'calendar', 'files', 'mail', 'webdav'})
def test_apply_peer_rules_uses_updated_ips(self):
def test_apply_peer_rules_uses_caddy_not_vips(self):
"""Service access uses Caddy IP for FORWARD rules, not SERVICE_IPS VIPs."""
firewall_manager.update_service_ips('10.0.0.0/24')
called_with = []
_CADDY_IP = '172.20.0.2'
def fake_wg_exec(args):
called_with.append(args)
m = MagicMock()
m.returncode = 1 # simulate rule-doesn't-exist → _ensure_rule inserts
m.returncode = 1
return m
with patch.object(firewall_manager, '_wg_exec', side_effect=fake_wg_exec), \
patch.object(firewall_manager, 'clear_peer_rules'):
patch.object(firewall_manager, 'clear_peer_rules'), \
patch.object(firewall_manager, '_get_caddy_container_ip', return_value=_CADDY_IP):
firewall_manager.apply_peer_rules('10.0.0.5', {
'internet_access': True,
'service_access': ['calendar'],
@@ -400,9 +407,10 @@ class TestUpdateServiceIps(unittest.TestCase):
iptables_calls = [c for c in called_with if c and c[0] == 'iptables']
dest_ips = [c[c.index('-d') + 1] for c in iptables_calls if '-d' in c]
# calendar vIP should now be 10.0.0.21
self.assertIn('10.0.0.21', dest_ips)
# old IP must not appear
# Caddy IP should appear for service access
self.assertIn(_CADDY_IP, dest_ips)
# VIPs (old or updated) must not appear — service access is via Caddy
self.assertNotIn('10.0.0.21', dest_ips)
self.assertNotIn('172.20.0.21', dest_ips)
@@ -416,9 +424,11 @@ class TestCellRules(unittest.TestCase):
# ── helpers ───────────────────────────────────────────────────────────────
_FAKE_API_IP = '172.20.0.10'
_FAKE_CADDY_IP = '172.20.0.2'
_FAKE_DNS_IP = '172.20.0.3'
def _capture_apply(self, cell_name, vpn_subnet, inbound_services):
"""Run apply_cell_rules with _wg_exec and _get_cell_api_ip mocked."""
"""Run apply_cell_rules with _wg_exec and container IP helpers mocked."""
calls_made = []
def fake_wg_exec(args):
@@ -429,7 +439,9 @@ class TestCellRules(unittest.TestCase):
return m
with patch.object(firewall_manager, '_wg_exec', side_effect=fake_wg_exec), \
patch.object(firewall_manager, '_get_cell_api_ip', return_value=self._FAKE_API_IP):
patch.object(firewall_manager, '_get_cell_api_ip', return_value=self._FAKE_API_IP), \
patch.object(firewall_manager, '_get_caddy_container_ip', return_value=self._FAKE_CADDY_IP), \
patch.object(firewall_manager, '_get_dns_container_ip', return_value=self._FAKE_DNS_IP):
firewall_manager.apply_cell_rules(cell_name, vpn_subnet, inbound_services)
return [c for c in calls_made if 'iptables' in c]
@@ -480,18 +492,18 @@ class TestCellRules(unittest.TestCase):
self.assertTrue(subnet_drops, "Expected a catch-all DROP rule for the subnet")
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
"""apply_cell_rules inserts ACCEPT for the calendar VIP when calendar is in inbound."""
"""apply_cell_rules inserts Caddy ACCEPT when inbound_services is non-empty."""
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
calendar_ip = firewall_manager.SERVICE_IPS['calendar']
calendar_targets = self._targets_for_dest(calls, calendar_ip)
self.assertIn('ACCEPT', calendar_targets)
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertIn('ACCEPT', caddy_targets,
"Expected ACCEPT to Caddy when inbound_services is non-empty")
def test_apply_cell_rules_sends_drop_for_disallowed_service(self):
"""apply_cell_rules inserts DROP for a service not in inbound_services."""
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
files_ip = firewall_manager.SERVICE_IPS['files']
files_targets = self._targets_for_dest(calls, files_ip)
self.assertIn('DROP', files_targets)
def test_apply_cell_rules_no_caddy_accept_when_no_inbound(self):
"""apply_cell_rules does NOT insert Caddy ACCEPT when inbound_services is empty."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertNotIn('ACCEPT', caddy_targets,
"No Caddy ACCEPT expected when inbound_services is empty")
def test_apply_cell_rules_accepts_api_sync_traffic(self):
"""apply_cell_rules inserts ACCEPT for cell-api:3000 so permission-sync pushes pass."""
@@ -517,7 +529,9 @@ class TestCellRules(unittest.TestCase):
m = MagicMock(); m.returncode = 0; m.stdout = ''; return m
with patch.object(firewall_manager, '_wg_exec', side_effect=fake_wg_exec), \
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'):
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'):
firewall_manager.apply_cell_rules('office', '10.0.1.0/24', [])
# The API-sync ACCEPT must be the LAST -I FORWARD insertion so it sits at position 1
@@ -527,26 +541,28 @@ class TestCellRules(unittest.TestCase):
# ── apply_cell_rules — empty inbound (all-deny) ───────────────────────────
def test_apply_cell_rules_empty_inbound_all_drop(self):
"""With inbound_services=[], all per-service rules are DROP."""
def test_apply_cell_rules_empty_inbound_no_service_accept(self):
"""With inbound_services=[], no service ACCEPT is added; catch-all DROP blocks traffic."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
# No ACCEPT to Caddy
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertNotIn('ACCEPT', caddy_targets,
"No Caddy ACCEPT expected with empty inbound_services")
# No per-VIP rules at all
for service, svc_ip in firewall_manager.SERVICE_IPS.items():
svc_targets = self._targets_for_dest(calls, svc_ip)
self.assertTrue(svc_targets,
f"Expected at least one rule for {service} ({svc_ip})")
self.assertNotIn('ACCEPT', svc_targets,
f"{service} should be DROP when not in inbound_services")
self.assertFalse(svc_targets,
f"No per-VIP rules expected for {service} ({svc_ip})")
# ── apply_cell_rules — all inbound (all-accept) ───────────────────────────
def test_apply_cell_rules_all_inbound_all_accept(self):
"""With all four services in inbound, all per-service rules are ACCEPT."""
def test_apply_cell_rules_all_inbound_caddy_accept(self):
"""With all four services in inbound, an ACCEPT rule is added for Caddy port 80."""
all_services = list(firewall_manager.SERVICE_IPS.keys())
calls = self._capture_apply('office', '10.0.1.0/24', all_services)
for service, svc_ip in firewall_manager.SERVICE_IPS.items():
svc_targets = self._targets_for_dest(calls, svc_ip)
self.assertIn('ACCEPT', svc_targets,
f"{service} should be ACCEPT when in inbound_services")
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertIn('ACCEPT', caddy_targets,
"Expected ACCEPT to Caddy when all services are in inbound_services")
# ── apply_cell_rules — all rules tagged ───────────────────────────────────
+15 -11
View File
@@ -284,20 +284,24 @@ class TestBootstrapDnsRecords(unittest.TestCase):
self.assertTrue(os.path.exists(zone_file))
@patch('subprocess.run')
def test_contains_default_caddy_ip(self, _mock):
def test_contains_wg_server_ip(self, _mock):
"""Zone file records now use WG server IP (10.0.0.1) not Docker VIPs."""
self.nm.bootstrap_dns_records('mycell', 'cell')
zone_file = os.path.join(self.nm.dns_zones_dir, 'cell.zone')
content = open(zone_file).read()
self.assertIn('172.20.0.2', content) # caddy
self.assertIn('10.0.0.1', content) # WG server IP for all services
self.assertNotIn('172.20.0.2', content) # Caddy VIP no longer in zone
self.assertNotIn('172.20.0.21', content) # Service VIPs no longer in zone
@patch('subprocess.run')
def test_custom_ip_range_used(self, _mock):
def test_custom_ip_range_does_not_affect_service_ips(self, _mock):
"""ip_range is no longer used for service record IPs; WG server IP is used."""
self.nm.bootstrap_dns_records('mycell', 'cell', ip_range='10.5.0.0/24')
zone_file = os.path.join(self.nm.dns_zones_dir, 'cell.zone')
content = open(zone_file).read()
self.assertIn('10.5.0.2', content) # caddy
self.assertIn('10.5.0.21', content) # vip_calendar
self.assertNotIn('172.20', content)
self.assertIn('10.0.0.1', content) # WG server IP
self.assertNotIn('10.5.0.2', content) # old caddy pattern gone
self.assertNotIn('10.5.0.21', content) # old VIP pattern gone
@patch('subprocess.run')
def test_idempotent_skips_existing_zone(self, _mock):
@@ -324,15 +328,15 @@ class TestApplyIpRange(unittest.TestCase):
shutil.rmtree(self.test_dir)
@patch('subprocess.run')
def test_zone_file_updated_with_new_ips(self, _mock):
# Bootstrap with default range, then change to 10.0.0.0/24
def test_zone_file_updated_with_wg_server_ip(self, _mock):
"""apply_ip_range regenerates zone with WG server IP for all service records."""
self.nm.bootstrap_dns_records('mycell', 'cell', '172.20.0.0/16')
result = self.nm.apply_ip_range('10.0.0.0/24', 'mycell', 'cell')
zone_file = os.path.join(self.nm.dns_zones_dir, 'cell.zone')
content = open(zone_file).read()
self.assertIn('10.0.0.2', content) # caddy
self.assertIn('10.0.0.21', content) # vip_calendar
self.assertNotIn('172.20', content)
self.assertIn('10.0.0.1', content) # WG server IP for all services
self.assertNotIn('172.20.0.2', content) # old Caddy pattern gone
self.assertNotIn('172.20.0.21', content) # old VIP pattern gone
@patch('subprocess.run')
def test_returns_restarted_on_success(self, _mock):
+41 -34
View File
@@ -465,10 +465,15 @@ class TestPeerEndpointAccessControl:
class TestDNSZoneRecords:
"""
Verify that network_manager._build_dns_records() generates the correct IPs.
api and webui must point to Caddy (not their container IPs) so Caddy can
reverse-proxy them — their containers don't listen on port 80.
All service names now resolve to the WG server IP (10.0.0.1) rather than
Docker VIPs. ensure_service_dnat() routes wg0:80 → Caddy; Caddy routes to
the correct backend by Host header. This allows cross-cell peers to reach
services without Docker bridge subnet conflicts.
"""
_WG_SERVER_IP = '10.0.0.1'
def setUp(self):
pass
@@ -477,59 +482,59 @@ class TestDNSZoneRecords:
mgr = nm.NetworkManager.__new__(nm.NetworkManager)
return mgr._build_dns_records(cell_name, ip_range)
def test_api_resolves_to_caddy_not_api_container(self):
def test_api_resolves_to_wg_server_ip(self):
records = self._records()
api_rec = next((r for r in records if r['name'] == 'api'), None)
assert api_rec is not None, "No DNS record for 'api'"
assert api_rec['value'] == '172.20.0.2', (
f"api.dev should resolve to Caddy (172.20.0.2), not the API container "
f"(172.20.0.10); got {api_rec['value']}"
assert api_rec['value'] == self._WG_SERVER_IP, (
f"api.dev should resolve to WG server IP ({self._WG_SERVER_IP}); "
f"got {api_rec['value']}"
)
def test_webui_resolves_to_caddy_not_webui_container(self):
def test_webui_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'webui'), None)
assert rec is not None, "No DNS record for 'webui'"
assert rec['value'] == '172.20.0.2', (
f"webui.dev should resolve to Caddy (172.20.0.2), not the WebUI container "
f"(172.20.0.11); got {rec['value']}"
assert rec['value'] == self._WG_SERVER_IP, (
f"webui.dev should resolve to WG server IP ({self._WG_SERVER_IP}); "
f"got {rec['value']}"
)
def test_calendar_uses_vip(self):
def test_calendar_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'calendar'), None)
assert rec and rec['value'] == '172.20.0.21', \
f"calendar.dev VIP should be 172.20.0.21; got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"calendar.dev should resolve to WG server IP; got {rec}"
def test_files_uses_vip(self):
def test_files_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'files'), None)
assert rec and rec['value'] == '172.20.0.22', \
f"files.dev VIP should be 172.20.0.22; got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"files.dev should resolve to WG server IP; got {rec}"
def test_mail_uses_vip(self):
def test_mail_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'mail'), None)
assert rec and rec['value'] == '172.20.0.23', \
f"mail.dev VIP should be 172.20.0.23; got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"mail.dev should resolve to WG server IP; got {rec}"
def test_webmail_uses_mail_vip(self):
def test_webmail_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'webmail'), None)
assert rec and rec['value'] == '172.20.0.23', \
f"webmail.dev should share the mail VIP 172.20.0.23; got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"webmail.dev should resolve to WG server IP; got {rec}"
def test_webdav_uses_vip(self):
def test_webdav_resolves_to_wg_server_ip(self):
records = self._records()
rec = next((r for r in records if r['name'] == 'webdav'), None)
assert rec and rec['value'] == '172.20.0.24', \
f"webdav.dev VIP should be 172.20.0.24; got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"webdav.dev should resolve to WG server IP; got {rec}"
def test_cell_name_resolves_to_caddy(self):
def test_cell_name_resolves_to_wg_server_ip(self):
records = self._records(cell_name='mypic')
rec = next((r for r in records if r['name'] == 'mypic'), None)
assert rec and rec['value'] == '172.20.0.2', \
f"mypic.dev should resolve to Caddy (172.20.0.2); got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"mypic.dev should resolve to WG server IP; got {rec}"
def test_all_records_are_type_a(self):
records = self._records()
@@ -540,21 +545,23 @@ class TestDNSZoneRecords:
class TestDNSZoneRecordsWithPytest:
"""Same as above but using pytest-style (no setUp/tearDown)."""
_WG_SERVER_IP = '10.0.0.1'
@pytest.fixture
def records(self):
import network_manager as nm
mgr = nm.NetworkManager.__new__(nm.NetworkManager)
return mgr._build_dns_records('pic0', '172.20.0.0/16')
def test_api_resolves_to_caddy(self, records):
def test_api_resolves_to_wg_server_ip(self, records):
rec = next((r for r in records if r['name'] == 'api'), None)
assert rec and rec['value'] == '172.20.0.2', \
f"api.dev should point to Caddy (172.20.0.2); got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"api.dev should point to WG server IP ({self._WG_SERVER_IP}); got {rec}"
def test_webui_resolves_to_caddy(self, records):
def test_webui_resolves_to_wg_server_ip(self, records):
rec = next((r for r in records if r['name'] == 'webui'), None)
assert rec and rec['value'] == '172.20.0.2', \
f"webui.dev should point to Caddy (172.20.0.2); got {rec}"
assert rec and rec['value'] == self._WG_SERVER_IP, \
f"webui.dev should point to WG server IP ({self._WG_SERVER_IP}); got {rec}"
# ─────────────────── Caddyfile generation ─────────────────────────────────────
+3 -2
View File
@@ -244,7 +244,7 @@ class TestWireGuardManager(unittest.TestCase):
self.assertIn('[Peer]', config)
self.assertIn('PrivateKey', config)
self.assertIn('Address = 10.0.0.2/32', config)
self.assertIn('DNS = 172.20.0.3', config)
self.assertIn('DNS = 10.0.0.1', config)
self.assertIn(keys['public_key'], config)
self.assertIn('AllowedIPs', config)
@@ -418,7 +418,8 @@ class TestWireGuardConfigReads(unittest.TestCase):
self._write_wg_conf(address='10.1.0.1/24')
split = self.wg.get_split_tunnel_ips()
self.assertIn('10.1.0.0/24', split)
self.assertIn('172.20.0.0/16', split)
# 172.20.0.0/16 is intentionally excluded — services now use WG server IP via DNAT
self.assertNotIn('172.20.0.0/16', split)
self.assertNotIn('10.0.0.0/24', split)
def test_get_server_config_uses_configured_port(self):
+17 -13
View File
@@ -98,9 +98,12 @@ class TestInternetForwardingRules(unittest.TestCase):
class TestPeerConfigDns(unittest.TestCase):
"""
Verify that peer client configs include a DNS = <ip> line pointing to the
PIC DNS container. Without DNS, the client tunnel has no internet-accessible
domain resolution even though packets are forwarded correctly.
Verify that peer client configs include a DNS = <wg_server_ip> line.
DNS is set to the WG server IP (e.g. 10.0.0.1) rather than the Docker
cell-dns container IP. ensure_dns_dnat() routes wg0:53 cell-dns, so
peers reach CoreDNS via the WG server IP works for both split-tunnel
(10.0.x.x in AllowedIPs) and cross-cell peers.
"""
def setUp(self):
@@ -123,19 +126,20 @@ class TestPeerConfigDns(unittest.TestCase):
# Must be a parseable IPv4 address
ipaddress.IPv4Address(dns_ip)
def test_peer_config_dns_defaults_to_cell_dns_ip(self):
"""When cell-dns hostname can't be resolved, falls back to 172.20.0.3."""
with patch('wireguard_manager.socket.gethostbyname', side_effect=OSError):
keys = self.wg.generate_peer_keys('p1')
cfg = self.wg.get_peer_config('p1', '10.0.0.5', keys['private_key'])
self.assertIn('DNS = 172.20.0.3', cfg)
def test_peer_config_dns_uses_wg_server_ip(self):
"""DNS in peer config is the WG server IP; ensure_dns_dnat() routes wg0:53 → cell-dns."""
keys = self.wg.generate_peer_keys('p1')
cfg = self.wg.get_peer_config('p1', '10.0.0.5', keys['private_key'])
# Default WG server address is 10.0.0.1/24 when no wg0.conf exists
self.assertIn('DNS = 10.0.0.1', cfg)
def test_peer_config_dns_uses_resolved_hostname(self):
"""When cell-dns resolves, its IP is used as the DNS server."""
with patch('wireguard_manager.socket.gethostbyname', return_value='172.20.0.3'):
def test_peer_config_dns_fallback_to_resolve_on_error(self):
"""If WG address parsing fails, _resolve_peer_dns() is used as fallback."""
with patch.object(self.wg, '_get_configured_address', return_value='invalid'), \
patch('wireguard_manager.socket.gethostbyname', return_value='172.20.0.9'):
keys = self.wg.generate_peer_keys('p2')
cfg = self.wg.get_peer_config('p2', '10.0.0.6', keys['private_key'])
self.assertIn('DNS = 172.20.0.3', cfg)
self.assertIn('DNS = 172.20.0.9', cfg)
def test_resolve_peer_dns_fallback(self):
"""_resolve_peer_dns() always returns a string even when DNS lookup fails."""