Fix cross-cell domain access: scope DNAT rules, add Docker→wg0 routing

- firewall_manager: add _get_wg_server_ip() helper; scope ensure_cell_api_dnat(),
  ensure_dns_dnat(), ensure_service_dnat() DNAT rules with -d server_ip; add
  ensure_wg_masquerade() (Docker→wg0 MASQUERADE+FORWARD) and
  ensure_cell_subnet_routes() (host routes via docker run busybox)
- wireguard_manager: scope PostUp DNAT rules with -d server_ip in generate_config()
  and ensure_postup_dnat(); add Docker→wg0 MASQUERADE+FORWARD rules
- app.py: call ensure_wg_masquerade() and ensure_cell_subnet_routes() in
  _apply_startup_enforcement()
- tests/test_firewall_manager.py: mock _get_wg_server_ip, add
  test_dnat_is_scoped_to_server_ip and test_returns_false_when_wg_server_ip_not_found
- tests/e2e/wg/test_cell_to_cell_routing.py: rewrite to use dynamic config
  (no hardcoded IPs/ports), add latency and domain access tests

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:37:02 -04:00
parent 1e1bda4679
commit e2c50c381a
5 changed files with 442 additions and 145 deletions
+3
View File
@@ -324,6 +324,9 @@ def _apply_startup_enforcement():
wireguard_manager.ensure_postup_dnat()
firewall_manager.ensure_dns_dnat()
firewall_manager.ensure_service_dnat()
# Allow Docker containers (cell-dns) to reach remote cell subnets via wg0.
firewall_manager.ensure_wg_masquerade()
firewall_manager.ensure_cell_subnet_routes(cell_links)
# Restore any cell link WireGuard peers that were lost from wg0.conf
# (happens if the container was rebuilt, wg0.conf was reset, etc.)
_restore_cell_wg_peers(cell_links)
+126 -24
View File
@@ -325,6 +325,22 @@ def _get_dns_container_ip() -> str:
return '172.20.0.3'
def _get_wg_server_ip() -> Optional[str]:
"""Return the WireGuard server's VPN IP from wg0.conf (e.g. '10.0.0.1')."""
import ipaddress as _ipaddress
wg_conf_path = '/app/config/wireguard/wg_confs/wg0.conf'
try:
with open(wg_conf_path) as f:
for line in f:
line = line.strip()
if line.startswith('Address') and '=' in line:
addr = line.split('=', 1)[1].strip()
return str(_ipaddress.ip_interface(addr).ip)
except Exception:
pass
return None
def _get_caddy_container_ip() -> str:
"""Return cell-caddy container's Docker bridge IP. Falls back to 172.20.0.2."""
try:
@@ -455,14 +471,19 @@ def ensure_forward_stateful() -> bool:
def ensure_cell_api_dnat() -> bool:
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
"""DNAT wg0:3000 (scoped to WG server IP) → cell-api:3000 inside cell-wireguard.
Remote cells push permission updates over the WireGuard tunnel to our
wg0 interface on port 3000. Since cell-api only listens on the Docker
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
that traffic. Called on every startup so rules survive container restarts.
wg0 interface on port 3000. The DNAT is scoped to -d {server_ip} so that
cross-cell traffic destined for another cell's API (different WG IP) is
not intercepted. Called on every startup so rules survive container restarts.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_cell_api_dnat: could not determine WG server IP')
return False
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
@@ -471,10 +492,12 @@ def ensure_cell_api_dnat() -> bool:
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '3000',
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '3000',
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
@@ -500,21 +523,27 @@ def ensure_cell_api_dnat() -> bool:
def ensure_dns_dnat() -> bool:
"""DNAT wg0:53 (UDP+TCP) → cell-dns:53 so VPN peers use the WG server IP for DNS.
"""DNAT wg0:53 (scoped to WG server IP) → cell-dns:53.
Peers are configured with DNS = <wg_server_ip>. Their DNS queries arrive on
wg0:53 and must be forwarded to cell-dns inside the Docker bridge.
Peers send DNS queries to the WG server IP. DNAT is scoped with -d {server_ip}
so cross-cell DNS traffic destined for another cell is forwarded, not hijacked.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_dns_dnat: could not determine WG server IP')
return False
dns_ip = _get_dns_container_ip()
if not dns_ip:
logger.warning('ensure_dns_dnat: cell-dns not found')
return False
for proto in ('udp', 'tcp'):
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', proto, '--dport', '53',
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', proto, '--dport', '53',
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
for proto in ('udp', 'tcp'):
@@ -524,7 +553,7 @@ def ensure_dns_dnat() -> bool:
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_dns_dnat: wg0:53 → {dns_ip}:53')
logger.info(f'ensure_dns_dnat: wg0:{server_ip}:53 → {dns_ip}:53')
return True
except Exception as e:
logger.error(f'ensure_dns_dnat: {e}')
@@ -532,20 +561,26 @@ def ensure_dns_dnat() -> bool:
def ensure_service_dnat() -> bool:
"""DNAT wg0:80 → cell-caddy:80 so VPN peers reach services via Host-header routing.
"""DNAT wg0:80 (scoped to WG server IP) → cell-caddy:80.
All service DNS names resolve to the WG server IP. Traffic to wg0:80 is
forwarded to Caddy, which routes to the correct backend by Host header.
Service DNS names resolve to the WG server IP. DNAT is scoped with -d {server_ip}
so that cross-cell HTTP traffic destined for another cell passes through unmodified.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_service_dnat: could not determine WG server IP')
return False
caddy_ip = _get_caddy_container_ip()
if not caddy_ip:
logger.warning('ensure_service_dnat: cell-caddy not found')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '80',
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '80',
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
@@ -554,13 +589,80 @@ def ensure_service_dnat() -> bool:
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_service_dnat: wg0:80 → {caddy_ip}:80')
logger.info(f'ensure_service_dnat: wg0:{server_ip}:80 → {caddy_ip}:80')
return True
except Exception as e:
logger.error(f'ensure_service_dnat: {e}')
return False
def ensure_wg_masquerade() -> bool:
"""MASQUERADE Docker bridge traffic leaving via wg0, and allow it through FORWARD.
cell-dns and other Docker containers need to reach remote cell subnets via
cell-wireguard's wg0. Without MASQUERADE the source IP (172.20.x.x) can't be
routed back over the WireGuard tunnel (WireGuard only accepts 10.0.x.x sources
from peers). MASQUERADE rewrites the source to wg0's IP so replies can return.
"""
try:
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
if _wg_exec(['iptables'] + masq_check).returncode != 0:
_wg_exec(['iptables'] + masq_add)
fwd_check = ['-C', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info('ensure_wg_masquerade: Docker→wg0 MASQUERADE+FORWARD configured')
return True
except Exception as e:
logger.error(f'ensure_wg_masquerade: {e}')
return False
def ensure_cell_subnet_routes(cell_links: List[Dict[str, Any]]) -> None:
"""Add host-namespace routes for remote cell VPN subnets via cell-wireguard.
Docker containers (cell-dns, etc.) use the host's routing table to reach
non-bridge destinations. Without a route, packets to 10.0.x.0/24 subnets
of connected cells hit the host's default gateway instead of cell-wireguard.
Uses a temporary '--network host --rm' container to run ip route replace in
the host network namespace. cell-api has docker.sock so this works without
privileged mode or nsenter namespace tricks.
"""
if not cell_links:
return
WG_BRIDGE_IP = '172.20.0.9' # cell-wireguard's fixed Docker IP (docker-compose.yml)
for link in cell_links:
subnet = link.get('vpn_subnet', '')
if not subnet:
continue
try:
result = _run(
['docker', 'run', '--rm',
'--network', 'host',
'--cap-add', 'NET_ADMIN',
'busybox',
'ip', 'route', 'replace', subnet, 'via', WG_BRIDGE_IP],
check=False
)
if result.returncode == 0:
logger.info(f'ensure_cell_subnet_routes: {subnet} via {WG_BRIDGE_IP}')
else:
logger.warning(
f'ensure_cell_subnet_routes: {subnet} failed: {result.stderr.strip()}'
)
except Exception as e:
logger.warning(f'ensure_cell_subnet_routes: {subnet}: {e}')
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------
+32 -15
View File
@@ -152,20 +152,24 @@ class WireGuardManager(BaseServiceManager):
cfg_port = self._get_configured_port() if os.path.exists(self._config_file()) else port
dns_ip, caddy_ip = self._get_dnat_container_ips()
dnat_up = (
f'iptables -t nat -A PREROUTING -i %i -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53; '
f'iptables -t nat -A PREROUTING -i %i -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53; '
f'iptables -t nat -A PREROUTING -i %i -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80; '
f'iptables -t nat -A PREROUTING -i %i -d {server_ip} -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53; '
f'iptables -t nat -A PREROUTING -i %i -d {server_ip} -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53; '
f'iptables -t nat -A PREROUTING -i %i -d {server_ip} -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80; '
f'iptables -I FORWARD -i %i -o eth0 -p tcp --dport 80 -j ACCEPT; '
f'iptables -I FORWARD -i %i -o eth0 -p udp --dport 53 -j ACCEPT; '
f'iptables -I FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT'
f'iptables -I FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT; '
f'iptables -I FORWARD -i eth0 -o %i -s 172.20.0.0/16 -j ACCEPT; '
f'iptables -t nat -A POSTROUTING -o %i -s 172.20.0.0/16 -j MASQUERADE'
)
dnat_down = (
f'iptables -t nat -D PREROUTING -i %i -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true; '
f'iptables -t nat -D PREROUTING -i %i -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true; '
f'iptables -t nat -D PREROUTING -i %i -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80 2>/dev/null || true; '
f'iptables -t nat -D PREROUTING -i %i -d {server_ip} -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true; '
f'iptables -t nat -D PREROUTING -i %i -d {server_ip} -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true; '
f'iptables -t nat -D PREROUTING -i %i -d {server_ip} -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80 2>/dev/null || true; '
f'iptables -D FORWARD -i %i -o eth0 -p tcp --dport 80 -j ACCEPT 2>/dev/null || true; '
f'iptables -D FORWARD -i %i -o eth0 -p udp --dport 53 -j ACCEPT 2>/dev/null || true; '
f'iptables -D FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT 2>/dev/null || true'
f'iptables -D FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT 2>/dev/null || true; '
f'iptables -D FORWARD -i eth0 -o %i -s 172.20.0.0/16 -j ACCEPT 2>/dev/null || true; '
f'iptables -t nat -D POSTROUTING -o %i -s 172.20.0.0/16 -j MASQUERADE 2>/dev/null || true'
)
return (
f'[Interface]\n'
@@ -190,12 +194,18 @@ class WireGuardManager(BaseServiceManager):
t = token.strip()
if not t.startswith('iptables'):
return False
# PREROUTING DNAT on ports 53 or 80
# PREROUTING DNAT on ports 53 or 80 (scoped or unscoped — we replace both)
if 'PREROUTING' in t and 'DNAT' in t and ('--dport 53' in t or '--dport 80' in t):
return True
# FORWARD accept to eth0 for ports 53 or 80 (service traffic forwarding)
if 'FORWARD' in t and '-o eth0' in t and ('--dport 53' in t or '--dport 80' in t):
return True
# Docker-to-WG FORWARD: eth0 → wg0 for 172.20.0.0/16
if 'FORWARD' in t and '-i eth0' in t and '172.20.0.0/16' in t:
return True
# Docker-to-WG MASQUERADE: POSTROUTING wg0 egress for 172.20.0.0/16
if 'POSTROUTING' in t and 'MASQUERADE' in t and '172.20.0.0/16' in t:
return True
return False
def ensure_postup_dnat(self) -> bool:
@@ -213,23 +223,30 @@ class WireGuardManager(BaseServiceManager):
with open(cf) as f:
content = f.read()
import ipaddress as _ipaddress
address = self._get_configured_address()
server_ip = str(_ipaddress.ip_interface(address).ip)
dns_ip, caddy_ip = self._get_dnat_container_ips()
dnat_up = (
f'iptables -t nat -A PREROUTING -i %i -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53'
f'; iptables -t nat -A PREROUTING -i %i -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53'
f'; iptables -t nat -A PREROUTING -i %i -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80'
f'iptables -t nat -A PREROUTING -i %i -d {server_ip} -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53'
f'; iptables -t nat -A PREROUTING -i %i -d {server_ip} -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53'
f'; iptables -t nat -A PREROUTING -i %i -d {server_ip} -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80'
f'; iptables -I FORWARD -i %i -o eth0 -p tcp --dport 80 -j ACCEPT'
f'; iptables -I FORWARD -i %i -o eth0 -p udp --dport 53 -j ACCEPT'
f'; iptables -I FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT'
f'; iptables -I FORWARD -i eth0 -o %i -s 172.20.0.0/16 -j ACCEPT'
f'; iptables -t nat -A POSTROUTING -o %i -s 172.20.0.0/16 -j MASQUERADE'
)
dnat_down = (
f'iptables -t nat -D PREROUTING -i %i -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true'
f'; iptables -t nat -D PREROUTING -i %i -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true'
f'; iptables -t nat -D PREROUTING -i %i -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80 2>/dev/null || true'
f'iptables -t nat -D PREROUTING -i %i -d {server_ip} -p udp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true'
f'; iptables -t nat -D PREROUTING -i %i -d {server_ip} -p tcp --dport 53 -j DNAT --to-destination {dns_ip}:53 2>/dev/null || true'
f'; iptables -t nat -D PREROUTING -i %i -d {server_ip} -p tcp --dport 80 -j DNAT --to-destination {caddy_ip}:80 2>/dev/null || true'
f'; iptables -D FORWARD -i %i -o eth0 -p tcp --dport 80 -j ACCEPT 2>/dev/null || true'
f'; iptables -D FORWARD -i %i -o eth0 -p udp --dport 53 -j ACCEPT 2>/dev/null || true'
f'; iptables -D FORWARD -i %i -o eth0 -p tcp --dport 53 -j ACCEPT 2>/dev/null || true'
f'; iptables -D FORWARD -i eth0 -o %i -s 172.20.0.0/16 -j ACCEPT 2>/dev/null || true'
f'; iptables -t nat -D POSTROUTING -o %i -s 172.20.0.0/16 -j MASQUERADE 2>/dev/null || true'
)
lines = content.split('\n')
+253 -101
View File
@@ -1,56 +1,115 @@
"""
E2E test: cross-cell routing for a split-tunnel VPN peer.
Creates a temporary WireGuard peer on cell2 (pic1 / test), brings up a real
WireGuard tunnel from the test-runner host, and verifies that cell1 (pic0 / dev)
is reachable end-to-end via the cell-to-cell link.
Creates a temporary WireGuard peer on cell2 (the first connected cell), brings up
a real WireGuard tunnel from the test-runner host, and verifies that cell1 (the
local cell) is reachable end-to-end via the cell-to-cell link.
Why this test is meaningful
---------------------------
10.0.0.1 is cell1's WireGuard server IP, reachable ONLY inside cell1's
cell-wireguard Docker container. It is NOT reachable directly from the
test-runner host (verified: 100% packet loss without VPN).
Cell1's WireGuard server IP is reachable ONLY inside cell1's cell-wireguard Docker
container. It is NOT reachable directly from the test-runner host. If a ping to
that IP succeeds, the full path was taken:
If a ping to 10.0.0.1 succeeds during the test, the full path was taken:
[test-runner wg-e2e] 192.168.31.52:51821 [pic1 cell-wireguard FORWARD]
[cell-to-cell WG tunnel] [pic0 cell-wireguard] 10.0.0.1
[test-runner wg-e2e] cell2 WireGuard [cell-to-cell tunnel] cell1 WG IP
Prerequisites
-------------
* SSH access to 192.168.31.52 (pic1) as 'roof' with no passphrase
* `wg-quick` and `sudo` available on the test runner (pic0)
* Both cells must have an active cell-to-cell WireGuard handshake
* /home/roof/pic/data/api/cell_links.json must have at least one connected cell
* /home/roof/pic/config/wireguard/wg_confs/wg0.conf must exist
* SSH access to cell2's LAN IP as 'roof' with no passphrase
* `wg-quick`, `dig`, and `sudo` available on the test runner
Skip conditions are checked at fixture time; no manual flag needed.
All configuration is read dynamically from config files no hardcoded IPs or ports.
Skip conditions are checked at module level; no manual flag needed.
"""
import ipaddress
import json
import os
import re
import subprocess
import secrets
import time
import pytest
# -------------------------------------------------------------------------
# Constants
# Dynamic configuration loading
# -------------------------------------------------------------------------
PIC1_LAN = '192.168.31.52' # test cell (cell2)
PIC1_WG_PORT = 51821 # WireGuard ListenPort on pic1
PIC1_WG_PUBKEY = 'ITl3+KfcNjsDq9ztE+1TC10rmeqaLmpGgTXEEk07BiE='
_CELL_LINKS_FILE = '/home/roof/pic/data/api/cell_links.json'
_WG_CONF_FILE = '/home/roof/pic/config/wireguard/wg_confs/wg0.conf'
_CELL_CONFIG_FILE = '/home/roof/pic/config/api/cell_config.json'
PIC1_WG_SERVER_IP = '10.0.2.1' # cell2's WireGuard server IP
PIC0_WG_SERVER_IP = '10.0.0.1' # cell1's WireGuard server IP (cross-cell target)
TEST_PEER_IP = '10.0.2.250' # unused IP in cell2's VPN subnet
TEST_PEER_CIDR = f'{TEST_PEER_IP}/32'
IFACE_NAME = 'pic-e2e-c2c'
def _load_cfg() -> dict:
"""Load all test parameters from local config files. Returns {} on any error."""
cfg = {}
# AllowedIPs for the test peer: cell2's local subnet + cell1's subnet (cross-cell)
SPLIT_TUNNEL_ALLOWED_IPS = '10.0.2.0/24, 10.0.0.0/24'
# --- cell1 (local/our) identity ---
try:
with open(_CELL_CONFIG_FILE) as f:
identity = json.load(f).get('_identity', {})
cfg['cell1_domain'] = identity.get('domain', '')
cfg['cell1_wg_port'] = int(identity.get('wireguard_port', 51820))
except Exception:
pass
# --- cell1 WG server IP from wg0.conf [Interface] Address ---
try:
with open(_WG_CONF_FILE) as f:
in_iface = False
for line in f:
line = line.strip()
if line == '[Interface]':
in_iface = True
elif line.startswith('[') and line.endswith(']'):
in_iface = False
elif in_iface and line.startswith('Address') and '=' in line:
addr = line.split('=', 1)[1].strip()
net = ipaddress.ip_interface(addr)
cfg['cell1_wg_ip'] = str(net.ip)
cfg['cell1_vpn_subnet'] = str(net.network)
break
except Exception:
pass
# --- cell2 (connected peer) from cell_links.json (first entry) ---
try:
with open(_CELL_LINKS_FILE) as f:
links = json.load(f)
if links:
link = links[0]
endpoint = link.get('endpoint', '')
if endpoint:
host, _, port = endpoint.rpartition(':')
cfg['cell2_lan_ip'] = host
cfg['cell2_wg_port'] = int(port)
cfg['cell2_pubkey'] = link.get('public_key', '')
cfg['cell2_wg_ip'] = link.get('dns_ip', '')
cfg['cell2_vpn_subnet'] = link.get('vpn_subnet', '')
cfg['cell2_domain'] = link.get('domain', '')
except Exception:
pass
# --- Derive TEST_PEER_IP: a high-range host in cell2's VPN subnet ---
# Use .250 (e.g., 10.0.2.250 for 10.0.2.0/24)
try:
net = ipaddress.ip_network(cfg['cell2_vpn_subnet'], strict=False)
cfg['test_peer_ip'] = str(net.network_address + 250)
except Exception:
pass
return cfg
_CFG = _load_cfg()
IFACE_NAME = 'pic-e2e-c2c'
IPTABLES_COMMENT = 'pic-e2e-c2c-test'
# Maximum acceptable average RTT for cells on the same LAN
MAX_LATENCY_MS = 10.0
pytestmark = pytest.mark.wg
@@ -63,19 +122,18 @@ def _run(cmd, **kw):
def _ssh(cmd, timeout=15):
"""Run a command on pic1 via SSH and return the CompletedProcess."""
"""Run a command on cell2 via SSH and return the CompletedProcess."""
lan_ip = _CFG.get('cell2_lan_ip', '')
return _run(
['ssh', '-o', 'StrictHostKeyChecking=no', '-o', 'BatchMode=yes',
'-o', f'ConnectTimeout=5', f'roof@{PIC1_LAN}', cmd],
'-o', 'ConnectTimeout=5', f'roof@{lan_ip}', cmd],
timeout=timeout,
)
def _pic1_wg(args, timeout=10):
"""Run a command inside pic1's cell-wireguard container via SSH."""
cmd = 'docker exec cell-wireguard ' + args
r = _ssh(cmd, timeout=timeout)
return r
def _pic2_wg(args, timeout=10):
"""Run a command inside cell2's cell-wireguard container via SSH."""
return _ssh('docker exec cell-wireguard ' + args, timeout=timeout)
def _ping(ip, count=3, wait=2):
@@ -87,40 +145,43 @@ def _cleanup_iface():
_run(['sudo', 'ip', 'link', 'delete', IFACE_NAME], timeout=5)
def _cleanup_pic1_peer(pubkey):
_pic1_wg(f'wg set wg0 peer {pubkey} remove')
def _cleanup_pic2_peer(pubkey):
_pic2_wg(f'wg set wg0 peer {pubkey} remove')
def _cleanup_pic1_iptables():
_pic1_wg(f'iptables -D FORWARD -s {TEST_PEER_IP} -j ACCEPT '
f'-m comment --comment {IPTABLES_COMMENT}')
def _cleanup_pic2_iptables(peer_ip):
_pic2_wg(
f'iptables -D FORWARD -s {peer_ip} -j ACCEPT '
f'-m comment --comment {IPTABLES_COMMENT}'
)
# -------------------------------------------------------------------------
# Session-level skip check
# Skip checks
# -------------------------------------------------------------------------
def _check_prerequisites():
"""Return a skip reason string, or None if all prereqs are met."""
# Check wg-quick
required_keys = ('cell1_wg_ip', 'cell2_lan_ip', 'cell2_pubkey',
'cell2_wg_ip', 'test_peer_ip', 'cell2_vpn_subnet',
'cell1_vpn_subnet')
missing = [k for k in required_keys if not _CFG.get(k)]
if missing:
return f'Config incomplete (missing: {", ".join(missing)}). ' \
f'Ensure cell_links.json and wg0.conf exist and are populated.'
if _run(['which', 'wg-quick']).returncode != 0:
return 'wg-quick not found on test runner'
# Check sudo
if _run(['which', 'dig']).returncode != 0:
return 'dig not found on test runner'
if _run(['sudo', '-n', 'true']).returncode != 0:
return 'passwordless sudo not available on test runner'
# Check SSH to pic1
r = _ssh('echo ok', timeout=6)
if r.returncode != 0 or 'ok' not in r.stdout:
return f'SSH to {PIC1_LAN} failed: {r.stderr.strip() or r.stdout.strip()}'
# Check that 10.0.0.1 is NOT reachable directly (otherwise test is meaningless)
# (a failure here is just a warning, not a skip)
lan = _CFG.get('cell2_lan_ip', '?')
return f'SSH to {lan} failed: {r.stderr.strip() or r.stdout.strip()}'
return None
# -------------------------------------------------------------------------
# Module-level skip
# -------------------------------------------------------------------------
_SKIP_REASON = _check_prerequisites()
@@ -131,20 +192,23 @@ _SKIP_REASON = _check_prerequisites()
@pytest.fixture(scope='module')
def wg_setup(tmp_path_factory):
"""
Module-scoped fixture: adds test peer to pic1, brings up wg interface on
pic0 host, yields, then tears everything down.
Yields a dict:
{
'peer_ip': '10.0.2.250',
'allowed_ips': '10.0.2.0/24, 10.0.0.0/24',
'privkey': '<wg private key>',
'pubkey': '<wg public key>',
}
Module-scoped fixture: adds test peer to cell2, brings up wg interface on
cell1 (test runner), yields config dict, then tears everything down.
"""
if _SKIP_REASON:
pytest.skip(_SKIP_REASON)
cell2_lan_ip = _CFG['cell2_lan_ip']
cell2_wg_port = _CFG['cell2_wg_port']
cell2_pubkey = _CFG['cell2_pubkey']
cell2_vpn_subnet = _CFG['cell2_vpn_subnet']
cell1_vpn_subnet = _CFG['cell1_vpn_subnet']
test_peer_ip = _CFG['test_peer_ip']
test_peer_cidr = f'{test_peer_ip}/32'
# AllowedIPs: cell2's subnet + cell1's subnet (split-tunnel cross-cell)
allowed_ips = f'{cell2_vpn_subnet}, {cell1_vpn_subnet}'
tmp_path = tmp_path_factory.mktemp('wg_e2e_c2c')
# --- Generate a WireGuard key pair ---
@@ -157,28 +221,28 @@ def wg_setup(tmp_path_factory):
assert pub_r.returncode == 0, f'wg pubkey failed: {pub_r.stderr}'
pubkey = pub_r.stdout.strip()
# --- Add peer to pic1's wg0 (live, no restart needed) ---
r = _pic1_wg(f'wg set wg0 peer {pubkey} allowed-ips {TEST_PEER_CIDR} persistent-keepalive 25')
assert r.returncode == 0, f'wg set peer failed on pic1: {r.stderr}'
# --- Add peer to cell2's wg0 (live, no restart needed) ---
r = _pic2_wg(f'wg set wg0 peer {pubkey} allowed-ips {test_peer_cidr} persistent-keepalive 25')
assert r.returncode == 0, f'wg set peer failed on cell2: {r.stderr}'
# --- Add permissive iptables rule so test traffic passes FORWARD ---
r = _pic1_wg(
f'iptables -I FORWARD 1 -s {TEST_PEER_IP} -j ACCEPT '
# --- Add permissive iptables ACCEPT so test traffic passes cell2's FORWARD ---
r = _pic2_wg(
f'iptables -I FORWARD 1 -s {test_peer_ip} -j ACCEPT '
f'-m comment --comment {IPTABLES_COMMENT}'
)
assert r.returncode == 0, f'iptables -I FORWARD failed on pic1: {r.stderr}'
assert r.returncode == 0, f'iptables -I FORWARD failed on cell2: {r.stderr}'
# --- Write wg-quick config on the test runner ---
conf_path = str(tmp_path / f'{IFACE_NAME}.conf')
conf = (
f'[Interface]\n'
f'PrivateKey = {privkey}\n'
f'Address = {TEST_PEER_IP}/32\n'
f'Address = {test_peer_ip}/32\n'
f'\n'
f'[Peer]\n'
f'PublicKey = {PIC1_WG_PUBKEY}\n'
f'Endpoint = {PIC1_LAN}:{PIC1_WG_PORT}\n'
f'AllowedIPs = {SPLIT_TUNNEL_ALLOWED_IPS}\n'
f'PublicKey = {cell2_pubkey}\n'
f'Endpoint = {cell2_lan_ip}:{cell2_wg_port}\n'
f'AllowedIPs = {allowed_ips}\n'
f'PersistentKeepalive = 25\n'
)
with open(conf_path, 'w') as f:
@@ -189,15 +253,17 @@ def wg_setup(tmp_path_factory):
up_r = _run(['sudo', 'wg-quick', 'up', conf_path], timeout=15)
assert up_r.returncode == 0, f'wg-quick up failed: {up_r.stderr}\n{up_r.stdout}'
# Give WireGuard a moment to establish the handshake
time.sleep(3)
yield {
'peer_ip': TEST_PEER_IP,
'allowed_ips': SPLIT_TUNNEL_ALLOWED_IPS,
'privkey': privkey,
'pubkey': pubkey,
'conf_path': conf_path,
'test_peer_ip': test_peer_ip,
'allowed_ips': allowed_ips,
'privkey': privkey,
'pubkey': pubkey,
'conf_path': conf_path,
'cell1_wg_ip': _CFG['cell1_wg_ip'],
'cell2_wg_ip': _CFG['cell2_wg_ip'],
'cell1_domain': _CFG.get('cell1_domain', ''),
}
# --- Teardown ---
@@ -206,8 +272,8 @@ def wg_setup(tmp_path_factory):
os.unlink(conf_path)
except Exception:
pass
_cleanup_pic1_iptables()
_cleanup_pic1_peer(pubkey)
_cleanup_pic2_iptables(test_peer_ip)
_cleanup_pic2_peer(pubkey)
# -------------------------------------------------------------------------
@@ -219,24 +285,25 @@ class TestCellToCellRouting:
Full end-to-end: split-tunnel peer on cell2 reaches cell1 via cell-to-cell tunnel.
"""
def test_prerequisites_10_0_0_1_not_reachable_directly(self):
"""Confirm 10.0.0.1 is NOT reachable from host without VPN (test validity check)."""
assert not _ping(PIC0_WG_SERVER_IP, count=1, wait=1), (
f'{PIC0_WG_SERVER_IP} is reachable WITHOUT the VPN — the test would be '
f'a false positive. The test is only meaningful when this IP is unreachable '
f'without the tunnel.'
def test_prerequisites_cell1_not_reachable_directly(self):
"""Confirm cell1's WG IP is NOT reachable from host without VPN (test validity check)."""
cell1_wg_ip = _CFG.get('cell1_wg_ip', '10.0.0.1')
assert not _ping(cell1_wg_ip, count=1, wait=1), (
f'{cell1_wg_ip} is reachable WITHOUT the VPN — test would be a false positive. '
f'The test is only meaningful when this IP is unreachable without the tunnel.'
)
def test_cell2_wg_ip_reachable(self, wg_setup):
"""Cell2's WireGuard server IP is reachable (basic tunnel sanity)."""
assert _ping(PIC1_WG_SERVER_IP), (
f'Cell2 WG server IP {PIC1_WG_SERVER_IP} not reachable. '
cell2_wg_ip = wg_setup['cell2_wg_ip']
assert _ping(cell2_wg_ip), (
f'Cell2 WG server IP {cell2_wg_ip} not reachable. '
f'Handshake may not have established. '
f'Peer allowed-ips: {wg_setup["allowed_ips"]}'
)
def test_handshake_established(self, wg_setup):
"""A WireGuard handshake with pic1 has completed (within 30 s)."""
"""A WireGuard handshake with cell2 has completed (within 30 s)."""
deadline = time.time() + 30
while time.time() < deadline:
r = _run(['sudo', 'wg', 'show', IFACE_NAME], timeout=5)
@@ -244,34 +311,59 @@ class TestCellToCellRouting:
return
time.sleep(2)
pytest.fail(
f'No WireGuard handshake with pic1 after 30 s.\n'
f'No WireGuard handshake with cell2 after 30 s.\n'
f'wg show output:\n{r.stdout}'
)
def test_cross_cell_wg_ip_reachable(self, wg_setup):
"""
Cell1's WireGuard IP (10.0.0.1) is reachable from a peer connected to cell2.
Cell1's WireGuard IP is reachable from a peer connected to cell2.
This is the critical cross-cell routing test. The full path is:
test-runner wg-e2e pic1 cell-wireguard FORWARD cell-to-cell tunnel pic0 10.0.0.1
test-runner wg-e2e cell2 FORWARD cell-to-cell tunnel cell1 WG IP
"""
assert _ping(PIC0_WG_SERVER_IP, count=3, wait=3), (
f'Cell1 WG IP {PIC0_WG_SERVER_IP} NOT reachable from split-tunnel peer on cell2. '
cell1_wg_ip = wg_setup['cell1_wg_ip']
assert _ping(cell1_wg_ip, count=3, wait=3), (
f'Cell1 WG IP {cell1_wg_ip} NOT reachable from split-tunnel peer on cell2. '
f'\nAllowed IPs: {wg_setup["allowed_ips"]}'
f'\nThis means the cell-to-cell routing is broken. Check:'
f'\n 1. pic1 FORWARD chain has ESTABLISHED,RELATED ACCEPT'
f'\n 2. pic1 wg0.conf has AllowedIPs=10.0.0.0/24 for the dev cell peer'
f'\n 3. Cell-to-cell WireGuard handshake is recent (wg show on pic1)'
f'\n 1. cell2 FORWARD chain has ESTABLISHED,RELATED ACCEPT'
f'\n 2. cell2 wg0.conf has AllowedIPs covering cell1 subnet'
f'\n 3. Cell-to-cell WireGuard handshake is recent (wg show on cell2)'
)
def test_cross_cell_ping_latency(self, wg_setup):
"""Cross-cell ping RTT is under 10ms — both cells are on the same LAN.
High latency (>10ms) indicates traffic is routing via the internet instead
of directly over the LAN WireGuard tunnel. Check cell_links.json endpoints.
"""
cell1_wg_ip = wg_setup['cell1_wg_ip']
r = _run(['ping', '-c', '10', '-W', '2', cell1_wg_ip], timeout=30)
assert r.returncode == 0, (
f'Ping to {cell1_wg_ip} failed completely: {r.stderr}'
)
m = re.search(
r'rtt min/avg/max/mdev = [\d.]+/([\d.]+)/[\d.]+/[\d.]+ ms',
r.stdout
)
assert m, f'Could not parse ping RTT from output:\n{r.stdout}'
avg_ms = float(m.group(1))
assert avg_ms < MAX_LATENCY_MS, (
f'Cross-cell avg RTT {avg_ms:.2f}ms exceeds {MAX_LATENCY_MS}ms. '
f'Both cells are on the same LAN — high latency means traffic routes '
f'via the internet. Check cell_links.json uses LAN IPs, not public IPs.'
)
def test_cross_cell_api_reachable(self, wg_setup):
"""Cell1's API /health is reachable through the cell-to-cell tunnel."""
import urllib.request, urllib.error
url = f'http://{PIC0_WG_SERVER_IP}:3000/health'
cell1_wg_ip = wg_setup['cell1_wg_ip']
url = f'http://{cell1_wg_ip}:3000/health'
try:
with urllib.request.urlopen(url, timeout=8) as resp:
import json
body = json.loads(resp.read())
import json as _json
body = _json.loads(resp.read())
assert body.get('status') == 'healthy', (
f'Cell1 API returned unexpected health: {body}'
)
@@ -285,15 +377,14 @@ class TestCellToCellRouting:
def test_cross_cell_web_reachable(self, wg_setup):
"""Cell1's web service (port 80 via Caddy) is reachable through the tunnel."""
import urllib.request, urllib.error
# Port 80 goes to Caddy → services. We expect any HTTP response (even a redirect).
url = f'http://{PIC0_WG_SERVER_IP}/'
cell1_wg_ip = wg_setup['cell1_wg_ip']
url = f'http://{cell1_wg_ip}/'
try:
with urllib.request.urlopen(url, timeout=8) as resp:
assert resp.status in (200, 301, 302, 307, 308), (
f'Unexpected HTTP status from cell1 Caddy: {resp.status}'
)
except urllib.error.HTTPError as e:
# HTTPError means we got a response — tunnel works even if it's a 4xx/5xx
assert e.code < 500, (
f'Cell1 Caddy returned server error {e.code} — may indicate a Caddy issue'
)
@@ -301,3 +392,64 @@ class TestCellToCellRouting:
pytest.fail(
f'Cell1 web (Caddy) at {url} not reachable via tunnel: {e}'
)
def test_cross_cell_domain_accessible(self, wg_setup):
"""A service domain from cell1 is resolvable via cell2's DNS and HTTP-reachable.
DNS chain:
test-peer cell2_wg_ip:53 (DNAT cell-dns on cell2)
cell2 Corefile forwards cell1_domain cell1_wg_ip:53
cell1 cell-dns returns A record cell1_wg_ip
HTTP:
test-peer cell1_wg_ip:80 (Host: calendar.<cell1_domain>)
cell-to-cell tunnel cell1 Caddy
Requires: scoped DNAT (wg0 PREROUTING -d server_ip) on both cells
and Dockerwg0 routing on cell2 (host route + MASQUERADE).
"""
cell1_domain = wg_setup.get('cell1_domain', '')
cell2_wg_ip = wg_setup['cell2_wg_ip']
cell1_wg_ip = wg_setup['cell1_wg_ip']
if not cell1_domain:
pytest.skip('cell1_domain not configured — cannot test domain access')
calendar_host = f'calendar.{cell1_domain}'
# --- DNS resolution via cell2's DNS ---
r = _run(
['dig', f'@{cell2_wg_ip}', calendar_host, '+short', '+time=5', '+tries=2'],
timeout=15
)
assert r.returncode == 0, (
f'dig @{cell2_wg_ip} {calendar_host} failed: {r.stderr.strip()}\n'
f'DNS chain: test-peer → {cell2_wg_ip}:53 → cell-dns(cell2) '
f'{cell1_wg_ip}:53 (cell1). '
f'If this fails, check: (1) DNAT on cell2 scoped to -d {cell2_wg_ip}, '
f'(2) Docker→wg0 routing on cell2 (host route + MASQUERADE).'
)
resolved = r.stdout.strip()
assert resolved == cell1_wg_ip, (
f'DNS resolved {calendar_host!r} to {resolved!r}, '
f'expected {cell1_wg_ip!r}. '
f'cell1 zone: all {cell1_domain} names should point to {cell1_wg_ip}.'
)
# --- HTTP access via domain name (Host header → Caddy routing) ---
import urllib.request, urllib.error
url = f'http://{cell1_wg_ip}/'
req = urllib.request.Request(url, headers={'Host': calendar_host})
try:
with urllib.request.urlopen(req, timeout=8) as resp:
assert resp.status < 500, (
f'cell1 Caddy returned {resp.status} for Host:{calendar_host}'
)
except urllib.error.HTTPError as e:
assert e.code < 500, (
f'cell1 Caddy server error {e.code} for Host:{calendar_host}'
)
except urllib.error.URLError as e:
pytest.fail(
f'HTTP to {url} (Host:{calendar_host}) via tunnel failed: {e}'
)
+28 -5
View File
@@ -793,7 +793,7 @@ class TestCellRules(unittest.TestCase):
class TestEnsureCellApiDnat(unittest.TestCase):
"""Tests for ensure_cell_api_dnat — DNAT wg0:3000 → cell-api:3000."""
"""Tests for ensure_cell_api_dnat — DNAT wg0:3000 (scoped) → cell-api:3000."""
def _wg_exec_no_existing_rules(self, args):
r = MagicMock()
@@ -815,7 +815,8 @@ class TestEnsureCellApiDnat(unittest.TestCase):
return r
def test_dnat_rules_added_when_not_present(self):
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
with patch.object(firewall_manager, '_get_wg_server_ip', return_value='10.0.0.1'), \
patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
patch.object(firewall_manager, '_wg_exec',
side_effect=self._wg_exec_no_existing_rules) as wg_mock:
result = firewall_manager.ensure_cell_api_dnat()
@@ -825,8 +826,23 @@ class TestEnsureCellApiDnat(unittest.TestCase):
dnat_adds = [a for a in calls_args if 'DNAT' in a and '-A' in a]
self.assertTrue(len(dnat_adds) >= 1, 'DNAT -A rule must be added')
def test_dnat_is_scoped_to_server_ip(self):
"""DNAT rule must include -d <server_ip> to avoid intercepting cross-cell traffic."""
with patch.object(firewall_manager, '_get_wg_server_ip', return_value='10.0.0.1'), \
patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
patch.object(firewall_manager, '_wg_exec',
side_effect=self._wg_exec_no_existing_rules) as wg_mock:
firewall_manager.ensure_cell_api_dnat()
all_args = [c.args[0] for c in wg_mock.call_args_list]
dnat_adds = [a for a in all_args if 'DNAT' in a and '-A' in a]
for rule in dnat_adds:
self.assertIn('10.0.0.1', rule, 'DNAT rule must be scoped to server IP')
self.assertIn('-d', rule, 'DNAT rule must use -d to scope to server IP')
def test_dnat_skipped_if_already_present(self):
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
with patch.object(firewall_manager, '_get_wg_server_ip', return_value='10.0.0.1'), \
patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
patch.object(firewall_manager, '_wg_exec',
side_effect=self._wg_exec_all_rules_exist) as wg_mock:
result = firewall_manager.ensure_cell_api_dnat()
@@ -836,16 +852,23 @@ class TestEnsureCellApiDnat(unittest.TestCase):
add_calls = [a for a in calls_args if '-A' in a or '-I' in a]
self.assertEqual(len(add_calls), 0, 'No rules should be added when they already exist')
def test_returns_false_when_wg_server_ip_not_found(self):
with patch.object(firewall_manager, '_get_wg_server_ip', return_value=None):
result = firewall_manager.ensure_cell_api_dnat()
self.assertFalse(result)
def test_returns_false_when_cell_api_not_found(self):
r = MagicMock()
r.returncode = 0
r.stdout = ''
with patch.object(firewall_manager, '_run', return_value=r):
with patch.object(firewall_manager, '_get_wg_server_ip', return_value='10.0.0.1'), \
patch.object(firewall_manager, '_run', return_value=r):
result = firewall_manager.ensure_cell_api_dnat()
self.assertFalse(result)
def test_returns_false_on_exception(self):
with patch.object(firewall_manager, '_run', side_effect=RuntimeError('docker gone')):
with patch.object(firewall_manager, '_get_wg_server_ip', return_value='10.0.0.1'), \
patch.object(firewall_manager, '_run', side_effect=RuntimeError('docker gone')):
result = firewall_manager.ensure_cell_api_dnat()
self.assertFalse(result)