Fix cross-cell domain access: scope DNAT rules, add Docker→wg0 routing

- firewall_manager: add _get_wg_server_ip() helper; scope ensure_cell_api_dnat(),
  ensure_dns_dnat(), ensure_service_dnat() DNAT rules with -d server_ip; add
  ensure_wg_masquerade() (Docker→wg0 MASQUERADE+FORWARD) and
  ensure_cell_subnet_routes() (host routes via docker run busybox)
- wireguard_manager: scope PostUp DNAT rules with -d server_ip in generate_config()
  and ensure_postup_dnat(); add Docker→wg0 MASQUERADE+FORWARD rules
- app.py: call ensure_wg_masquerade() and ensure_cell_subnet_routes() in
  _apply_startup_enforcement()
- tests/test_firewall_manager.py: mock _get_wg_server_ip, add
  test_dnat_is_scoped_to_server_ip and test_returns_false_when_wg_server_ip_not_found
- tests/e2e/wg/test_cell_to_cell_routing.py: rewrite to use dynamic config
  (no hardcoded IPs/ports), add latency and domain access tests

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:37:02 -04:00
parent 1e1bda4679
commit e2c50c381a
5 changed files with 442 additions and 145 deletions
+126 -24
View File
@@ -325,6 +325,22 @@ def _get_dns_container_ip() -> str:
return '172.20.0.3'
def _get_wg_server_ip() -> Optional[str]:
"""Return the WireGuard server's VPN IP from wg0.conf (e.g. '10.0.0.1')."""
import ipaddress as _ipaddress
wg_conf_path = '/app/config/wireguard/wg_confs/wg0.conf'
try:
with open(wg_conf_path) as f:
for line in f:
line = line.strip()
if line.startswith('Address') and '=' in line:
addr = line.split('=', 1)[1].strip()
return str(_ipaddress.ip_interface(addr).ip)
except Exception:
pass
return None
def _get_caddy_container_ip() -> str:
"""Return cell-caddy container's Docker bridge IP. Falls back to 172.20.0.2."""
try:
@@ -455,14 +471,19 @@ def ensure_forward_stateful() -> bool:
def ensure_cell_api_dnat() -> bool:
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
"""DNAT wg0:3000 (scoped to WG server IP) → cell-api:3000 inside cell-wireguard.
Remote cells push permission updates over the WireGuard tunnel to our
wg0 interface on port 3000. Since cell-api only listens on the Docker
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
that traffic. Called on every startup so rules survive container restarts.
wg0 interface on port 3000. The DNAT is scoped to -d {server_ip} so that
cross-cell traffic destined for another cell's API (different WG IP) is
not intercepted. Called on every startup so rules survive container restarts.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_cell_api_dnat: could not determine WG server IP')
return False
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
@@ -471,10 +492,12 @@ def ensure_cell_api_dnat() -> bool:
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '3000',
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '3000',
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
@@ -500,21 +523,27 @@ def ensure_cell_api_dnat() -> bool:
def ensure_dns_dnat() -> bool:
"""DNAT wg0:53 (UDP+TCP) → cell-dns:53 so VPN peers use the WG server IP for DNS.
"""DNAT wg0:53 (scoped to WG server IP) → cell-dns:53.
Peers are configured with DNS = <wg_server_ip>. Their DNS queries arrive on
wg0:53 and must be forwarded to cell-dns inside the Docker bridge.
Peers send DNS queries to the WG server IP. DNAT is scoped with -d {server_ip}
so cross-cell DNS traffic destined for another cell is forwarded, not hijacked.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_dns_dnat: could not determine WG server IP')
return False
dns_ip = _get_dns_container_ip()
if not dns_ip:
logger.warning('ensure_dns_dnat: cell-dns not found')
return False
for proto in ('udp', 'tcp'):
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', proto, '--dport', '53',
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', proto, '--dport', '53',
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
for proto in ('udp', 'tcp'):
@@ -524,7 +553,7 @@ def ensure_dns_dnat() -> bool:
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_dns_dnat: wg0:53 → {dns_ip}:53')
logger.info(f'ensure_dns_dnat: wg0:{server_ip}:53 → {dns_ip}:53')
return True
except Exception as e:
logger.error(f'ensure_dns_dnat: {e}')
@@ -532,20 +561,26 @@ def ensure_dns_dnat() -> bool:
def ensure_service_dnat() -> bool:
"""DNAT wg0:80 → cell-caddy:80 so VPN peers reach services via Host-header routing.
"""DNAT wg0:80 (scoped to WG server IP) → cell-caddy:80.
All service DNS names resolve to the WG server IP. Traffic to wg0:80 is
forwarded to Caddy, which routes to the correct backend by Host header.
Service DNS names resolve to the WG server IP. DNAT is scoped with -d {server_ip}
so that cross-cell HTTP traffic destined for another cell passes through unmodified.
"""
try:
server_ip = _get_wg_server_ip()
if not server_ip:
logger.warning('ensure_service_dnat: could not determine WG server IP')
return False
caddy_ip = _get_caddy_container_ip()
if not caddy_ip:
logger.warning('ensure_service_dnat: cell-caddy not found')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '80',
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
'-p', 'tcp', '--dport', '80',
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
@@ -554,13 +589,80 @@ def ensure_service_dnat() -> bool:
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_service_dnat: wg0:80 → {caddy_ip}:80')
logger.info(f'ensure_service_dnat: wg0:{server_ip}:80 → {caddy_ip}:80')
return True
except Exception as e:
logger.error(f'ensure_service_dnat: {e}')
return False
def ensure_wg_masquerade() -> bool:
"""MASQUERADE Docker bridge traffic leaving via wg0, and allow it through FORWARD.
cell-dns and other Docker containers need to reach remote cell subnets via
cell-wireguard's wg0. Without MASQUERADE the source IP (172.20.x.x) can't be
routed back over the WireGuard tunnel (WireGuard only accepts 10.0.x.x sources
from peers). MASQUERADE rewrites the source to wg0's IP so replies can return.
"""
try:
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
if _wg_exec(['iptables'] + masq_check).returncode != 0:
_wg_exec(['iptables'] + masq_add)
fwd_check = ['-C', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info('ensure_wg_masquerade: Docker→wg0 MASQUERADE+FORWARD configured')
return True
except Exception as e:
logger.error(f'ensure_wg_masquerade: {e}')
return False
def ensure_cell_subnet_routes(cell_links: List[Dict[str, Any]]) -> None:
"""Add host-namespace routes for remote cell VPN subnets via cell-wireguard.
Docker containers (cell-dns, etc.) use the host's routing table to reach
non-bridge destinations. Without a route, packets to 10.0.x.0/24 subnets
of connected cells hit the host's default gateway instead of cell-wireguard.
Uses a temporary '--network host --rm' container to run ip route replace in
the host network namespace. cell-api has docker.sock so this works without
privileged mode or nsenter namespace tricks.
"""
if not cell_links:
return
WG_BRIDGE_IP = '172.20.0.9' # cell-wireguard's fixed Docker IP (docker-compose.yml)
for link in cell_links:
subnet = link.get('vpn_subnet', '')
if not subnet:
continue
try:
result = _run(
['docker', 'run', '--rm',
'--network', 'host',
'--cap-add', 'NET_ADMIN',
'busybox',
'ip', 'route', 'replace', subnet, 'via', WG_BRIDGE_IP],
check=False
)
if result.returncode == 0:
logger.info(f'ensure_cell_subnet_routes: {subnet} via {WG_BRIDGE_IP}')
else:
logger.warning(
f'ensure_cell_subnet_routes: {subnet} failed: {result.stderr.strip()}'
)
except Exception as e:
logger.warning(f'ensure_cell_subnet_routes: {subnet}: {e}')
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------