Fix cross-cell domain access: scope DNAT rules, add Docker→wg0 routing
- firewall_manager: add _get_wg_server_ip() helper; scope ensure_cell_api_dnat(), ensure_dns_dnat(), ensure_service_dnat() DNAT rules with -d server_ip; add ensure_wg_masquerade() (Docker→wg0 MASQUERADE+FORWARD) and ensure_cell_subnet_routes() (host routes via docker run busybox) - wireguard_manager: scope PostUp DNAT rules with -d server_ip in generate_config() and ensure_postup_dnat(); add Docker→wg0 MASQUERADE+FORWARD rules - app.py: call ensure_wg_masquerade() and ensure_cell_subnet_routes() in _apply_startup_enforcement() - tests/test_firewall_manager.py: mock _get_wg_server_ip, add test_dnat_is_scoped_to_server_ip and test_returns_false_when_wg_server_ip_not_found - tests/e2e/wg/test_cell_to_cell_routing.py: rewrite to use dynamic config (no hardcoded IPs/ports), add latency and domain access tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+126
-24
@@ -325,6 +325,22 @@ def _get_dns_container_ip() -> str:
|
||||
return '172.20.0.3'
|
||||
|
||||
|
||||
def _get_wg_server_ip() -> Optional[str]:
|
||||
"""Return the WireGuard server's VPN IP from wg0.conf (e.g. '10.0.0.1')."""
|
||||
import ipaddress as _ipaddress
|
||||
wg_conf_path = '/app/config/wireguard/wg_confs/wg0.conf'
|
||||
try:
|
||||
with open(wg_conf_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('Address') and '=' in line:
|
||||
addr = line.split('=', 1)[1].strip()
|
||||
return str(_ipaddress.ip_interface(addr).ip)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _get_caddy_container_ip() -> str:
|
||||
"""Return cell-caddy container's Docker bridge IP. Falls back to 172.20.0.2."""
|
||||
try:
|
||||
@@ -455,14 +471,19 @@ def ensure_forward_stateful() -> bool:
|
||||
|
||||
|
||||
def ensure_cell_api_dnat() -> bool:
|
||||
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
|
||||
"""DNAT wg0:3000 (scoped to WG server IP) → cell-api:3000 inside cell-wireguard.
|
||||
|
||||
Remote cells push permission updates over the WireGuard tunnel to our
|
||||
wg0 interface on port 3000. Since cell-api only listens on the Docker
|
||||
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
|
||||
that traffic. Called on every startup so rules survive container restarts.
|
||||
wg0 interface on port 3000. The DNAT is scoped to -d {server_ip} so that
|
||||
cross-cell traffic destined for another cell's API (different WG IP) is
|
||||
not intercepted. Called on every startup so rules survive container restarts.
|
||||
"""
|
||||
try:
|
||||
server_ip = _get_wg_server_ip()
|
||||
if not server_ip:
|
||||
logger.warning('ensure_cell_api_dnat: could not determine WG server IP')
|
||||
return False
|
||||
|
||||
r = _run(['docker', 'inspect', '--format',
|
||||
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
|
||||
'cell-api'], check=False)
|
||||
@@ -471,10 +492,12 @@ def ensure_cell_api_dnat() -> bool:
|
||||
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
|
||||
return False
|
||||
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', 'tcp', '--dport', '3000',
|
||||
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', 'tcp', '--dport', '3000',
|
||||
'-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
|
||||
@@ -500,21 +523,27 @@ def ensure_cell_api_dnat() -> bool:
|
||||
|
||||
|
||||
def ensure_dns_dnat() -> bool:
|
||||
"""DNAT wg0:53 (UDP+TCP) → cell-dns:53 so VPN peers use the WG server IP for DNS.
|
||||
"""DNAT wg0:53 (scoped to WG server IP) → cell-dns:53.
|
||||
|
||||
Peers are configured with DNS = <wg_server_ip>. Their DNS queries arrive on
|
||||
wg0:53 and must be forwarded to cell-dns inside the Docker bridge.
|
||||
Peers send DNS queries to the WG server IP. DNAT is scoped with -d {server_ip}
|
||||
so cross-cell DNS traffic destined for another cell is forwarded, not hijacked.
|
||||
"""
|
||||
try:
|
||||
server_ip = _get_wg_server_ip()
|
||||
if not server_ip:
|
||||
logger.warning('ensure_dns_dnat: could not determine WG server IP')
|
||||
return False
|
||||
dns_ip = _get_dns_container_ip()
|
||||
if not dns_ip:
|
||||
logger.warning('ensure_dns_dnat: cell-dns not found')
|
||||
return False
|
||||
for proto in ('udp', 'tcp'):
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', proto,
|
||||
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', proto,
|
||||
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', proto, '--dport', '53',
|
||||
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', proto, '--dport', '53',
|
||||
'-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
for proto in ('udp', 'tcp'):
|
||||
@@ -524,7 +553,7 @@ def ensure_dns_dnat() -> bool:
|
||||
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
logger.info(f'ensure_dns_dnat: wg0:53 → {dns_ip}:53')
|
||||
logger.info(f'ensure_dns_dnat: wg0:{server_ip}:53 → {dns_ip}:53')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_dns_dnat: {e}')
|
||||
@@ -532,20 +561,26 @@ def ensure_dns_dnat() -> bool:
|
||||
|
||||
|
||||
def ensure_service_dnat() -> bool:
|
||||
"""DNAT wg0:80 → cell-caddy:80 so VPN peers reach services via Host-header routing.
|
||||
"""DNAT wg0:80 (scoped to WG server IP) → cell-caddy:80.
|
||||
|
||||
All service DNS names resolve to the WG server IP. Traffic to wg0:80 is
|
||||
forwarded to Caddy, which routes to the correct backend by Host header.
|
||||
Service DNS names resolve to the WG server IP. DNAT is scoped with -d {server_ip}
|
||||
so that cross-cell HTTP traffic destined for another cell passes through unmodified.
|
||||
"""
|
||||
try:
|
||||
server_ip = _get_wg_server_ip()
|
||||
if not server_ip:
|
||||
logger.warning('ensure_service_dnat: could not determine WG server IP')
|
||||
return False
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if not caddy_ip:
|
||||
logger.warning('ensure_service_dnat: cell-caddy not found')
|
||||
return False
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', 'tcp', '--dport', '80',
|
||||
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-d', server_ip,
|
||||
'-p', 'tcp', '--dport', '80',
|
||||
'-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
@@ -554,13 +589,80 @@ def ensure_service_dnat() -> bool:
|
||||
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
logger.info(f'ensure_service_dnat: wg0:80 → {caddy_ip}:80')
|
||||
logger.info(f'ensure_service_dnat: wg0:{server_ip}:80 → {caddy_ip}:80')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_service_dnat: {e}')
|
||||
return False
|
||||
|
||||
|
||||
def ensure_wg_masquerade() -> bool:
|
||||
"""MASQUERADE Docker bridge traffic leaving via wg0, and allow it through FORWARD.
|
||||
|
||||
cell-dns and other Docker containers need to reach remote cell subnets via
|
||||
cell-wireguard's wg0. Without MASQUERADE the source IP (172.20.x.x) can't be
|
||||
routed back over the WireGuard tunnel (WireGuard only accepts 10.0.x.x sources
|
||||
from peers). MASQUERADE rewrites the source to wg0's IP so replies can return.
|
||||
"""
|
||||
try:
|
||||
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'wg0',
|
||||
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
|
||||
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'wg0',
|
||||
'-s', '172.20.0.0/16', '-j', 'MASQUERADE']
|
||||
if _wg_exec(['iptables'] + masq_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + masq_add)
|
||||
|
||||
fwd_check = ['-C', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
|
||||
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
|
||||
fwd_add = ['-I', 'FORWARD', '-i', 'eth0', '-o', 'wg0',
|
||||
'-s', '172.20.0.0/16', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
|
||||
logger.info('ensure_wg_masquerade: Docker→wg0 MASQUERADE+FORWARD configured')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_wg_masquerade: {e}')
|
||||
return False
|
||||
|
||||
|
||||
def ensure_cell_subnet_routes(cell_links: List[Dict[str, Any]]) -> None:
|
||||
"""Add host-namespace routes for remote cell VPN subnets via cell-wireguard.
|
||||
|
||||
Docker containers (cell-dns, etc.) use the host's routing table to reach
|
||||
non-bridge destinations. Without a route, packets to 10.0.x.0/24 subnets
|
||||
of connected cells hit the host's default gateway instead of cell-wireguard.
|
||||
|
||||
Uses a temporary '--network host --rm' container to run ip route replace in
|
||||
the host network namespace. cell-api has docker.sock so this works without
|
||||
privileged mode or nsenter namespace tricks.
|
||||
"""
|
||||
if not cell_links:
|
||||
return
|
||||
WG_BRIDGE_IP = '172.20.0.9' # cell-wireguard's fixed Docker IP (docker-compose.yml)
|
||||
for link in cell_links:
|
||||
subnet = link.get('vpn_subnet', '')
|
||||
if not subnet:
|
||||
continue
|
||||
try:
|
||||
result = _run(
|
||||
['docker', 'run', '--rm',
|
||||
'--network', 'host',
|
||||
'--cap-add', 'NET_ADMIN',
|
||||
'busybox',
|
||||
'ip', 'route', 'replace', subnet, 'via', WG_BRIDGE_IP],
|
||||
check=False
|
||||
)
|
||||
if result.returncode == 0:
|
||||
logger.info(f'ensure_cell_subnet_routes: {subnet} via {WG_BRIDGE_IP}')
|
||||
else:
|
||||
logger.warning(
|
||||
f'ensure_cell_subnet_routes: {subnet} failed: {result.stderr.strip()}'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'ensure_cell_subnet_routes: {subnet}: {e}')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user