feat: fix cross-cell service access — DNS DNAT, service DNAT, Caddy routing

DNS A records now return the WireGuard server IP (10.0.0.1) instead of
Docker bridge VIPs so cross-cell peers resolve service names correctly
regardless of their bridge subnet. DNAT rules (wg0:53→cell-dns:53 and
wg0:80→cell-caddy:80) are applied at startup. Caddy routes by Host header,
eliminating the Docker bridge subnet conflict. Firewall cell rules allow
DNS and service (Caddy) traffic from linked cell subnets. Split-tunnel
AllowedIPs now dynamically includes connected-cell VPN subnets and drops
the 172.20.0.0/16 range. Peers with route_via set now receive full-tunnel
config (0.0.0.0/0) so all their traffic exits via the remote cell.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 03:12:09 -04:00
parent f2f15eb17e
commit 9a800e3b6b
11 changed files with 325 additions and 146 deletions
+119 -17
View File
@@ -193,10 +193,14 @@ def apply_peer_rules(peer_ip: str, settings: Dict[str, Any]) -> bool:
'-m', 'comment', '--comment', comment, '-j', target])
# --- Step 3 (inserted last → ends up at TOP of chain) ---
# Per-service rules — inserted in reverse dict order so first service ends up at top
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
target = 'ACCEPT' if service in service_access else 'DROP'
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', svc_ip,
# Service access via Caddy: DNS returns WG server IP for all services;
# ensure_service_dnat() routes wg0:80 to Caddy. One ACCEPT/DROP rule
# controls service access; CoreDNS ACL enforces per-name granularity.
caddy_ip = _get_caddy_container_ip()
if caddy_ip:
target = 'ACCEPT' if service_access else 'DROP'
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', caddy_ip,
'-p', 'tcp', '--dport', '80',
'-m', 'comment', '--comment', comment, '-j', target])
logger.info(f"Applied rules for {peer_ip}: internet={internet_access} "
@@ -298,24 +302,50 @@ def _get_cell_api_ip() -> Optional[str]:
return r.stdout.strip()
def _get_dns_container_ip() -> str:
"""Return cell-dns container's Docker bridge IP. Falls back to 172.20.0.3."""
try:
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-dns'], check=False)
return r.stdout.strip() or '172.20.0.3'
except Exception:
return '172.20.0.3'
def _get_caddy_container_ip() -> str:
"""Return cell-caddy container's Docker bridge IP. Falls back to 172.20.0.2."""
try:
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-caddy'], check=False)
return r.stdout.strip() or '172.20.0.2'
except Exception:
return '172.20.0.2'
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str],
exit_relay: bool = False) -> bool:
"""Apply FORWARD rules for a cell-to-cell peer.
Traffic from vpn_subnet is allowed only to service VIPs listed in
inbound_services; all other cell traffic is DROPped. Cells get no
internet or peer access — only explicit service VIPs, plus the
cell-api port (3000) for permission-sync pushes arriving via DNAT.
internet or peer access — only explicit service access via Caddy on
port 80, plus the cell-api port (3000) for permission-sync pushes.
DNS (port 53) is always allowed so cell peers can resolve service names.
Service names resolve to the WG server IP; ensure_service_dnat() routes
wg0:80 to Caddy, which routes by Host header.
When exit_relay=True, the remote cell's peers can route internet
traffic through this cell (Phase 3). A broad ACCEPT for traffic
going out eth0 is added below per-service rules but above catch-all.
traffic through this cell (Phase 3).
Rule insertion order (first inserted = bottom, last inserted = top):
1. Catch-all DROP for the subnet (inserted first → bottom)
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
3. Per-service ACCEPT/DROP (inserted in reversed() order)
4. API-sync ACCEPT (inserted last → top)
3. Service ACCEPT to Caddy port 80 (if any inbound_services)
4. DNS ACCEPT to cell-dns port 53 (UDP + TCP)
5. API-sync ACCEPT (inserted last → top)
"""
try:
tag = _cell_tag(cell_name)
@@ -326,17 +356,27 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
'-m', 'comment', '--comment', tag, '-j', 'DROP'])
# Exit relay ACCEPT — allow internet-bound traffic from this cell's peers.
# Inserted ABOVE catch-all but BELOW per-service rules so service-level
# DROP rules still take effect for specific service VIPs.
if exit_relay:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-o', 'eth0',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# Per-service rules — inserted in reverse dict order, highest-priority last
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
target = 'ACCEPT' if service in inbound_services else 'DROP'
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', svc_ip,
'-m', 'comment', '--comment', tag, '-j', target])
# Service access via Caddy — DNAT wg0:80 → Caddy; Host header routes to service.
# Only add ACCEPT if this cell has any inbound services granted.
if inbound_services:
caddy_ip = _get_caddy_container_ip()
if caddy_ip:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', caddy_ip,
'-p', 'tcp', '--dport', '80',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# DNS ACCEPT — allow cross-cell peers to query CoreDNS via the WG server IP.
# ensure_dns_dnat() routes wg0:53 to cell-dns; FORWARD must allow it.
dns_ip = _get_dns_container_ip()
if dns_ip:
for proto in ('udp', 'tcp'):
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', dns_ip,
'-p', proto, '--dport', '53',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
# the catch-all DROP). Remote cells push permissions to our cell-api via the
@@ -415,6 +455,68 @@ def ensure_cell_api_dnat() -> bool:
return False
def ensure_dns_dnat() -> bool:
"""DNAT wg0:53 (UDP+TCP) → cell-dns:53 so VPN peers use the WG server IP for DNS.
Peers are configured with DNS = <wg_server_ip>. Their DNS queries arrive on
wg0:53 and must be forwarded to cell-dns inside the Docker bridge.
"""
try:
dns_ip = _get_dns_container_ip()
if not dns_ip:
logger.warning('ensure_dns_dnat: cell-dns not found')
return False
for proto in ('udp', 'tcp'):
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', proto,
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
for proto in ('udp', 'tcp'):
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_dns_dnat: wg0:53 → {dns_ip}:53')
return True
except Exception as e:
logger.error(f'ensure_dns_dnat: {e}')
return False
def ensure_service_dnat() -> bool:
"""DNAT wg0:80 → cell-caddy:80 so VPN peers reach services via Host-header routing.
All service DNS names resolve to the WG server IP. Traffic to wg0:80 is
forwarded to Caddy, which routes to the correct backend by Host header.
"""
try:
caddy_ip = _get_caddy_container_ip()
if not caddy_ip:
logger.warning('ensure_service_dnat: cell-caddy not found')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_service_dnat: wg0:80 → {caddy_ip}:80')
return True
except Exception as e:
logger.error(f'ensure_service_dnat: {e}')
return False
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------