Files
pic/api/firewall_manager.py
T
roof 8ea834e108 feat: Phase 3 - per-peer internet routing via exit cell
Adds the ability to route a specific peer's internet traffic through a
connected cell acting as an exit relay.

Cell A side:
- PUT /api/peers/<peer>/route-via {"via_cell": "cellB"} sets route_via
- Updates WG AllowedIPs to include 0.0.0.0/0 for the exit cell peer
- Adds ip rule + ip route in policy table inside cell-wireguard so the
  specific peer's traffic egresses via cellB's WG IP
- Sets exit_relay_active on the cell link and pushes use_as_exit_relay=True
  to cellB via peer-sync

Cell B side:
- Receives use_as_exit_relay in the peer-sync payload
- Calls apply_cell_rules(..., exit_relay=True) to add FORWARD -o eth0 ACCEPT
- Stores remote_exit_relay_active flag for startup recovery

Startup recovery:
- apply_all_cell_rules passes exit_relay=remote_exit_relay_active (cellB)
- _apply_startup_enforcement reapplies ip rule for each peer with route_via (cellA)
  since policy routing rules don't survive container restart

peer_registry gets route_via field with lazy migration.
22 new tests across test_cell_link_manager, test_peer_registry, test_peer_route_via.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 16:23:31 -04:00

511 lines
21 KiB
Python

#!/usr/bin/env python3
"""
Firewall Manager for Personal Internet Cell
Manages per-peer iptables rules in the WireGuard container and DNS ACLs in CoreDNS.
"""
import os
import subprocess
import logging
import re
from typing import Dict, List, Any, Optional
logger = logging.getLogger(__name__)
# Virtual IPs assigned to Caddy per service — must match Caddyfile listeners.
# Populated at import time from the default subnet; call update_service_ips()
# whenever ip_range changes so all downstream callers see the new values.
SERVICE_IPS: Dict[str, str] = {
'calendar': '172.20.0.21',
'files': '172.20.0.22',
'mail': '172.20.0.23',
'webdav': '172.20.0.24',
}
def update_service_ips(ip_range: str) -> None:
"""Recalculate SERVICE_IPS from the new subnet and update in-place."""
from ip_utils import get_virtual_ips
new_ips = get_virtual_ips(ip_range)
SERVICE_IPS.clear()
SERVICE_IPS.update(new_ips)
# Internal RFC-1918 ranges (peer traffic stays inside these = cell-only access)
PRIVATE_NETS = ['10.0.0.0/8', '172.16.0.0/12', '192.168.0.0/16']
WIREGUARD_CONTAINER = 'cell-wireguard'
CADDY_CONTAINER = 'cell-caddy'
COREFILE_PATH = '/app/config/dns/Corefile'
ZONE_DATA_DIR = '/data' # inside CoreDNS container; mounted from ./data/dns
def _run(cmd: List[str], check: bool = True) -> subprocess.CompletedProcess:
"""Run a shell command and return the result."""
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if check and result.returncode != 0:
logger.warning(f"Command {cmd} exited {result.returncode}: {result.stderr.strip()}")
return result
except Exception as e:
logger.error(f"Command {cmd} failed: {e}")
raise
def _wg_exec(args: List[str]) -> subprocess.CompletedProcess:
"""Run a command inside the WireGuard container via docker exec."""
return _run(['docker', 'exec', WIREGUARD_CONTAINER] + args, check=False)
def _caddy_exec(args: List[str]) -> subprocess.CompletedProcess:
"""Run a command inside the Caddy container via docker exec."""
return _run(['docker', 'exec', CADDY_CONTAINER] + args, check=False)
# ---------------------------------------------------------------------------
# Virtual IP management (Caddy container)
# ---------------------------------------------------------------------------
def ensure_caddy_virtual_ips() -> bool:
"""Add per-service virtual IPs to Caddy's eth0 if not already present."""
try:
result = _caddy_exec(['ip', 'addr', 'show', 'eth0'])
existing = result.stdout
for service, ip in SERVICE_IPS.items():
if ip not in existing:
r = _caddy_exec(['ip', 'addr', 'add', f'{ip}/16', 'dev', 'eth0'])
if r.returncode == 0:
logger.info(f"Added virtual IP {ip} for {service} to Caddy eth0")
else:
logger.warning(f"Failed to add virtual IP {ip}: {r.stderr.strip()}")
return True
except Exception as e:
logger.error(f"ensure_caddy_virtual_ips failed: {e}")
return False
# ---------------------------------------------------------------------------
# iptables rule helpers
# ---------------------------------------------------------------------------
def _iptables(args: List[str], check: bool = False) -> subprocess.CompletedProcess:
return _wg_exec(['iptables'] + args)
def _rule_exists(chain: str, rule_args: List[str]) -> bool:
result = _iptables(['-C', chain] + rule_args)
return result.returncode == 0
def _ensure_rule(chain: str, rule_args: List[str]) -> None:
"""Insert rule at top of chain if it doesn't already exist."""
if not _rule_exists(chain, rule_args):
_iptables(['-I', chain] + rule_args)
def _delete_rule(chain: str, rule_args: List[str]) -> None:
"""Delete rule from chain (silently if it doesn't exist)."""
while _rule_exists(chain, rule_args):
_iptables(['-D', chain] + rule_args)
# ---------------------------------------------------------------------------
# Per-peer rule management
# ---------------------------------------------------------------------------
def _peer_comment(peer_ip: str) -> str:
# SECURITY: append a non-numeric, non-dash suffix so peer comments cannot
# be substrings of one another. Without this, the comment for 10.0.0.1
# ('pic-peer-10-0-0-1') is a prefix of 10.0.0.10..19 and a naive
# substring match would delete unrelated peers' rules.
return f'pic-peer-{peer_ip.replace(".", "-")}/32'
def clear_peer_rules(peer_ip: str) -> None:
"""Remove all FORWARD rules tagged with this peer's IP via iptables-save/restore."""
comment = _peer_comment(peer_ip)
# SECURITY: match the comment as a complete --comment token, not a
# substring. iptables-save renders comments as `--comment "<value>"` (or
# occasionally without quotes), so we anchor on the surrounding quotes /
# whitespace. Even with the unique /32 suffix in _peer_comment, we keep
# exact-token matching so a future change to the comment format cannot
# silently re-introduce the substring-deletion bug.
comment_re = re.compile(
rf'--comment\s+["\']?{re.escape(comment)}["\']?(\s|$)'
)
try:
# Dump rules, strip matching lines, restore — atomic and order-stable
save = _wg_exec(['iptables-save'])
if save.returncode != 0:
return
lines = save.stdout.splitlines()
filtered = [l for l in lines if not comment_re.search(l)]
if len(filtered) == len(lines):
return # nothing to remove
restore_input = '\n'.join(filtered) + '\n'
restore = subprocess.run(
['docker', 'exec', '-i', WIREGUARD_CONTAINER, 'iptables-restore'],
input=restore_input, capture_output=True, text=True, timeout=10
)
if restore.returncode != 0:
logger.warning(f"iptables-restore failed: {restore.stderr.strip()}")
except Exception as e:
logger.error(f"clear_peer_rules({peer_ip}): {e}")
def apply_peer_rules(peer_ip: str, settings: Dict[str, Any]) -> bool:
"""
Apply iptables FORWARD rules for a peer based on their access settings.
Each rule is inserted at position 1 (-I), so the LAST call ends up at the TOP.
We insert in reverse-priority order: lowest-priority rules first, highest last.
Desired final chain order (top = highest priority):
1. Per-service DROP/ACCEPT (most specific — must beat private-net ACCEPT)
2. Peer-to-peer ACCEPT/DROP (10.0.0.0/24)
3. Private-net ACCEPTs (for no-internet peers to reach local resources)
4. Internet DROP or ACCEPT (lowest priority catch-all)
"""
try:
comment = _peer_comment(peer_ip)
clear_peer_rules(peer_ip)
internet_access = settings.get('internet_access', True)
service_access = settings.get('service_access', list(SERVICE_IPS.keys()))
peer_access = settings.get('peer_access', True)
# --- Step 1 (inserted first → ends up at bottom before default ACCEPT) ---
# Internet catch-all: allow or block
if internet_access:
_iptables(['-I', 'FORWARD', '-s', peer_ip,
'-m', 'comment', '--comment', comment, '-j', 'ACCEPT'])
else:
# Block non-private, allow private nets
_iptables(['-I', 'FORWARD', '-s', peer_ip,
'-m', 'comment', '--comment', comment, '-j', 'DROP'])
for net in reversed(PRIVATE_NETS):
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', net,
'-m', 'comment', '--comment', comment, '-j', 'ACCEPT'])
# --- Step 2 --- Peer-to-peer (10.0.0.0/24)
target = 'ACCEPT' if peer_access else 'DROP'
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', '10.0.0.0/24',
'-m', 'comment', '--comment', comment, '-j', target])
# --- Step 3 (inserted last → ends up at TOP of chain) ---
# Per-service rules — inserted in reverse dict order so first service ends up at top
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
target = 'ACCEPT' if service in service_access else 'DROP'
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', svc_ip,
'-m', 'comment', '--comment', comment, '-j', target])
logger.info(f"Applied rules for {peer_ip}: internet={internet_access} "
f"services={service_access} peers={peer_access}")
return True
except Exception as e:
logger.error(f"apply_peer_rules({peer_ip}): {e}")
return False
def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None:
"""Re-apply rules for all peers (called on startup)."""
ensure_caddy_virtual_ips()
for peer in peers:
ip = peer.get('ip')
if not ip:
continue
apply_peer_rules(ip, {
'internet_access': peer.get('internet_access', True),
'service_access': peer.get('service_access', list(SERVICE_IPS.keys())),
'peer_access': peer.get('peer_access', True),
})
# ---------------------------------------------------------------------------
# Cell-to-cell firewall rules
# ---------------------------------------------------------------------------
def _cell_tag(cell_name: str) -> str:
"""iptables comment tag for cell rules — distinct prefix from pic-peer-* to prevent collision."""
safe = re.sub(r'[^a-z0-9]', '-', cell_name.lower())
return f'pic-cell-{safe}'
def clear_cell_rules(cell_name: str) -> None:
"""Remove all FORWARD rules tagged for this cell (atomic save/restore)."""
tag = _cell_tag(cell_name)
comment_re = re.compile(rf'--comment\s+["\']?{re.escape(tag)}["\']?(\s|$)')
try:
save = _wg_exec(['iptables-save'])
if save.returncode != 0:
return
lines = save.stdout.splitlines()
filtered = [l for l in lines if not comment_re.search(l)]
if len(filtered) == len(lines):
return
restore_input = '\n'.join(filtered) + '\n'
restore = subprocess.run(
['docker', 'exec', '-i', WIREGUARD_CONTAINER, 'iptables-restore'],
input=restore_input, capture_output=True, text=True, timeout=10
)
if restore.returncode != 0:
logger.warning(f"clear_cell_rules iptables-restore failed: {restore.stderr.strip()}")
except Exception as e:
logger.error(f"clear_cell_rules({cell_name}): {e}")
def _get_cell_api_ip() -> Optional[str]:
"""Return cell-api's Docker bridge IP. Returns empty string on failure."""
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
return r.stdout.strip()
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str],
exit_relay: bool = False) -> bool:
"""Apply FORWARD rules for a cell-to-cell peer.
Traffic from vpn_subnet is allowed only to service VIPs listed in
inbound_services; all other cell traffic is DROPped. Cells get no
internet or peer access — only explicit service VIPs, plus the
cell-api port (3000) for permission-sync pushes arriving via DNAT.
When exit_relay=True, the remote cell's peers can route internet
traffic through this cell (Phase 3). A broad ACCEPT for traffic
going out eth0 is added below per-service rules but above catch-all.
Rule insertion order (first inserted = bottom, last inserted = top):
1. Catch-all DROP for the subnet (inserted first → bottom)
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
3. Per-service ACCEPT/DROP (inserted in reversed() order)
4. API-sync ACCEPT (inserted last → top)
"""
try:
tag = _cell_tag(cell_name)
clear_cell_rules(cell_name)
# Catch-all DROP — inserted first so it ends up at the bottom
_iptables(['-I', 'FORWARD', '-s', vpn_subnet,
'-m', 'comment', '--comment', tag, '-j', 'DROP'])
# Exit relay ACCEPT — allow internet-bound traffic from this cell's peers.
# Inserted ABOVE catch-all but BELOW per-service rules so service-level
# DROP rules still take effect for specific service VIPs.
if exit_relay:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-o', 'eth0',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# Per-service rules — inserted in reverse dict order, highest-priority last
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
target = 'ACCEPT' if service in inbound_services else 'DROP'
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', svc_ip,
'-m', 'comment', '--comment', tag, '-j', target])
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
# the catch-all DROP). Remote cells push permissions to our cell-api via the
# WG tunnel; iptables sees source=cell_subnet dst=api_ip after DNAT.
api_ip = _get_cell_api_ip()
if api_ip:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', api_ip,
'-p', 'tcp', '--dport', '3000',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
logger.info(
f"Applied cell rules for {cell_name} ({vpn_subnet}): "
f"inbound={inbound_services} exit_relay={exit_relay}"
)
return True
except Exception as e:
logger.error(f"apply_cell_rules({cell_name}): {e}")
return False
def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
"""Re-apply firewall rules for all cell connections (called on startup)."""
for link in cell_links:
name = link.get('cell_name')
subnet = link.get('vpn_subnet')
if not name or not subnet:
continue
perms = link.get('permissions', {})
inbound = [s for s, v in perms.get('inbound', {}).items() if v]
exit_relay = bool(link.get('remote_exit_relay_active', False))
apply_cell_rules(name, subnet, inbound, exit_relay=exit_relay)
def ensure_cell_api_dnat() -> bool:
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
Remote cells push permission updates over the WireGuard tunnel to our
wg0 interface on port 3000. Since cell-api only listens on the Docker
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
that traffic. Called on every startup so rules survive container restarts.
"""
try:
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
api_ip = r.stdout.strip()
if not api_ip:
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
if _wg_exec(['iptables'] + masq_check).returncode != 0:
_wg_exec(['iptables'] + masq_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_cell_api_dnat: wg0:3000 → {api_ip}:3000')
return True
except Exception as e:
logger.error(f'ensure_cell_api_dnat: {e}')
return False
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------
# Service subdomains that get per-peer ACL rules in the CoreDNS zone block
_ACL_SERVICES = ('calendar', 'files', 'mail', 'webdav')
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]],
domain: str = 'cell') -> str:
"""
Build CoreDNS ACL plugin stanzas.
blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... }
Returns a string to embed in the primary zone block.
"""
if not blocked_peers_by_service:
return ''
lines = []
for service in _ACL_SERVICES:
peer_ips = blocked_peers_by_service.get(service, [])
if not peer_ips:
continue
host = f'{service}.{domain}.'
for ip in peer_ips:
lines.append(f' acl {host} {{')
lines.append(f' block net {ip}/32')
lines.append(f' allow net 0.0.0.0/0')
lines.append(f' allow net ::/0')
lines.append(f' }}')
return '\n'.join(lines)
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell',
cell_links: Optional[List[Dict[str, Any]]] = None) -> bool:
"""
Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin.
The file is written to corefile_path (API-side path mapped into CoreDNS container).
domain: the configured cell domain (e.g. 'cell', 'dev') — must match zone file names.
cell_links: optional list of cell-to-cell DNS forwarding entries, each a dict with
'domain' and 'dns_ip' keys (same shape as CellLinkManager.list_connections()).
When non-empty, a forwarding stanza is appended for each entry.
"""
try:
# Collect which peers block which services
blocked: Dict[str, List[str]] = {s: [] for s in SERVICE_IPS}
for peer in peers:
ip = peer.get('ip')
if not ip:
continue
allowed_services = peer.get('service_access', list(SERVICE_IPS.keys()))
for service in SERVICE_IPS:
if service not in allowed_services:
blocked[service].append(ip)
acl_block = _build_acl_block(blocked, domain)
primary_zone_block = f'{domain} {{\n file /data/{domain}.zone\n log\n'
if acl_block:
primary_zone_block += acl_block + '\n'
primary_zone_block += '}\n'
corefile = f""". {{
forward . 8.8.8.8 1.1.1.1
cache
log
health
}}
{primary_zone_block}"""
# Append cell-to-cell DNS forwarding stanzas if provided
if cell_links:
for link in cell_links:
link_domain = link.get('domain', '')
link_dns_ip = link.get('dns_ip', '')
if not link_domain or not link_dns_ip:
continue
corefile += (
f'\n{link_domain} {{\n'
f' forward . {link_dns_ip}\n'
f' cache\n'
f' log\n'
f'}}\n'
)
else:
corefile += '\n'
# local.{domain} block intentionally omitted: /data/local.zone does not exist
# and CoreDNS logs errors on every reload for a missing zone file.
os.makedirs(os.path.dirname(corefile_path), exist_ok=True)
tmp_path = corefile_path + '.tmp'
with open(tmp_path, 'w') as f:
f.write(corefile)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, corefile_path)
logger.info(f"Wrote Corefile to {corefile_path}")
return True
except Exception as e:
logger.error(f"generate_corefile: {e}")
return False
def reload_coredns() -> bool:
"""Signal CoreDNS to reload its config. SIGUSR1 triggers the reload plugin; SIGHUP kills the process."""
try:
result = _run(['docker', 'kill', '--signal=SIGUSR1', 'cell-dns'], check=False)
if result.returncode == 0:
logger.info("Sent SIGUSR1 to cell-dns (reload)")
return True
logger.warning(f"SIGUSR1 to cell-dns failed: {result.stderr.strip()}")
return False
except Exception as e:
logger.error(f"reload_coredns: {e}")
return False
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell',
cell_links: Optional[List[Dict[str, Any]]] = None) -> bool:
"""Regenerate Corefile (including any cell-to-cell forwarding stanzas) and reload CoreDNS."""
ok = generate_corefile(peers, corefile_path, domain, cell_links)
if ok:
reload_coredns()
return ok