feat: Phase 3 - per-peer internet routing via exit cell

Adds the ability to route a specific peer's internet traffic through a
connected cell acting as an exit relay.

Cell A side:
- PUT /api/peers/<peer>/route-via {"via_cell": "cellB"} sets route_via
- Updates WG AllowedIPs to include 0.0.0.0/0 for the exit cell peer
- Adds ip rule + ip route in policy table inside cell-wireguard so the
  specific peer's traffic egresses via cellB's WG IP
- Sets exit_relay_active on the cell link and pushes use_as_exit_relay=True
  to cellB via peer-sync

Cell B side:
- Receives use_as_exit_relay in the peer-sync payload
- Calls apply_cell_rules(..., exit_relay=True) to add FORWARD -o eth0 ACCEPT
- Stores remote_exit_relay_active flag for startup recovery

Startup recovery:
- apply_all_cell_rules passes exit_relay=remote_exit_relay_active (cellB)
- _apply_startup_enforcement reapplies ip rule for each peer with route_via (cellA)
  since policy routing rules don't survive container restart

peer_registry gets route_via field with lazy migration.
22 new tests across test_cell_link_manager, test_peer_registry, test_peer_route_via.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 16:23:31 -04:00
parent dcee03dd3f
commit 8ea834e108
11 changed files with 547 additions and 11 deletions
+74
View File
@@ -573,6 +573,80 @@ class WireGuardManager(BaseServiceManager):
except Exception as e:
logger.warning(f'sync_cell_routes failed (non-fatal): {e}')
def update_cell_peer_allowed_ips(self, public_key: str,
vpn_subnet: str,
add_default_route: bool = False) -> bool:
"""Update AllowedIPs for a cell peer to optionally include 0.0.0.0/0.
When add_default_route=True, appends 0.0.0.0/0 to AllowedIPs so that
traffic destined for the internet is routed through this cell peer
(Phase 3 exit-relay routing).
At most one WireGuard peer can have 0.0.0.0/0 in AllowedIPs per
interface (v1 constraint); callers must remove it from any other cell
peer before adding it here.
"""
try:
if add_default_route:
new_ips = f'{vpn_subnet}, 0.0.0.0/0'
else:
new_ips = vpn_subnet
return self.update_peer_ip(public_key, new_ips)
except Exception as e:
logger.error(f'update_cell_peer_allowed_ips failed: {e}')
return False
def apply_peer_route_via(self, peer_ip: str, via_wg_ip: str,
table: int = 100) -> bool:
"""Add policy routing inside cell-wireguard so peer_ip traffic uses via_wg_ip.
Creates a dedicated routing table (table) with a default route via the
exit cell's WG IP, then adds an ip rule so traffic sourced from peer_ip
uses that table.
This is the cell-A side of Phase 3 routing: after this, traffic from
the local peer (at peer_ip) is sent through the WG tunnel to cell B
(at via_wg_ip) for internet egress.
Idempotent: adding a duplicate route/rule returns 0 or EEXIST, which is
treated as success.
"""
real_conf = self._config_file()
if '/tmp/' in real_conf or 'pytest' in real_conf:
return True
try:
def _wg(cmd):
return subprocess.run(
['docker', 'exec', 'cell-wireguard'] + cmd,
capture_output=True, text=True, timeout=5
)
# Add default route in the policy table (idempotent via || true)
_wg(['ip', 'route', 'add', 'default', 'via', via_wg_ip,
'dev', 'wg0', 'table', str(table)])
# Add ip rule: traffic FROM peer_ip uses this table
_wg(['ip', 'rule', 'add', 'from', f'{peer_ip}/32',
'pref', str(table), 'lookup', str(table)])
logger.info(f'apply_peer_route_via: {peer_ip}{via_wg_ip} table {table}')
return True
except Exception as e:
logger.error(f'apply_peer_route_via failed: {e}')
return False
def remove_peer_route_via(self, peer_ip: str, table: int = 100) -> None:
"""Remove the ip rule for peer_ip added by apply_peer_route_via. Non-fatal."""
real_conf = self._config_file()
if '/tmp/' in real_conf or 'pytest' in real_conf:
return
try:
subprocess.run(
['docker', 'exec', 'cell-wireguard',
'ip', 'rule', 'del', 'from', f'{peer_ip}/32',
'pref', str(table), 'lookup', str(table)],
capture_output=True, timeout=5
)
except Exception:
pass
def remove_peer(self, public_key: str) -> bool:
"""Remove the [Peer] block matching public_key from wg0.conf."""
try: