fix: add kernel routes for cell peers after wg set

wg set updates WireGuard peer state but does not add kernel routes —
unlike wg-quick. Without ip route add, traffic to a remote cell's
vpn_subnet is routed via the default gateway (internet) instead of wg0,
causing all cross-cell pushes to time out with HTTP 000.

- add_cell_peer() now calls _ensure_cell_route(vpn_subnet) after
  writing the peer config and running _syncconf
- _ensure_cell_route() runs docker exec cell-wireguard ip route add
  (idempotent, non-fatal); no-op inside test dirs
- sync_cell_routes() parses wg0.conf at startup to re-add any routes
  lost across container restarts; called from _apply_startup_enforcement
- 5 new unit tests covering both normal and test-dir no-op paths

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 14:47:22 -04:00
parent ea6731d62c
commit 4a9c4cc58b
3 changed files with 123 additions and 0 deletions
+1
View File
@@ -265,6 +265,7 @@ def _apply_startup_enforcement():
firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_cell_rules(cell_links)
firewall_manager.ensure_cell_api_dnat()
wireguard_manager.sync_cell_routes()
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
cell_links=cell_links)
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
+50
View File
@@ -518,11 +518,61 @@ class WireGuardManager(BaseServiceManager):
if endpoint:
peer_block += f'Endpoint = {endpoint}\n'
self._write_config(content + peer_block)
self._ensure_cell_route(vpn_subnet)
return True
except Exception as e:
logger.error(f'add_cell_peer failed: {e}')
return False
def _ensure_cell_route(self, vpn_subnet: str) -> None:
"""Add kernel route for vpn_subnet via wg0 inside cell-wireguard if missing.
'wg set' updates WireGuard peer state but does not add kernel routes.
wg-quick would do this automatically, but we manage WG live via 'wg set'.
"""
real_conf = self._config_file()
if '/tmp/' in real_conf or 'pytest' in real_conf:
return
try:
subprocess.run(
['docker', 'exec', 'cell-wireguard',
'ip', 'route', 'add', vpn_subnet, 'dev', 'wg0'],
capture_output=True, timeout=5
)
logger.info(f'_ensure_cell_route: {vpn_subnet} via wg0')
except Exception as e:
logger.warning(f'_ensure_cell_route failed (non-fatal): {e}')
def sync_cell_routes(self) -> None:
"""Ensure kernel routes exist for all cell peers defined in wg0.conf.
Called on startup so routes survive container restarts (kernel routes
are ephemeral; only the WG peer config in wg0.conf persists).
"""
real_conf = self._config_file()
if '/tmp/' in real_conf or 'pytest' in real_conf:
return
try:
content = self._read_config()
subnets = []
lines = content.splitlines()
in_cell_peer = False
for line in lines:
stripped = line.strip()
if stripped == '[Peer]':
in_cell_peer = False
elif stripped.startswith('# cell:'):
in_cell_peer = True
elif in_cell_peer and stripped.startswith('AllowedIPs'):
subnet = stripped.split('=', 1)[1].strip()
subnets.append(subnet)
for subnet in subnets:
self._ensure_cell_route(subnet)
if subnets:
logger.info(f'sync_cell_routes: ensured routes for {subnets}')
except Exception as e:
logger.warning(f'sync_cell_routes failed (non-fatal): {e}')
def remove_peer(self, public_key: str) -> bool:
"""Remove the [Peer] block matching public_key from wg0.conf."""
try: