fix: add kernel routes for cell peers after wg set
wg set updates WireGuard peer state but does not add kernel routes — unlike wg-quick. Without ip route add, traffic to a remote cell's vpn_subnet is routed via the default gateway (internet) instead of wg0, causing all cross-cell pushes to time out with HTTP 000. - add_cell_peer() now calls _ensure_cell_route(vpn_subnet) after writing the peer config and running _syncconf - _ensure_cell_route() runs docker exec cell-wireguard ip route add (idempotent, non-fatal); no-op inside test dirs - sync_cell_routes() parses wg0.conf at startup to re-add any routes lost across container restarts; called from _apply_startup_enforcement - 5 new unit tests covering both normal and test-dir no-op paths Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -265,6 +265,7 @@ def _apply_startup_enforcement():
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_cell_rules(cell_links)
|
||||
firewall_manager.ensure_cell_api_dnat()
|
||||
wireguard_manager.sync_cell_routes()
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
|
||||
cell_links=cell_links)
|
||||
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
|
||||
|
||||
@@ -518,11 +518,61 @@ class WireGuardManager(BaseServiceManager):
|
||||
if endpoint:
|
||||
peer_block += f'Endpoint = {endpoint}\n'
|
||||
self._write_config(content + peer_block)
|
||||
self._ensure_cell_route(vpn_subnet)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'add_cell_peer failed: {e}')
|
||||
return False
|
||||
|
||||
def _ensure_cell_route(self, vpn_subnet: str) -> None:
|
||||
"""Add kernel route for vpn_subnet via wg0 inside cell-wireguard if missing.
|
||||
|
||||
'wg set' updates WireGuard peer state but does not add kernel routes.
|
||||
wg-quick would do this automatically, but we manage WG live via 'wg set'.
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'route', 'add', vpn_subnet, 'dev', 'wg0'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
logger.info(f'_ensure_cell_route: {vpn_subnet} via wg0')
|
||||
except Exception as e:
|
||||
logger.warning(f'_ensure_cell_route failed (non-fatal): {e}')
|
||||
|
||||
def sync_cell_routes(self) -> None:
|
||||
"""Ensure kernel routes exist for all cell peers defined in wg0.conf.
|
||||
|
||||
Called on startup so routes survive container restarts (kernel routes
|
||||
are ephemeral; only the WG peer config in wg0.conf persists).
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return
|
||||
try:
|
||||
content = self._read_config()
|
||||
subnets = []
|
||||
lines = content.splitlines()
|
||||
in_cell_peer = False
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped == '[Peer]':
|
||||
in_cell_peer = False
|
||||
elif stripped.startswith('# cell:'):
|
||||
in_cell_peer = True
|
||||
elif in_cell_peer and stripped.startswith('AllowedIPs'):
|
||||
subnet = stripped.split('=', 1)[1].strip()
|
||||
subnets.append(subnet)
|
||||
for subnet in subnets:
|
||||
self._ensure_cell_route(subnet)
|
||||
if subnets:
|
||||
logger.info(f'sync_cell_routes: ensured routes for {subnets}')
|
||||
except Exception as e:
|
||||
logger.warning(f'sync_cell_routes failed (non-fatal): {e}')
|
||||
|
||||
def remove_peer(self, public_key: str) -> bool:
|
||||
"""Remove the [Peer] block matching public_key from wg0.conf."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user