fix: add kernel routes for cell peers after wg set
wg set updates WireGuard peer state but does not add kernel routes — unlike wg-quick. Without ip route add, traffic to a remote cell's vpn_subnet is routed via the default gateway (internet) instead of wg0, causing all cross-cell pushes to time out with HTTP 000. - add_cell_peer() now calls _ensure_cell_route(vpn_subnet) after writing the peer config and running _syncconf - _ensure_cell_route() runs docker exec cell-wireguard ip route add (idempotent, non-fatal); no-op inside test dirs - sync_cell_routes() parses wg0.conf at startup to re-add any routes lost across container restarts; called from _apply_startup_enforcement - 5 new unit tests covering both normal and test-dir no-op paths Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -265,6 +265,7 @@ def _apply_startup_enforcement():
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_cell_rules(cell_links)
|
||||
firewall_manager.ensure_cell_api_dnat()
|
||||
wireguard_manager.sync_cell_routes()
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
|
||||
cell_links=cell_links)
|
||||
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
|
||||
|
||||
@@ -518,11 +518,61 @@ class WireGuardManager(BaseServiceManager):
|
||||
if endpoint:
|
||||
peer_block += f'Endpoint = {endpoint}\n'
|
||||
self._write_config(content + peer_block)
|
||||
self._ensure_cell_route(vpn_subnet)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'add_cell_peer failed: {e}')
|
||||
return False
|
||||
|
||||
def _ensure_cell_route(self, vpn_subnet: str) -> None:
|
||||
"""Add kernel route for vpn_subnet via wg0 inside cell-wireguard if missing.
|
||||
|
||||
'wg set' updates WireGuard peer state but does not add kernel routes.
|
||||
wg-quick would do this automatically, but we manage WG live via 'wg set'.
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'route', 'add', vpn_subnet, 'dev', 'wg0'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
logger.info(f'_ensure_cell_route: {vpn_subnet} via wg0')
|
||||
except Exception as e:
|
||||
logger.warning(f'_ensure_cell_route failed (non-fatal): {e}')
|
||||
|
||||
def sync_cell_routes(self) -> None:
|
||||
"""Ensure kernel routes exist for all cell peers defined in wg0.conf.
|
||||
|
||||
Called on startup so routes survive container restarts (kernel routes
|
||||
are ephemeral; only the WG peer config in wg0.conf persists).
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return
|
||||
try:
|
||||
content = self._read_config()
|
||||
subnets = []
|
||||
lines = content.splitlines()
|
||||
in_cell_peer = False
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped == '[Peer]':
|
||||
in_cell_peer = False
|
||||
elif stripped.startswith('# cell:'):
|
||||
in_cell_peer = True
|
||||
elif in_cell_peer and stripped.startswith('AllowedIPs'):
|
||||
subnet = stripped.split('=', 1)[1].strip()
|
||||
subnets.append(subnet)
|
||||
for subnet in subnets:
|
||||
self._ensure_cell_route(subnet)
|
||||
if subnets:
|
||||
logger.info(f'sync_cell_routes: ensured routes for {subnets}')
|
||||
except Exception as e:
|
||||
logger.warning(f'sync_cell_routes failed (non-fatal): {e}')
|
||||
|
||||
def remove_peer(self, public_key: str) -> bool:
|
||||
"""Remove the [Peer] block matching public_key from wg0.conf."""
|
||||
try:
|
||||
|
||||
@@ -704,5 +704,77 @@ class TestAddCellPeerSubnetOverlap(unittest.TestCase):
|
||||
self.assertFalse(ok)
|
||||
|
||||
|
||||
class TestCellRoutes(unittest.TestCase):
|
||||
"""Tests for _ensure_cell_route and sync_cell_routes."""
|
||||
|
||||
_CELL_PUBKEY = 'cmVtb3RlcHVia2V5X2Zvcl90ZXN0c193Z3Rlc3QxMiE='
|
||||
|
||||
def setUp(self):
|
||||
self.test_dir = tempfile.mkdtemp()
|
||||
self.data_dir = os.path.join(self.test_dir, 'data')
|
||||
self.config_dir = os.path.join(self.test_dir, 'config')
|
||||
os.makedirs(self.data_dir, exist_ok=True)
|
||||
os.makedirs(self.config_dir, exist_ok=True)
|
||||
patcher = patch.object(WireGuardManager, '_syncconf', return_value=None)
|
||||
self.mock_sync = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
self.wg = WireGuardManager(self.data_dir, self.config_dir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.test_dir)
|
||||
|
||||
def test_ensure_cell_route_noop_in_test_dir(self):
|
||||
"""_ensure_cell_route must not call subprocess when config is in /tmp (test env)."""
|
||||
with patch('subprocess.run') as mock_run:
|
||||
self.wg._ensure_cell_route('10.1.0.0/24')
|
||||
mock_run.assert_not_called()
|
||||
|
||||
def test_sync_cell_routes_noop_in_test_dir(self):
|
||||
"""sync_cell_routes must not call subprocess when config is in /tmp (test env)."""
|
||||
with patch('subprocess.run') as mock_run:
|
||||
self.wg.sync_cell_routes()
|
||||
mock_run.assert_not_called()
|
||||
|
||||
def test_ensure_cell_route_calls_ip_route_add(self):
|
||||
"""Outside test dirs, _ensure_cell_route calls docker exec ip route add."""
|
||||
with patch.object(self.wg, '_config_file', return_value='/app/config/wireguard/wg0.conf'):
|
||||
with patch('subprocess.run') as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
self.wg._ensure_cell_route('10.1.0.0/24')
|
||||
mock_run.assert_called_once()
|
||||
cmd = mock_run.call_args[0][0]
|
||||
self.assertIn('ip', cmd)
|
||||
self.assertIn('route', cmd)
|
||||
self.assertIn('add', cmd)
|
||||
self.assertIn('10.1.0.0/24', cmd)
|
||||
self.assertIn('wg0', cmd)
|
||||
|
||||
def test_sync_cell_routes_finds_cell_peers_in_config(self):
|
||||
"""sync_cell_routes parses wg0.conf and adds routes for cell peers only."""
|
||||
conf = (
|
||||
'[Interface]\nPrivateKey = dummykey\nAddress = 10.0.0.1/24\nListenPort = 51820\n\n'
|
||||
'[Peer]\n# cell:remote\nPublicKey = cmVtb3RlcHVia2V5X2Zvcl90ZXN0c193Z3Rlc3QxMiE=\n'
|
||||
'AllowedIPs = 10.1.0.0/24\nPersistentKeepalive = 25\n\n'
|
||||
'[Peer]\n# alice\nPublicKey = YWxpY2VwdWJrZXlfZm9yX3Rlc3RzX3dndGVzdDEyMyE=\n'
|
||||
'AllowedIPs = 10.0.0.2/32\nPersistentKeepalive = 25\n'
|
||||
)
|
||||
with patch.object(self.wg, '_config_file', return_value='/app/config/wireguard/wg0.conf'):
|
||||
with patch.object(self.wg, '_read_config', return_value=conf):
|
||||
with patch('subprocess.run') as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0)
|
||||
self.wg.sync_cell_routes()
|
||||
calls = [c[0][0] for c in mock_run.call_args_list]
|
||||
subnets = [c for c in calls if '10.1.0.0/24' in c]
|
||||
non_cell = [c for c in calls if '10.0.0.2/32' in c]
|
||||
self.assertTrue(len(subnets) >= 1, 'expected route add for cell peer subnet')
|
||||
self.assertEqual(len(non_cell), 0, 'should not add route for regular peer')
|
||||
|
||||
def test_add_cell_peer_triggers_ensure_cell_route(self):
|
||||
"""add_cell_peer calls _ensure_cell_route after writing config."""
|
||||
with patch.object(self.wg, '_ensure_cell_route') as mock_route:
|
||||
self.wg.add_cell_peer('remote', self._CELL_PUBKEY, '5.6.7.8:51821', '10.1.0.0/24')
|
||||
mock_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user