From 4a9c4cc58b24c9f7df05e80f5b7cc107463bfdb9 Mon Sep 17 00:00:00 2001 From: Dmitrii Iurco Date: Fri, 1 May 2026 14:47:22 -0400 Subject: [PATCH] fix: add kernel routes for cell peers after wg set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wg set updates WireGuard peer state but does not add kernel routes — unlike wg-quick. Without ip route add, traffic to a remote cell's vpn_subnet is routed via the default gateway (internet) instead of wg0, causing all cross-cell pushes to time out with HTTP 000. - add_cell_peer() now calls _ensure_cell_route(vpn_subnet) after writing the peer config and running _syncconf - _ensure_cell_route() runs docker exec cell-wireguard ip route add (idempotent, non-fatal); no-op inside test dirs - sync_cell_routes() parses wg0.conf at startup to re-add any routes lost across container restarts; called from _apply_startup_enforcement - 5 new unit tests covering both normal and test-dir no-op paths Co-Authored-By: Claude Sonnet 4.6 --- api/app.py | 1 + api/wireguard_manager.py | 50 +++++++++++++++++++++++ tests/test_wireguard_manager.py | 72 +++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/api/app.py b/api/app.py index b4c19d8..17106b3 100644 --- a/api/app.py +++ b/api/app.py @@ -265,6 +265,7 @@ def _apply_startup_enforcement(): firewall_manager.apply_all_peer_rules(peers) firewall_manager.apply_all_cell_rules(cell_links) firewall_manager.ensure_cell_api_dnat() + wireguard_manager.sync_cell_routes() firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(), cell_links=cell_links) logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup") diff --git a/api/wireguard_manager.py b/api/wireguard_manager.py index a5884b0..74704b1 100644 --- a/api/wireguard_manager.py +++ b/api/wireguard_manager.py @@ -518,11 +518,61 @@ class WireGuardManager(BaseServiceManager): if endpoint: peer_block += f'Endpoint = {endpoint}\n' self._write_config(content + peer_block) + self._ensure_cell_route(vpn_subnet) return True except Exception as e: logger.error(f'add_cell_peer failed: {e}') return False + def _ensure_cell_route(self, vpn_subnet: str) -> None: + """Add kernel route for vpn_subnet via wg0 inside cell-wireguard if missing. + + 'wg set' updates WireGuard peer state but does not add kernel routes. + wg-quick would do this automatically, but we manage WG live via 'wg set'. + """ + real_conf = self._config_file() + if '/tmp/' in real_conf or 'pytest' in real_conf: + return + try: + subprocess.run( + ['docker', 'exec', 'cell-wireguard', + 'ip', 'route', 'add', vpn_subnet, 'dev', 'wg0'], + capture_output=True, timeout=5 + ) + logger.info(f'_ensure_cell_route: {vpn_subnet} via wg0') + except Exception as e: + logger.warning(f'_ensure_cell_route failed (non-fatal): {e}') + + def sync_cell_routes(self) -> None: + """Ensure kernel routes exist for all cell peers defined in wg0.conf. + + Called on startup so routes survive container restarts (kernel routes + are ephemeral; only the WG peer config in wg0.conf persists). + """ + real_conf = self._config_file() + if '/tmp/' in real_conf or 'pytest' in real_conf: + return + try: + content = self._read_config() + subnets = [] + lines = content.splitlines() + in_cell_peer = False + for line in lines: + stripped = line.strip() + if stripped == '[Peer]': + in_cell_peer = False + elif stripped.startswith('# cell:'): + in_cell_peer = True + elif in_cell_peer and stripped.startswith('AllowedIPs'): + subnet = stripped.split('=', 1)[1].strip() + subnets.append(subnet) + for subnet in subnets: + self._ensure_cell_route(subnet) + if subnets: + logger.info(f'sync_cell_routes: ensured routes for {subnets}') + except Exception as e: + logger.warning(f'sync_cell_routes failed (non-fatal): {e}') + def remove_peer(self, public_key: str) -> bool: """Remove the [Peer] block matching public_key from wg0.conf.""" try: diff --git a/tests/test_wireguard_manager.py b/tests/test_wireguard_manager.py index e2152c5..782d7b3 100644 --- a/tests/test_wireguard_manager.py +++ b/tests/test_wireguard_manager.py @@ -704,5 +704,77 @@ class TestAddCellPeerSubnetOverlap(unittest.TestCase): self.assertFalse(ok) +class TestCellRoutes(unittest.TestCase): + """Tests for _ensure_cell_route and sync_cell_routes.""" + + _CELL_PUBKEY = 'cmVtb3RlcHVia2V5X2Zvcl90ZXN0c193Z3Rlc3QxMiE=' + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.data_dir = os.path.join(self.test_dir, 'data') + self.config_dir = os.path.join(self.test_dir, 'config') + os.makedirs(self.data_dir, exist_ok=True) + os.makedirs(self.config_dir, exist_ok=True) + patcher = patch.object(WireGuardManager, '_syncconf', return_value=None) + self.mock_sync = patcher.start() + self.addCleanup(patcher.stop) + self.wg = WireGuardManager(self.data_dir, self.config_dir) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_ensure_cell_route_noop_in_test_dir(self): + """_ensure_cell_route must not call subprocess when config is in /tmp (test env).""" + with patch('subprocess.run') as mock_run: + self.wg._ensure_cell_route('10.1.0.0/24') + mock_run.assert_not_called() + + def test_sync_cell_routes_noop_in_test_dir(self): + """sync_cell_routes must not call subprocess when config is in /tmp (test env).""" + with patch('subprocess.run') as mock_run: + self.wg.sync_cell_routes() + mock_run.assert_not_called() + + def test_ensure_cell_route_calls_ip_route_add(self): + """Outside test dirs, _ensure_cell_route calls docker exec ip route add.""" + with patch.object(self.wg, '_config_file', return_value='/app/config/wireguard/wg0.conf'): + with patch('subprocess.run') as mock_run: + mock_run.return_value = MagicMock(returncode=0) + self.wg._ensure_cell_route('10.1.0.0/24') + mock_run.assert_called_once() + cmd = mock_run.call_args[0][0] + self.assertIn('ip', cmd) + self.assertIn('route', cmd) + self.assertIn('add', cmd) + self.assertIn('10.1.0.0/24', cmd) + self.assertIn('wg0', cmd) + + def test_sync_cell_routes_finds_cell_peers_in_config(self): + """sync_cell_routes parses wg0.conf and adds routes for cell peers only.""" + conf = ( + '[Interface]\nPrivateKey = dummykey\nAddress = 10.0.0.1/24\nListenPort = 51820\n\n' + '[Peer]\n# cell:remote\nPublicKey = cmVtb3RlcHVia2V5X2Zvcl90ZXN0c193Z3Rlc3QxMiE=\n' + 'AllowedIPs = 10.1.0.0/24\nPersistentKeepalive = 25\n\n' + '[Peer]\n# alice\nPublicKey = YWxpY2VwdWJrZXlfZm9yX3Rlc3RzX3dndGVzdDEyMyE=\n' + 'AllowedIPs = 10.0.0.2/32\nPersistentKeepalive = 25\n' + ) + with patch.object(self.wg, '_config_file', return_value='/app/config/wireguard/wg0.conf'): + with patch.object(self.wg, '_read_config', return_value=conf): + with patch('subprocess.run') as mock_run: + mock_run.return_value = MagicMock(returncode=0) + self.wg.sync_cell_routes() + calls = [c[0][0] for c in mock_run.call_args_list] + subnets = [c for c in calls if '10.1.0.0/24' in c] + non_cell = [c for c in calls if '10.0.0.2/32' in c] + self.assertTrue(len(subnets) >= 1, 'expected route add for cell peer subnet') + self.assertEqual(len(non_cell), 0, 'should not add route for regular peer') + + def test_add_cell_peer_triggers_ensure_cell_route(self): + """add_cell_peer calls _ensure_cell_route after writing config.""" + with patch.object(self.wg, '_ensure_cell_route') as mock_route: + self.wg.add_cell_peer('remote', self._CELL_PUBKEY, '5.6.7.8:51821', '10.1.0.0/24') + mock_route.assert_called_once_with('10.1.0.0/24') + + if __name__ == '__main__': unittest.main() \ No newline at end of file