From bd71466a879933700adc2e2d22d73a56ccfc7c48 Mon Sep 17 00:00:00 2001 From: Dmitrii Iurco Date: Mon, 8 Jun 2026 02:11:01 -0400 Subject: [PATCH] fix: split-horizon DNS zone uses WireGuard IP, not Docker bridge IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VPN peers can reach Caddy via the host's WireGuard interface (10.0.0.1), not via the Docker bridge IP (172.20.0.2) which is unreachable outside the container network. _bootstrap_dns now calls _get_wg_server_ip() instead of ip_utils.get_service_ips() so the internal zone returns a routable address for service subdomains. Also log config save failures instead of silently swallowing them — the silent PermissionError/OSError was masking write failures and making it impossible to diagnose why installed services disappeared after container restarts. Co-Authored-By: Claude Sonnet 4.6 --- api/app.py | 6 ++-- api/config_manager.py | 4 +-- tests/test_config_manager.py | 33 +++++++++++++++++++++ tests/test_network_manager.py | 55 +++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 4 deletions(-) diff --git a/api/app.py b/api/app.py index c57945d..ad26a82 100644 --- a/api/app.py +++ b/api/app.py @@ -436,8 +436,10 @@ def _bootstrap_dns(): # Never call apply_ip_range here — it would pollute the DDNS parent zone. effective_domain = config_manager.get_effective_domain() if effective_domain and effective_domain != domain: - import ip_utils - caddy_ip = ip_utils.get_service_ips(ip_range).get('caddy', '172.20.0.2') + # Use the WireGuard server IP so VPN peers can reach Caddy via the tunnel. + # The Docker bridge IP (172.20.x.x) is only reachable inside the Docker + # network; WireGuard peers need the host's WG interface IP (e.g. 10.0.0.1). + caddy_ip = network_manager._get_wg_server_ip() # update_split_horizon_zone writes both the zone file and the Corefile # (with the split-horizon block included). No separate apply_all_dns_rules # call needed — that would overwrite the Corefile and drop the split-horizon block. diff --git a/api/config_manager.py b/api/config_manager.py index 98b39cd..4535291 100644 --- a/api/config_manager.py +++ b/api/config_manager.py @@ -163,8 +163,8 @@ class ConfigManager: f.flush() os.fsync(f.fileno()) os.replace(tmp, self.config_file) - except (PermissionError, OSError): - pass + except (PermissionError, OSError) as e: + logger.error('_save_all_configs: write failed — config NOT persisted to disk: %s', e) def get_service_config(self, service: str) -> Dict[str, Any]: """Get configuration for a specific service""" diff --git a/tests/test_config_manager.py b/tests/test_config_manager.py index fd7c859..18e45a7 100644 --- a/tests/test_config_manager.py +++ b/tests/test_config_manager.py @@ -260,6 +260,39 @@ class TestConfigManager(unittest.TestCase): "import must not inject zero-filled entries for absent services") +class TestSaveAllConfigs(unittest.TestCase): + """_save_all_configs must log errors instead of silently swallowing them.""" + + def setUp(self): + self.temp_dir = tempfile.mkdtemp() + self.config_file = os.path.join(self.temp_dir, 'cell_config.json') + self.data_dir = os.path.join(self.temp_dir, 'data') + os.makedirs(self.data_dir, exist_ok=True) + self.cm = ConfigManager(self.config_file, self.data_dir) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_save_failure_is_logged_not_silenced(self): + """When the config file cannot be written, _save_all_configs must log an error.""" + with patch('builtins.open', side_effect=OSError('disk full')): + with self.assertLogs('config_manager', level='ERROR') as log: + self.cm._save_all_configs() + self.assertTrue( + any('write failed' in msg or 'NOT persisted' in msg for msg in log.output), + f'Expected error about write failure in logs, got: {log.output}', + ) + + def test_save_success_does_not_log_error(self): + """A successful save must not produce error logs.""" + import logging + with self.assertLogs('config_manager', level='DEBUG') as cm: + logging.getLogger('config_manager').debug('sentinel') + self.cm._save_all_configs() + errors = [m for m in cm.output if 'ERROR' in m and 'write failed' in m] + self.assertEqual(errors, [], 'Unexpected write-failure error on a successful save') + + class TestGetEffectiveDomain(unittest.TestCase): """Tests for ConfigManager.get_effective_domain and get_internal_domain.""" diff --git a/tests/test_network_manager.py b/tests/test_network_manager.py index 0c3623e..0cf8e94 100644 --- a/tests/test_network_manager.py +++ b/tests/test_network_manager.py @@ -639,5 +639,60 @@ class TestUpdateSplitHorizonZoneStaleCleanup(unittest.TestCase): self.assertTrue(os.path.exists(current_zone)) +class TestGetWgServerIp(unittest.TestCase): + """_get_wg_server_ip must read from wg0.conf and fall back to 10.0.0.1. + + Regression guard: _bootstrap_dns used to pass 172.20.0.2 (Docker bridge IP) + to update_split_horizon_zone. WireGuard peers cannot reach that IP; the zone + must use the WireGuard server IP (e.g. 10.0.0.1) so VPN clients can reach Caddy. + """ + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.data_dir = os.path.join(self.test_dir, 'data') + self.config_dir = os.path.join(self.test_dir, 'config') + os.makedirs(os.path.join(self.data_dir, 'dns'), exist_ok=True) + os.makedirs(os.path.join(self.config_dir, 'dns'), exist_ok=True) + self.nm = NetworkManager(self.data_dir, self.config_dir) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _write_wg_conf(self, address: str) -> None: + wg_dir = os.path.join(self.config_dir, 'wireguard', 'wg_confs') + os.makedirs(wg_dir, exist_ok=True) + with open(os.path.join(wg_dir, 'wg0.conf'), 'w') as f: + f.write(f'[Interface]\nAddress = {address}\nListenPort = 51820\n') + + def test_reads_address_from_wg0_conf(self): + self._write_wg_conf('10.0.0.1/24') + self.assertEqual(self.nm._get_wg_server_ip(), '10.0.0.1') + + def test_reads_non_default_address(self): + self._write_wg_conf('10.8.0.1/16') + self.assertEqual(self.nm._get_wg_server_ip(), '10.8.0.1') + + def test_falls_back_to_10_0_0_1_when_conf_missing(self): + self.assertEqual(self.nm._get_wg_server_ip(), '10.0.0.1') + + def test_split_horizon_zone_uses_wg_ip_not_docker_bridge(self): + """update_split_horizon_zone called with WG IP writes that IP in zone file. + + This is the correct call pattern from _bootstrap_dns: pass the WireGuard + server IP, not 172.20.0.x (Docker bridge IP unreachable from VPN peers). + """ + self._write_wg_conf('10.0.0.1/24') + wg_ip = self.nm._get_wg_server_ip() + self.assertEqual(wg_ip, '10.0.0.1', + 'WireGuard IP must be read from wg0.conf, not be a Docker bridge address') + with patch('subprocess.run'): + self.nm.update_split_horizon_zone('pic1.pic.ngo', wg_ip) + zone_path = os.path.join(self.data_dir, 'dns', 'pic1.pic.ngo.zone') + content = open(zone_path).read() + self.assertIn('10.0.0.1', content) + self.assertNotIn('172.20.0', content, + 'Zone must not contain Docker bridge IP — VPN peers cannot reach it') + + if __name__ == '__main__': unittest.main() \ No newline at end of file