fix: split-horizon DNS zone uses WireGuard IP, not Docker bridge IP
Unit Tests / test (push) Successful in 7m31s

VPN peers can reach Caddy via the host's WireGuard interface (10.0.0.1),
not via the Docker bridge IP (172.20.0.2) which is unreachable outside
the container network. _bootstrap_dns now calls _get_wg_server_ip()
instead of ip_utils.get_service_ips() so the internal zone returns a
routable address for service subdomains.

Also log config save failures instead of silently swallowing them —
the silent PermissionError/OSError was masking write failures and
making it impossible to diagnose why installed services disappeared
after container restarts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 02:11:01 -04:00
parent e4c80149f4
commit bd71466a87
4 changed files with 94 additions and 4 deletions
+4 -2
View File
@@ -436,8 +436,10 @@ def _bootstrap_dns():
# Never call apply_ip_range here — it would pollute the DDNS parent zone.
effective_domain = config_manager.get_effective_domain()
if effective_domain and effective_domain != domain:
import ip_utils
caddy_ip = ip_utils.get_service_ips(ip_range).get('caddy', '172.20.0.2')
# Use the WireGuard server IP so VPN peers can reach Caddy via the tunnel.
# The Docker bridge IP (172.20.x.x) is only reachable inside the Docker
# network; WireGuard peers need the host's WG interface IP (e.g. 10.0.0.1).
caddy_ip = network_manager._get_wg_server_ip()
# update_split_horizon_zone writes both the zone file and the Corefile
# (with the split-horizon block included). No separate apply_all_dns_rules
# call needed — that would overwrite the Corefile and drop the split-horizon block.
+2 -2
View File
@@ -163,8 +163,8 @@ class ConfigManager:
f.flush()
os.fsync(f.fileno())
os.replace(tmp, self.config_file)
except (PermissionError, OSError):
pass
except (PermissionError, OSError) as e:
logger.error('_save_all_configs: write failed — config NOT persisted to disk: %s', e)
def get_service_config(self, service: str) -> Dict[str, Any]:
"""Get configuration for a specific service"""
+33
View File
@@ -260,6 +260,39 @@ class TestConfigManager(unittest.TestCase):
"import must not inject zero-filled entries for absent services")
class TestSaveAllConfigs(unittest.TestCase):
"""_save_all_configs must log errors instead of silently swallowing them."""
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.config_file = os.path.join(self.temp_dir, 'cell_config.json')
self.data_dir = os.path.join(self.temp_dir, 'data')
os.makedirs(self.data_dir, exist_ok=True)
self.cm = ConfigManager(self.config_file, self.data_dir)
def tearDown(self):
shutil.rmtree(self.temp_dir)
def test_save_failure_is_logged_not_silenced(self):
"""When the config file cannot be written, _save_all_configs must log an error."""
with patch('builtins.open', side_effect=OSError('disk full')):
with self.assertLogs('config_manager', level='ERROR') as log:
self.cm._save_all_configs()
self.assertTrue(
any('write failed' in msg or 'NOT persisted' in msg for msg in log.output),
f'Expected error about write failure in logs, got: {log.output}',
)
def test_save_success_does_not_log_error(self):
"""A successful save must not produce error logs."""
import logging
with self.assertLogs('config_manager', level='DEBUG') as cm:
logging.getLogger('config_manager').debug('sentinel')
self.cm._save_all_configs()
errors = [m for m in cm.output if 'ERROR' in m and 'write failed' in m]
self.assertEqual(errors, [], 'Unexpected write-failure error on a successful save')
class TestGetEffectiveDomain(unittest.TestCase):
"""Tests for ConfigManager.get_effective_domain and get_internal_domain."""
+55
View File
@@ -639,5 +639,60 @@ class TestUpdateSplitHorizonZoneStaleCleanup(unittest.TestCase):
self.assertTrue(os.path.exists(current_zone))
class TestGetWgServerIp(unittest.TestCase):
"""_get_wg_server_ip must read from wg0.conf and fall back to 10.0.0.1.
Regression guard: _bootstrap_dns used to pass 172.20.0.2 (Docker bridge IP)
to update_split_horizon_zone. WireGuard peers cannot reach that IP; the zone
must use the WireGuard server IP (e.g. 10.0.0.1) so VPN clients can reach Caddy.
"""
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.data_dir = os.path.join(self.test_dir, 'data')
self.config_dir = os.path.join(self.test_dir, 'config')
os.makedirs(os.path.join(self.data_dir, 'dns'), exist_ok=True)
os.makedirs(os.path.join(self.config_dir, 'dns'), exist_ok=True)
self.nm = NetworkManager(self.data_dir, self.config_dir)
def tearDown(self):
shutil.rmtree(self.test_dir)
def _write_wg_conf(self, address: str) -> None:
wg_dir = os.path.join(self.config_dir, 'wireguard', 'wg_confs')
os.makedirs(wg_dir, exist_ok=True)
with open(os.path.join(wg_dir, 'wg0.conf'), 'w') as f:
f.write(f'[Interface]\nAddress = {address}\nListenPort = 51820\n')
def test_reads_address_from_wg0_conf(self):
self._write_wg_conf('10.0.0.1/24')
self.assertEqual(self.nm._get_wg_server_ip(), '10.0.0.1')
def test_reads_non_default_address(self):
self._write_wg_conf('10.8.0.1/16')
self.assertEqual(self.nm._get_wg_server_ip(), '10.8.0.1')
def test_falls_back_to_10_0_0_1_when_conf_missing(self):
self.assertEqual(self.nm._get_wg_server_ip(), '10.0.0.1')
def test_split_horizon_zone_uses_wg_ip_not_docker_bridge(self):
"""update_split_horizon_zone called with WG IP writes that IP in zone file.
This is the correct call pattern from _bootstrap_dns: pass the WireGuard
server IP, not 172.20.0.x (Docker bridge IP unreachable from VPN peers).
"""
self._write_wg_conf('10.0.0.1/24')
wg_ip = self.nm._get_wg_server_ip()
self.assertEqual(wg_ip, '10.0.0.1',
'WireGuard IP must be read from wg0.conf, not be a Docker bridge address')
with patch('subprocess.run'):
self.nm.update_split_horizon_zone('pic1.pic.ngo', wg_ip)
zone_path = os.path.join(self.data_dir, 'dns', 'pic1.pic.ngo.zone')
content = open(zone_path).read()
self.assertIn('10.0.0.1', content)
self.assertNotIn('172.20.0', content,
'Zone must not contain Docker bridge IP — VPN peers cannot reach it')
if __name__ == '__main__':
unittest.main()