Fix three DNS corruption bugs in DDNS/non-LAN mode
Unit Tests / test (push) Successful in 11m30s

apply_cell_name() now skips multi-label zone files (split-horizon DDNS
zones like pic2.pic.ngo.zone) and excludes '*' and '@' from hostname
candidate detection, preventing the wildcard record from being renamed
to the old cell name during a cell rename.

update_split_horizon_zone() now deletes stale zone files from previous
cell names sharing the same TLD (e.g. pic3.pic.ngo.zone when renaming
to pic2.pic.ngo), eliminating orphaned DNS entries.

_bootstrap_dns() now detects non-LAN domain modes and calls
update_split_horizon_zone() instead of apply_ip_range(), preventing
service records (api, calendar, files…) from being re-injected into
the DDNS parent zone on every container restart.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-28 05:56:00 -04:00
parent 66500bb128
commit b16189d00f
3 changed files with 197 additions and 33 deletions
+14 -2
View File
@@ -390,8 +390,20 @@ def _bootstrap_dns():
cell_name = identity.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
domain = identity.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
ip_range = identity.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
# Bootstrap on first start; then always regenerate to ensure A records use WG server IP.
network_manager.apply_ip_range(ip_range, cell_name, domain)
domain_mode = identity.get('domain_mode', 'lan')
if domain_mode == 'lan':
# LAN mode: write full service records into the primary local zone.
network_manager.apply_ip_range(ip_range, cell_name, domain)
else:
# Non-LAN mode (DDNS/ACME): ensure the split-horizon zone is present so
# LAN clients resolve service subdomains to the internal Caddy IP.
# Never call apply_ip_range here — it would pollute the DDNS parent zone.
effective_domain = config_manager.get_effective_domain()
if effective_domain and effective_domain != domain:
import ip_utils
caddy_ip = ip_utils.get_service_ips(ip_range).get('caddy', '172.20.0.2')
network_manager.update_split_horizon_zone(
effective_domain, caddy_ip, primary_domain=domain)
except Exception as e:
logger.warning(f"DNS bootstrap failed (non-fatal): {e}")
+56 -30
View File
@@ -182,6 +182,21 @@ class NetworkManager(BaseServiceManager):
if not ok:
logger.warning('update_split_horizon_zone: zone file write failed for %s', effective_domain)
# Delete split-horizon zone files for prior cell names sharing the same TLD.
# E.g. when renaming from pic3.pic.ngo → pic2.pic.ngo, remove pic3.pic.ngo.zone.
eff_parts = effective_domain.split('.')
if len(eff_parts) >= 2:
tld_suffix = '.' + '.'.join(eff_parts[1:])
for fname in os.listdir(self.dns_zones_dir):
if fname.endswith('.zone'):
z = fname[:-5]
if z.endswith(tld_suffix) and z != effective_domain:
try:
os.remove(os.path.join(self.dns_zones_dir, fname))
logger.info('Deleted stale split-horizon zone: %s', fname)
except OSError as _e:
logger.warning('Failed to delete stale zone %s: %s', fname, _e)
# If the internal zone name happens to be a parent of the effective DDNS
# domain (e.g. primary_domain='pic.ngo', effective_domain='pic2.pic.ngo'),
# bootstrap service records like 'api', 'calendar' etc. would pollute the
@@ -579,42 +594,53 @@ class NetworkManager(BaseServiceManager):
warnings = []
if not new_name:
return {'restarted': restarted, 'warnings': warnings}
# Exclude service names, wildcard, and apex from cell-hostname detection.
_service_names = {'api', 'webui', 'calendar', 'files', 'mail', 'webmail', 'webdav'}
_reserved = _service_names | {'@', '*'}
changed = False
try:
dns_data = os.path.join(self.data_dir, 'dns')
if os.path.isdir(dns_data):
for fname in os.listdir(dns_data):
if fname.endswith('.zone') and 'local' not in fname:
zone_file = os.path.join(dns_data, fname)
with open(zone_file) as f:
content = f.read()
# Determine which name to replace: prefer old_name if present,
# otherwise detect from zone (non-service A record not in _service_names)
actual_old = old_name if (
old_name and re.search(
rf'^{re.escape(old_name)}\s', content, re.MULTILINE)
) else None
if actual_old is None:
for m in re.finditer(
r'^(\S+)\s+(?:\d+\s+)?IN\s+A\s+\S+', content, re.MULTILINE
):
candidate = m.group(1)
if candidate not in _service_names and candidate != '@':
actual_old = candidate
break
if actual_old is None or actual_old == new_name:
break
new_content = re.sub(
rf'^{re.escape(actual_old)}(\s+(?:\d+\s+)?IN\s+A\s+)',
f'{new_name}\\1',
content, flags=re.MULTILINE
)
if new_content != content:
with open(zone_file, 'w') as f:
f.write(new_content)
changed = True
break
if not fname.endswith('.zone'):
continue
zone_name = fname[:-5]
# Skip split-horizon DDNS zones (multi-label, e.g. 'pic2.pic.ngo.zone')
# and any zone with 'local' in its name. The cell hostname only lives
# in the primary single-label zone (e.g. 'cell.zone').
if 'local' in zone_name or '.' in zone_name:
continue
zone_file = os.path.join(dns_data, fname)
with open(zone_file) as f:
content = f.read()
# Determine which name to replace: prefer old_name if present,
# otherwise detect from zone (non-service A record not in _reserved)
actual_old = old_name if (
old_name and re.search(
rf'^{re.escape(old_name)}\s', content, re.MULTILINE)
) else None
if actual_old is None:
for m in re.finditer(
r'^(\S+)\s+(?:\d+\s+)?IN\s+A\s+\S+', content, re.MULTILINE
):
candidate = m.group(1)
if candidate not in _reserved:
actual_old = candidate
break
if actual_old is None:
continue # no hostname in this zone; try next
if actual_old == new_name:
break # already correct
new_content = re.sub(
rf'^{re.escape(actual_old)}(\s+(?:\d+\s+)?IN\s+A\s+)',
f'{new_name}\\1',
content, flags=re.MULTILINE
)
if new_content != content:
with open(zone_file, 'w') as f:
f.write(new_content)
changed = True
break
if changed and reload:
self._reload_dns_service()
restarted.append('cell-dns (reloaded)')
+126
View File
@@ -496,5 +496,131 @@ class TestUpdateSplitHorizonZone(unittest.TestCase):
self.assertIn('calendar', content)
class TestApplyCellName(unittest.TestCase):
"""Tests for apply_cell_name — hostname rename in primary DNS zone."""
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.data_dir = os.path.join(self.test_dir, 'data')
self.config_dir = os.path.join(self.test_dir, 'config')
os.makedirs(os.path.join(self.data_dir, 'dns'), exist_ok=True)
os.makedirs(os.path.join(self.config_dir, 'dns'), exist_ok=True)
self.nm = NetworkManager(self.data_dir, self.config_dir)
def tearDown(self):
shutil.rmtree(self.test_dir)
def _write_zone(self, name: str, content: str):
path = os.path.join(self.data_dir, 'dns', f'{name}.zone')
with open(path, 'w') as f:
f.write(content)
return path
@patch('subprocess.run')
def test_renames_hostname_in_primary_zone(self, _mock):
"""Old cell name is replaced with new name in the primary zone."""
self._write_zone('cell', (
'$ORIGIN cell.\n'
'@ 300 IN SOA ns1 admin 1 3600 900 86400 300\n'
'@ 300 IN NS ns1\n'
'oldname 300 IN A 172.20.0.2\n'
'api 300 IN A 172.20.0.10\n'
))
self.nm.apply_cell_name('oldname', 'newname', reload=False)
content = open(os.path.join(self.data_dir, 'dns', 'cell.zone')).read()
self.assertIn('newname', content)
self.assertNotIn('oldname', content)
@patch('subprocess.run')
def test_does_not_corrupt_split_horizon_zone(self, _mock):
"""A multi-label DDNS zone (e.g. pic2.pic.ngo.zone) must not be touched."""
sh_path = self._write_zone('pic2.pic.ngo', (
'$ORIGIN pic2.pic.ngo.\n'
'@ 300 IN SOA ns1 admin 1 3600 900 86400 300\n'
'@ 300 IN NS ns1\n'
'@ 300 IN A 172.20.0.2\n'
'* 300 IN A 172.20.0.2\n'
))
self._write_zone('cell', (
'$ORIGIN cell.\n'
'@ 300 IN SOA ns1 admin 1 3600 900 86400 300\n'
'oldname 300 IN A 172.20.0.2\n'
))
self.nm.apply_cell_name('oldname', 'newname', reload=False)
# Split-horizon zone must be unchanged (wildcard not renamed)
sh_content = open(sh_path).read()
self.assertNotIn('newname', sh_content)
self.assertIn('* 300 IN A 172.20.0.2', sh_content)
@patch('subprocess.run')
def test_wildcard_not_treated_as_hostname(self, _mock):
"""Wildcard record in a zone must never be detected as the cell hostname."""
zone_path = self._write_zone('cell', (
'$ORIGIN cell.\n'
'@ 300 IN SOA ns1 admin 1 3600 900 86400 300\n'
'@ 300 IN A 172.20.0.2\n'
'* 300 IN A 172.20.0.2\n'
))
self.nm.apply_cell_name('', 'newname', reload=False)
content = open(zone_path).read()
# Wildcard must remain; 'newname' must not appear
self.assertIn('* 300 IN A', content)
self.assertNotIn('newname', content)
@patch('subprocess.run')
def test_skips_zone_with_local_in_name(self, _mock):
"""Zones with 'local' in the filename are ignored."""
local_path = self._write_zone('home.local', (
'$ORIGIN home.local.\n'
'oldname 300 IN A 172.20.0.2\n'
))
self.nm.apply_cell_name('oldname', 'newname', reload=False)
content = open(local_path).read()
self.assertIn('oldname', content)
self.assertNotIn('newname', content)
class TestUpdateSplitHorizonZoneStaleCleanup(unittest.TestCase):
"""Tests for stale split-horizon zone deletion in update_split_horizon_zone."""
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.data_dir = os.path.join(self.test_dir, 'data')
self.config_dir = os.path.join(self.test_dir, 'config')
os.makedirs(os.path.join(self.data_dir, 'dns'), exist_ok=True)
os.makedirs(os.path.join(self.config_dir, 'dns'), exist_ok=True)
self.nm = NetworkManager(self.data_dir, self.config_dir)
def tearDown(self):
shutil.rmtree(self.test_dir)
@patch('subprocess.run')
def test_deletes_old_cell_zone_same_tld(self, _mock):
"""When renaming pic3.pic.ngo → pic2.pic.ngo the old zone file is removed."""
old_zone = os.path.join(self.data_dir, 'dns', 'pic3.pic.ngo.zone')
with open(old_zone, 'w') as f:
f.write('@ 300 IN A 172.20.0.2\n')
self.nm.update_split_horizon_zone('pic2.pic.ngo', '172.20.0.2')
self.assertFalse(os.path.exists(old_zone), 'stale pic3.pic.ngo.zone should be deleted')
new_zone = os.path.join(self.data_dir, 'dns', 'pic2.pic.ngo.zone')
self.assertTrue(os.path.exists(new_zone))
@patch('subprocess.run')
def test_keeps_zone_for_different_tld(self, _mock):
"""Zone files under a different TLD are not deleted."""
other_zone = os.path.join(self.data_dir, 'dns', 'myhost.example.com.zone')
with open(other_zone, 'w') as f:
f.write('@ 300 IN A 1.2.3.4\n')
self.nm.update_split_horizon_zone('pic2.pic.ngo', '172.20.0.2')
self.assertTrue(os.path.exists(other_zone), 'unrelated zone must not be deleted')
@patch('subprocess.run')
def test_keeps_current_effective_zone(self, _mock):
"""The current effective_domain zone file is never deleted."""
self.nm.update_split_horizon_zone('pic2.pic.ngo', '172.20.0.2')
current_zone = os.path.join(self.data_dir, 'dns', 'pic2.pic.ngo.zone')
self.assertTrue(os.path.exists(current_zone))
if __name__ == '__main__':
unittest.main()