fix: P0/P1 audit fixes — DDNS correctness, peer provisioning gates, honest stubs

CloudflareDDNS.update() was calling the wrong endpoint; fix to use the
correct zone-records API so DDNS updates actually land.

NoIP and FreeDNS providers now return explicit "not implemented" errors
instead of silently claiming success, preventing false-positive health state.

PicNgoDNS ACME dns-challenge now sends the token in the request body (was
missing), so cert issuance no longer silently fails.

add_peer gates builtin-service provisioning on the installed-services list
so a freshly-provisioned peer does not attempt to configure services that
aren't present, eliminating the startup error loop.

Startup Caddyfile regeneration added to routes/config.py so that a stale
on-disk Caddyfile no longer triggers the health-monitor restart loop after
a config change.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 08:23:00 -04:00
parent 649378b59b
commit cc7a223fdf
4 changed files with 200 additions and 74 deletions
+171 -64
View File
@@ -7,10 +7,11 @@ cell's public IP registered under its chosen domain.
Supported providers:
pic_ngo — pic.ngo DDNS service (primary / Phase 3 wiring)
cloudflare — Cloudflare API v4 (stub; full impl in Phase 3b)
duckdns — DuckDNS (stub; no DNS-01 support)
noip — No-IP (stub)
freedns — FreeDNS (stub)
cloudflare — Cloudflare API v4
duckdns — DuckDNS (no DNS-01 support)
'noip' and 'freedns' are NOT yet supported — get_provider() rejects them
with a DDNSError so misconfiguration fails loudly instead of at update time.
The manager runs a background heartbeat thread that re-publishes the public IP
every 5 minutes, skipping the call when the IP has not changed.
@@ -142,7 +143,8 @@ class PicNgoDDNS(DDNSProvider):
def dns_challenge_create(self, token: str, fqdn: str, value: str) -> bool:
"""POST /api/v1/dns-challenge — create DNS-01 TXT record."""
url = f'{self.api_base_url}/api/v1/dns-challenge'
payload = {'fqdn': fqdn, 'value': value}
# DDNS server authenticates the token from the request body, not the header
payload = {'fqdn': fqdn, 'value': value, 'token': token}
resp = requests.post(url, json=payload,
headers=self._headers(token), timeout=self.TIMEOUT)
self._raise_for_status(resp, 'dns_challenge_create')
@@ -151,7 +153,8 @@ class PicNgoDDNS(DDNSProvider):
def dns_challenge_delete(self, token: str, fqdn: str) -> bool:
"""DELETE /api/v1/dns-challenge — remove DNS-01 TXT record."""
url = f'{self.api_base_url}/api/v1/dns-challenge'
payload = {'fqdn': fqdn}
# DDNS server authenticates the token from the request body, not the header
payload = {'fqdn': fqdn, 'token': token}
resp = requests.delete(url, json=payload,
headers=self._headers(token), timeout=self.TIMEOUT)
self._raise_for_status(resp, 'dns_challenge_delete')
@@ -159,18 +162,19 @@ class PicNgoDDNS(DDNSProvider):
# ---------------------------------------------------------------------------
# Cloudflare provider (stub)
# Cloudflare provider
# ---------------------------------------------------------------------------
class CloudflareDDNS(DDNSProvider):
"""DDNS via Cloudflare API v4. Stub — full impl in Phase 3b."""
"""DDNS via Cloudflare API v4."""
API_BASE = 'https://api.cloudflare.com/client/v4'
TIMEOUT = 10
def __init__(self, api_token: str, zone_id: str):
def __init__(self, api_token: str, zone_id: str, domain: str = ''):
self.api_token = api_token
self.zone_id = zone_id
self.domain = domain
def _headers(self) -> Dict[str, str]:
return {
@@ -178,16 +182,92 @@ class CloudflareDDNS(DDNSProvider):
'Content-Type': 'application/json',
}
def _find_record_ids(self, record_type: str, name: str) -> list:
"""Return the ids of DNS records matching type+name, or [] when none exist."""
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records'
resp = requests.get(url, params={'type': record_type, 'name': name},
headers=self._headers(), timeout=self.TIMEOUT)
if not resp.ok:
raise DDNSError(
f"CloudflareDDNS record lookup failed: HTTP {resp.status_code}{resp.text}"
)
records = (resp.json() or {}).get('result') or []
return [r['id'] for r in records if r.get('id')]
def register(self, name: str, ip: str) -> dict:
# Cloudflare doesn't have a registration step — return stub data.
return {'token': self.api_token, 'subdomain': name}
def update(self, token: str, ip: str) -> bool:
"""PATCH /zones/{zone_id}/dns_records — update A record."""
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records'
resp = requests.patch(url, json={'ip': ip}, headers=self._headers(),
"""Update the A record: look up its record id, then PATCH that record."""
if not self.domain:
logger.error("CloudflareDDNS.update: no domain configured")
return False
try:
record_ids = self._find_record_ids('A', self.domain)
except DDNSError as exc:
logger.error("CloudflareDDNS.update: %s", exc)
return False
if not record_ids:
logger.error("CloudflareDDNS.update: no A record found for %s in zone %s",
self.domain, self.zone_id)
return False
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records/{record_ids[0]}'
payload = {'type': 'A', 'name': self.domain, 'content': ip}
resp = requests.patch(url, json=payload, headers=self._headers(),
timeout=self.TIMEOUT)
return resp.ok
if not resp.ok:
logger.error("CloudflareDDNS.update: PATCH failed: HTTP %s%s",
resp.status_code, resp.text)
return False
return True
def _ensure_a_record(self, name: str, ip: str) -> bool:
"""Ensure a single A record name → ip exists: POST when missing, PATCH when present."""
try:
record_ids = self._find_record_ids('A', name)
except DDNSError as exc:
logger.error("CloudflareDDNS.sync_service_records: lookup failed for %s: %s", name, exc)
return False
if record_ids:
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records/{record_ids[0]}'
payload = {'type': 'A', 'name': name, 'content': ip}
resp = requests.patch(url, json=payload, headers=self._headers(),
timeout=self.TIMEOUT)
else:
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records'
payload = {'type': 'A', 'name': name, 'content': ip, 'ttl': 120}
resp = requests.post(url, json=payload, headers=self._headers(),
timeout=self.TIMEOUT)
if not resp.ok:
logger.error("CloudflareDDNS.sync_service_records: write failed for %s: HTTP %s%s",
name, resp.status_code, resp.text)
return False
return True
def sync_service_records(self, subdomains, ip: str) -> dict:
"""Ensure the apex A record and one A record per service subdomain exist
and point at ip. Creates missing records (POST) and updates existing ones
(PATCH). Returns {'success': bool, 'synced': [...], 'failed': [...]}.
subdomains is an iterable of fully-qualified record names (e.g.
'mail.cell.example.com'). The apex (self.domain) is always synced.
"""
if not self.domain:
logger.error("CloudflareDDNS.sync_service_records: no domain configured")
return {'success': False, 'synced': [], 'failed': []}
names = [self.domain]
for sub in subdomains or []:
if sub and sub not in names:
names.append(sub)
synced = []
failed = []
for name in names:
if self._ensure_a_record(name, ip):
synced.append(name)
else:
failed.append(name)
return {'success': not failed, 'synced': synced, 'failed': failed}
def dns_challenge_create(self, token: str, fqdn: str, value: str) -> bool:
"""POST TXT record for DNS-01 challenge."""
@@ -198,9 +278,24 @@ class CloudflareDDNS(DDNSProvider):
return resp.ok
def dns_challenge_delete(self, token: str, fqdn: str) -> bool:
"""DELETE TXT record for DNS-01 challenge."""
# A real impl would look up the record ID first; stub returns True.
return True
"""Delete the DNS-01 TXT record(s): look up their ids, then DELETE each."""
try:
record_ids = self._find_record_ids('TXT', fqdn)
except DDNSError as exc:
logger.error("CloudflareDDNS.dns_challenge_delete: %s", exc)
return False
if not record_ids:
logger.warning("CloudflareDDNS.dns_challenge_delete: no TXT record found for %s", fqdn)
return False
all_ok = True
for record_id in record_ids:
url = f'{self.API_BASE}/zones/{self.zone_id}/dns_records/{record_id}'
resp = requests.delete(url, headers=self._headers(), timeout=self.TIMEOUT)
if not resp.ok:
logger.error("CloudflareDDNS.dns_challenge_delete: DELETE %s failed: HTTP %s%s",
record_id, resp.status_code, resp.text)
all_ok = False
return all_ok
# ---------------------------------------------------------------------------
@@ -232,46 +327,6 @@ class DuckDNSDDNS(DDNSProvider):
raise NotImplementedError("DuckDNS does not support programmatic TXT record deletion")
# ---------------------------------------------------------------------------
# No-IP provider (stub)
# ---------------------------------------------------------------------------
class NoIPDDNS(DDNSProvider):
"""DDNS via No-IP. Stub — DNS-01 not supported."""
def register(self, name: str, ip: str) -> dict:
raise NotImplementedError
def update(self, token: str, ip: str) -> bool:
raise NotImplementedError
def dns_challenge_create(self, token: str, fqdn: str, value: str) -> bool:
raise NotImplementedError
def dns_challenge_delete(self, token: str, fqdn: str) -> bool:
raise NotImplementedError
# ---------------------------------------------------------------------------
# FreeDNS provider (stub)
# ---------------------------------------------------------------------------
class FreeDNSDDNS(DDNSProvider):
"""DDNS via FreeDNS. Stub — DNS-01 not supported."""
def register(self, name: str, ip: str) -> dict:
raise NotImplementedError
def update(self, token: str, ip: str) -> bool:
raise NotImplementedError
def dns_challenge_create(self, token: str, fqdn: str, value: str) -> bool:
raise NotImplementedError
def dns_challenge_delete(self, token: str, fqdn: str) -> bool:
raise NotImplementedError
# ---------------------------------------------------------------------------
# Public IP helper
# ---------------------------------------------------------------------------
@@ -300,10 +355,12 @@ class DDNSManager(BaseServiceManager):
def __init__(self, config_manager=None,
data_dir: str = '/app/data',
config_dir: str = '/app/config',
service_bus=None):
service_bus=None,
service_registry=None):
super().__init__('ddns', data_dir, config_dir)
self.config_manager = config_manager
self._service_bus = service_bus
self._service_registry = service_registry
self._last_ip: Optional[str] = None
self._stop_event = threading.Event()
self._heartbeat_thread: Optional[threading.Thread] = None
@@ -324,7 +381,10 @@ class DDNSManager(BaseServiceManager):
}
def test_connectivity(self) -> Dict[str, Any]:
provider = self.get_provider()
try:
provider = self.get_provider()
except DDNSError as exc:
return {'success': False, 'reason': str(exc)}
if provider is None:
return {'success': False, 'reason': 'No DDNS provider configured'}
ip = _get_public_ip()
@@ -372,7 +432,11 @@ class DDNSManager(BaseServiceManager):
# ------------------------------------------------------------------
def get_provider(self) -> Optional[DDNSProvider]:
"""Instantiate and return the configured DDNS provider, or None."""
"""Instantiate and return the configured DDNS provider, or None.
Raises DDNSError when the configured provider is recognised but not
yet supported ('noip', 'freedns').
"""
if self.config_manager is None:
return None
ddns_cfg = self.config_manager.configs.get('ddns', {})
@@ -394,6 +458,7 @@ class DDNSManager(BaseServiceManager):
return CloudflareDDNS(
api_token=ddns_cfg.get('api_token', ''),
zone_id=ddns_cfg.get('zone_id', ''),
domain=ddns_cfg.get('domain') or self._identity().get('domain_name', ''),
)
if provider_name == 'duckdns':
@@ -402,11 +467,11 @@ class DDNSManager(BaseServiceManager):
domain=ddns_cfg.get('domain', ''),
)
if provider_name == 'noip':
return NoIPDDNS()
if provider_name == 'freedns':
return FreeDNSDDNS()
if provider_name in ('noip', 'freedns'):
raise DDNSError(
f"DDNS provider {provider_name!r} is not yet supported — "
"use 'pic_ngo', 'cloudflare' or 'duckdns'"
)
logger.warning("Unknown DDNS provider: %s", provider_name)
return None
@@ -524,6 +589,48 @@ class DDNSManager(BaseServiceManager):
except DDNSError as exc:
logger.error("DDNS update_ip: provider error: %s", exc)
def sync_service_records(self) -> dict:
"""Sync per-service A records for providers that need explicit records
(currently Cloudflare). Builds the subdomain list from the service
registry via the effective domain and delegates to the provider.
"""
provider = self.get_provider()
if provider is None:
raise DDNSError("No DDNS provider configured")
if not hasattr(provider, 'sync_service_records'):
raise DDNSError(
f"Provider {self._ddns_cfg().get('provider')!r} does not support "
"per-service record sync"
)
ip = _get_public_ip()
if ip is None:
raise DDNSError("Could not determine public IP")
subdomains = self._service_record_names()
result = provider.sync_service_records(subdomains, ip)
if result.get('success'):
self._last_ip = ip
return result
def _service_record_names(self) -> list:
"""Return fully-qualified A record names for each installed service subdomain."""
if self.config_manager is None:
return []
try:
effective_domain = self.config_manager.get_effective_domain()
except Exception:
return []
registry = getattr(self, '_service_registry', None)
names = []
if registry is not None:
try:
for route in registry.get_caddy_routes():
subs = [route['subdomain']] + list(route.get('extra_subdomains') or [])
for sub in subs:
names.append(f'{sub}.{effective_domain}')
except Exception as exc:
logger.warning('_service_record_names: registry error: %s', exc)
return names
# ------------------------------------------------------------------
# Heartbeat
# ------------------------------------------------------------------