Fix DDNS security and reliability gaps (#2, #3, #5, #6, #7)
Unit Tests / test (push) Successful in 7m23s

- Fix #2: Move DDNS bearer token from cell_config.json to data/api/ddns_token.
  Token is now in the secrets store (data/) rather than the config store (config/).
  Auto-migrates existing installs on first access. ConfigManager.get/set_ddns_token()
  added. set_ddns_config() now strips 'token' key to prevent it leaking back.

- Fix #3: Set Caddyfile permissions to 0o600 after write so the token embedded
  in the Caddyfile is not world-readable on the host filesystem.

- Fix #5: Heartbeat now fires IDENTITY_CHANGED after re-registration so Caddy
  regenerates its config with the new token automatically — users no longer need
  to click Re-register in Settings after a wizard registration failure.
  Also: heartbeat skips the 401-cycle when no token exists and goes straight to
  registration instead. DDNSManager now accepts service_bus= and is wired up.

- Fix #6: Settings page starts polling GET /api/caddy/cert-status every 15s
  after a successful DDNS re-registration and shows "Acquiring certificate…"
  feedback until Let's Encrypt issues the cert (up to 5 minutes).

- Fix #7: regenerate_with_installed() is debounced (5 s window) so two rapid
  IDENTITY_CHANGED events (e.g. wizard + heartbeat) can't start simultaneous
  ACME orders that interfere with each other.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 03:37:48 -04:00
parent 40f9d90fad
commit 3d750ed1e8
8 changed files with 248 additions and 67 deletions
+34 -7
View File
@@ -28,6 +28,7 @@ import logging
import os
import socket as _socket
import ssl as _ssl
import threading
import time as _time
from typing import Any, Dict, List, Optional
@@ -78,6 +79,10 @@ class CaddyManager(BaseServiceManager):
self._health_failures = 0
# Monotonic timestamp of the last successful cert status refresh.
self._cert_refreshed_at: Optional[float] = None
# Debounce: prevent two rapid Caddyfile reloads (e.g. IDENTITY_CHANGED
# fires from wizard AND heartbeat re-registration within seconds of each other).
self._last_regenerate_at: float = 0.0
self._regenerate_lock = threading.Lock()
if service_bus is not None:
from service_bus import EventType
@@ -311,13 +316,17 @@ class CaddyManager(BaseServiceManager):
# Resolve credentials at write time — Caddy runs in its own container
# and does not inherit the API's environment variables, so we embed the
# actual values instead of {$VAR} placeholders.
# Use the registration bearer token (ddns.token), NOT the TOTP secret —
# the pic_ngo plugin authenticates to POST /api/v1/dns-challenge with this token.
ddns_cfg = self.config_manager.configs.get('ddns', {})
ddns_token = (ddns_cfg.get('token') or os.environ.get('DDNS_TOKEN') or '').strip()
_raw_api = (os.environ.get('DDNS_URL') or ddns_cfg.get('url') or 'https://ddns.pic.ngo').strip()
# Token is read from data/api/ddns_token (not cell_config.json).
ddns_cfg = self.config_manager.configs.get('ddns', {})
if hasattr(self.config_manager, 'get_ddns_token'):
ddns_token = self.config_manager.get_ddns_token() or ''
else:
ddns_token = (ddns_cfg.get('token') or '').strip()
if not ddns_token:
ddns_token = os.environ.get('DDNS_TOKEN', '').strip()
_raw_api = (os.environ.get('DDNS_URL') or ddns_cfg.get('url') or 'https://ddns.pic.ngo').strip()
# Strip legacy /api/v1 suffix — the pic_ngo plugin appends /api/v1 itself.
ddns_api = _raw_api.rstrip('/').removesuffix('/api/v1')
ddns_api = _raw_api.rstrip('/').removesuffix('/api/v1')
# No token yet (fresh install, pre-registration) — Caddy would reject a
# bare `token` keyword with no value. Fall back to LAN mode so Caddy
@@ -458,6 +467,10 @@ class CaddyManager(BaseServiceManager):
os.fsync(f.fileno())
except OSError:
pass
try:
os.chmod(self.caddyfile_path, 0o600)
except OSError:
pass
logger.info("Wrote Caddyfile to %s (%d bytes)",
self.caddyfile_path, len(caddyfile_content))
except Exception as e:
@@ -530,8 +543,22 @@ class CaddyManager(BaseServiceManager):
# ── certificate status ────────────────────────────────────────────────
_REGENERATE_DEBOUNCE = 5.0 # seconds
def regenerate_with_installed(self, installed_services: list) -> bool:
"""Regenerate Caddyfile with installed services and reload."""
"""Regenerate Caddyfile with installed services and reload.
Debounced: skips if called again within _REGENERATE_DEBOUNCE seconds.
This prevents two simultaneous ACME orders when IDENTITY_CHANGED fires
from multiple sources (e.g. wizard completion + heartbeat re-registration)
within a short window.
"""
now = _time.monotonic()
with self._regenerate_lock:
if now - self._last_regenerate_at < self._REGENERATE_DEBOUNCE:
logger.debug("caddy regenerate_with_installed: skipped (debounce)")
return True
self._last_regenerate_at = now
identity = self.config_manager.get_identity()
content = self.generate_caddyfile(identity, installed_services)
return self.write_caddyfile(content)
+43 -1
View File
@@ -678,10 +678,52 @@ class ConfigManager:
return dict(cfg)
def set_ddns_config(self, ddns_cfg: Dict[str, Any]) -> None:
"""Replace the top-level ddns section and persist."""
"""Replace the top-level ddns section and persist.
Never writes a 'token' key into cell_config.json — tokens live in data/.
"""
ddns_cfg = {k: v for k, v in ddns_cfg.items() if k != 'token'}
self.configs['ddns'] = ddns_cfg
self._save_all_configs()
@property
def _ddns_token_path(self) -> Path:
return self.data_dir / 'api' / 'ddns_token'
def get_ddns_token(self) -> str:
"""Return the DDNS bearer token from data/api/ddns_token.
Migrates automatically from the old cell_config.json location on first
call so existing installs keep working without manual intervention.
"""
path = self._ddns_token_path
if path.exists():
try:
tok = path.read_text().strip()
if tok:
return tok
except (PermissionError, OSError):
pass
# Migrate legacy token from cell_config.json
old_token = self.configs.get('ddns', {}).get('token', '')
if old_token:
self.set_ddns_token(old_token)
return old_token
def set_ddns_token(self, token: str) -> None:
"""Write the DDNS bearer token to data/api/ddns_token (not cell_config.json)."""
path = self._ddns_token_path
try:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(token)
except (PermissionError, OSError) as exc:
logger.error('set_ddns_token: failed to write token file: %s', exc)
return
# Remove from cell_config.json if a legacy copy is there
if self.configs.get('ddns', {}).get('token'):
ddns_cfg = {k: v for k, v in self.configs.get('ddns', {}).items() if k != 'token'}
self.configs['ddns'] = ddns_cfg
self._save_all_configs()
def set_connectivity_field(self, field: str, value: Any) -> bool:
"""Set a single field within the connectivity config and persist."""
cfg = self.configs.setdefault('connectivity', {'exits': {}, 'peer_exit_map': {}})
+55 -9
View File
@@ -299,9 +299,11 @@ class DDNSManager(BaseServiceManager):
def __init__(self, config_manager=None,
data_dir: str = '/app/data',
config_dir: str = '/app/config'):
config_dir: str = '/app/config',
service_bus=None):
super().__init__('ddns', data_dir, config_dir)
self.config_manager = config_manager
self._service_bus = service_bus
self._last_ip: Optional[str] = None
self._stop_event = threading.Event()
self._heartbeat_thread: Optional[threading.Thread] = None
@@ -344,6 +346,27 @@ class DDNSManager(BaseServiceManager):
return {}
return self.config_manager.configs.get('ddns', {}) or {}
def _get_token(self) -> str:
"""Return the DDNS bearer token from the secure token store."""
if self.config_manager is None:
return ''
if hasattr(self.config_manager, 'get_ddns_token'):
return self.config_manager.get_ddns_token() or ''
return self.config_manager.configs.get('ddns', {}).get('token', '')
def _fire_identity_changed(self, source: str) -> None:
"""Publish IDENTITY_CHANGED so CaddyManager regenerates its config."""
if self._service_bus is None:
return
try:
from service_bus import EventType
cell_name = self._identity().get('cell_name', '')
self._service_bus.publish_event(EventType.IDENTITY_CHANGED, source, {
'cell_name': cell_name,
})
except Exception as exc:
logger.warning('DDNSManager._fire_identity_changed: %s', exc)
# ------------------------------------------------------------------
# Provider factory
# ------------------------------------------------------------------
@@ -409,7 +432,7 @@ class DDNSManager(BaseServiceManager):
# Release the old subdomain if the name is changing and we hold a token
if self.config_manager is not None and hasattr(provider, 'release'):
old_token = self._ddns_cfg().get('token', '')
old_token = self._get_token()
old_domain = self._identity().get('domain_name', '')
old_name = old_domain.replace('.pic.ngo', '') if old_domain else ''
if old_token and old_name and old_name != name:
@@ -422,11 +445,14 @@ class DDNSManager(BaseServiceManager):
result = provider.register(name, ip)
if self.config_manager is not None:
# Token lives in the top-level ddns config so update_ip() can find it
# Token stored in data/api/ddns_token (not cell_config.json)
if 'token' in result:
ddns_cfg = dict(self.config_manager.configs.get('ddns', {}))
ddns_cfg['token'] = result['token']
self.config_manager.set_ddns_config(ddns_cfg)
if hasattr(self.config_manager, 'set_ddns_token'):
self.config_manager.set_ddns_token(result['token'])
else:
ddns_cfg = dict(self.config_manager.configs.get('ddns', {}))
ddns_cfg['token'] = result['token']
self.config_manager.set_ddns_config(ddns_cfg)
# Keep domain_name in identity up to date
if 'subdomain' in result:
self.config_manager.set_identity_field('domain_name', result['subdomain'])
@@ -454,7 +480,26 @@ class DDNSManager(BaseServiceManager):
logger.debug("DDNS update_ip: IP unchanged (%s), skipping", current_ip)
return
token = self._ddns_cfg().get('token', '')
token = self._get_token()
# No token means we never successfully registered (e.g. wizard failed).
# Attempt registration immediately rather than waiting for the 401 cycle.
if not token:
provider_name = self._ddns_cfg().get('provider', '')
if provider_name == 'pic_ngo':
logger.info("DDNS update_ip: no token — attempting initial registration")
try:
cell_name = self._identity().get('cell_name', '')
if cell_name:
self.register(cell_name, current_ip)
logger.info("DDNS registered (no-token retry): cell_name=%r", cell_name)
self._last_ip = current_ip
self._fire_identity_changed('ddns_heartbeat')
else:
logger.error("DDNS update_ip: cannot register — cell_name not in identity")
except Exception as exc:
logger.error("DDNS update_ip: initial registration failed: %s", exc)
return
try:
success = provider.update(token, current_ip)
@@ -471,6 +516,7 @@ class DDNSManager(BaseServiceManager):
self.register(cell_name, current_ip)
logger.info("DDNS re-registered after token expiry: cell_name=%r", cell_name)
self._last_ip = current_ip
self._fire_identity_changed('ddns_heartbeat')
else:
logger.error("DDNS update_ip: cannot re-register — cell_name not in identity")
except Exception as exc2:
@@ -526,7 +572,7 @@ class DDNSManager(BaseServiceManager):
provider = self.get_provider()
if provider is None:
raise DDNSError("No DDNS provider configured")
token = self._ddns_cfg().get('token', '')
token = self._get_token()
return provider.dns_challenge_create(token, fqdn, value)
def dns_challenge_delete(self, fqdn: str) -> bool:
@@ -534,5 +580,5 @@ class DDNSManager(BaseServiceManager):
provider = self.get_provider()
if provider is None:
raise DDNSError("No DDNS provider configured")
token = self._ddns_cfg().get('token', '')
token = self._get_token()
return provider.dns_challenge_delete(token, fqdn)
+2 -1
View File
@@ -68,7 +68,8 @@ cell_link_manager = CellLinkManager(
auth_manager = AuthManager(data_dir=DATA_DIR, config_dir=CONFIG_DIR)
caddy_manager = CaddyManager(config_manager=config_manager, data_dir=DATA_DIR, config_dir=CONFIG_DIR,
service_bus=service_bus, service_registry=service_registry)
ddns_manager = DDNSManager(config_manager=config_manager, data_dir=DATA_DIR, config_dir=CONFIG_DIR)
ddns_manager = DDNSManager(config_manager=config_manager, data_dir=DATA_DIR, config_dir=CONFIG_DIR,
service_bus=service_bus)
connectivity_manager = ConnectivityManager(
config_manager=config_manager,
peer_registry=peer_registry,
+8 -3
View File
@@ -123,10 +123,15 @@ def get_config():
config['domain_name'] = identity.get('domain_name', '')
config['effective_domain'] = config_manager.get_effective_domain()
ddns_section = config_manager.configs.get('ddns', {})
_provider = ddns_section.get('provider', '')
_has_token = bool(
(config_manager.get_ddns_token() if _provider == 'pic_ngo' else '') or
ddns_section.get('api_token') or ddns_section.get('token')
)
config['ddns'] = {
'provider': ddns_section.get('provider', ''),
'provider': _provider,
'subdomain': ddns_section.get('subdomain', ''),
'has_token': bool(ddns_section.get('token') or ddns_section.get('api_token')),
'has_token': _has_token,
}
return jsonify(config)
except Exception as e:
@@ -613,7 +618,7 @@ def ddns_status():
except Exception:
pass
registered = bool(ddns_cfg.get('token'))
registered = bool(config_manager.get_ddns_token())
return jsonify({
'registered': registered,
'domain_name': identity.get('domain_name', ''),