Fix DDNS security and reliability gaps (#2, #3, #5, #6, #7)
Unit Tests / test (push) Successful in 7m23s

- Fix #2: Move DDNS bearer token from cell_config.json to data/api/ddns_token.
  Token is now in the secrets store (data/) rather than the config store (config/).
  Auto-migrates existing installs on first access. ConfigManager.get/set_ddns_token()
  added. set_ddns_config() now strips 'token' key to prevent it leaking back.

- Fix #3: Set Caddyfile permissions to 0o600 after write so the token embedded
  in the Caddyfile is not world-readable on the host filesystem.

- Fix #5: Heartbeat now fires IDENTITY_CHANGED after re-registration so Caddy
  regenerates its config with the new token automatically — users no longer need
  to click Re-register in Settings after a wizard registration failure.
  Also: heartbeat skips the 401-cycle when no token exists and goes straight to
  registration instead. DDNSManager now accepts service_bus= and is wired up.

- Fix #6: Settings page starts polling GET /api/caddy/cert-status every 15s
  after a successful DDNS re-registration and shows "Acquiring certificate…"
  feedback until Let's Encrypt issues the cert (up to 5 minutes).

- Fix #7: regenerate_with_installed() is debounced (5 s window) so two rapid
  IDENTITY_CHANGED events (e.g. wizard + heartbeat) can't start simultaneous
  ACME orders that interfere with each other.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 03:37:48 -04:00
parent 40f9d90fad
commit 3d750ed1e8
8 changed files with 248 additions and 67 deletions
+34 -7
View File
@@ -28,6 +28,7 @@ import logging
import os
import socket as _socket
import ssl as _ssl
import threading
import time as _time
from typing import Any, Dict, List, Optional
@@ -78,6 +79,10 @@ class CaddyManager(BaseServiceManager):
self._health_failures = 0
# Monotonic timestamp of the last successful cert status refresh.
self._cert_refreshed_at: Optional[float] = None
# Debounce: prevent two rapid Caddyfile reloads (e.g. IDENTITY_CHANGED
# fires from wizard AND heartbeat re-registration within seconds of each other).
self._last_regenerate_at: float = 0.0
self._regenerate_lock = threading.Lock()
if service_bus is not None:
from service_bus import EventType
@@ -311,13 +316,17 @@ class CaddyManager(BaseServiceManager):
# Resolve credentials at write time — Caddy runs in its own container
# and does not inherit the API's environment variables, so we embed the
# actual values instead of {$VAR} placeholders.
# Use the registration bearer token (ddns.token), NOT the TOTP secret —
# the pic_ngo plugin authenticates to POST /api/v1/dns-challenge with this token.
ddns_cfg = self.config_manager.configs.get('ddns', {})
ddns_token = (ddns_cfg.get('token') or os.environ.get('DDNS_TOKEN') or '').strip()
_raw_api = (os.environ.get('DDNS_URL') or ddns_cfg.get('url') or 'https://ddns.pic.ngo').strip()
# Token is read from data/api/ddns_token (not cell_config.json).
ddns_cfg = self.config_manager.configs.get('ddns', {})
if hasattr(self.config_manager, 'get_ddns_token'):
ddns_token = self.config_manager.get_ddns_token() or ''
else:
ddns_token = (ddns_cfg.get('token') or '').strip()
if not ddns_token:
ddns_token = os.environ.get('DDNS_TOKEN', '').strip()
_raw_api = (os.environ.get('DDNS_URL') or ddns_cfg.get('url') or 'https://ddns.pic.ngo').strip()
# Strip legacy /api/v1 suffix — the pic_ngo plugin appends /api/v1 itself.
ddns_api = _raw_api.rstrip('/').removesuffix('/api/v1')
ddns_api = _raw_api.rstrip('/').removesuffix('/api/v1')
# No token yet (fresh install, pre-registration) — Caddy would reject a
# bare `token` keyword with no value. Fall back to LAN mode so Caddy
@@ -458,6 +467,10 @@ class CaddyManager(BaseServiceManager):
os.fsync(f.fileno())
except OSError:
pass
try:
os.chmod(self.caddyfile_path, 0o600)
except OSError:
pass
logger.info("Wrote Caddyfile to %s (%d bytes)",
self.caddyfile_path, len(caddyfile_content))
except Exception as e:
@@ -530,8 +543,22 @@ class CaddyManager(BaseServiceManager):
# ── certificate status ────────────────────────────────────────────────
_REGENERATE_DEBOUNCE = 5.0 # seconds
def regenerate_with_installed(self, installed_services: list) -> bool:
"""Regenerate Caddyfile with installed services and reload."""
"""Regenerate Caddyfile with installed services and reload.
Debounced: skips if called again within _REGENERATE_DEBOUNCE seconds.
This prevents two simultaneous ACME orders when IDENTITY_CHANGED fires
from multiple sources (e.g. wizard completion + heartbeat re-registration)
within a short window.
"""
now = _time.monotonic()
with self._regenerate_lock:
if now - self._last_regenerate_at < self._REGENERATE_DEBOUNCE:
logger.debug("caddy regenerate_with_installed: skipped (debounce)")
return True
self._last_regenerate_at = now
identity = self.config_manager.get_identity()
content = self.generate_caddyfile(identity, installed_services)
return self.write_caddyfile(content)