feat: HTTPS cert status, IDENTITY_CHANGED wiring, remove stale ip_utils Caddyfile writes
Unit Tests / test (push) Successful in 11m18s

- CaddyManager: add refresh_cert_status() and get_cert_status_fresh() that
  open a live TLS connection to cell-caddy:443 to read cert expiry; avoids
  needing a volume mount into the API container
- CaddyManager: periodic cert refresh in health_monitor_loop (every 60 cycles)
- config.py PUT /api/ddns: publish IDENTITY_CHANGED so CaddyManager regenerates
  the Caddyfile immediately after any domain/cell_name change — previously the
  event was never fired from this route
- config.py: remove all ip_utils.write_caddyfile() calls; CaddyManager is now
  the sole authority for Caddyfile generation
- app.py: add GET /api/caddy/cert-status route
- app.py: add GET /api/egress/status and PUT /api/egress/services/<id>/exit routes
- Settings.jsx: display cert status badge (valid/expired/internal/unknown) with
  expiry date and days-remaining in the domain section
- Tests: TestRefreshCertStatus (8 tests), TestDdnsConfigUpdatesFiresIdentityChanged,
  TestCaddyCertStatusRoute added; fix expired-cert helper to set not_valid_before
  relative to expiry so it's always earlier

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-05 11:39:36 -04:00
parent 41d09c598b
commit 0267dce73d
7 changed files with 398 additions and 26 deletions
+76 -1
View File
@@ -23,8 +23,12 @@ in the main server block (or, for ``http01``, written as their own per-host
blocks).
"""
import datetime as _dt
import logging
import os
import socket as _socket
import ssl as _ssl
import time as _time
from typing import Any, Dict, List, Optional
import requests
@@ -62,6 +66,8 @@ class CaddyManager(BaseServiceManager):
# Consecutive health-check failure counter (reset on success or when
# the caller restarts the container).
self._health_failures = 0
# Monotonic timestamp of the last successful cert status refresh.
self._cert_refreshed_at: Optional[float] = None
if service_bus is not None:
from service_bus import EventType
@@ -490,8 +496,10 @@ class CaddyManager(BaseServiceManager):
except Exception as exc:
self.logger.warning('caddy_manager identity_changed handler failed: %s', exc)
# ── Certificate status ────────────────────────────────────────────────
def get_cert_status(self) -> Dict[str, Any]:
"""Return TLS cert status from identity['tls'] if present."""
"""Return TLS cert status from identity['tls'] if present (cached)."""
default = {'status': 'unknown', 'expiry': None, 'days_remaining': None}
if not self.config_manager:
return default
@@ -506,3 +514,70 @@ class CaddyManager(BaseServiceManager):
'expiry': tls.get('expiry'),
'days_remaining': tls.get('days_remaining'),
}
def get_cert_status_fresh(self, max_age_seconds: int = 300) -> Dict[str, Any]:
"""Return cert status, refreshing if the cached value is older than max_age_seconds."""
now = _time.monotonic()
if self._cert_refreshed_at is None or (now - self._cert_refreshed_at) > max_age_seconds:
self.refresh_cert_status()
return self.get_cert_status()
def refresh_cert_status(self) -> Dict[str, Any]:
"""Check TLS cert expiry via SSL and persist to identity['tls'].
For LAN mode (no ACME): immediately returns {'status': 'internal'}.
For ACME modes: opens an SSL connection to Caddy on port 443 and
reads the cert expiry from the TLS handshake. On any error (cert
not yet issued, network unreachable): returns {'status': 'unknown'}.
"""
identity = self.config_manager.get_identity() if self.config_manager else {}
domain_mode = (identity or {}).get('domain_mode', 'lan')
if domain_mode == 'lan':
status: Dict[str, Any] = {'status': 'internal', 'expiry': None, 'days_remaining': None}
else:
caddy_host = os.environ.get('CADDY_CERT_HOST', 'cell-caddy')
caddy_port = int(os.environ.get('CADDY_HTTPS_PORT', '443'))
result = self._check_cert_via_ssl(caddy_host, caddy_port)
status = result if result is not None else {
'status': 'unknown', 'expiry': None, 'days_remaining': None
}
if self.config_manager:
try:
self.config_manager.set_identity_field('tls', status)
except Exception as exc:
logger.warning('refresh_cert_status: failed to persist tls status: %s', exc)
self._cert_refreshed_at = _time.monotonic()
return status
@staticmethod
def _check_cert_via_ssl(hostname: str, port: int = 443) -> Optional[Dict[str, Any]]:
"""Open an SSL connection and return cert expiry info, or None on failure."""
ctx = _ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = _ssl.CERT_NONE
try:
with _socket.create_connection((hostname, port), timeout=5) as raw:
with ctx.wrap_socket(raw, server_hostname=hostname) as tls:
der = tls.getpeercert(binary_form=True)
if not der:
return None
from cryptography import x509
from cryptography.hazmat.backends import default_backend
cert = x509.load_der_x509_certificate(der, default_backend())
# Use not_valid_after_utc (cryptography ≥42) with fallback for older builds.
try:
expiry = cert.not_valid_after_utc
except AttributeError:
expiry = cert.not_valid_after.replace(tzinfo=_dt.timezone.utc) # type: ignore[attr-defined]
now = _dt.datetime.now(_dt.timezone.utc)
days = (expiry - now).days
return {
'status': 'valid' if days > 0 else 'expired',
'expiry': expiry.isoformat(),
'days_remaining': days,
}
except Exception:
return None