feat: HTTPS cert status, IDENTITY_CHANGED wiring, remove stale ip_utils Caddyfile writes
Unit Tests / test (push) Successful in 11m18s

- CaddyManager: add refresh_cert_status() and get_cert_status_fresh() that
  open a live TLS connection to cell-caddy:443 to read cert expiry; avoids
  needing a volume mount into the API container
- CaddyManager: periodic cert refresh in health_monitor_loop (every 60 cycles)
- config.py PUT /api/ddns: publish IDENTITY_CHANGED so CaddyManager regenerates
  the Caddyfile immediately after any domain/cell_name change — previously the
  event was never fired from this route
- config.py: remove all ip_utils.write_caddyfile() calls; CaddyManager is now
  the sole authority for Caddyfile generation
- app.py: add GET /api/caddy/cert-status route
- app.py: add GET /api/egress/status and PUT /api/egress/services/<id>/exit routes
- Settings.jsx: display cert status badge (valid/expired/internal/unknown) with
  expiry date and days-remaining in the domain section
- Tests: TestRefreshCertStatus (8 tests), TestDdnsConfigUpdatesFiresIdentityChanged,
  TestCaddyCertStatusRoute added; fix expired-cert helper to set not_valid_before
  relative to expiry so it's always earlier

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-05 11:39:36 -04:00
parent 41d09c598b
commit 0267dce73d
7 changed files with 398 additions and 26 deletions
+24
View File
@@ -590,6 +590,7 @@ def perform_health_check():
return {'error': str(e), 'timestamp': datetime.utcnow().isoformat()}
def health_monitor_loop():
_cert_check_cycle = 0
while health_monitor_running:
with app.app_context():
health_result = perform_health_check()
@@ -613,6 +614,14 @@ def health_monitor_loop():
caddy_manager.reset_health_failures()
except Exception as _caddy_err:
logger.error("Caddy health monitor error: %s", _caddy_err)
# Refresh cert status every 60 cycles (\u2248 1 hour with a 60 s loop).
_cert_check_cycle += 1
if _cert_check_cycle >= 60:
_cert_check_cycle = 0
try:
caddy_manager.refresh_cert_status()
except Exception as _cert_err:
logger.warning("Cert status refresh failed (non-fatal): %s", _cert_err)
time.sleep(60) # Check every 60 seconds
# Start health monitor thread
@@ -854,6 +863,21 @@ def connectivity_get_peer_exits():
return jsonify({'error': str(e)}), 500
@app.route('/api/caddy/cert-status', methods=['GET'])
def caddy_cert_status():
"""Return TLS certificate status (expiry, days remaining, status).
Refreshes from Caddy if the cached value is older than 5 minutes.
For LAN mode returns {'status': 'internal'}; for ACME modes returns
expiry info read via SSL handshake with the Caddy container.
"""
try:
return jsonify(caddy_manager.get_cert_status_fresh(max_age_seconds=300))
except Exception as e:
logger.error(f"caddy_cert_status: {e}")
return jsonify({'error': str(e)}), 500
@app.route('/api/egress/status', methods=['GET'])
def egress_status():
"""Return egress status for all installed services that have an egress config."""
+76 -1
View File
@@ -23,8 +23,12 @@ in the main server block (or, for ``http01``, written as their own per-host
blocks).
"""
import datetime as _dt
import logging
import os
import socket as _socket
import ssl as _ssl
import time as _time
from typing import Any, Dict, List, Optional
import requests
@@ -62,6 +66,8 @@ class CaddyManager(BaseServiceManager):
# Consecutive health-check failure counter (reset on success or when
# the caller restarts the container).
self._health_failures = 0
# Monotonic timestamp of the last successful cert status refresh.
self._cert_refreshed_at: Optional[float] = None
if service_bus is not None:
from service_bus import EventType
@@ -490,8 +496,10 @@ class CaddyManager(BaseServiceManager):
except Exception as exc:
self.logger.warning('caddy_manager identity_changed handler failed: %s', exc)
# ── Certificate status ────────────────────────────────────────────────
def get_cert_status(self) -> Dict[str, Any]:
"""Return TLS cert status from identity['tls'] if present."""
"""Return TLS cert status from identity['tls'] if present (cached)."""
default = {'status': 'unknown', 'expiry': None, 'days_remaining': None}
if not self.config_manager:
return default
@@ -506,3 +514,70 @@ class CaddyManager(BaseServiceManager):
'expiry': tls.get('expiry'),
'days_remaining': tls.get('days_remaining'),
}
def get_cert_status_fresh(self, max_age_seconds: int = 300) -> Dict[str, Any]:
"""Return cert status, refreshing if the cached value is older than max_age_seconds."""
now = _time.monotonic()
if self._cert_refreshed_at is None or (now - self._cert_refreshed_at) > max_age_seconds:
self.refresh_cert_status()
return self.get_cert_status()
def refresh_cert_status(self) -> Dict[str, Any]:
"""Check TLS cert expiry via SSL and persist to identity['tls'].
For LAN mode (no ACME): immediately returns {'status': 'internal'}.
For ACME modes: opens an SSL connection to Caddy on port 443 and
reads the cert expiry from the TLS handshake. On any error (cert
not yet issued, network unreachable): returns {'status': 'unknown'}.
"""
identity = self.config_manager.get_identity() if self.config_manager else {}
domain_mode = (identity or {}).get('domain_mode', 'lan')
if domain_mode == 'lan':
status: Dict[str, Any] = {'status': 'internal', 'expiry': None, 'days_remaining': None}
else:
caddy_host = os.environ.get('CADDY_CERT_HOST', 'cell-caddy')
caddy_port = int(os.environ.get('CADDY_HTTPS_PORT', '443'))
result = self._check_cert_via_ssl(caddy_host, caddy_port)
status = result if result is not None else {
'status': 'unknown', 'expiry': None, 'days_remaining': None
}
if self.config_manager:
try:
self.config_manager.set_identity_field('tls', status)
except Exception as exc:
logger.warning('refresh_cert_status: failed to persist tls status: %s', exc)
self._cert_refreshed_at = _time.monotonic()
return status
@staticmethod
def _check_cert_via_ssl(hostname: str, port: int = 443) -> Optional[Dict[str, Any]]:
"""Open an SSL connection and return cert expiry info, or None on failure."""
ctx = _ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = _ssl.CERT_NONE
try:
with _socket.create_connection((hostname, port), timeout=5) as raw:
with ctx.wrap_socket(raw, server_hostname=hostname) as tls:
der = tls.getpeercert(binary_form=True)
if not der:
return None
from cryptography import x509
from cryptography.hazmat.backends import default_backend
cert = x509.load_der_x509_certificate(der, default_backend())
# Use not_valid_after_utc (cryptography ≥42) with fallback for older builds.
try:
expiry = cert.not_valid_after_utc
except AttributeError:
expiry = cert.not_valid_after.replace(tzinfo=_dt.timezone.utc) # type: ignore[attr-defined]
now = _dt.datetime.now(_dt.timezone.utc)
days = (expiry - now).days
return {
'status': 'valid' if days > 0 else 'expired',
'expiry': expiry.isoformat(),
'days_remaining': days,
}
except Exception:
return None
+21 -23
View File
@@ -316,13 +316,6 @@ def update_config():
domain = identity_updates['domain']
net_result = network_manager.apply_domain(domain, reload=False)
all_warnings.extend(net_result.get('warnings', []))
_cur_id = config_manager.configs.get('_identity', {})
if _cur_id.get('domain_mode', 'lan') == 'lan':
ip_utils.write_caddyfile(
_cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
_cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell')),
domain, '/app/config-caddy/Caddyfile'
)
_set_pending_restart(
[f'domain changed to {domain}'],
['dns', 'caddy'],
@@ -335,14 +328,6 @@ def update_config():
if old_name != new_name:
cn_result = network_manager.apply_cell_name(old_name, new_name, reload=False)
all_warnings.extend(cn_result.get('warnings', []))
_cur_id2 = config_manager.configs.get('_identity', {})
if _cur_id2.get('domain_mode', 'lan') == 'lan':
ip_utils.write_caddyfile(
_cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
new_name,
identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell')),
'/app/config-caddy/Caddyfile'
)
_set_pending_restart(
[f'cell_name changed to {new_name}'],
['dns'],
@@ -373,8 +358,6 @@ def update_config():
firewall_manager.ensure_caddy_virtual_ips()
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs))
if cur_identity.get('domain_mode', 'lan') == 'lan':
ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain, '/app/config-caddy/Caddyfile')
_set_pending_restart(
[f'ip_range changed to {new_range} — network will be recreated'],
['*'], network_recreate=True,
@@ -581,6 +564,21 @@ def update_ddns_config():
config_manager.set_identity_field('duckdns_token', duck_token)
config_manager.set_identity_field('duckdns_subdomain', duck_sub)
# Fire IDENTITY_CHANGED so CaddyManager regenerates the Caddyfile
# for the new domain mode without requiring a container restart.
try:
from app import service_bus as _sbus, EventType as _ET
_cur = config_manager.configs.get('_identity', {})
_sbus.publish_event(_ET.IDENTITY_CHANGED, 'config', {
'cell_name': _cur.get('cell_name'),
'domain': _cur.get('domain'),
'domain_name': _cur.get('domain_name'),
'domain_mode': _cur.get('domain_mode'),
'effective_domain': config_manager.get_effective_domain(),
})
except Exception as _ev_err:
logger.warning('update_ddns_config: failed to fire IDENTITY_CHANGED: %s', _ev_err)
logger.info('DDNS config updated: domain_mode=%r domain_name=%r', domain_mode, domain_name)
return jsonify({'updated': True})
except Exception as e:
@@ -660,12 +658,12 @@ def cancel_pending_config():
if cur_cell_name and old_cell_name and cur_cell_name != old_cell_name:
network_manager.apply_cell_name(cur_cell_name, old_cell_name, reload=False)
if _id.get('domain_mode', 'lan') == 'lan':
_ip_revert.write_caddyfile(
_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell')),
_dom, '/app/config-caddy/Caddyfile'
)
# Regenerate Caddyfile for the reverted identity (all domain modes)
try:
from app import caddy_manager as _cm
_cm.regenerate_with_installed([])
except Exception as _cm_err:
logger.warning('cancel_pending_config: caddy regenerate failed (non-fatal): %s', _cm_err)
_clear_pending_restart()
return jsonify({'message': 'Pending changes discarded'})