feat: HTTPS cert status, IDENTITY_CHANGED wiring, remove stale ip_utils Caddyfile writes
Unit Tests / test (push) Successful in 11m18s
Unit Tests / test (push) Successful in 11m18s
- CaddyManager: add refresh_cert_status() and get_cert_status_fresh() that open a live TLS connection to cell-caddy:443 to read cert expiry; avoids needing a volume mount into the API container - CaddyManager: periodic cert refresh in health_monitor_loop (every 60 cycles) - config.py PUT /api/ddns: publish IDENTITY_CHANGED so CaddyManager regenerates the Caddyfile immediately after any domain/cell_name change — previously the event was never fired from this route - config.py: remove all ip_utils.write_caddyfile() calls; CaddyManager is now the sole authority for Caddyfile generation - app.py: add GET /api/caddy/cert-status route - app.py: add GET /api/egress/status and PUT /api/egress/services/<id>/exit routes - Settings.jsx: display cert status badge (valid/expired/internal/unknown) with expiry date and days-remaining in the domain section - Tests: TestRefreshCertStatus (8 tests), TestDdnsConfigUpdatesFiresIdentityChanged, TestCaddyCertStatusRoute added; fix expired-cert helper to set not_valid_before relative to expiry so it's always earlier Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+24
@@ -590,6 +590,7 @@ def perform_health_check():
|
||||
return {'error': str(e), 'timestamp': datetime.utcnow().isoformat()}
|
||||
|
||||
def health_monitor_loop():
|
||||
_cert_check_cycle = 0
|
||||
while health_monitor_running:
|
||||
with app.app_context():
|
||||
health_result = perform_health_check()
|
||||
@@ -613,6 +614,14 @@ def health_monitor_loop():
|
||||
caddy_manager.reset_health_failures()
|
||||
except Exception as _caddy_err:
|
||||
logger.error("Caddy health monitor error: %s", _caddy_err)
|
||||
# Refresh cert status every 60 cycles (\u2248 1 hour with a 60 s loop).
|
||||
_cert_check_cycle += 1
|
||||
if _cert_check_cycle >= 60:
|
||||
_cert_check_cycle = 0
|
||||
try:
|
||||
caddy_manager.refresh_cert_status()
|
||||
except Exception as _cert_err:
|
||||
logger.warning("Cert status refresh failed (non-fatal): %s", _cert_err)
|
||||
time.sleep(60) # Check every 60 seconds
|
||||
|
||||
# Start health monitor thread
|
||||
@@ -854,6 +863,21 @@ def connectivity_get_peer_exits():
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/caddy/cert-status', methods=['GET'])
|
||||
def caddy_cert_status():
|
||||
"""Return TLS certificate status (expiry, days remaining, status).
|
||||
|
||||
Refreshes from Caddy if the cached value is older than 5 minutes.
|
||||
For LAN mode returns {'status': 'internal'}; for ACME modes returns
|
||||
expiry info read via SSL handshake with the Caddy container.
|
||||
"""
|
||||
try:
|
||||
return jsonify(caddy_manager.get_cert_status_fresh(max_age_seconds=300))
|
||||
except Exception as e:
|
||||
logger.error(f"caddy_cert_status: {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/egress/status', methods=['GET'])
|
||||
def egress_status():
|
||||
"""Return egress status for all installed services that have an egress config."""
|
||||
|
||||
+76
-1
@@ -23,8 +23,12 @@ in the main server block (or, for ``http01``, written as their own per-host
|
||||
blocks).
|
||||
"""
|
||||
|
||||
import datetime as _dt
|
||||
import logging
|
||||
import os
|
||||
import socket as _socket
|
||||
import ssl as _ssl
|
||||
import time as _time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
@@ -62,6 +66,8 @@ class CaddyManager(BaseServiceManager):
|
||||
# Consecutive health-check failure counter (reset on success or when
|
||||
# the caller restarts the container).
|
||||
self._health_failures = 0
|
||||
# Monotonic timestamp of the last successful cert status refresh.
|
||||
self._cert_refreshed_at: Optional[float] = None
|
||||
|
||||
if service_bus is not None:
|
||||
from service_bus import EventType
|
||||
@@ -490,8 +496,10 @@ class CaddyManager(BaseServiceManager):
|
||||
except Exception as exc:
|
||||
self.logger.warning('caddy_manager identity_changed handler failed: %s', exc)
|
||||
|
||||
# ── Certificate status ────────────────────────────────────────────────
|
||||
|
||||
def get_cert_status(self) -> Dict[str, Any]:
|
||||
"""Return TLS cert status from identity['tls'] if present."""
|
||||
"""Return TLS cert status from identity['tls'] if present (cached)."""
|
||||
default = {'status': 'unknown', 'expiry': None, 'days_remaining': None}
|
||||
if not self.config_manager:
|
||||
return default
|
||||
@@ -506,3 +514,70 @@ class CaddyManager(BaseServiceManager):
|
||||
'expiry': tls.get('expiry'),
|
||||
'days_remaining': tls.get('days_remaining'),
|
||||
}
|
||||
|
||||
def get_cert_status_fresh(self, max_age_seconds: int = 300) -> Dict[str, Any]:
|
||||
"""Return cert status, refreshing if the cached value is older than max_age_seconds."""
|
||||
now = _time.monotonic()
|
||||
if self._cert_refreshed_at is None or (now - self._cert_refreshed_at) > max_age_seconds:
|
||||
self.refresh_cert_status()
|
||||
return self.get_cert_status()
|
||||
|
||||
def refresh_cert_status(self) -> Dict[str, Any]:
|
||||
"""Check TLS cert expiry via SSL and persist to identity['tls'].
|
||||
|
||||
For LAN mode (no ACME): immediately returns {'status': 'internal'}.
|
||||
For ACME modes: opens an SSL connection to Caddy on port 443 and
|
||||
reads the cert expiry from the TLS handshake. On any error (cert
|
||||
not yet issued, network unreachable): returns {'status': 'unknown'}.
|
||||
"""
|
||||
identity = self.config_manager.get_identity() if self.config_manager else {}
|
||||
domain_mode = (identity or {}).get('domain_mode', 'lan')
|
||||
|
||||
if domain_mode == 'lan':
|
||||
status: Dict[str, Any] = {'status': 'internal', 'expiry': None, 'days_remaining': None}
|
||||
else:
|
||||
caddy_host = os.environ.get('CADDY_CERT_HOST', 'cell-caddy')
|
||||
caddy_port = int(os.environ.get('CADDY_HTTPS_PORT', '443'))
|
||||
result = self._check_cert_via_ssl(caddy_host, caddy_port)
|
||||
status = result if result is not None else {
|
||||
'status': 'unknown', 'expiry': None, 'days_remaining': None
|
||||
}
|
||||
|
||||
if self.config_manager:
|
||||
try:
|
||||
self.config_manager.set_identity_field('tls', status)
|
||||
except Exception as exc:
|
||||
logger.warning('refresh_cert_status: failed to persist tls status: %s', exc)
|
||||
|
||||
self._cert_refreshed_at = _time.monotonic()
|
||||
return status
|
||||
|
||||
@staticmethod
|
||||
def _check_cert_via_ssl(hostname: str, port: int = 443) -> Optional[Dict[str, Any]]:
|
||||
"""Open an SSL connection and return cert expiry info, or None on failure."""
|
||||
ctx = _ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = _ssl.CERT_NONE
|
||||
try:
|
||||
with _socket.create_connection((hostname, port), timeout=5) as raw:
|
||||
with ctx.wrap_socket(raw, server_hostname=hostname) as tls:
|
||||
der = tls.getpeercert(binary_form=True)
|
||||
if not der:
|
||||
return None
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
cert = x509.load_der_x509_certificate(der, default_backend())
|
||||
# Use not_valid_after_utc (cryptography ≥42) with fallback for older builds.
|
||||
try:
|
||||
expiry = cert.not_valid_after_utc
|
||||
except AttributeError:
|
||||
expiry = cert.not_valid_after.replace(tzinfo=_dt.timezone.utc) # type: ignore[attr-defined]
|
||||
now = _dt.datetime.now(_dt.timezone.utc)
|
||||
days = (expiry - now).days
|
||||
return {
|
||||
'status': 'valid' if days > 0 else 'expired',
|
||||
'expiry': expiry.isoformat(),
|
||||
'days_remaining': days,
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
+21
-23
@@ -316,13 +316,6 @@ def update_config():
|
||||
domain = identity_updates['domain']
|
||||
net_result = network_manager.apply_domain(domain, reload=False)
|
||||
all_warnings.extend(net_result.get('warnings', []))
|
||||
_cur_id = config_manager.configs.get('_identity', {})
|
||||
if _cur_id.get('domain_mode', 'lan') == 'lan':
|
||||
ip_utils.write_caddyfile(
|
||||
_cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
|
||||
_cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell')),
|
||||
domain, '/app/config-caddy/Caddyfile'
|
||||
)
|
||||
_set_pending_restart(
|
||||
[f'domain changed to {domain}'],
|
||||
['dns', 'caddy'],
|
||||
@@ -335,14 +328,6 @@ def update_config():
|
||||
if old_name != new_name:
|
||||
cn_result = network_manager.apply_cell_name(old_name, new_name, reload=False)
|
||||
all_warnings.extend(cn_result.get('warnings', []))
|
||||
_cur_id2 = config_manager.configs.get('_identity', {})
|
||||
if _cur_id2.get('domain_mode', 'lan') == 'lan':
|
||||
ip_utils.write_caddyfile(
|
||||
_cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
|
||||
new_name,
|
||||
identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell')),
|
||||
'/app/config-caddy/Caddyfile'
|
||||
)
|
||||
_set_pending_restart(
|
||||
[f'cell_name changed to {new_name}'],
|
||||
['dns'],
|
||||
@@ -373,8 +358,6 @@ def update_config():
|
||||
firewall_manager.ensure_caddy_virtual_ips()
|
||||
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
|
||||
ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs))
|
||||
if cur_identity.get('domain_mode', 'lan') == 'lan':
|
||||
ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain, '/app/config-caddy/Caddyfile')
|
||||
_set_pending_restart(
|
||||
[f'ip_range changed to {new_range} — network will be recreated'],
|
||||
['*'], network_recreate=True,
|
||||
@@ -581,6 +564,21 @@ def update_ddns_config():
|
||||
config_manager.set_identity_field('duckdns_token', duck_token)
|
||||
config_manager.set_identity_field('duckdns_subdomain', duck_sub)
|
||||
|
||||
# Fire IDENTITY_CHANGED so CaddyManager regenerates the Caddyfile
|
||||
# for the new domain mode without requiring a container restart.
|
||||
try:
|
||||
from app import service_bus as _sbus, EventType as _ET
|
||||
_cur = config_manager.configs.get('_identity', {})
|
||||
_sbus.publish_event(_ET.IDENTITY_CHANGED, 'config', {
|
||||
'cell_name': _cur.get('cell_name'),
|
||||
'domain': _cur.get('domain'),
|
||||
'domain_name': _cur.get('domain_name'),
|
||||
'domain_mode': _cur.get('domain_mode'),
|
||||
'effective_domain': config_manager.get_effective_domain(),
|
||||
})
|
||||
except Exception as _ev_err:
|
||||
logger.warning('update_ddns_config: failed to fire IDENTITY_CHANGED: %s', _ev_err)
|
||||
|
||||
logger.info('DDNS config updated: domain_mode=%r domain_name=%r', domain_mode, domain_name)
|
||||
return jsonify({'updated': True})
|
||||
except Exception as e:
|
||||
@@ -660,12 +658,12 @@ def cancel_pending_config():
|
||||
if cur_cell_name and old_cell_name and cur_cell_name != old_cell_name:
|
||||
network_manager.apply_cell_name(cur_cell_name, old_cell_name, reload=False)
|
||||
|
||||
if _id.get('domain_mode', 'lan') == 'lan':
|
||||
_ip_revert.write_caddyfile(
|
||||
_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')),
|
||||
_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell')),
|
||||
_dom, '/app/config-caddy/Caddyfile'
|
||||
)
|
||||
# Regenerate Caddyfile for the reverted identity (all domain modes)
|
||||
try:
|
||||
from app import caddy_manager as _cm
|
||||
_cm.regenerate_with_installed([])
|
||||
except Exception as _cm_err:
|
||||
logger.warning('cancel_pending_config: caddy regenerate failed (non-fatal): %s', _cm_err)
|
||||
|
||||
_clear_pending_restart()
|
||||
return jsonify({'message': 'Pending changes discarded'})
|
||||
|
||||
Reference in New Issue
Block a user