feat: connectivity redesign phase 3+4 — per-connection health, per-peer fallback, connection CRUD API
Unit Tests / test (push) Successful in 13m15s
Unit Tests / test (push) Successful in 13m15s
Health probes (probe_health/refresh_health) are type-aware: WireGuard checks the last WG handshake timestamp, OpenVPN checks the tun/tap interface, Tor checks the control-port GETINFO, and sshuttle/proxy types do a TCP reachability probe to the remote endpoint. Results are persisted via set_connection_status and wired into the health_monitor_loop so the UI always has a current health snapshot without polling. Per-peer fail-open semantics: VPN, SSH, and proxy connections default to fail-closed (kill-switch stays active even when the tunnel is down). Tor defaults to fail-open. The default can be overridden per-peer via set_peer_failopen/effective_failopen. apply_routes skips the fwmark and kill-switch rules for any fail-open peer whose connection health is not "working", letting traffic fall back to direct routing transparently. New generic admin-only connection CRUD endpoints (GET/POST/PUT/DELETE /api/connectivity/connections, GET /<id>/health, PUT /api/connectivity/peers/<peer>/failopen) are guarded by the existing admin role check. connection.create, connection.update, connection.delete, and peer.failopen are all registered in ROUTE_ACTION_MAP for the audit hook so every change is recorded in the owner-visible change log. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+120
@@ -409,6 +409,10 @@ ROUTE_ACTION_MAP = {
|
||||
('POST', 'connectivity_configure_sshuttle'): ('connection.exit_sshuttle', 'connection', None),
|
||||
('POST', 'connectivity_configure_proxy'): ('connection.exit_proxy', 'connection', None),
|
||||
('PUT', 'connectivity_set_peer_exit'): ('connection.peer_exit_set', 'peer', 'peer_name'),
|
||||
('POST', 'connectivity_create_connection'): ('connection.create', 'connection', None),
|
||||
('PUT', 'connectivity_update_connection'): ('connection.update', 'connection', 'conn_id'),
|
||||
('DELETE', 'connectivity_delete_connection'): ('connection.delete', 'connection', 'conn_id'),
|
||||
('PUT', 'connectivity_set_peer_failopen'): ('peer.failopen', 'peer', 'peer_name'),
|
||||
# egress
|
||||
('PUT', 'egress_set_service_exit'): ('egress.service_exit_set', 'service', 'service_id'),
|
||||
# cells
|
||||
@@ -867,6 +871,7 @@ def perform_health_check():
|
||||
|
||||
def health_monitor_loop():
|
||||
_cert_check_cycle = 0
|
||||
_conn_health_cycle = 0
|
||||
while health_monitor_running:
|
||||
with app.app_context():
|
||||
health_result = perform_health_check()
|
||||
@@ -898,6 +903,15 @@ def health_monitor_loop():
|
||||
caddy_manager.refresh_cert_status()
|
||||
except Exception as _cert_err:
|
||||
logger.warning("Cert status refresh failed (non-fatal): %s", _cert_err)
|
||||
# Refresh connection health every 2 cycles (\u2248 every 2 min) so the
|
||||
# connections list and per-peer fallback decisions stay current.
|
||||
_conn_health_cycle += 1
|
||||
if _conn_health_cycle >= 2:
|
||||
_conn_health_cycle = 0
|
||||
try:
|
||||
connectivity_manager.refresh_health()
|
||||
except Exception as _ch_err:
|
||||
logger.warning("Connection health refresh failed (non-fatal): %s", _ch_err)
|
||||
time.sleep(60) # Check every 60 seconds
|
||||
|
||||
# Start health monitor thread
|
||||
@@ -1172,6 +1186,112 @@ def connectivity_get_peer_exits():
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
# Connectivity v2 — generic connection CRUD (going-forward API; admin-only via
|
||||
# enforce_auth which restricts all non-peer /api/* routes to the admin role).
|
||||
|
||||
@app.route('/api/connectivity/connections', methods=['GET'])
|
||||
def connectivity_list_connections():
|
||||
"""List all connection instances (with status; never any secret value)."""
|
||||
try:
|
||||
return jsonify({'connections': connectivity_manager.list_connections()})
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_list_connections: {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/connectivity/connections', methods=['POST'])
|
||||
def connectivity_create_connection():
|
||||
"""Create a connection instance. Secrets are stored in the vault, never echoed."""
|
||||
try:
|
||||
data = request.get_json(silent=True) or {}
|
||||
conn_type = data.get('type')
|
||||
name = data.get('name')
|
||||
config = data.get('config') or {}
|
||||
conn_secrets = data.get('secrets') or {}
|
||||
if not isinstance(conn_type, str) or not conn_type:
|
||||
return jsonify({'ok': False, 'error': 'type is required'}), 400
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
return jsonify({'ok': False, 'error': 'name is required'}), 400
|
||||
result = connectivity_manager.create_connection(
|
||||
conn_type, name, config=config, secrets=conn_secrets)
|
||||
if result.get('ok'):
|
||||
return jsonify(result), 201
|
||||
return jsonify(result), 400
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_create_connection: {e}")
|
||||
return jsonify({'error': 'internal error'}), 500
|
||||
|
||||
|
||||
@app.route('/api/connectivity/connections/<conn_id>', methods=['PUT'])
|
||||
def connectivity_update_connection(conn_id: str):
|
||||
"""Update a connection's name, config and/or secrets. Secrets never echoed."""
|
||||
try:
|
||||
data = request.get_json(silent=True) or {}
|
||||
result = connectivity_manager.update_connection(
|
||||
conn_id,
|
||||
name=data.get('name'),
|
||||
config=data.get('config'),
|
||||
secrets=data.get('secrets'),
|
||||
)
|
||||
if result.get('ok'):
|
||||
return jsonify(result)
|
||||
status = 404 if 'not found' in result.get('error', '') else 400
|
||||
return jsonify(result), status
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_update_connection({conn_id}): {e}")
|
||||
return jsonify({'error': 'internal error'}), 500
|
||||
|
||||
|
||||
@app.route('/api/connectivity/connections/<conn_id>', methods=['DELETE'])
|
||||
def connectivity_delete_connection(conn_id: str):
|
||||
"""Delete a connection. Blocked with 409 when a peer/egress references it."""
|
||||
try:
|
||||
result = connectivity_manager.delete_connection(conn_id)
|
||||
if result.get('ok'):
|
||||
return jsonify(result)
|
||||
error = result.get('error', '')
|
||||
if 'not found' in error:
|
||||
return jsonify(result), 404
|
||||
if 'in use by' in error:
|
||||
return jsonify(result), 409
|
||||
return jsonify(result), 400
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_delete_connection({conn_id}): {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/connectivity/connections/<conn_id>/health', methods=['GET'])
|
||||
def connectivity_connection_health(conn_id: str):
|
||||
"""On-demand probe of one connection's health (admin)."""
|
||||
try:
|
||||
conn = connectivity_manager.get_connection(conn_id)
|
||||
if conn is None:
|
||||
return jsonify({'error': f'connection {conn_id!r} not found'}), 404
|
||||
health, detail = connectivity_manager.probe_health(conn)
|
||||
return jsonify({'id': conn_id, 'health': health, 'detail': detail})
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_connection_health({conn_id}): {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/connectivity/peers/<peer_name>/failopen', methods=['PUT'])
|
||||
def connectivity_set_peer_failopen(peer_name: str):
|
||||
"""Set or clear a peer's fail-open override. Body: {"failopen": bool|null}."""
|
||||
try:
|
||||
data = request.get_json(silent=True) or {}
|
||||
failopen = data.get('failopen')
|
||||
if failopen is not None and not isinstance(failopen, bool):
|
||||
return jsonify({'ok': False, 'error': 'failopen must be a boolean or null'}), 400
|
||||
result = connectivity_manager.set_peer_failopen(peer_name, failopen)
|
||||
if result.get('ok'):
|
||||
return jsonify(result)
|
||||
status = 404 if 'not found' in result.get('error', '') else 400
|
||||
return jsonify(result), status
|
||||
except Exception as e:
|
||||
logger.error(f"connectivity_set_peer_failopen({peer_name}): {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/api/caddy/cert-status', methods=['GET'])
|
||||
def caddy_cert_status():
|
||||
"""Return TLS certificate status (expiry, days remaining, domain, mode).
|
||||
|
||||
Reference in New Issue
Block a user