Files
pic/api/connectivity_manager.py
T
roof d39c091cec
Unit Tests / test (push) Successful in 13m15s
feat: connectivity redesign phase 3+4 — per-connection health, per-peer fallback, connection CRUD API
Health probes (probe_health/refresh_health) are type-aware: WireGuard
checks the last WG handshake timestamp, OpenVPN checks the tun/tap
interface, Tor checks the control-port GETINFO, and sshuttle/proxy
types do a TCP reachability probe to the remote endpoint. Results are
persisted via set_connection_status and wired into the health_monitor_loop
so the UI always has a current health snapshot without polling.

Per-peer fail-open semantics: VPN, SSH, and proxy connections default to
fail-closed (kill-switch stays active even when the tunnel is down).
Tor defaults to fail-open. The default can be overridden per-peer via
set_peer_failopen/effective_failopen. apply_routes skips the fwmark and
kill-switch rules for any fail-open peer whose connection health is not
"working", letting traffic fall back to direct routing transparently.

New generic admin-only connection CRUD endpoints (GET/POST/PUT/DELETE
/api/connectivity/connections, GET /<id>/health, PUT
/api/connectivity/peers/<peer>/failopen) are guarded by the existing
admin role check. connection.create, connection.update, connection.delete,
and peer.failopen are all registered in ROUTE_ACTION_MAP for the audit
hook so every change is recorded in the owner-visible change log.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 21:50:45 -04:00

2026 lines
87 KiB
Python

#!/usr/bin/env python3
"""
Connectivity Manager for Personal Internet Cell — Phase 5 Extended Connectivity.
Provides per-peer egress routing through alternate exits (WireGuard external,
OpenVPN, Tor, sshuttle SSH tunnel, upstream proxy) via Linux policy routing
(fwmark + ip rule + dedicated routing tables) and dedicated iptables chains.
Architecture
------------
- A peer's `exit_via` field selects the egress path: "default", "wireguard_ext",
"openvpn", "tor", "sshuttle", or "proxy".
- Each non-default exit type is assigned a unique fwmark and a dedicated routing
table:
wireguard_ext mark 0x10 table 110 iface wg_ext0
openvpn mark 0x20 table 120 iface tun0
tor mark 0x30 table 130 (transparent proxy → 9040)
sshuttle mark 0x40 table 140 (transparent proxy → 12300)
proxy mark 0x50 table 150 (transparent proxy → 12345, redsocks)
- All rules live in dedicated PIC_CONNECTIVITY chains in the `mangle` and `nat`
tables so they can be flushed/rebuilt without touching firewall_manager rules.
- A kill-switch FORWARD DROP rule prevents leaks if the exit interface is down.
Container model
---------------
Each exit type runs in its own separate container; this manager only programs
policy routing rules in the WireGuard server container (cell-wireguard) where
peer traffic flows through.
Config files
------------
- WireGuard external: {config_dir}/connectivity/wireguard_ext/wg_ext0.conf
- OpenVPN: {config_dir}/connectivity/openvpn/<name>.ovpn
Both are validated to strip / reject hook directives that could execute
arbitrary commands on the host.
"""
import ipaddress
import logging
import os
import re
import secrets
import socket
import threading
import time
from typing import Any, Dict, List, Optional, Tuple
import subprocess
from base_service_manager import BaseServiceManager
logger = logging.getLogger(__name__)
WIREGUARD_CONTAINER = 'cell-wireguard'
# Lines we strip from uploaded WireGuard configs — these can run arbitrary
# host commands when wg-quick brings the interface up/down.
_WG_FORBIDDEN_PREFIXES = ('PostUp', 'PostDown', 'PreUp', 'PreDown')
# Lines we strip from uploaded OpenVPN configs — these execute external
# scripts/binaries on connect/disconnect.
_OVPN_FORBIDDEN_DIRECTIVES = (
'up', 'down', 'script-security', 'plugin',
'route-up', 'route-pre-down',
)
_NAME_RE = re.compile(r'^[a-z0-9_-]{1,32}$')
# sshuttle / proxy configuration validation
_HOST_RE = re.compile(r'^[A-Za-z0-9]([A-Za-z0-9.-]{0,252}[A-Za-z0-9])?$')
_SSH_USER_RE = re.compile(r'^[a-z_][a-z0-9_-]{0,31}$')
_PROXY_USER_RE = re.compile(r'^[A-Za-z0-9._-]{1,64}$')
# Printable ASCII excluding double quote and backslash — safe inside the
# quoted strings of a redsocks.conf without any escaping ambiguity.
_PROXY_PASSWORD_RE = re.compile(r'^[\x20\x21\x23-\x5B\x5D-\x7E]{1,128}$')
_B64_RE = re.compile(r'^[A-Za-z0-9+/]+={0,2}$')
_KNOWN_HOSTS_HOSTS_RE = re.compile(r'^[A-Za-z0-9\[\]:.,*?_-]{1,512}$')
_SSH_KEYTYPES = (
'ssh-ed25519', 'ssh-rsa',
'ecdsa-sha2-nistp256', 'ecdsa-sha2-nistp384', 'ecdsa-sha2-nistp521',
)
def _contains_strict_hostkey_disable(value: str) -> bool:
"""Detect attempts to disable SSH host-key pinning (any spacing/case)."""
normalized = re.sub(r'\s+', '', value.lower())
return ('stricthostkeychecking=no' in normalized
or 'stricthostkeycheckingno' in normalized)
def _validate_host(host) -> Optional[str]:
"""Return a validated hostname/IP, or None when invalid."""
if not isinstance(host, str):
return None
host = host.strip()
if not host or '..' in host or not _HOST_RE.match(host):
return None
return host
def _validate_port(port) -> Optional[int]:
"""Return a validated TCP port (1-65535), or None when invalid."""
try:
port = int(port)
except (TypeError, ValueError):
return None
if not 1 <= port <= 65535:
return None
return port
class ConnectivityManager(BaseServiceManager):
"""Manages alternate egress paths (extended connectivity) for peers."""
EXIT_TYPES = ("default", "wireguard_ext", "openvpn", "tor",
"sshuttle", "proxy")
MARKS = {"wireguard_ext": 0x10, "openvpn": 0x20, "tor": 0x30,
"sshuttle": 0x40, "proxy": 0x50}
TABLES = {"wireguard_ext": 110, "openvpn": 120, "tor": 130,
"sshuttle": 140, "proxy": 150}
IFACES = {"wireguard_ext": "wg_ext0", "openvpn": "tun0"}
TOR_TRANS_PORT = 9040
TOR_DNS_PORT = 5353
SSHUTTLE_PORT = 12300
REDSOCKS_PORT = 12345
# Exits that work as pure iptables REDIRECTs to a local transparent-proxy
# port (no exit interface, no kill-switch interface).
REDIRECT_PORTS = {"tor": TOR_TRANS_PORT, "sshuttle": SSHUTTLE_PORT,
"proxy": REDSOCKS_PORT}
# ── Connectivity v2 — instance resource allocation ────────────────────
# Connection instance types (the legacy "default" pseudo-exit is excluded —
# a peer/service routed via "default" simply has no connection).
CONNECTION_TYPES = ("wireguard_ext", "openvpn", "tor", "sshuttle", "proxy")
# Types whose egress is a real interface (kill-switch capable). They get an
# iface name and no redirect port.
IFACE_TYPES = ("wireguard_ext", "openvpn")
# Types implemented as a local transparent-proxy REDIRECT. They get a
# redirect port and no iface.
REDIRECT_TYPES = ("tor", "sshuttle", "proxy")
# Only a single Tor instance is supported (one Tor container per cell).
SINGLE_INSTANCE_TYPES = ("tor",)
# fwmark block 0x1000–0x1FFF, stride 0x10.
MARK_BASE = 0x1000
MARK_STRIDE = 0x10
MARK_MAX = 0x1FFF
# routing tables 1000+.
TABLE_BASE = 1000
# transparent-proxy redirect port pool for instances.
REDIRECT_PORT_BASE = 9100
REDIRECT_PORT_MAX = 9199
# WireGuard handshake older than this (seconds) means the tunnel is down.
WG_HANDSHAKE_MAX_AGE = 180
# Cached health is reused for this long so on-demand GETs don't re-probe.
HEALTH_TTL = 30
# TCP connect timeout for socket-based probes (sshuttle/proxy).
PROBE_TCP_TIMEOUT = 3
IFACE_PREFIXES = {"wireguard_ext": "wgext_", "openvpn": "ovpn_"}
CONNECTION_NAME_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9 _.-]{0,63}$')
DEFAULT_CONNECTION_NAMES = {
"wireguard_ext": "WireGuard External",
"openvpn": "OpenVPN",
"tor": "Tor",
"sshuttle": "SSH Tunnel",
"proxy": "Proxy",
}
# Store-service ids / container names backing each exit type — used to
# report an exit as configured when it was installed via the Service Store
# rather than through a legacy config upload.
STORE_SERVICE_IDS = {
"wireguard_ext": "wireguard-ext",
"openvpn": "openvpn-client",
"tor": "tor",
"sshuttle": "sshuttle",
"proxy": "proxy",
}
EXIT_CONTAINERS = {
"wireguard_ext": "cell-wg-ext",
"openvpn": "cell-openvpn",
"tor": "cell-tor",
"sshuttle": "cell-sshuttle",
"proxy": "cell-redsocks",
}
# RFC1918 ranges excluded from the sshuttle tunnel by default so cell-local
# and LAN traffic is never tunneled.
RFC1918_SUBNETS = ("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
CONNECTIVITY_CHAIN = 'PIC_CONNECTIVITY'
def __init__(self, config_manager=None, peer_registry=None,
vault_manager=None,
data_dir: str = '/app/data', config_dir: str = '/app/config'):
super().__init__('connectivity', data_dir, config_dir)
self.config_manager = config_manager
self.peer_registry = peer_registry
self.vault_manager = vault_manager
# Serializes connection CRUD + resource allocation across threads.
self._conn_lock = threading.RLock()
# Wire the v1→v2 migration so it runs lazily on first get_connectivity().
if self.config_manager is not None and hasattr(
self.config_manager, 'register_connectivity_migrator'
):
self.config_manager.register_connectivity_migrator(
self._migrate_connectivity_v1_to_v2
)
# Connectivity configs live under the per-service data dir so that
# ${PIC_DATA_DIR}/services/<id>/config bind mounts in store compose
# templates can read them (Docker daemon resolves paths on the HOST,
# so they must be reachable via data_dir, not config_dir).
services_dir = os.path.join(data_dir, 'services')
self.wireguard_ext_dir = os.path.join(services_dir, 'wireguard-ext', 'config')
self.openvpn_dir = os.path.join(services_dir, 'openvpn-client', 'config')
self.sshuttle_dir = os.path.join(services_dir, 'sshuttle', 'config')
self.proxy_dir = os.path.join(services_dir, 'proxy', 'config')
for d in (self.wireguard_ext_dir, self.openvpn_dir,
self.sshuttle_dir, self.proxy_dir):
self.safe_makedirs(d)
# One-shot migration from the legacy config_dir/connectivity/ location.
_legacy_base = os.path.join(config_dir, 'connectivity')
self._migrate_legacy_configs(_legacy_base)
# Subscribe to ServiceBus CONFIG_CHANGED events so routes are
# reapplied if the underlying network changes. Done lazily —
# service_bus is a singleton imported at app startup.
self._subscribe_to_events()
# ── Legacy migration ──────────────────────────────────────────────────
def _migrate_legacy_configs(self, legacy_base: str) -> None:
"""Copy files from the old config_dir/connectivity/ tree to the new data_dir locations.
The old layout stored WireGuard and OpenVPN configs under the API container's
config_dir, which Docker cannot bind-mount into store-service containers. Files
are copied (not moved) so the legacy location still works until the operator
removes it manually.
"""
import shutil
pairs = (
(os.path.join(legacy_base, 'wireguard_ext'), self.wireguard_ext_dir),
(os.path.join(legacy_base, 'openvpn'), self.openvpn_dir),
)
for src_dir, dst_dir in pairs:
if not os.path.isdir(src_dir):
continue
try:
for fname in os.listdir(src_dir):
src_file = os.path.join(src_dir, fname)
dst_file = os.path.join(dst_dir, fname)
if os.path.isfile(src_file) and not os.path.exists(dst_file):
shutil.copy2(src_file, dst_file)
os.chmod(dst_file, 0o600)
logger.info('connectivity: migrated %s%s', src_file, dst_file)
except OSError as e:
logger.warning('connectivity: migration from %s failed: %s', src_dir, e)
# ── Event wiring ──────────────────────────────────────────────────────
def _subscribe_to_events(self) -> None:
"""Subscribe to network change events so routes auto-reapply."""
try:
from managers import service_bus, EventType
service_bus.subscribe_to_event(
EventType.CONFIG_CHANGED, self._on_network_changed
)
except Exception as e:
# Non-fatal: subscription is best-effort, manual apply still works.
logger.debug(f"connectivity: event subscribe skipped: {e}")
def _on_network_changed(self, event) -> None:
"""ServiceBus handler: re-apply routes when network config changes."""
try:
source = getattr(event, 'source', '')
if source not in ('network', 'wireguard', 'connectivity'):
return
logger.info(f"connectivity: re-applying routes due to {source} change")
self.apply_routes()
except Exception as e:
logger.warning(f"connectivity: on_network_changed failed (non-fatal): {e}")
# ── BaseServiceManager required ───────────────────────────────────────
def get_status(self) -> Dict[str, Any]:
"""Return status summary including configured exits and peer count."""
try:
exits_status: Dict[str, Dict[str, Any]] = {}
for exit_type in self.EXIT_TYPES:
if exit_type == "default":
continue
exits_status[exit_type] = self._exit_status(exit_type)
peers_with_exit = 0
if self.peer_registry is not None:
try:
for peer in self.peer_registry.list_peers():
if peer.get('exit_via', 'default') != 'default':
peers_with_exit += 1
except Exception as e:
logger.warning(f"get_status: peer count failed: {e}")
return {
'service': 'connectivity',
'running': True,
'exits': exits_status,
'peers_with_exit': peers_with_exit,
}
except Exception as e:
return self.handle_error(e, 'get_status')
def test_connectivity(self) -> Dict[str, Any]:
"""Minimal connectivity self-test."""
return {'success': True}
def get_config(self) -> Dict[str, Any]:
"""Return current connectivity config from config_manager."""
try:
if self.config_manager is not None and hasattr(
self.config_manager, 'get_connectivity_config'
):
return self.config_manager.get_connectivity_config()
except Exception as e:
logger.warning(f"get_config: config_manager lookup failed: {e}")
return {'exits': {}, 'peer_exit_map': {}}
# ── Public API ────────────────────────────────────────────────────────
def list_exits(self) -> List[Dict[str, Any]]:
"""List configured exits with current status."""
result: List[Dict[str, Any]] = []
for exit_type in self.EXIT_TYPES:
if exit_type == "default":
continue
entry = {'type': exit_type}
entry.update(self._exit_status(exit_type))
result.append(entry)
return result
def get_peer_exits(self) -> Dict[str, str]:
"""Return {peer_name: exit_type} for all peers."""
out: Dict[str, str] = {}
if self.peer_registry is None:
return out
try:
for peer in self.peer_registry.list_peers():
name = peer.get('peer')
if name:
out[name] = peer.get('exit_via', 'default')
except Exception as e:
logger.warning(f"get_peer_exits: {e}")
return out
def set_peer_exit(self, peer_name: str, exit_via: str) -> Dict[str, Any]:
"""Assign a peer to a connection (by id) or 'default' and apply rules.
`exit_via` is a connection id, 'default', or — as a one-release
back-compat shim — a legacy exit *type* string, which is resolved to
the single connection instance of that type. Validation that the id
exists lives in peer_registry.set_peer_exit_via.
"""
if not isinstance(exit_via, str) or not exit_via:
return {'ok': False, 'error': 'connection_id is required'}
if not isinstance(peer_name, str) or not re.match(r'^[A-Za-z0-9_.-]{1,64}$', peer_name):
return {'ok': False, 'error': f'invalid peer_name {peer_name!r}'}
if self.peer_registry is None:
return {'ok': False, 'error': 'peer_registry not available'}
try:
ok = self.peer_registry.set_peer_exit_via(peer_name, exit_via)
except Exception as e:
logger.error(f"set_peer_exit: registry update failed: {e}")
return {'ok': False, 'error': str(e)}
if not ok:
# Distinguish "no such peer" from "no such connection".
if self._peer_exists(peer_name):
return {'ok': False, 'error':
f'unknown connection {exit_via!r}; '
f"must be a connection id or 'default'"}
return {'ok': False, 'error': f'peer {peer_name!r} not found'}
try:
self.apply_routes()
except Exception as e:
logger.warning(f"set_peer_exit: apply_routes failed (non-fatal): {e}")
resolved = 'default'
try:
peer = self.peer_registry.get_peer(peer_name)
if peer:
resolved = peer.get('exit_via', 'default')
except Exception:
pass
return {'ok': True, 'peer': peer_name, 'exit_via': resolved}
def _peer_exists(self, peer_name: str) -> bool:
"""True when a peer with this name is registered."""
if self.peer_registry is None:
return False
try:
return self.peer_registry.get_peer(peer_name) is not None
except Exception:
return False
def upload_wireguard_ext(self, conf_text: str) -> Dict[str, Any]:
"""Validate and store an external WireGuard config."""
try:
cleaned = self._validate_wg_conf(conf_text)
except ValueError as e:
return {'ok': False, 'error': str(e)}
path = os.path.join(self.wireguard_ext_dir, 'wg_ext0.conf')
try:
self._write_secure(path, cleaned)
except Exception as e:
logger.error(f"upload_wireguard_ext: write failed: {e}")
return {'ok': False, 'error': str(e)}
logger.info(f"connectivity: stored wg_ext0.conf ({len(cleaned)} bytes)")
return {'ok': True}
def upload_openvpn(self, ovpn_text: str, name: str = 'default') -> Dict[str, Any]:
"""Validate and store an OpenVPN profile."""
if not isinstance(name, str) or not _NAME_RE.match(name):
return {
'ok': False,
'error': f'invalid name {name!r}; must match [a-z0-9_-]{{1,32}}',
}
try:
cleaned = self._validate_ovpn(ovpn_text)
except ValueError as e:
return {'ok': False, 'error': str(e)}
path = os.path.join(self.openvpn_dir, f'{name}.ovpn')
try:
self._write_secure(path, cleaned)
except Exception as e:
logger.error(f"upload_openvpn: write failed: {e}")
return {'ok': False, 'error': str(e)}
logger.info(f"connectivity: stored {name}.ovpn ({len(cleaned)} bytes)")
return {'ok': True}
def configure_sshuttle(self, cfg: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and store an sshuttle (SSH tunnel) exit configuration.
Requires a pinned host key (a single known_hosts line); rejects any
attempt to disable strict host-key checking. Secrets (private key /
password) are written 0o600 under data/services/sshuttle/config/ and
mirrored into the vault — they are never placed in cell_config.json.
"""
if not isinstance(cfg, dict):
return {'ok': False, 'error': 'config must be a JSON object'}
for value in cfg.values():
if isinstance(value, str) and _contains_strict_hostkey_disable(value):
return {
'ok': False,
'error': 'StrictHostKeyChecking=no is not allowed; '
'a pinned host key (known_hosts line) is required',
}
host = _validate_host(cfg.get('host'))
if not host:
return {'ok': False, 'error': 'invalid host: must be a hostname or IP'}
port = _validate_port(cfg.get('port', 22))
if port is None:
return {'ok': False, 'error': 'invalid port: must be 1-65535'}
user = cfg.get('user')
if not isinstance(user, str) or not _SSH_USER_RE.match(user):
return {
'ok': False,
'error': 'invalid user: must match ^[a-z_][a-z0-9_-]{0,31}$',
}
auth = cfg.get('auth', 'key')
if auth not in ('key', 'password'):
return {'ok': False, 'error': "invalid auth: must be 'key' or 'password'"}
known_hosts = cfg.get('known_hosts')
err = self._validate_known_hosts_line(known_hosts)
if err:
return {'ok': False, 'error': err}
known_hosts = known_hosts.strip()
private_key = ''
password = ''
if auth == 'key':
private_key = cfg.get('private_key', '')
if not isinstance(private_key, str) or 'PRIVATE KEY' not in private_key:
return {
'ok': False,
'error': 'private_key is required for key auth and must be '
'a PEM/OpenSSH private key',
}
else:
password = cfg.get('password', '')
if not isinstance(password, str) or not password or '\n' in password:
return {'ok': False, 'error': 'password is required for password auth'}
exclude_subnets = cfg.get('exclude_subnets')
if exclude_subnets is None:
exclude_subnets = self._default_exclude_subnets()
if not isinstance(exclude_subnets, list):
return {'ok': False, 'error': 'exclude_subnets must be a list of CIDRs'}
validated_excludes = []
for net in exclude_subnets:
try:
validated_excludes.append(str(ipaddress.ip_network(str(net), strict=False)))
except ValueError:
return {'ok': False, 'error': f'invalid exclude subnet: {net!r}'}
conf_lines = [
f'HOST={host}',
f'PORT={port}',
f'USER={user}',
f'AUTH={auth}',
f'LISTEN_PORT={self.SSHUTTLE_PORT}',
f'EXCLUDE={",".join(validated_excludes)}',
]
try:
self._write_secure(
os.path.join(self.sshuttle_dir, 'known_hosts'),
known_hosts + '\n',
)
if auth == 'key':
key_text = private_key.rstrip('\n') + '\n'
self._write_secure(os.path.join(self.sshuttle_dir, 'id_pic'), key_text)
else:
self._write_secure(
os.path.join(self.sshuttle_dir, 'password'), password + '\n')
self._write_secure(
os.path.join(self.sshuttle_dir, 'sshuttle.conf'),
'\n'.join(conf_lines) + '\n',
)
except Exception as e:
logger.error(f"configure_sshuttle: write failed: {e}")
return {'ok': False, 'error': 'failed to write sshuttle configuration'}
if self.vault_manager is not None:
try:
if auth == 'key':
self.vault_manager.store_secret('connectivity_sshuttle_key',
private_key)
else:
self.vault_manager.store_secret('connectivity_sshuttle_password',
password)
except Exception as e:
logger.warning(f"configure_sshuttle: vault store failed: {e}")
self._persist_exit_config('sshuttle', {
'host': host,
'port': port,
'user': user,
'auth': auth,
'exclude_subnets': validated_excludes,
})
logger.info(f"connectivity: configured sshuttle exit ({user}@{host}:{port})")
return {'ok': True}
def configure_proxy(self, cfg: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and store an upstream proxy (redsocks) exit configuration.
Generates redsocks.conf from strictly validated fields only — no value
that could break out of the quoted config strings is accepted. The
password lives in the 0o600 conf file, never in compose env.
"""
if not isinstance(cfg, dict):
return {'ok': False, 'error': 'config must be a JSON object'}
scheme = cfg.get('scheme')
if scheme not in ('http', 'socks5'):
return {'ok': False, 'error': "invalid scheme: must be 'http' or 'socks5'"}
host = _validate_host(cfg.get('host'))
if not host:
return {'ok': False, 'error': 'invalid host: must be a hostname or IP'}
port = _validate_port(cfg.get('port'))
if port is None:
return {'ok': False, 'error': 'invalid port: must be 1-65535'}
user = cfg.get('user') or ''
password = cfg.get('password') or ''
if user and not (isinstance(user, str) and _PROXY_USER_RE.match(user)):
return {
'ok': False,
'error': 'invalid user: must match ^[A-Za-z0-9._-]{1,64}$',
}
if password and not (isinstance(password, str)
and _PROXY_PASSWORD_RE.match(password)):
return {
'ok': False,
'error': 'invalid password: 1-128 printable ASCII characters '
'excluding double quotes and backslashes',
}
if password and not user:
return {'ok': False, 'error': 'password requires a user'}
conf = self._render_redsocks_conf(scheme, host, port, user, password)
try:
self._write_secure(os.path.join(self.proxy_dir, 'redsocks.conf'), conf)
except Exception as e:
logger.error(f"configure_proxy: write failed: {e}")
return {'ok': False, 'error': 'failed to write redsocks configuration'}
self._persist_exit_config('proxy', {
'scheme': scheme,
'host': host,
'port': port,
'user': user,
})
logger.info(f"connectivity: configured proxy exit ({scheme}://{host}:{port})")
return {'ok': True}
def _render_redsocks_conf(self, scheme: str, host: str, port: int,
user: str, password: str) -> str:
"""Build a redsocks.conf from already-validated fields."""
redsocks_type = 'socks5' if scheme == 'socks5' else 'http-connect'
lines = [
'base {',
' log_debug = off;',
' log_info = on;',
' log = stderr;',
' daemon = off;',
' redirector = iptables;',
'}',
'',
'redsocks {',
' local_ip = 0.0.0.0;',
f' local_port = {self.REDSOCKS_PORT};',
f' ip = {host};',
f' port = {port};',
f' type = {redsocks_type};',
]
if user:
lines.append(f' login = "{user}";')
if password:
lines.append(f' password = "{password}";')
lines.append('}')
return '\n'.join(lines) + '\n'
@staticmethod
def _validate_known_hosts_line(line) -> Optional[str]:
"""Validate a single known_hosts line; return an error string or None.
Expected format: host[,ip] keytype base64key [comment]
"""
if not isinstance(line, str) or not line.strip():
return ('known_hosts is required: a pinned host key line '
'(host[,ip] keytype base64key)')
line = line.strip()
if '\n' in line or '\r' in line:
return 'known_hosts must be a single line'
parts = line.split()
if len(parts) < 3:
return ('invalid known_hosts line: expected '
'host[,ip] keytype base64key')
hosts, keytype, key = parts[0], parts[1], parts[2]
if not _KNOWN_HOSTS_HOSTS_RE.match(hosts):
return f'invalid known_hosts host field: {hosts!r}'
if keytype not in _SSH_KEYTYPES:
return (f'invalid known_hosts key type {keytype!r}; '
f'must be one of {_SSH_KEYTYPES}')
if not _B64_RE.match(key):
return 'invalid known_hosts key: not valid base64'
return None
def _default_exclude_subnets(self) -> List[str]:
"""Cell subnet + RFC1918 ranges — internal traffic is never tunneled."""
excludes = list(self.RFC1918_SUBNETS)
try:
if self.config_manager is not None:
identity = self.config_manager.get_identity()
if isinstance(identity, dict):
ip_range = identity.get('ip_range', '')
if isinstance(ip_range, str) and '/' in ip_range \
and ip_range not in excludes:
excludes.insert(0, ip_range)
except Exception as e:
logger.debug(f"_default_exclude_subnets: {e}")
return excludes
def _persist_exit_config(self, exit_type: str, fields: Dict[str, Any]) -> None:
"""Persist non-secret exit fields under connectivity.exits in config."""
if self.config_manager is None:
return
try:
cfg = self.config_manager.get_connectivity_config()
exits = cfg.get('exits') if isinstance(cfg, dict) else None
exits = dict(exits) if isinstance(exits, dict) else {}
exits[exit_type] = fields
self.config_manager.set_connectivity_field('exits', exits)
except Exception as e:
logger.warning(f"_persist_exit_config({exit_type}): {e}")
# ── Connectivity v2 — per-instance config validation ──────────────────
#
# These validators operate on a single connection instance's config dict,
# returning (clean_config, clean_secrets, error). They share the field
# rules used by the legacy single-slot configure_* methods so behaviour is
# identical; the legacy methods remain the v1 write path until phase 2.
def _validate_connection_config(
self, conn_type: str, config: Dict[str, Any],
secrets_in: Optional[Dict[str, Any]],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
"""Validate one instance's config + secrets for `conn_type`.
Returns (clean_config, clean_secrets, error). On error the first two
values are empty. Secrets are returned separately so callers store them
in the vault and never in cell_config.
"""
if not isinstance(config, dict):
return {}, {}, 'config must be a JSON object'
secrets_in = secrets_in or {}
if not isinstance(secrets_in, dict):
return {}, {}, 'secrets must be a JSON object'
if conn_type == 'sshuttle':
return self._validate_sshuttle_instance(config, secrets_in)
if conn_type == 'proxy':
return self._validate_proxy_instance(config, secrets_in)
if conn_type == 'wireguard_ext':
return self._validate_wg_instance(config, secrets_in)
if conn_type == 'openvpn':
return self._validate_ovpn_instance(config, secrets_in)
if conn_type == 'tor':
# Tor has no per-instance config or secret.
return {}, {}, None
return {}, {}, f'unsupported connection type {conn_type!r}'
def _validate_sshuttle_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
for value in cfg.values():
if isinstance(value, str) and _contains_strict_hostkey_disable(value):
return {}, {}, ('StrictHostKeyChecking=no is not allowed; a '
'pinned host key (known_hosts line) is required')
host = _validate_host(cfg.get('host'))
if not host:
return {}, {}, 'invalid host: must be a hostname or IP'
port = _validate_port(cfg.get('port', 22))
if port is None:
return {}, {}, 'invalid port: must be 1-65535'
user = cfg.get('user')
if not isinstance(user, str) or not _SSH_USER_RE.match(user):
return {}, {}, 'invalid user: must match ^[a-z_][a-z0-9_-]{0,31}$'
auth = cfg.get('auth', 'key')
if auth not in ('key', 'password'):
return {}, {}, "invalid auth: must be 'key' or 'password'"
known_hosts = cfg.get('known_hosts')
err = self._validate_known_hosts_line(known_hosts)
if err:
return {}, {}, err
known_hosts = known_hosts.strip()
clean_secrets: Dict[str, str] = {'known_hosts': known_hosts}
if auth == 'key':
private_key = secrets_in.get('private_key', cfg.get('private_key', ''))
if not isinstance(private_key, str) or 'PRIVATE KEY' not in private_key:
return {}, {}, ('private_key is required for key auth and must be '
'a PEM/OpenSSH private key')
clean_secrets['private_key'] = private_key
else:
password = secrets_in.get('password', cfg.get('password', ''))
if not isinstance(password, str) or not password or '\n' in password:
return {}, {}, 'password is required for password auth'
clean_secrets['password'] = password
exclude_subnets = cfg.get('exclude_subnets')
if exclude_subnets is None:
exclude_subnets = self._default_exclude_subnets()
if not isinstance(exclude_subnets, list):
return {}, {}, 'exclude_subnets must be a list of CIDRs'
validated_excludes = []
for net in exclude_subnets:
try:
validated_excludes.append(str(ipaddress.ip_network(str(net), strict=False)))
except ValueError:
return {}, {}, f'invalid exclude subnet: {net!r}'
clean_config = {
'host': host, 'port': port, 'user': user, 'auth': auth,
'exclude_subnets': validated_excludes,
}
return clean_config, clean_secrets, None
def _validate_proxy_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
scheme = cfg.get('scheme')
if scheme not in ('http', 'socks5'):
return {}, {}, "invalid scheme: must be 'http' or 'socks5'"
host = _validate_host(cfg.get('host'))
if not host:
return {}, {}, 'invalid host: must be a hostname or IP'
port = _validate_port(cfg.get('port'))
if port is None:
return {}, {}, 'invalid port: must be 1-65535'
user = cfg.get('user') or ''
password = secrets_in.get('password', cfg.get('password') or '')
if user and not (isinstance(user, str) and _PROXY_USER_RE.match(user)):
return {}, {}, 'invalid user: must match ^[A-Za-z0-9._-]{1,64}$'
if password and not (isinstance(password, str)
and _PROXY_PASSWORD_RE.match(password)):
return {}, {}, ('invalid password: 1-128 printable ASCII characters '
'excluding double quotes and backslashes')
if password and not user:
return {}, {}, 'password requires a user'
clean_config = {'scheme': scheme, 'host': host, 'port': port, 'user': user}
clean_secrets: Dict[str, str] = {}
if password:
clean_secrets['password'] = password
return clean_config, clean_secrets, None
def _validate_wg_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
conf_text = secrets_in.get('conf', cfg.get('conf', ''))
if not isinstance(conf_text, str) or not conf_text.strip():
return {}, {}, 'conf is required: a WireGuard config'
try:
cleaned = self._validate_wg_conf(conf_text)
except ValueError as e:
return {}, {}, str(e)
return {}, {'conf': cleaned}, None
def _validate_ovpn_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
conf_text = secrets_in.get('conf', cfg.get('conf', ''))
if not isinstance(conf_text, str) or not conf_text.strip():
return {}, {}, 'conf is required: an OpenVPN profile'
try:
cleaned = self._validate_ovpn(conf_text)
except ValueError as e:
return {}, {}, str(e)
return {}, {'conf': cleaned}, None
# ── Connectivity v2 — resource allocator ──────────────────────────────
def _used_resources(self) -> Tuple[set, set, set, set]:
"""Return (marks, tables, ifaces, ports) currently used by connections."""
marks, tables, ifaces, ports = set(), set(), set(), set()
if self.config_manager is None:
return marks, tables, ifaces, ports
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"_used_resources: list_connections failed: {e}")
conns = []
for c in conns:
if isinstance(c.get('mark'), int):
marks.add(c['mark'])
if isinstance(c.get('table'), int):
tables.add(c['table'])
if c.get('iface'):
ifaces.add(c['iface'])
if isinstance(c.get('redirect_port'), int):
ports.add(c['redirect_port'])
return marks, tables, ifaces, ports
def _allocate_resources(
self, conn_type: str, conn_id: str,
) -> Tuple[int, int, Optional[str], Optional[int]]:
"""Allocate (mark, table, iface, redirect_port) for a new connection.
Lowest-free-overall within each pool (delete frees + cleans rules, so
reuse is safe). iface is set only for IFACE_TYPES, redirect_port only
for REDIRECT_TYPES.
"""
marks, tables, ifaces, ports = self._used_resources()
mark = self.MARK_BASE
while mark in marks:
mark += self.MARK_STRIDE
if mark > self.MARK_MAX:
raise ValueError('no free fwmark available in 0x1000–0x1FFF')
table = self.TABLE_BASE
while table in tables:
table += 1
iface: Optional[str] = None
if conn_type in self.IFACE_TYPES:
hexid = conn_id.split('_')[-1][:8]
iface = f"{self.IFACE_PREFIXES[conn_type]}{hexid}"
redirect_port: Optional[int] = None
if conn_type in self.REDIRECT_TYPES:
port = self.REDIRECT_PORT_BASE
while port in ports and port <= self.REDIRECT_PORT_MAX:
port += 1
if port > self.REDIRECT_PORT_MAX:
raise ValueError('no free redirect port available in 9100–9199')
redirect_port = port
return mark, table, iface, redirect_port
@staticmethod
def _new_conn_id() -> str:
return f"conn_{secrets.token_hex(4)}"
# ── Connectivity v2 — connection CRUD ─────────────────────────────────
def _compute_state(self, conn_type: str, config: Dict[str, Any],
secret_refs: List[str]) -> str:
"""'configured' when all required fields/secrets present, else 'added'."""
required = self._required_for_type(conn_type, config)
for field in required.get('config', ()):
if not config.get(field):
return 'added'
for ref_suffix in required.get('secrets', ()):
if not any(r.endswith(f'_{ref_suffix}') for r in secret_refs):
return 'added'
return 'configured'
def _required_for_type(self, conn_type: str,
config: Dict[str, Any]) -> Dict[str, Tuple[str, ...]]:
"""Required non-secret fields and secret suffixes per type."""
if conn_type == 'sshuttle':
auth_secret = 'private_key' if config.get('auth') != 'password' else 'password'
return {'config': ('host', 'user', 'auth'),
'secrets': ('known_hosts', auth_secret)}
if conn_type == 'proxy':
return {'config': ('scheme', 'host', 'port'), 'secrets': ()}
if conn_type in ('wireguard_ext', 'openvpn'):
return {'config': (), 'secrets': ('conf',)}
if conn_type == 'tor':
return {'config': (), 'secrets': ()}
return {'config': (), 'secrets': ()}
def create_connection(self, conn_type: str, name: str,
config: Optional[Dict[str, Any]] = None,
secrets: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Create a named connection instance of `conn_type`.
Validates the type, name (non-empty + unique per cell), and the
per-type field rules; enforces a single Tor; allocates routing
resources; stores secrets in the vault under conn_<id>_<field> and
records only the refs. Returns the created record (no secret values).
"""
if conn_type not in self.CONNECTION_TYPES:
return {'ok': False, 'error':
f'invalid type {conn_type!r}; must be one of {self.CONNECTION_TYPES}'}
if not isinstance(name, str) or not self.CONNECTION_NAME_RE.match(name.strip()):
return {'ok': False, 'error': 'invalid name: 1-64 chars, '
'letters/digits/space/._- and must start alphanumeric'}
name = name.strip()
config = config or {}
clean_config, clean_secrets, err = self._validate_connection_config(
conn_type, config, secrets)
if err:
return {'ok': False, 'error': err}
with self._conn_lock:
existing = []
if self.config_manager is not None:
try:
existing = self.config_manager.list_connections()
except Exception as e:
logger.error(f"create_connection: list failed: {e}")
return {'ok': False, 'error': 'failed to read connections'}
if conn_type in self.SINGLE_INSTANCE_TYPES:
if any(c.get('type') == conn_type for c in existing):
return {'ok': False, 'error':
f'only a single {conn_type} connection is supported'}
if any(c.get('name', '').strip().lower() == name.lower()
for c in existing):
return {'ok': False, 'error': f'a connection named {name!r} already exists'}
conn_id = self._new_conn_id()
try:
mark, table, iface, redirect_port = self._allocate_resources(
conn_type, conn_id)
except ValueError as e:
return {'ok': False, 'error': str(e)}
secret_refs: List[str] = []
stored_refs: List[str] = []
if clean_secrets:
if self.vault_manager is None:
return {'ok': False, 'error': 'vault unavailable; cannot store secrets'}
for field, value in clean_secrets.items():
ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(ref, value)
except Exception as e:
logger.error(f"create_connection: vault store {ref}: {e}")
for done in stored_refs:
try:
self.vault_manager.delete_secret(done)
except Exception:
pass
return {'ok': False, 'error': 'failed to store secret in vault'}
stored_refs.append(ref)
secret_refs.append(ref)
now = self._now_iso()
state = self._compute_state(conn_type, clean_config, secret_refs)
record = {
'id': conn_id,
'type': conn_type,
'name': name,
'enabled': True,
'mark': mark,
'table': table,
'iface': iface,
'redirect_port': redirect_port,
'config': clean_config,
'secret_refs': secret_refs,
'cell_name': None,
'status': {
'state': state,
'health': 'unknown',
'last_check': None,
'detail': None,
},
'created_at': now,
'updated_at': now,
}
try:
self.config_manager.add_connection(record)
except Exception as e:
logger.error(f"create_connection: persist failed: {e}")
for ref in stored_refs:
try:
self.vault_manager.delete_secret(ref)
except Exception:
pass
return {'ok': False, 'error': 'failed to persist connection'}
logger.info(f"connectivity: created connection {conn_id} "
f"({conn_type}/{name}) mark={hex(mark)} table={table}")
return {'ok': True, 'connection': self._public_record(record)}
def update_connection(self, conn_id: str, name: Optional[str] = None,
config: Optional[Dict[str, Any]] = None,
secrets: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Update an existing connection's name, config and/or secrets."""
with self._conn_lock:
if self.config_manager is None:
return {'ok': False, 'error': 'config unavailable'}
record = self.config_manager.get_connection(conn_id)
if record is None:
return {'ok': False, 'error': f'connection {conn_id!r} not found'}
conn_type = record.get('type')
fields: Dict[str, Any] = {}
if name is not None:
if not isinstance(name, str) or not self.CONNECTION_NAME_RE.match(name.strip()):
return {'ok': False, 'error': 'invalid name'}
name = name.strip()
for c in self.config_manager.list_connections():
if c.get('id') != conn_id and \
c.get('name', '').strip().lower() == name.lower():
return {'ok': False, 'error':
f'a connection named {name!r} already exists'}
fields['name'] = name
secret_refs = list(record.get('secret_refs', []))
new_config = dict(record.get('config', {}))
if config is not None or secrets is not None:
merged = dict(record.get('config', {}))
if isinstance(config, dict):
merged.update(config)
clean_config, clean_secrets, err = self._validate_connection_config(
conn_type, merged, secrets)
if err:
return {'ok': False, 'error': err}
new_config = clean_config
fields['config'] = clean_config
if clean_secrets:
if self.vault_manager is None:
return {'ok': False, 'error': 'vault unavailable'}
for field, value in clean_secrets.items():
ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(ref, value)
except Exception as e:
logger.error(f"update_connection: vault store {ref}: {e}")
return {'ok': False, 'error': 'failed to store secret'}
if ref not in secret_refs:
secret_refs.append(ref)
fields['secret_refs'] = secret_refs
if fields:
fields['updated_at'] = self._now_iso()
fields['status'] = {
**record.get('status', {}),
'state': self._compute_state(conn_type, new_config, secret_refs),
}
self.config_manager.update_connection(conn_id, fields)
updated = self.config_manager.get_connection(conn_id)
return {'ok': True, 'connection': self._public_record(updated)}
def delete_connection(self, conn_id: str) -> Dict[str, Any]:
"""Delete a connection: free resources + vault secrets. Blocked if referenced."""
with self._conn_lock:
if self.config_manager is None:
return {'ok': False, 'error': 'config unavailable'}
record = self.config_manager.get_connection(conn_id)
if record is None:
return {'ok': False, 'error': f'connection {conn_id!r} not found'}
ref = self._connection_reference(conn_id)
if ref:
return {'ok': False, 'error':
f'connection is in use by {ref}; detach it first'}
for secret_ref in record.get('secret_refs', []):
if self.vault_manager is not None:
try:
self.vault_manager.delete_secret(secret_ref)
except Exception as e:
logger.warning(f"delete_connection: vault delete {secret_ref}: {e}")
self.config_manager.delete_connection(conn_id)
logger.info(f"connectivity: deleted connection {conn_id}")
return {'ok': True}
def _connection_reference(self, conn_id: str) -> Optional[str]:
"""Return a human description if a peer/egress references this connection.
A peer references a connection through its exit_via field (a connection
id); a service references one through the egress_overrides map. Either
blocks deletion until the reference is detached.
"""
if self.peer_registry is not None:
try:
for peer in self.peer_registry.list_peers():
if peer.get('exit_via') == conn_id:
return f"peer {peer.get('peer')!r}"
except Exception as e:
logger.debug(f"_connection_reference (peers): {e}")
if self.config_manager is not None:
try:
overrides = self.config_manager.configs.get('egress_overrides')
if isinstance(overrides, dict):
for svc_id, cid in overrides.items():
if cid == conn_id:
return f"service {svc_id!r}"
except Exception as e:
logger.debug(f"_connection_reference (egress): {e}")
return None
def list_connections(self) -> List[Dict[str, Any]]:
"""Return all connection records (public form, computed status.state)."""
if self.config_manager is None:
return []
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"list_connections: {e}")
return []
out: List[Dict[str, Any]] = []
for record in conns:
state = self._compute_state(
record.get('type'), record.get('config', {}),
record.get('secret_refs', []))
status = dict(record.get('status', {}))
status['state'] = state
rec = dict(record)
rec['status'] = status
out.append(self._public_record(rec))
return out
def get_connection(self, conn_id: str) -> Optional[Dict[str, Any]]:
"""Return one connection record (public form), or None."""
if self.config_manager is None:
return None
record = self.config_manager.get_connection(conn_id)
if record is None:
return None
status = dict(record.get('status', {}))
status['state'] = self._compute_state(
record.get('type'), record.get('config', {}),
record.get('secret_refs', []))
rec = dict(record)
rec['status'] = status
return self._public_record(rec)
@staticmethod
def _public_record(record: Dict[str, Any]) -> Dict[str, Any]:
"""Strip any secret values; only secret_refs are exposed."""
rec = dict(record)
rec.pop('private_key', None)
rec.pop('password', None)
rec.pop('conf', None)
config = dict(rec.get('config', {}))
for k in ('private_key', 'password', 'conf'):
config.pop(k, None)
rec['config'] = config
return rec
@staticmethod
def _now_iso() -> str:
return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
# ── Connectivity v2 — v1→v2 migration ─────────────────────────────────
def _migrate_connectivity_v1_to_v2(
self, legacy: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""Build v2 connection records from the legacy connectivity section.
Called once by config_manager.get_connectivity() when version<2. For
each legacy exit type detected as configured (via _exit_status), creates
exactly one connection instance with allocated resources, copying the
legacy `config` and repointing any existing per-type vault secret to the
new conn_<id>_<field> name. Returns [] when nothing was configured.
Existing secrets are RE-STORED under the new ref name and the old name
deleted, so the only reference that survives is the new one — no secret
is ever lost (re-store happens before old delete).
"""
records: List[Dict[str, Any]] = []
exits = legacy.get('exits') if isinstance(legacy, dict) else {}
exits = exits if isinstance(exits, dict) else {}
# Legacy per-type vault secret names → instance secret field.
legacy_secret_names = {
'sshuttle': [
('connectivity_sshuttle_key', 'private_key'),
('connectivity_sshuttle_password', 'password'),
],
}
used_marks, used_tables, used_ifaces, used_ports = set(), set(), set(), set()
for conn_type in self.CONNECTION_TYPES:
status = self._exit_status(conn_type)
if not status.get('configured'):
continue
conn_id = self._new_conn_id()
mark = self.MARK_BASE
while mark in used_marks:
mark += self.MARK_STRIDE
used_marks.add(mark)
table = self.TABLE_BASE
while table in used_tables:
table += 1
used_tables.add(table)
iface = None
if conn_type in self.IFACE_TYPES:
iface = f"{self.IFACE_PREFIXES[conn_type]}{conn_id.split('_')[-1][:8]}"
used_ifaces.add(iface)
redirect_port = None
if conn_type in self.REDIRECT_TYPES:
port = self.REDIRECT_PORT_BASE
while port in used_ports:
port += 1
redirect_port = port
used_ports.add(port)
legacy_config = exits.get(conn_type)
config = dict(legacy_config) if isinstance(legacy_config, dict) else {}
# Never let a stray secret hide in the copied non-secret config.
for k in ('private_key', 'password', 'conf'):
config.pop(k, None)
secret_refs: List[str] = []
for old_name, field in legacy_secret_names.get(conn_type, []):
if self.vault_manager is None:
continue
try:
value = self.vault_manager.get_secret(old_name)
except Exception as e:
logger.warning(f"migration: read {old_name} failed: {e}")
value = None
if not value:
continue
new_ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(new_ref, value)
secret_refs.append(new_ref)
self.vault_manager.delete_secret(old_name)
except Exception as e:
logger.error(f"migration: repoint {old_name}{new_ref} failed: {e}")
now = self._now_iso()
state = self._compute_state(conn_type, config, secret_refs)
records.append({
'id': conn_id,
'type': conn_type,
'name': self.DEFAULT_CONNECTION_NAMES.get(conn_type, conn_type),
'enabled': True,
'mark': mark,
'table': table,
'iface': iface,
'redirect_port': redirect_port,
'config': config,
'secret_refs': secret_refs,
'cell_name': None,
'status': {
'state': state,
'health': 'unknown',
'last_check': None,
'detail': None,
},
'created_at': now,
'updated_at': now,
})
logger.info(f"connectivity: migrated legacy {conn_type} exit → {conn_id}")
return records
# ── Routing application ───────────────────────────────────────────────
def apply_routes(self) -> Dict[str, Any]:
"""Idempotently rebuild all connectivity rules and policy routing.
Connectivity v2: routing is driven by connection *instances*, not by
per-type constants. Each connection carries its own persisted mark,
table, iface and redirect_port; two instances of the same type route
through distinct tables/marks without collision. A peer's exit_via is
the id of the connection it egresses through.
"""
rules_applied = 0
try:
self._ensure_chains()
except Exception as e:
logger.warning(f"apply_routes: _ensure_chains failed: {e}")
# Flush our dedicated chains (without deleting them)
for table, chain in (('mangle', self.CONNECTIVITY_CHAIN),
('nat', self.CONNECTIVITY_CHAIN)):
try:
self._flush_chain(table, chain)
except Exception as e:
logger.warning(f"apply_routes: flush {table}/{chain} failed: {e}")
connections = self._routing_connections()
# Idempotent ip rule registration: one fwmark→table rule per instance.
for conn in connections:
mark, table = conn.get('mark'), conn.get('table')
if not isinstance(mark, int) or not isinstance(table, int):
continue
try:
self._remove_ip_rule(mark, table)
self._add_ip_rule(mark, table)
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: ip rule {conn.get('id')} failed: {e}")
# Per-peer marking + nat redirect, resolved through each peer's
# connection instance. A peer whose connection is unhealthy AND whose
# effective fail-open is True is skipped entirely: no mark, so its
# traffic falls through to the default route (direct internet) instead
# of being blocked by the kill-switch. Fail-closed peers keep their mark
# so the kill-switch blocks them while the tunnel is down.
#
# marked_conn_ids tracks connections that actually have at least one
# marked peer, so their kill-switch stays installed; a connection whose
# only peers are all fail-open-and-down gets no kill-switch.
marked_conn_ids: set = set()
if self.peer_registry is not None:
try:
peers = self.peer_registry.list_peers()
except Exception as e:
logger.warning(f"apply_routes: list_peers failed: {e}")
peers = []
by_id = {c.get('id'): c for c in connections}
for peer in peers:
conn = self._resolve_peer_connection(peer, by_id)
if conn is None:
continue
if self._peer_fails_open(peer, conn):
logger.info(
f"apply_routes: peer {peer.get('peer')!r} fails open over "
f"down connection {conn.get('id')!r}; skipping mark")
continue
src_ip = self._peer_source_ip(peer.get('peer', ''))
if not src_ip:
continue
rules_applied += self._apply_connection_for_src(src_ip, conn)
marked_conn_ids.add(conn.get('id'))
# Kill-switch: drop marked packets that would otherwise leak via the
# default route if an iface-based exit interface is down. Installed only
# for connections that still have at least one marked peer.
for conn in connections:
iface = conn.get('iface')
mark = conn.get('mark')
if not iface or not isinstance(mark, int):
continue
if conn.get('id') not in marked_conn_ids:
continue
try:
self._add_killswitch(mark, iface)
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: killswitch {conn.get('id')}: {e}")
return {'ok': True, 'rules_applied': rules_applied}
def _peer_fails_open(self, peer: Dict[str, Any],
conn: Dict[str, Any]) -> bool:
"""True when this peer should fall back to the default route.
A peer falls back only when its connection is unhealthy (health is
explicitly 'down') AND its effective fail-open is True. A 'working' or
'unknown' connection always routes normally so a stale/never-probed
status never silently drops the tunnel.
"""
health = (conn.get('status') or {}).get('health')
if health != 'down':
return False
return self.effective_failopen(peer, conn)
def _routing_connections(self) -> List[Dict[str, Any]]:
"""Return the connection instances that drive routing (enabled only)."""
if self.config_manager is None:
return []
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"apply_routes: list_connections failed: {e}")
return []
return [c for c in conns if c.get('enabled', True)]
@staticmethod
def _resolve_peer_connection(
peer: Dict[str, Any], by_id: Dict[str, Dict[str, Any]],
) -> Optional[Dict[str, Any]]:
"""Resolve a peer's exit_via (a connection id) to its connection record."""
exit_via = peer.get('exit_via', 'default')
if exit_via == 'default':
return None
return by_id.get(exit_via)
def _apply_connection_for_src(
self, src_ip: str, conn: Dict[str, Any],
) -> int:
"""Mark + optionally REDIRECT traffic from src_ip via this connection.
Returns the number of rules applied. iface-based connections only need
the fwmark (policy route + killswitch handle egress); redirect-style
connections additionally REDIRECT TCP to the instance's redirect_port.
"""
applied = 0
mark = conn.get('mark')
if isinstance(mark, int):
try:
self._add_mark_rule(src_ip, mark)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: mark rule for {src_ip}/{conn.get('id')}: {e}")
redirect_port = conn.get('redirect_port')
if conn.get('type') in self.REDIRECT_TYPES and isinstance(redirect_port, int):
try:
self._add_redirect(src_ip, redirect_port)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: redirect for {src_ip}/{conn.get('id')}: {e}")
return applied
# ── iptables / ip rule helpers ────────────────────────────────────────
def _wg_iptables(self, args: List[str], timeout: int = 10) -> subprocess.CompletedProcess:
"""Run iptables inside the WireGuard container (where peer traffic forwards)."""
cmd = ['docker', 'exec', WIREGUARD_CONTAINER, 'iptables'] + args
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
def _wg_ip(self, args: List[str], timeout: int = 10) -> subprocess.CompletedProcess:
"""Run `ip` inside the WireGuard container."""
cmd = ['docker', 'exec', WIREGUARD_CONTAINER, 'ip'] + args
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
def _ensure_chains(self) -> None:
"""Create PIC_CONNECTIVITY chains in mangle and nat (idempotent)."""
for table, parent_chain in (
('mangle', 'PREROUTING'),
('nat', 'PREROUTING'),
):
# Create chain if it doesn't already exist
check = self._wg_iptables(
['-t', table, '-L', self.CONNECTIVITY_CHAIN, '-n']
)
if check.returncode != 0:
create = self._wg_iptables(
['-t', table, '-N', self.CONNECTIVITY_CHAIN]
)
if create.returncode != 0 and 'exists' not in (create.stderr or ''):
logger.warning(
f"_ensure_chains: cannot create {table}/{self.CONNECTIVITY_CHAIN}: "
f"{create.stderr.strip()}"
)
# Insert jump from parent chain at position 1, idempotent.
jump_args = ['-t', table, '-C', parent_chain, '-j', self.CONNECTIVITY_CHAIN]
exists = self._wg_iptables(jump_args)
if exists.returncode != 0:
self._wg_iptables(
['-t', table, '-I', parent_chain, '1',
'-j', self.CONNECTIVITY_CHAIN]
)
def _flush_chain(self, table: str, chain: str) -> None:
"""Flush a chain in-place (`iptables -F`) without deleting it."""
self._wg_iptables(['-t', table, '-F', chain])
def _add_ip_rule(self, mark: int, table: int) -> None:
"""Add `ip rule fwmark <mark> lookup <table>`."""
self._wg_ip(['rule', 'add', 'fwmark', hex(mark), 'lookup', str(table)])
def _remove_ip_rule(self, mark: int, table: int) -> None:
"""Remove all matching `ip rule fwmark <mark> lookup <table>` (idempotent)."""
# `ip rule del` returns nonzero when no matching rule exists; loop
# until it fails to drain duplicates.
for _ in range(8):
r = self._wg_ip(['rule', 'del', 'fwmark', hex(mark), 'lookup', str(table)])
if r.returncode != 0:
break
def _add_mark_rule(self, src_ip: str, mark: int) -> None:
"""Mark packets from src_ip with mark in the mangle PIC_CONNECTIVITY chain."""
self._wg_iptables([
'-t', 'mangle', '-A', self.CONNECTIVITY_CHAIN,
'-s', src_ip,
'-j', 'MARK', '--set-mark', hex(mark),
])
def _add_redirect(self, src_ip: str, port: int) -> None:
"""Redirect peer's TCP traffic to a local transparent-proxy port."""
self._wg_iptables([
'-t', 'nat', '-A', self.CONNECTIVITY_CHAIN,
'-s', src_ip, '-p', 'tcp',
'-j', 'REDIRECT', '--to-ports', str(port),
])
def _add_tor_redirect(self, src_ip: str) -> None:
"""Redirect peer's TCP traffic to local Tor TransPort."""
self._add_redirect(src_ip, self.TOR_TRANS_PORT)
def _add_killswitch(self, mark: int, iface: Optional[str]) -> None:
"""Drop marked packets that would egress via any interface other than iface.
For Tor (no exit iface), skip — Tor traffic is fully redirected at
nat/REDIRECT and never reaches FORWARD.
"""
if not iface:
return
# Use -C to test, -A to add — idempotent.
check_args = ['-C', 'FORWARD',
'-m', 'mark', '--mark', hex(mark),
'!', '-o', iface, '-j', 'DROP']
exists = self._wg_iptables(check_args)
if exists.returncode != 0:
self._wg_iptables(['-A', 'FORWARD',
'-m', 'mark', '--mark', hex(mark),
'!', '-o', iface, '-j', 'DROP'])
def _exit_status(self, exit_type: str) -> Dict[str, Any]:
"""Return per-exit status (config presence + interface up/down).
An exit counts as configured when a legacy uploaded config file
exists, OR the backing store service is installed, OR its container
is running — exits installed via the Service Store never create the
legacy upload files.
"""
info: Dict[str, Any] = {'configured': False, 'iface_up': False}
if exit_type == 'wireguard_ext':
path = os.path.join(self.wireguard_ext_dir, 'wg_ext0.conf')
info['configured'] = os.path.isfile(path)
elif exit_type == 'openvpn':
try:
info['configured'] = any(
f.endswith('.ovpn')
for f in os.listdir(self.openvpn_dir)
)
except OSError:
info['configured'] = False
elif exit_type == 'tor':
info['configured'] = True # Tor uses defaults; no per-cell config
elif exit_type == 'sshuttle':
info['configured'] = os.path.isfile(
os.path.join(self.sshuttle_dir, 'sshuttle.conf'))
elif exit_type == 'proxy':
info['configured'] = os.path.isfile(
os.path.join(self.proxy_dir, 'redsocks.conf'))
if not info['configured'] and (
self._store_service_installed(exit_type)
or self._exit_container_running(exit_type)
):
info['configured'] = True
iface = self.IFACES.get(exit_type)
if iface:
try:
r = self._wg_ip(['link', 'show', iface], timeout=5)
info['iface_up'] = r.returncode == 0 and 'UP' in (r.stdout or '')
except Exception:
info['iface_up'] = False
if info['iface_up']:
info['status'] = 'active'
elif info['configured']:
info['status'] = 'configured'
else:
info['status'] = 'not_configured'
return info
def _store_service_installed(self, exit_type: str) -> bool:
"""True when the store service backing this exit type is installed."""
svc_id = self.STORE_SERVICE_IDS.get(exit_type)
if not svc_id or self.config_manager is None:
return False
try:
installed = self.config_manager.get_installed_services()
except Exception as e:
logger.debug(f"_store_service_installed({exit_type}): {e}")
return False
return isinstance(installed, dict) and svc_id in installed
def _exit_container_running(self, exit_type: str) -> bool:
"""True when the exit's container is currently running."""
cname = self.EXIT_CONTAINERS.get(exit_type)
if not cname:
return False
try:
r = subprocess.run(
['docker', 'inspect', '-f', '{{.State.Running}}', cname],
capture_output=True, text=True, timeout=5,
)
return r.returncode == 0 and r.stdout.strip() == 'true'
except Exception:
return False
# ── Health probing ────────────────────────────────────────────────────
#
# probe_health(connection) returns 'working' | 'down' | 'unknown' per type.
# All real subprocess/socket calls are factored behind small helpers
# (_exec_in_container, _tcp_reachable) so tests patch those, never raw
# subprocess/socket. refresh_health() persists results via
# config_manager.set_connection_status and caches them for HEALTH_TTL.
# Per-type fallback default: tor fails open (direct internet on outage),
# all interface/tunnel/proxy types fail closed (traffic blocked on outage).
FAILOPEN_DEFAULTS = {
"wireguard_ext": False,
"openvpn": False,
"sshuttle": False,
"proxy": False,
"tor": True,
}
def probe_health(self, connection: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""Probe a connection's liveness. Returns (health, detail).
health is 'working' | 'down' | 'unknown'. detail is a short human
string (or None). Never raises — probe errors map to ('unknown', msg).
"""
conn_type = connection.get('type')
try:
if conn_type == 'wireguard_ext':
return self._probe_wireguard_ext(connection)
if conn_type == 'openvpn':
return self._probe_openvpn(connection)
if conn_type == 'tor':
return self._probe_tor(connection)
if conn_type == 'sshuttle':
return self._probe_sshuttle(connection)
if conn_type == 'proxy':
return self._probe_proxy(connection)
except Exception as e:
logger.warning(f"probe_health({connection.get('id')}): {e}")
return 'unknown', str(e)
return 'unknown', f'no probe for type {conn_type!r}'
def _probe_wireguard_ext(
self, conn: Dict[str, Any],
) -> Tuple[str, Optional[str]]:
"""A WireGuard exit is working when its latest handshake is recent."""
iface = conn.get('iface')
if not iface:
return 'unknown', 'no interface assigned'
container = self.EXIT_CONTAINERS.get('wireguard_ext')
r = self._exec_in_container(
container, ['wg', 'show', iface, 'latest-handshakes'])
if r is None or r.returncode != 0:
return 'down', 'wg show failed or interface absent'
newest = 0
for line in (r.stdout or '').splitlines():
parts = line.split()
if len(parts) >= 2:
try:
newest = max(newest, int(parts[-1]))
except ValueError:
continue
if newest == 0:
return 'down', 'no handshake yet'
age = int(time.time()) - newest
if age <= self.WG_HANDSHAKE_MAX_AGE:
return 'working', f'handshake {age}s ago'
return 'down', f'handshake stale ({age}s ago)'
def _probe_openvpn(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""An OpenVPN exit is working when its tun iface exists and is UP."""
iface = conn.get('iface')
container = self.EXIT_CONTAINERS.get('openvpn')
# The tun device lives in the openvpn container's net namespace.
r = self._exec_in_container(container, ['ip', 'link', 'show', iface]) \
if iface else None
if r is None or r.returncode != 0:
# Fall back to the legacy fixed tun0 iface check in the WG container.
r2 = self._wg_ip(['link', 'show', self.IFACES.get('openvpn', 'tun0')])
if r2.returncode == 0 and 'UP' in (r2.stdout or ''):
return 'working', 'tun up'
return 'down', 'tun interface absent or down'
if 'UP' in (r.stdout or ''):
return 'working', 'tun up'
return 'down', 'tun interface down'
def _probe_tor(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""Tor is working when its container is running and bootstrapped."""
container = self.EXIT_CONTAINERS.get('tor')
if not self._container_running(container):
return 'down', 'tor container not running'
r = self._exec_in_container(
container, ['sh', '-c',
'grep -m1 "Bootstrapped 100" /var/log/tor/notices.log '
'2>/dev/null || true'])
if r is not None and 'Bootstrapped 100' in (r.stdout or ''):
return 'working', 'bootstrapped'
# Container is up but bootstrap state is unknown — treat as working
# (tor fails open by default; a running container is the cheap signal).
return 'working', 'running'
def _probe_sshuttle(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""sshuttle is working when the SSH host AND local listener are reachable."""
cfg = conn.get('config', {})
host = cfg.get('host')
port = cfg.get('port', 22)
if not host:
return 'unknown', 'no host configured'
if not self._tcp_reachable(host, int(port)):
return 'down', f'ssh host {host}:{port} unreachable'
listen_port = conn.get('redirect_port')
container = self.EXIT_CONTAINERS.get('sshuttle')
if isinstance(listen_port, int) and not self._listener_reachable(
container, listen_port):
return 'down', f'sshuttle listener :{listen_port} down'
return 'working', 'ssh host + listener reachable'
def _probe_proxy(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""A proxy exit is working when the upstream proxy host:port accepts TCP."""
cfg = conn.get('config', {})
host = cfg.get('host')
port = cfg.get('port')
if not host or not port:
return 'unknown', 'no upstream host/port configured'
if self._tcp_reachable(host, int(port)):
return 'working', f'{host}:{port} reachable'
return 'down', f'upstream {host}:{port} unreachable'
def _listener_reachable(self, container: Optional[str], port: int) -> bool:
"""True when a local TCP listener on `port` is up inside the exit container."""
r = self._exec_in_container(
container, ['sh', '-c',
f'nc -z -w2 127.0.0.1 {port} 2>/dev/null && echo ok || true'])
if r is not None and 'ok' in (r.stdout or ''):
return True
# Fall back to a host-side probe (container may lack nc).
return self._tcp_reachable('127.0.0.1', port)
def _container_running(self, container: Optional[str]) -> bool:
"""True when the named container is running (cheap docker inspect)."""
if not container:
return False
try:
r = subprocess.run(
['docker', 'inspect', '-f', '{{.State.Running}}', container],
capture_output=True, text=True, timeout=5)
return r.returncode == 0 and r.stdout.strip() == 'true'
except Exception:
return False
def _exec_in_container(
self, container: Optional[str], args: List[str], timeout: int = 8,
) -> Optional[subprocess.CompletedProcess]:
"""Run a command inside a container; None on failure. Mock target for tests."""
if not container:
return None
try:
return subprocess.run(
['docker', 'exec', container] + args,
capture_output=True, text=True, timeout=timeout)
except Exception as e:
logger.debug(f"_exec_in_container({container}): {e}")
return None
def _tcp_reachable(self, host: str, port: int) -> bool:
"""True when a TCP connection to host:port succeeds. Mock target for tests."""
try:
with socket.create_connection(
(host, port), timeout=self.PROBE_TCP_TIMEOUT):
return True
except Exception:
return False
def refresh_health(
self, connection_id: Optional[str] = None, force: bool = False,
) -> Dict[str, Any]:
"""Probe one or all connections and persist health into their status.
With a TTL cache: a connection whose last_check is younger than
HEALTH_TTL is skipped unless force=True. Returns {id: health}.
"""
if self.config_manager is None:
return {}
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"refresh_health: list_connections failed: {e}")
return {}
if connection_id is not None:
conns = [c for c in conns if c.get('id') == connection_id]
results: Dict[str, Any] = {}
now = int(time.time())
for conn in conns:
cid = conn.get('id')
if not cid or not conn.get('enabled', True):
continue
status = dict(conn.get('status', {}))
if not force and self._health_is_fresh(status, now):
results[cid] = status.get('health', 'unknown')
continue
health, detail = self.probe_health(conn)
status['health'] = health
status['detail'] = detail
status['last_check'] = self._now_iso()
try:
self.config_manager.set_connection_status(cid, status)
except Exception as e:
logger.warning(f"refresh_health: persist {cid} failed: {e}")
results[cid] = health
return results
def _health_is_fresh(self, status: Dict[str, Any], now: int) -> bool:
"""True when status.last_check is within HEALTH_TTL of `now`.
last_check is a UTC ISO string (from _now_iso); calendar.timegm parses
it back as UTC so the comparison is timezone-consistent with `now`
(int(time.time()), also epoch/UTC).
"""
import calendar
last = status.get('last_check')
if not last:
return False
try:
ts = calendar.timegm(time.strptime(last, '%Y-%m-%dT%H:%M:%SZ'))
return (now - int(ts)) < self.HEALTH_TTL
except (ValueError, OverflowError):
return False
def effective_failopen(self, peer: Dict[str, Any],
conn: Optional[Dict[str, Any]]) -> bool:
"""Resolve a peer's effective fail-open for its connection.
peer.exit_failopen overrides when set (bool); otherwise the per-type
default applies. An unknown/missing connection type fails closed.
"""
override = peer.get('exit_failopen')
if isinstance(override, bool):
return override
conn_type = conn.get('type') if conn else None
return self.FAILOPEN_DEFAULTS.get(conn_type, False)
def set_peer_failopen(self, peer_name: str,
failopen: Optional[bool]) -> Dict[str, Any]:
"""Set or clear a peer's fail-open override (None = use type default)."""
if failopen is not None and not isinstance(failopen, bool):
return {'ok': False, 'error': 'failopen must be a boolean or null'}
if self.peer_registry is None:
return {'ok': False, 'error': 'peer_registry not available'}
if not self._peer_exists(peer_name):
return {'ok': False, 'error': f'peer {peer_name!r} not found'}
try:
self.peer_registry.update_peer(peer_name, {'exit_failopen': failopen})
except Exception as e:
logger.error(f"set_peer_failopen({peer_name}): {e}")
return {'ok': False, 'error': str(e)}
try:
self.apply_routes()
except Exception as e:
logger.warning(f"set_peer_failopen: apply_routes failed (non-fatal): {e}")
return {'ok': True, 'peer': peer_name, 'exit_failopen': failopen}
def _peer_source_ip(self, peer_name: str) -> Optional[str]:
"""Return a peer's WireGuard IP (no /CIDR suffix)."""
if not peer_name or self.peer_registry is None:
return None
try:
peer = self.peer_registry.get_peer(peer_name)
except Exception as e:
logger.warning(f"_peer_source_ip({peer_name}): {e}")
return None
if not peer:
return None
ip = peer.get('ip', '')
if not ip:
return None
return ip.split('/')[0]
# ── Config validation ─────────────────────────────────────────────────
def _validate_wg_conf(self, text: str) -> str:
"""Strip Pre/Post-Up/Down hooks and reject conflicting wg0 interface.
Raises ValueError if the config tries to define `Interface = wg0`
(which would clash with the existing peer-server interface).
"""
if not isinstance(text, str):
raise ValueError('wg conf must be a string')
cleaned: List[str] = []
for raw_line in text.splitlines():
stripped = raw_line.strip()
# Reject wg0 interface declaration that would conflict with the
# existing WireGuard server interface.
if stripped.lower().startswith('interface'):
# Look ahead in subsequent lines for `= wg0` would be hard;
# the [Interface] section header itself is fine. We only
# reject explicit Name/Interface = wg0 directives.
pass
# Match assignments like `PostUp = ...`
if '=' in stripped:
key = stripped.split('=', 1)[0].strip()
if key in _WG_FORBIDDEN_PREFIXES:
logger.info(f"_validate_wg_conf: dropped {key} hook")
continue
# Detect Name = wg0 or Interface = wg0 inside Interface section
if key.lower() in ('name', 'interface') and \
stripped.split('=', 1)[1].strip().lower() == 'wg0':
raise ValueError(
"config defines interface 'wg0' which conflicts "
"with the peer-server interface"
)
cleaned.append(raw_line)
return '\n'.join(cleaned).rstrip() + '\n'
def _validate_ovpn(self, text: str) -> str:
"""Strip directives that execute external scripts/binaries."""
if not isinstance(text, str):
raise ValueError('ovpn conf must be a string')
cleaned: List[str] = []
for raw_line in text.splitlines():
stripped = raw_line.strip()
# Match the directive name (first whitespace-delimited token).
if stripped and not stripped.startswith('#'):
first = stripped.split(None, 1)[0]
if first in _OVPN_FORBIDDEN_DIRECTIVES:
logger.info(f"_validate_ovpn: dropped {first} directive")
continue
cleaned.append(raw_line)
return '\n'.join(cleaned).rstrip() + '\n'
# ── Filesystem helpers ────────────────────────────────────────────────
@staticmethod
def _write_secure(path: str, text: str) -> None:
"""Atomic 0o600 write — secrets in these configs must not be world-readable."""
os.makedirs(os.path.dirname(path), exist_ok=True)
tmp = path + '.tmp'
fd = os.open(tmp, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
try:
with os.fdopen(fd, 'w') as f:
f.write(text)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
os.chmod(tmp, 0o600)
os.replace(tmp, path)
os.chmod(path, 0o600)