Files
pic/api/connectivity_manager.py
T
roof 8d904b1b8f
Unit Tests / test (push) Successful in 13m7s
fix: clean-install bugs — Tor false-installed, WG port-check honesty, encrypted backup upload
Three independent bugs surfaced during pic1 clean-install testing:

1. Tor _exit_status hardcoded configured=True regardless of whether Tor was
   actually installed.  Status now flows through the same store-installed /
   container-running bridge used by every other optional service, so Tor only
   reports installed when the container is present and running.

2. check_port_open compared the port from wg0.conf against the kernel-reported
   listening port, causing false "port closed" results whenever the conf and the
   running container were momentarily out of sync.  The function is now an honest
   liveness check: any wg0 interface that is up and has a "listening port:" line
   in `wg show` is considered open.  The check-port API endpoint now also returns
   the actual kernel listening_port and a port_mismatch flag so the UI can inform
   the user when a container recreate is needed.  (The recreate machinery already
   exists via the port-change pending-restart path; this fix makes the mismatch
   visible rather than silently lying about reachability.)

3. upload_backup only handled .zip archives; encrypted .age blobs were rejected
   with a generic error.  The endpoint now calls backup_crypto.is_encrypted() to
   detect Age-encrypted blobs and stores them verbatim as <id>.tar.gz.age with
   mode 0600 so they can be uploaded and then restored with a passphrase.  The
   plaintext zip path is unchanged.

Tests added/updated: test_connectivity_manager.py (Tor status bridge),
test_wireguard_manager.py + test_wireguard_endpoints.py (port-check liveness
and mismatch flag), test_config_backup_restore_http.py (encrypted upload
round-trip).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 01:52:26 -04:00

2597 lines
114 KiB
Python

#!/usr/bin/env python3
"""
Connectivity Manager for Personal Internet Cell — Phase 5 Extended Connectivity.
Provides per-peer egress routing through alternate exits (WireGuard external,
OpenVPN, Tor, sshuttle SSH tunnel, upstream proxy) via Linux policy routing
(fwmark + ip rule + dedicated routing tables) and dedicated iptables chains.
Architecture
------------
- A peer's `exit_via` field selects the egress path: "default", "wireguard_ext",
"openvpn", "tor", "sshuttle", or "proxy".
- Each non-default exit type is assigned a unique fwmark and a dedicated routing
table:
wireguard_ext mark 0x10 table 110 iface wg_ext0
openvpn mark 0x20 table 120 iface tun0
tor mark 0x30 table 130 (transparent proxy → 9040)
sshuttle mark 0x40 table 140 (transparent proxy → 12300)
proxy mark 0x50 table 150 (transparent proxy → 12345, redsocks)
- All rules live in dedicated PIC_CONNECTIVITY chains in the `mangle` and `nat`
tables so they can be flushed/rebuilt without touching firewall_manager rules.
- A kill-switch FORWARD DROP rule prevents leaks if the exit interface is down.
Container model
---------------
Each exit type runs in its own separate container; this manager only programs
policy routing rules in the WireGuard server container (cell-wireguard) where
peer traffic flows through.
Config files
------------
- WireGuard external: {config_dir}/connectivity/wireguard_ext/wg_ext0.conf
- OpenVPN: {config_dir}/connectivity/openvpn/<name>.ovpn
Both are validated to strip / reject hook directives that could execute
arbitrary commands on the host.
"""
import ipaddress
import logging
import os
import re
import secrets
import socket
import threading
import time
from typing import Any, Dict, List, Optional, Tuple
import subprocess
from base_service_manager import BaseServiceManager
logger = logging.getLogger(__name__)
WIREGUARD_CONTAINER = 'cell-wireguard'
# Lines we strip from uploaded WireGuard configs — these can run arbitrary
# host commands when wg-quick brings the interface up/down.
_WG_FORBIDDEN_PREFIXES = ('PostUp', 'PostDown', 'PreUp', 'PreDown')
# Lines we strip from uploaded OpenVPN configs — these execute external
# scripts/binaries on connect/disconnect.
_OVPN_FORBIDDEN_DIRECTIVES = (
'up', 'down', 'script-security', 'plugin',
'route-up', 'route-pre-down',
)
_NAME_RE = re.compile(r'^[a-z0-9_-]{1,32}$')
# sshuttle / proxy configuration validation
_HOST_RE = re.compile(r'^[A-Za-z0-9]([A-Za-z0-9.-]{0,252}[A-Za-z0-9])?$')
_SSH_USER_RE = re.compile(r'^[a-z_][a-z0-9_-]{0,31}$')
_PROXY_USER_RE = re.compile(r'^[A-Za-z0-9._-]{1,64}$')
# Printable ASCII excluding double quote and backslash — safe inside the
# quoted strings of a redsocks.conf without any escaping ambiguity.
_PROXY_PASSWORD_RE = re.compile(r'^[\x20\x21\x23-\x5B\x5D-\x7E]{1,128}$')
_B64_RE = re.compile(r'^[A-Za-z0-9+/]+={0,2}$')
_KNOWN_HOSTS_HOSTS_RE = re.compile(r'^[A-Za-z0-9\[\]:.,*?_-]{1,512}$')
_SSH_KEYTYPES = (
'ssh-ed25519', 'ssh-rsa',
'ecdsa-sha2-nistp256', 'ecdsa-sha2-nistp384', 'ecdsa-sha2-nistp521',
)
def _contains_strict_hostkey_disable(value: str) -> bool:
"""Detect attempts to disable SSH host-key pinning (any spacing/case)."""
normalized = re.sub(r'\s+', '', value.lower())
return ('stricthostkeychecking=no' in normalized
or 'stricthostkeycheckingno' in normalized)
def _validate_host(host) -> Optional[str]:
"""Return a validated hostname/IP, or None when invalid."""
if not isinstance(host, str):
return None
host = host.strip()
if not host or '..' in host or not _HOST_RE.match(host):
return None
return host
def _validate_port(port) -> Optional[int]:
"""Return a validated TCP port (1-65535), or None when invalid."""
try:
port = int(port)
except (TypeError, ValueError):
return None
if not 1 <= port <= 65535:
return None
return port
class ConnectivityManager(BaseServiceManager):
"""Manages alternate egress paths (extended connectivity) for peers."""
EXIT_TYPES = ("default", "wireguard_ext", "openvpn", "tor",
"sshuttle", "proxy")
MARKS = {"wireguard_ext": 0x10, "openvpn": 0x20, "tor": 0x30,
"sshuttle": 0x40, "proxy": 0x50}
TABLES = {"wireguard_ext": 110, "openvpn": 120, "tor": 130,
"sshuttle": 140, "proxy": 150}
IFACES = {"wireguard_ext": "wg_ext0", "openvpn": "tun0"}
TOR_TRANS_PORT = 9040
TOR_DNS_PORT = 5353
SSHUTTLE_PORT = 12300
REDSOCKS_PORT = 12345
# Exits that work as pure iptables REDIRECTs to a local transparent-proxy
# port (no exit interface, no kill-switch interface).
REDIRECT_PORTS = {"tor": TOR_TRANS_PORT, "sshuttle": SSHUTTLE_PORT,
"proxy": REDSOCKS_PORT}
# ── Connectivity v2 — instance resource allocation ────────────────────
# Connection instance types (the legacy "default" pseudo-exit is excluded —
# a peer/service routed via "default" simply has no connection).
CONNECTION_TYPES = ("wireguard_ext", "openvpn", "tor", "sshuttle", "proxy")
# Types whose egress is a real interface (kill-switch capable). They get an
# iface name and no redirect port.
IFACE_TYPES = ("wireguard_ext", "openvpn")
# Types implemented as a local transparent-proxy REDIRECT. They get a
# redirect port and no iface.
REDIRECT_TYPES = ("tor", "sshuttle", "proxy")
# Only a single Tor instance is supported (one Tor container per cell).
SINGLE_INSTANCE_TYPES = ("tor",)
# A cell_relay connection represents "route this peer's internet through a
# connected cell that offers its exit". It is NOT manually created — it is
# auto-derived (reconciled) from cell links that offer an exit. It needs a
# mark+table for policy routing but no local iface/redirect_port/container:
# egress happens through the cell WG tunnel, not a local exit container.
CELL_RELAY_TYPE = "cell_relay"
CELL_RELAY_NAME_PREFIX = "Cell: "
# fwmark block 0x1000–0x1FFF, stride 0x10.
MARK_BASE = 0x1000
MARK_STRIDE = 0x10
MARK_MAX = 0x1FFF
# routing tables 1000+.
TABLE_BASE = 1000
# transparent-proxy redirect port pool for instances.
REDIRECT_PORT_BASE = 9100
REDIRECT_PORT_MAX = 9199
# WireGuard handshake older than this (seconds) means the tunnel is down.
WG_HANDSHAKE_MAX_AGE = 180
# Cached health is reused for this long so on-demand GETs don't re-probe.
HEALTH_TTL = 30
# TCP connect timeout for socket-based probes (sshuttle/proxy).
PROBE_TCP_TIMEOUT = 3
IFACE_PREFIXES = {"wireguard_ext": "wgext_", "openvpn": "ovpn_"}
CONNECTION_NAME_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9 _.-]{0,63}$')
DEFAULT_CONNECTION_NAMES = {
"wireguard_ext": "WireGuard External",
"openvpn": "OpenVPN",
"tor": "Tor",
"sshuttle": "SSH Tunnel",
"proxy": "Proxy",
}
# Store-service ids / container names backing each exit type — used to
# report an exit as configured when it was installed via the Service Store
# rather than through a legacy config upload.
STORE_SERVICE_IDS = {
"wireguard_ext": "wireguard-ext",
"openvpn": "openvpn-client",
"tor": "tor",
"sshuttle": "sshuttle",
"proxy": "proxy",
}
EXIT_CONTAINERS = {
"wireguard_ext": "cell-wg-ext",
"openvpn": "cell-openvpn",
"tor": "cell-tor",
"sshuttle": "cell-sshuttle",
"proxy": "cell-redsocks",
}
# RFC1918 ranges excluded from the sshuttle tunnel by default so cell-local
# and LAN traffic is never tunneled.
RFC1918_SUBNETS = ("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
CONNECTIVITY_CHAIN = 'PIC_CONNECTIVITY'
def __init__(self, config_manager=None, peer_registry=None,
vault_manager=None, service_composer=None,
data_dir: str = '/app/data', config_dir: str = '/app/config'):
super().__init__('connectivity', data_dir, config_dir)
self.config_manager = config_manager
self.peer_registry = peer_registry
self.vault_manager = vault_manager
# Set after construction in managers.py (composer is built later) — used
# to bring per-connection containers up/down out-of-process.
self.service_composer = service_composer
# Set after construction in managers.py — used to derive cell_relay
# connections from cell links that offer an exit, and to drive cell
# exit routing / handshake-based health. Optional: when unset, the
# cell_relay reconcile is a no-op (no cell links to surface).
self.cell_link_manager = None
self.wireguard_manager = None
# Serializes connection CRUD + resource allocation across threads.
self._conn_lock = threading.RLock()
# Wire the v1→v2 migration so it runs lazily on first get_connectivity().
if self.config_manager is not None and hasattr(
self.config_manager, 'register_connectivity_migrator'
):
self.config_manager.register_connectivity_migrator(
self._migrate_connectivity_v1_to_v2
)
# Connectivity configs live under the per-service data dir so that
# ${PIC_DATA_DIR}/services/<id>/config bind mounts in store compose
# templates can read them (Docker daemon resolves paths on the HOST,
# so they must be reachable via data_dir, not config_dir).
services_dir = os.path.join(data_dir, 'services')
self.wireguard_ext_dir = os.path.join(services_dir, 'wireguard-ext', 'config')
self.openvpn_dir = os.path.join(services_dir, 'openvpn-client', 'config')
self.sshuttle_dir = os.path.join(services_dir, 'sshuttle', 'config')
self.proxy_dir = os.path.join(services_dir, 'proxy', 'config')
for d in (self.wireguard_ext_dir, self.openvpn_dir,
self.sshuttle_dir, self.proxy_dir):
self.safe_makedirs(d)
# One-shot migration from the legacy config_dir/connectivity/ location.
_legacy_base = os.path.join(config_dir, 'connectivity')
self._migrate_legacy_configs(_legacy_base)
# Subscribe to ServiceBus CONFIG_CHANGED events so routes are
# reapplied if the underlying network changes. Done lazily —
# service_bus is a singleton imported at app startup.
self._subscribe_to_events()
# ── Legacy migration ──────────────────────────────────────────────────
def _migrate_legacy_configs(self, legacy_base: str) -> None:
"""Copy files from the old config_dir/connectivity/ tree to the new data_dir locations.
The old layout stored WireGuard and OpenVPN configs under the API container's
config_dir, which Docker cannot bind-mount into store-service containers. Files
are copied (not moved) so the legacy location still works until the operator
removes it manually.
"""
import shutil
pairs = (
(os.path.join(legacy_base, 'wireguard_ext'), self.wireguard_ext_dir),
(os.path.join(legacy_base, 'openvpn'), self.openvpn_dir),
)
for src_dir, dst_dir in pairs:
if not os.path.isdir(src_dir):
continue
try:
for fname in os.listdir(src_dir):
src_file = os.path.join(src_dir, fname)
dst_file = os.path.join(dst_dir, fname)
if os.path.isfile(src_file) and not os.path.exists(dst_file):
shutil.copy2(src_file, dst_file)
os.chmod(dst_file, 0o600)
logger.info('connectivity: migrated %s%s', src_file, dst_file)
except OSError as e:
logger.warning('connectivity: migration from %s failed: %s', src_dir, e)
# ── Event wiring ──────────────────────────────────────────────────────
def _subscribe_to_events(self) -> None:
"""Subscribe to network change events so routes auto-reapply."""
try:
from managers import service_bus, EventType
service_bus.subscribe_to_event(
EventType.CONFIG_CHANGED, self._on_network_changed
)
except Exception as e:
# Non-fatal: subscription is best-effort, manual apply still works.
logger.debug(f"connectivity: event subscribe skipped: {e}")
def _on_network_changed(self, event) -> None:
"""ServiceBus handler: re-apply routes when network config changes."""
try:
source = getattr(event, 'source', '')
if source not in ('network', 'wireguard', 'connectivity'):
return
logger.info(f"connectivity: re-applying routes due to {source} change")
self.apply_routes()
except Exception as e:
logger.warning(f"connectivity: on_network_changed failed (non-fatal): {e}")
# ── BaseServiceManager required ───────────────────────────────────────
def get_status(self) -> Dict[str, Any]:
"""Return status summary including configured exits and peer count."""
try:
exits_status: Dict[str, Dict[str, Any]] = {}
for exit_type in self.EXIT_TYPES:
if exit_type == "default":
continue
exits_status[exit_type] = self._exit_status(exit_type)
peers_with_exit = 0
if self.peer_registry is not None:
try:
for peer in self.peer_registry.list_peers():
if peer.get('exit_via', 'default') != 'default':
peers_with_exit += 1
except Exception as e:
logger.warning(f"get_status: peer count failed: {e}")
return {
'service': 'connectivity',
'running': True,
'exits': exits_status,
'peers_with_exit': peers_with_exit,
}
except Exception as e:
return self.handle_error(e, 'get_status')
def test_connectivity(self) -> Dict[str, Any]:
"""Minimal connectivity self-test."""
return {'success': True}
def get_config(self) -> Dict[str, Any]:
"""Return current connectivity config from config_manager."""
try:
if self.config_manager is not None and hasattr(
self.config_manager, 'get_connectivity_config'
):
return self.config_manager.get_connectivity_config()
except Exception as e:
logger.warning(f"get_config: config_manager lookup failed: {e}")
return {'exits': {}, 'peer_exit_map': {}}
# ── Public API ────────────────────────────────────────────────────────
def list_exits(self) -> List[Dict[str, Any]]:
"""List configured exits with current status."""
result: List[Dict[str, Any]] = []
for exit_type in self.EXIT_TYPES:
if exit_type == "default":
continue
entry = {'type': exit_type}
entry.update(self._exit_status(exit_type))
result.append(entry)
return result
def get_peer_exits(self) -> Dict[str, str]:
"""Return {peer_name: exit_type} for all peers."""
out: Dict[str, str] = {}
if self.peer_registry is None:
return out
try:
for peer in self.peer_registry.list_peers():
name = peer.get('peer')
if name:
out[name] = peer.get('exit_via', 'default')
except Exception as e:
logger.warning(f"get_peer_exits: {e}")
return out
def set_peer_exit(self, peer_name: str, exit_via: str) -> Dict[str, Any]:
"""Assign a peer to a connection (by id) or 'default' and apply rules.
`exit_via` is a connection id, 'default', or — as a one-release
back-compat shim — a legacy exit *type* string, which is resolved to
the single connection instance of that type. Validation that the id
exists lives in peer_registry.set_peer_exit_via.
"""
if not isinstance(exit_via, str) or not exit_via:
return {'ok': False, 'error': 'connection_id is required'}
if not isinstance(peer_name, str) or not re.match(r'^[A-Za-z0-9_.-]{1,64}$', peer_name):
return {'ok': False, 'error': f'invalid peer_name {peer_name!r}'}
if self.peer_registry is None:
return {'ok': False, 'error': 'peer_registry not available'}
# A cell_relay assignment must not form an exit loop (A→B→A). Reject
# before persisting so the UI surfaces the error rather than silently
# creating a cycle that apply_routes would later refuse.
target = self._connection_by_id(exit_via)
if target is not None and target.get('type') == self.CELL_RELAY_TYPE:
peer = self.peer_registry.get_peer(peer_name) if self.peer_registry else None
if peer is not None and self._cell_relay_loops(peer, target.get('cell_name')):
return {'ok': False, 'error':
f"routing {peer_name!r} via cell {target.get('cell_name')!r} "
f'would form an exit loop (that cell already uses this cell '
f'as its exit relay)'}
try:
ok = self.peer_registry.set_peer_exit_via(peer_name, exit_via)
except Exception as e:
logger.error(f"set_peer_exit: registry update failed: {e}")
return {'ok': False, 'error': str(e)}
if not ok:
# Distinguish "no such peer" from "no such connection".
if self._peer_exists(peer_name):
return {'ok': False, 'error':
f'unknown connection {exit_via!r}; '
f"must be a connection id or 'default'"}
return {'ok': False, 'error': f'peer {peer_name!r} not found'}
# Keep the legacy route_via in sync: a cell_relay assignment drives the
# cell-routing path (route_via set in apply_routes), but reassigning away
# from a cell must clear the stale route_via so the cell route is torn
# down and startup replay no longer reapplies it.
if target is None or target.get('type') != self.CELL_RELAY_TYPE:
try:
cur = self.peer_registry.get_peer(peer_name)
if cur and cur.get('route_via'):
via_cell = cur.get('route_via')
self.peer_registry.set_route_via(peer_name, None)
if self.wireguard_manager is not None:
src_ip = self._peer_source_ip(peer_name)
if src_ip:
try:
self.wireguard_manager.remove_peer_route_via(src_ip)
except Exception as e:
logger.warning(f"set_peer_exit: remove_peer_route_via "
f"{src_ip}: {e}")
logger.info(f"set_peer_exit: cleared cell route_via "
f"{via_cell!r} for {peer_name!r}")
except Exception as e:
logger.warning(f"set_peer_exit: route_via cleanup failed: {e}")
try:
self.apply_routes()
except Exception as e:
logger.warning(f"set_peer_exit: apply_routes failed (non-fatal): {e}")
resolved = 'default'
try:
peer = self.peer_registry.get_peer(peer_name)
if peer:
resolved = peer.get('exit_via', 'default')
except Exception:
pass
return {'ok': True, 'peer': peer_name, 'exit_via': resolved}
def _peer_exists(self, peer_name: str) -> bool:
"""True when a peer with this name is registered."""
if self.peer_registry is None:
return False
try:
return self.peer_registry.get_peer(peer_name) is not None
except Exception:
return False
def upload_wireguard_ext(self, conf_text: str) -> Dict[str, Any]:
"""Validate and store an external WireGuard config."""
try:
cleaned = self._validate_wg_conf(conf_text)
except ValueError as e:
return {'ok': False, 'error': str(e)}
path = os.path.join(self.wireguard_ext_dir, 'wg_ext0.conf')
try:
self._write_secure(path, cleaned)
except Exception as e:
logger.error(f"upload_wireguard_ext: write failed: {e}")
return {'ok': False, 'error': str(e)}
logger.info(f"connectivity: stored wg_ext0.conf ({len(cleaned)} bytes)")
return {'ok': True}
def upload_openvpn(self, ovpn_text: str, name: str = 'default') -> Dict[str, Any]:
"""Validate and store an OpenVPN profile."""
if not isinstance(name, str) or not _NAME_RE.match(name):
return {
'ok': False,
'error': f'invalid name {name!r}; must match [a-z0-9_-]{{1,32}}',
}
try:
cleaned = self._validate_ovpn(ovpn_text)
except ValueError as e:
return {'ok': False, 'error': str(e)}
path = os.path.join(self.openvpn_dir, f'{name}.ovpn')
try:
self._write_secure(path, cleaned)
except Exception as e:
logger.error(f"upload_openvpn: write failed: {e}")
return {'ok': False, 'error': str(e)}
logger.info(f"connectivity: stored {name}.ovpn ({len(cleaned)} bytes)")
return {'ok': True}
def configure_sshuttle(self, cfg: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and store an sshuttle (SSH tunnel) exit configuration.
Requires a pinned host key (a single known_hosts line); rejects any
attempt to disable strict host-key checking. Secrets (private key /
password) are written 0o600 under data/services/sshuttle/config/ and
mirrored into the vault — they are never placed in cell_config.json.
"""
if not isinstance(cfg, dict):
return {'ok': False, 'error': 'config must be a JSON object'}
for value in cfg.values():
if isinstance(value, str) and _contains_strict_hostkey_disable(value):
return {
'ok': False,
'error': 'StrictHostKeyChecking=no is not allowed; '
'a pinned host key (known_hosts line) is required',
}
host = _validate_host(cfg.get('host'))
if not host:
return {'ok': False, 'error': 'invalid host: must be a hostname or IP'}
port = _validate_port(cfg.get('port', 22))
if port is None:
return {'ok': False, 'error': 'invalid port: must be 1-65535'}
user = cfg.get('user')
if not isinstance(user, str) or not _SSH_USER_RE.match(user):
return {
'ok': False,
'error': 'invalid user: must match ^[a-z_][a-z0-9_-]{0,31}$',
}
auth = cfg.get('auth', 'key')
if auth not in ('key', 'password'):
return {'ok': False, 'error': "invalid auth: must be 'key' or 'password'"}
known_hosts = cfg.get('known_hosts')
err = self._validate_known_hosts_line(known_hosts)
if err:
return {'ok': False, 'error': err}
known_hosts = known_hosts.strip()
private_key = ''
password = ''
if auth == 'key':
private_key = cfg.get('private_key', '')
if not isinstance(private_key, str) or 'PRIVATE KEY' not in private_key:
return {
'ok': False,
'error': 'private_key is required for key auth and must be '
'a PEM/OpenSSH private key',
}
else:
password = cfg.get('password', '')
if not isinstance(password, str) or not password or '\n' in password:
return {'ok': False, 'error': 'password is required for password auth'}
exclude_subnets = cfg.get('exclude_subnets')
if exclude_subnets is None:
exclude_subnets = self._default_exclude_subnets()
if not isinstance(exclude_subnets, list):
return {'ok': False, 'error': 'exclude_subnets must be a list of CIDRs'}
validated_excludes = []
for net in exclude_subnets:
try:
validated_excludes.append(str(ipaddress.ip_network(str(net), strict=False)))
except ValueError:
return {'ok': False, 'error': f'invalid exclude subnet: {net!r}'}
conf_lines = [
f'HOST={host}',
f'PORT={port}',
f'USER={user}',
f'AUTH={auth}',
f'LISTEN_PORT={self.SSHUTTLE_PORT}',
f'EXCLUDE={",".join(validated_excludes)}',
]
try:
self._write_secure(
os.path.join(self.sshuttle_dir, 'known_hosts'),
known_hosts + '\n',
)
if auth == 'key':
key_text = private_key.rstrip('\n') + '\n'
self._write_secure(os.path.join(self.sshuttle_dir, 'id_pic'), key_text)
else:
self._write_secure(
os.path.join(self.sshuttle_dir, 'password'), password + '\n')
self._write_secure(
os.path.join(self.sshuttle_dir, 'sshuttle.conf'),
'\n'.join(conf_lines) + '\n',
)
except Exception as e:
logger.error(f"configure_sshuttle: write failed: {e}")
return {'ok': False, 'error': 'failed to write sshuttle configuration'}
if self.vault_manager is not None:
try:
if auth == 'key':
self.vault_manager.store_secret('connectivity_sshuttle_key',
private_key)
else:
self.vault_manager.store_secret('connectivity_sshuttle_password',
password)
except Exception as e:
logger.warning(f"configure_sshuttle: vault store failed: {e}")
self._persist_exit_config('sshuttle', {
'host': host,
'port': port,
'user': user,
'auth': auth,
'exclude_subnets': validated_excludes,
})
logger.info(f"connectivity: configured sshuttle exit ({user}@{host}:{port})")
return {'ok': True}
def configure_proxy(self, cfg: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and store an upstream proxy (redsocks) exit configuration.
Generates redsocks.conf from strictly validated fields only — no value
that could break out of the quoted config strings is accepted. The
password lives in the 0o600 conf file, never in compose env.
"""
if not isinstance(cfg, dict):
return {'ok': False, 'error': 'config must be a JSON object'}
scheme = cfg.get('scheme')
if scheme not in ('http', 'socks5'):
return {'ok': False, 'error': "invalid scheme: must be 'http' or 'socks5'"}
host = _validate_host(cfg.get('host'))
if not host:
return {'ok': False, 'error': 'invalid host: must be a hostname or IP'}
port = _validate_port(cfg.get('port'))
if port is None:
return {'ok': False, 'error': 'invalid port: must be 1-65535'}
user = cfg.get('user') or ''
password = cfg.get('password') or ''
if user and not (isinstance(user, str) and _PROXY_USER_RE.match(user)):
return {
'ok': False,
'error': 'invalid user: must match ^[A-Za-z0-9._-]{1,64}$',
}
if password and not (isinstance(password, str)
and _PROXY_PASSWORD_RE.match(password)):
return {
'ok': False,
'error': 'invalid password: 1-128 printable ASCII characters '
'excluding double quotes and backslashes',
}
if password and not user:
return {'ok': False, 'error': 'password requires a user'}
conf = self._render_redsocks_conf(scheme, host, port, user, password)
try:
self._write_secure(os.path.join(self.proxy_dir, 'redsocks.conf'), conf)
except Exception as e:
logger.error(f"configure_proxy: write failed: {e}")
return {'ok': False, 'error': 'failed to write redsocks configuration'}
self._persist_exit_config('proxy', {
'scheme': scheme,
'host': host,
'port': port,
'user': user,
})
logger.info(f"connectivity: configured proxy exit ({scheme}://{host}:{port})")
return {'ok': True}
def _render_redsocks_conf(self, scheme: str, host: str, port: int,
user: str, password: str) -> str:
"""Build a redsocks.conf from already-validated fields."""
redsocks_type = 'socks5' if scheme == 'socks5' else 'http-connect'
lines = [
'base {',
' log_debug = off;',
' log_info = on;',
' log = stderr;',
' daemon = off;',
' redirector = iptables;',
'}',
'',
'redsocks {',
' local_ip = 0.0.0.0;',
f' local_port = {self.REDSOCKS_PORT};',
f' ip = {host};',
f' port = {port};',
f' type = {redsocks_type};',
]
if user:
lines.append(f' login = "{user}";')
if password:
lines.append(f' password = "{password}";')
lines.append('}')
return '\n'.join(lines) + '\n'
@staticmethod
def _validate_known_hosts_line(line) -> Optional[str]:
"""Validate a single known_hosts line; return an error string or None.
Expected format: host[,ip] keytype base64key [comment]
"""
if not isinstance(line, str) or not line.strip():
return ('known_hosts is required: a pinned host key line '
'(host[,ip] keytype base64key)')
line = line.strip()
if '\n' in line or '\r' in line:
return 'known_hosts must be a single line'
parts = line.split()
if len(parts) < 3:
return ('invalid known_hosts line: expected '
'host[,ip] keytype base64key')
hosts, keytype, key = parts[0], parts[1], parts[2]
if not _KNOWN_HOSTS_HOSTS_RE.match(hosts):
return f'invalid known_hosts host field: {hosts!r}'
if keytype not in _SSH_KEYTYPES:
return (f'invalid known_hosts key type {keytype!r}; '
f'must be one of {_SSH_KEYTYPES}')
if not _B64_RE.match(key):
return 'invalid known_hosts key: not valid base64'
return None
def _default_exclude_subnets(self) -> List[str]:
"""Cell subnet + RFC1918 ranges — internal traffic is never tunneled."""
excludes = list(self.RFC1918_SUBNETS)
try:
if self.config_manager is not None:
identity = self.config_manager.get_identity()
if isinstance(identity, dict):
ip_range = identity.get('ip_range', '')
if isinstance(ip_range, str) and '/' in ip_range \
and ip_range not in excludes:
excludes.insert(0, ip_range)
except Exception as e:
logger.debug(f"_default_exclude_subnets: {e}")
return excludes
def _persist_exit_config(self, exit_type: str, fields: Dict[str, Any]) -> None:
"""Persist non-secret exit fields under connectivity.exits in config."""
if self.config_manager is None:
return
try:
cfg = self.config_manager.get_connectivity_config()
exits = cfg.get('exits') if isinstance(cfg, dict) else None
exits = dict(exits) if isinstance(exits, dict) else {}
exits[exit_type] = fields
self.config_manager.set_connectivity_field('exits', exits)
except Exception as e:
logger.warning(f"_persist_exit_config({exit_type}): {e}")
# ── Connectivity v2 — per-instance config validation ──────────────────
#
# These validators operate on a single connection instance's config dict,
# returning (clean_config, clean_secrets, error). They share the field
# rules used by the legacy single-slot configure_* methods so behaviour is
# identical; the legacy methods remain the v1 write path until phase 2.
def _validate_connection_config(
self, conn_type: str, config: Dict[str, Any],
secrets_in: Optional[Dict[str, Any]],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
"""Validate one instance's config + secrets for `conn_type`.
Returns (clean_config, clean_secrets, error). On error the first two
values are empty. Secrets are returned separately so callers store them
in the vault and never in cell_config.
"""
if not isinstance(config, dict):
return {}, {}, 'config must be a JSON object'
secrets_in = secrets_in or {}
if not isinstance(secrets_in, dict):
return {}, {}, 'secrets must be a JSON object'
if conn_type == 'sshuttle':
return self._validate_sshuttle_instance(config, secrets_in)
if conn_type == 'proxy':
return self._validate_proxy_instance(config, secrets_in)
if conn_type == 'wireguard_ext':
return self._validate_wg_instance(config, secrets_in)
if conn_type == 'openvpn':
return self._validate_ovpn_instance(config, secrets_in)
if conn_type == 'tor':
# Tor has no per-instance config or secret.
return {}, {}, None
return {}, {}, f'unsupported connection type {conn_type!r}'
def _validate_sshuttle_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
for value in cfg.values():
if isinstance(value, str) and _contains_strict_hostkey_disable(value):
return {}, {}, ('StrictHostKeyChecking=no is not allowed; a '
'pinned host key (known_hosts line) is required')
host = _validate_host(cfg.get('host'))
if not host:
return {}, {}, 'invalid host: must be a hostname or IP'
port = _validate_port(cfg.get('port', 22))
if port is None:
return {}, {}, 'invalid port: must be 1-65535'
user = cfg.get('user')
if not isinstance(user, str) or not _SSH_USER_RE.match(user):
return {}, {}, 'invalid user: must match ^[a-z_][a-z0-9_-]{0,31}$'
auth = cfg.get('auth', 'key')
if auth not in ('key', 'password'):
return {}, {}, "invalid auth: must be 'key' or 'password'"
known_hosts = cfg.get('known_hosts')
err = self._validate_known_hosts_line(known_hosts)
if err:
return {}, {}, err
known_hosts = known_hosts.strip()
clean_secrets: Dict[str, str] = {'known_hosts': known_hosts}
if auth == 'key':
private_key = secrets_in.get('private_key', cfg.get('private_key', ''))
if not isinstance(private_key, str) or 'PRIVATE KEY' not in private_key:
return {}, {}, ('private_key is required for key auth and must be '
'a PEM/OpenSSH private key')
clean_secrets['private_key'] = private_key
else:
password = secrets_in.get('password', cfg.get('password', ''))
if not isinstance(password, str) or not password or '\n' in password:
return {}, {}, 'password is required for password auth'
clean_secrets['password'] = password
exclude_subnets = cfg.get('exclude_subnets')
if exclude_subnets is None:
exclude_subnets = self._default_exclude_subnets()
if not isinstance(exclude_subnets, list):
return {}, {}, 'exclude_subnets must be a list of CIDRs'
validated_excludes = []
for net in exclude_subnets:
try:
validated_excludes.append(str(ipaddress.ip_network(str(net), strict=False)))
except ValueError:
return {}, {}, f'invalid exclude subnet: {net!r}'
clean_config = {
'host': host, 'port': port, 'user': user, 'auth': auth,
'exclude_subnets': validated_excludes,
}
return clean_config, clean_secrets, None
def _validate_proxy_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
scheme = cfg.get('scheme')
if scheme not in ('http', 'socks5'):
return {}, {}, "invalid scheme: must be 'http' or 'socks5'"
host = _validate_host(cfg.get('host'))
if not host:
return {}, {}, 'invalid host: must be a hostname or IP'
port = _validate_port(cfg.get('port'))
if port is None:
return {}, {}, 'invalid port: must be 1-65535'
user = cfg.get('user') or ''
password = secrets_in.get('password', cfg.get('password') or '')
if user and not (isinstance(user, str) and _PROXY_USER_RE.match(user)):
return {}, {}, 'invalid user: must match ^[A-Za-z0-9._-]{1,64}$'
if password and not (isinstance(password, str)
and _PROXY_PASSWORD_RE.match(password)):
return {}, {}, ('invalid password: 1-128 printable ASCII characters '
'excluding double quotes and backslashes')
if password and not user:
return {}, {}, 'password requires a user'
clean_config = {'scheme': scheme, 'host': host, 'port': port, 'user': user}
clean_secrets: Dict[str, str] = {}
if password:
clean_secrets['password'] = password
return clean_config, clean_secrets, None
def _validate_wg_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
conf_text = secrets_in.get('conf', cfg.get('conf', ''))
if not isinstance(conf_text, str) or not conf_text.strip():
return {}, {}, 'conf is required: a WireGuard config'
try:
cleaned = self._validate_wg_conf(conf_text)
except ValueError as e:
return {}, {}, str(e)
return {}, {'conf': cleaned}, None
def _validate_ovpn_instance(
self, cfg: Dict[str, Any], secrets_in: Dict[str, Any],
) -> Tuple[Dict[str, Any], Dict[str, str], Optional[str]]:
conf_text = secrets_in.get('conf', cfg.get('conf', ''))
if not isinstance(conf_text, str) or not conf_text.strip():
return {}, {}, 'conf is required: an OpenVPN profile'
try:
cleaned = self._validate_ovpn(conf_text)
except ValueError as e:
return {}, {}, str(e)
return {}, {'conf': cleaned}, None
# ── Connectivity v2 — resource allocator ──────────────────────────────
def _used_resources(self) -> Tuple[set, set, set, set]:
"""Return (marks, tables, ifaces, ports) currently used by connections."""
marks, tables, ifaces, ports = set(), set(), set(), set()
if self.config_manager is None:
return marks, tables, ifaces, ports
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"_used_resources: list_connections failed: {e}")
conns = []
for c in conns:
if isinstance(c.get('mark'), int):
marks.add(c['mark'])
if isinstance(c.get('table'), int):
tables.add(c['table'])
if c.get('iface'):
ifaces.add(c['iface'])
if isinstance(c.get('redirect_port'), int):
ports.add(c['redirect_port'])
return marks, tables, ifaces, ports
def _allocate_resources(
self, conn_type: str, conn_id: str,
) -> Tuple[int, int, Optional[str], Optional[int]]:
"""Allocate (mark, table, iface, redirect_port) for a new connection.
Lowest-free-overall within each pool (delete frees + cleans rules, so
reuse is safe). iface is set only for IFACE_TYPES, redirect_port only
for REDIRECT_TYPES.
"""
marks, tables, ifaces, ports = self._used_resources()
mark = self.MARK_BASE
while mark in marks:
mark += self.MARK_STRIDE
if mark > self.MARK_MAX:
raise ValueError('no free fwmark available in 0x1000–0x1FFF')
table = self.TABLE_BASE
while table in tables:
table += 1
iface: Optional[str] = None
if conn_type in self.IFACE_TYPES:
hexid = conn_id.split('_')[-1][:8]
iface = f"{self.IFACE_PREFIXES[conn_type]}{hexid}"
redirect_port: Optional[int] = None
if conn_type in self.REDIRECT_TYPES:
port = self.REDIRECT_PORT_BASE
while port in ports and port <= self.REDIRECT_PORT_MAX:
port += 1
if port > self.REDIRECT_PORT_MAX:
raise ValueError('no free redirect port available in 9100–9199')
redirect_port = port
return mark, table, iface, redirect_port
@staticmethod
def _new_conn_id() -> str:
return f"conn_{secrets.token_hex(4)}"
# ── Connectivity v2 — per-instance container lifecycle ────────────────
#
# Each connection instance runs its own container, named
# cell-<short>-<instance_id>, derived from the connection record (NOT from
# the fixed EXIT_CONTAINERS map, which is the legacy single-container view).
# The backing store service must be installed first — its image and raw
# compose template come from the install record. Tor is single-instance.
# short container slug per type — must match the instanceable
# compose-template container_name prefixes in pic-services.
CONTAINER_SLUGS = {
"wireguard_ext": "wgext",
"openvpn": "ovpn",
"sshuttle": "sshuttle",
"proxy": "proxy",
}
def instance_container_name(self, conn: Dict[str, Any]) -> Optional[str]:
"""Derive the per-instance container name from a connection record.
Tor keeps its fixed single-instance name (cell-tor). All other types
get cell-<slug>-<instance_id>.
"""
conn_type = conn.get('type')
if conn_type == 'tor':
return self.EXIT_CONTAINERS.get('tor')
slug = self.CONTAINER_SLUGS.get(conn_type)
if not slug:
return None
return f"cell-{slug}-{self._instance_id(conn)}"
@staticmethod
def _instance_id(conn: Dict[str, Any]) -> str:
"""Short, docker-safe instance id derived from the connection id."""
return str(conn.get('id', '')).split('_')[-1][:12]
def _store_record_for(self, conn_type: str) -> Optional[Dict[str, Any]]:
"""Return the install record for the store service backing conn_type."""
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
if not svc_id or self.config_manager is None:
return None
try:
installed = self.config_manager.get_installed_services()
except Exception as e:
logger.warning(f"_store_record_for({conn_type}): {e}")
return None
if not isinstance(installed, dict):
return None
return installed.get(svc_id)
def _materialize_instance_config(self, conn: Dict[str, Any],
config_dir: str) -> None:
"""Write the per-instance config files the container's entrypoint reads.
Pulls secrets from the vault by the connection's secret_refs and the
non-secret fields from the record's config. Files land in the
per-instance config dir (bind-mounted into the container).
"""
conn_type = conn.get('type')
config = conn.get('config', {}) or {}
secrets_map = self._load_instance_secrets(conn)
os.makedirs(config_dir, exist_ok=True)
if conn_type == 'wireguard_ext':
conf = secrets_map.get('conf', '')
if conf:
self._write_secure(os.path.join(config_dir, 'wg_ext0.conf'), conf)
elif conn_type == 'openvpn':
conf = secrets_map.get('conf', '')
if conf:
self._write_secure(os.path.join(config_dir, 'client.ovpn'), conf)
elif conn_type == 'sshuttle':
self._materialize_sshuttle_config(conn, config, secrets_map, config_dir)
elif conn_type == 'proxy':
self._write_secure(
os.path.join(config_dir, 'redsocks.conf'),
self._render_redsocks_for_instance(conn, config, secrets_map))
def _load_instance_secrets(self, conn: Dict[str, Any]) -> Dict[str, str]:
"""Resolve a connection's secret_refs to {field: value} from the vault."""
out: Dict[str, str] = {}
conn_id = str(conn.get('id', ''))
if self.vault_manager is None:
return out
for ref in conn.get('secret_refs', []):
field = ref[len(conn_id) + 1:] if ref.startswith(conn_id + '_') else ref
try:
value = self.vault_manager.get_secret(ref)
except Exception as e:
logger.warning(f"_load_instance_secrets: read {ref} failed: {e}")
value = None
if value is not None:
out[field] = value
return out
def _materialize_sshuttle_config(self, conn, config, secrets_map,
config_dir) -> None:
listen_port = conn.get('redirect_port') or self.SSHUTTLE_PORT
conf_lines = [
f"HOST={config.get('host', '')}",
f"PORT={config.get('port', 22)}",
f"USER={config.get('user', '')}",
f"AUTH={config.get('auth', 'key')}",
f"LISTEN_PORT={listen_port}",
f"EXCLUDE={','.join(config.get('exclude_subnets', []) or [])}",
]
if 'known_hosts' in secrets_map:
self._write_secure(os.path.join(config_dir, 'known_hosts'),
secrets_map['known_hosts'].rstrip('\n') + '\n')
if config.get('auth') == 'password':
if 'password' in secrets_map:
self._write_secure(os.path.join(config_dir, 'password'),
secrets_map['password'] + '\n')
else:
if 'private_key' in secrets_map:
self._write_secure(os.path.join(config_dir, 'id_pic'),
secrets_map['private_key'].rstrip('\n') + '\n')
self._write_secure(os.path.join(config_dir, 'sshuttle.conf'),
'\n'.join(conf_lines) + '\n')
def _render_redsocks_for_instance(self, conn, config, secrets_map) -> str:
local_port = conn.get('redirect_port') or self.REDSOCKS_PORT
redsocks_type = 'socks5' if config.get('scheme') == 'socks5' else 'http-connect'
lines = [
'base {',
' log_debug = off;',
' log_info = on;',
' log = stderr;',
' daemon = off;',
' redirector = iptables;',
'}',
'',
'redsocks {',
' local_ip = 0.0.0.0;',
f' local_port = {local_port};',
f" ip = {config.get('host', '')};",
f" port = {config.get('port', '')};",
f' type = {redsocks_type};',
]
if config.get('user'):
lines.append(f" login = \"{config['user']}\";")
if secrets_map.get('password'):
lines.append(f" password = \"{secrets_map['password']}\";")
lines.append('}')
return '\n'.join(lines) + '\n'
def up_connection_instance(self, conn: Dict[str, Any]) -> Dict[str, Any]:
"""Render config + compose for one connection and bring its container up.
Requires the backing store service to be installed (its image + raw
compose template come from the install record). Tor is single-instance
and is started through the plain store-service path, not here.
Returns {'ok': bool, ...}; never raises.
"""
conn_type = conn.get('type')
if conn_type in self.SINGLE_INSTANCE_TYPES:
return {'ok': True, 'single_instance': True}
if self.service_composer is None:
return {'ok': False, 'error': 'service_composer unavailable'}
record = self._store_record_for(conn_type)
if not record:
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
return {'ok': False,
'error': f"store service {svc_id!r} is not installed; "
f'install it before creating a {conn_type} connection'}
template = record.get('compose_template')
manifest = record.get('manifest') or {}
if not template:
return {'ok': False,
'error': f'store service for {conn_type} has no compose '
'template (reinstall required for per-instance support)'}
svc_id = self.STORE_SERVICE_IDS[conn_type]
instance_id = self._instance_id(conn)
try:
config_dir = self.service_composer.instance_config_dir(svc_id, instance_id)
self._materialize_instance_config(conn, config_dir)
except Exception as e:
logger.error(f"up_connection_instance: config write failed: {e}")
return {'ok': False, 'error': 'failed to write instance config'}
return self.service_composer.up_instance(
svc_id, instance_id, manifest, template,
redirect_port=conn.get('redirect_port'))
def down_connection_instance(self, conn: Dict[str, Any],
purge_data: bool = True) -> Dict[str, Any]:
"""Bring down + clean up one connection's container, compose and config."""
conn_type = conn.get('type')
if conn_type in self.SINGLE_INSTANCE_TYPES:
return {'ok': True, 'single_instance': True}
if self.service_composer is None:
return {'ok': False, 'error': 'service_composer unavailable'}
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
if not svc_id:
return {'ok': True}
return self.service_composer.down_instance(
svc_id, self._instance_id(conn), purge_data=purge_data)
# ── Connectivity v2 — connection CRUD ─────────────────────────────────
def _compute_state(self, conn_type: str, config: Dict[str, Any],
secret_refs: List[str]) -> str:
"""'configured' when all required fields/secrets present, else 'added'."""
required = self._required_for_type(conn_type, config)
for field in required.get('config', ()):
if not config.get(field):
return 'added'
for ref_suffix in required.get('secrets', ()):
if not any(r.endswith(f'_{ref_suffix}') for r in secret_refs):
return 'added'
return 'configured'
def _required_for_type(self, conn_type: str,
config: Dict[str, Any]) -> Dict[str, Tuple[str, ...]]:
"""Required non-secret fields and secret suffixes per type."""
if conn_type == 'sshuttle':
auth_secret = 'private_key' if config.get('auth') != 'password' else 'password'
return {'config': ('host', 'user', 'auth'),
'secrets': ('known_hosts', auth_secret)}
if conn_type == 'proxy':
return {'config': ('scheme', 'host', 'port'), 'secrets': ()}
if conn_type in ('wireguard_ext', 'openvpn'):
return {'config': (), 'secrets': ('conf',)}
if conn_type == 'tor':
return {'config': (), 'secrets': ()}
return {'config': (), 'secrets': ()}
def create_connection(self, conn_type: str, name: str,
config: Optional[Dict[str, Any]] = None,
secrets: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Create a named connection instance of `conn_type`.
Validates the type, name (non-empty + unique per cell), and the
per-type field rules; enforces a single Tor; allocates routing
resources; stores secrets in the vault under conn_<id>_<field> and
records only the refs. Returns the created record (no secret values).
"""
if conn_type not in self.CONNECTION_TYPES:
return {'ok': False, 'error':
f'invalid type {conn_type!r}; must be one of {self.CONNECTION_TYPES}'}
if not isinstance(name, str) or not self.CONNECTION_NAME_RE.match(name.strip()):
return {'ok': False, 'error': 'invalid name: 1-64 chars, '
'letters/digits/space/._- and must start alphanumeric'}
name = name.strip()
config = config or {}
clean_config, clean_secrets, err = self._validate_connection_config(
conn_type, config, secrets)
if err:
return {'ok': False, 'error': err}
# The backing store service must be installed before a connection of
# its type can run a container. Tor (single instance) installs its own
# container via the store path, so the gate applies to it too.
if not self._store_service_installed(conn_type):
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
return {'ok': False, 'error':
f"the {svc_id!r} service must be installed from the Service "
f'Store before creating a {conn_type} connection'}
with self._conn_lock:
existing = []
if self.config_manager is not None:
try:
existing = self.config_manager.list_connections()
except Exception as e:
logger.error(f"create_connection: list failed: {e}")
return {'ok': False, 'error': 'failed to read connections'}
if conn_type in self.SINGLE_INSTANCE_TYPES:
if any(c.get('type') == conn_type for c in existing):
return {'ok': False, 'error':
f'only a single {conn_type} connection is supported'}
if any(c.get('name', '').strip().lower() == name.lower()
for c in existing):
return {'ok': False, 'error': f'a connection named {name!r} already exists'}
conn_id = self._new_conn_id()
try:
mark, table, iface, redirect_port = self._allocate_resources(
conn_type, conn_id)
except ValueError as e:
return {'ok': False, 'error': str(e)}
secret_refs: List[str] = []
stored_refs: List[str] = []
if clean_secrets:
if self.vault_manager is None:
return {'ok': False, 'error': 'vault unavailable; cannot store secrets'}
for field, value in clean_secrets.items():
ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(ref, value)
except Exception as e:
logger.error(f"create_connection: vault store {ref}: {e}")
for done in stored_refs:
try:
self.vault_manager.delete_secret(done)
except Exception:
pass
return {'ok': False, 'error': 'failed to store secret in vault'}
stored_refs.append(ref)
secret_refs.append(ref)
now = self._now_iso()
state = self._compute_state(conn_type, clean_config, secret_refs)
record = {
'id': conn_id,
'type': conn_type,
'name': name,
'enabled': True,
'mark': mark,
'table': table,
'iface': iface,
'redirect_port': redirect_port,
'config': clean_config,
'secret_refs': secret_refs,
'cell_name': None,
'status': {
'state': state,
'health': 'unknown',
'last_check': None,
'detail': None,
},
'created_at': now,
'updated_at': now,
}
try:
self.config_manager.add_connection(record)
except Exception as e:
logger.error(f"create_connection: persist failed: {e}")
for ref in stored_refs:
try:
self.vault_manager.delete_secret(ref)
except Exception:
pass
return {'ok': False, 'error': 'failed to persist connection'}
logger.info(f"connectivity: created connection {conn_id} "
f"({conn_type}/{name}) mark={hex(mark)} table={table}")
# Bring up this connection's own container (cell-<slug>-<id>). Tor is
# single-instance and runs via the store path, so up is a no-op there.
# A failure here is non-fatal to the record — the connection still
# exists and can be retried via apply/up — but is surfaced to the
# caller so the UI can show it.
up = self.up_connection_instance(record)
result = {'ok': True, 'connection': self._public_record(record)}
if not up.get('ok') and not up.get('single_instance'):
result['container'] = {'ok': False,
'error': up.get('error') or up.get('stderr')}
return result
def update_connection(self, conn_id: str, name: Optional[str] = None,
config: Optional[Dict[str, Any]] = None,
secrets: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Update an existing connection's name, config and/or secrets."""
with self._conn_lock:
if self.config_manager is None:
return {'ok': False, 'error': 'config unavailable'}
record = self.config_manager.get_connection(conn_id)
if record is None:
return {'ok': False, 'error': f'connection {conn_id!r} not found'}
conn_type = record.get('type')
fields: Dict[str, Any] = {}
if name is not None:
if not isinstance(name, str) or not self.CONNECTION_NAME_RE.match(name.strip()):
return {'ok': False, 'error': 'invalid name'}
name = name.strip()
for c in self.config_manager.list_connections():
if c.get('id') != conn_id and \
c.get('name', '').strip().lower() == name.lower():
return {'ok': False, 'error':
f'a connection named {name!r} already exists'}
fields['name'] = name
secret_refs = list(record.get('secret_refs', []))
new_config = dict(record.get('config', {}))
if config is not None or secrets is not None:
merged = dict(record.get('config', {}))
if isinstance(config, dict):
merged.update(config)
clean_config, clean_secrets, err = self._validate_connection_config(
conn_type, merged, secrets)
if err:
return {'ok': False, 'error': err}
new_config = clean_config
fields['config'] = clean_config
if clean_secrets:
if self.vault_manager is None:
return {'ok': False, 'error': 'vault unavailable'}
for field, value in clean_secrets.items():
ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(ref, value)
except Exception as e:
logger.error(f"update_connection: vault store {ref}: {e}")
return {'ok': False, 'error': 'failed to store secret'}
if ref not in secret_refs:
secret_refs.append(ref)
fields['secret_refs'] = secret_refs
if fields:
fields['updated_at'] = self._now_iso()
fields['status'] = {
**record.get('status', {}),
'state': self._compute_state(conn_type, new_config, secret_refs),
}
self.config_manager.update_connection(conn_id, fields)
updated = self.config_manager.get_connection(conn_id)
return {'ok': True, 'connection': self._public_record(updated)}
def delete_connection(self, conn_id: str) -> Dict[str, Any]:
"""Delete a connection: free resources + vault secrets. Blocked if referenced."""
with self._conn_lock:
if self.config_manager is None:
return {'ok': False, 'error': 'config unavailable'}
record = self.config_manager.get_connection(conn_id)
if record is None:
return {'ok': False, 'error': f'connection {conn_id!r} not found'}
ref = self._connection_reference(conn_id)
if ref:
return {'ok': False, 'error':
f'connection is in use by {ref}; detach it first'}
# Tear down this connection's container + its per-instance compose
# and config before forgetting the record (best-effort; a stale
# container must not block deletion of the config entry).
try:
self.down_connection_instance(record)
except Exception as e:
logger.warning(f"delete_connection: container teardown failed "
f"(non-fatal): {e}")
for secret_ref in record.get('secret_refs', []):
if self.vault_manager is not None:
try:
self.vault_manager.delete_secret(secret_ref)
except Exception as e:
logger.warning(f"delete_connection: vault delete {secret_ref}: {e}")
self.config_manager.delete_connection(conn_id)
logger.info(f"connectivity: deleted connection {conn_id}")
return {'ok': True}
def _connection_reference(self, conn_id: str) -> Optional[str]:
"""Return a human description if a peer/egress references this connection.
A peer references a connection through its exit_via field (a connection
id); a service references one through the egress_overrides map. Either
blocks deletion until the reference is detached.
"""
if self.peer_registry is not None:
try:
for peer in self.peer_registry.list_peers():
if peer.get('exit_via') == conn_id:
return f"peer {peer.get('peer')!r}"
except Exception as e:
logger.debug(f"_connection_reference (peers): {e}")
if self.config_manager is not None:
try:
overrides = self.config_manager.configs.get('egress_overrides')
if isinstance(overrides, dict):
for svc_id, cid in overrides.items():
if cid == conn_id:
return f"service {svc_id!r}"
except Exception as e:
logger.debug(f"_connection_reference (egress): {e}")
return None
# ── Cell-relay connections (derived from cell links) ──────────────────
@staticmethod
def _offers_exit(link: Dict[str, Any]) -> bool:
"""True when a cell link makes its internet available as an exit relay.
`remote_exit_offered` is set when the remote cell pushed an offer to us
(the common case); `exit_offered` is the locally-recorded flag. Either
being true means the cell is usable as a relay from our side.
"""
return bool(link.get('remote_exit_offered') or link.get('exit_offered'))
def _list_cell_links(self) -> List[Dict[str, Any]]:
"""Return cell link records, or [] when no cell_link_manager is wired."""
if self.cell_link_manager is None:
return []
try:
links = self.cell_link_manager.list_connections()
except Exception as e:
logger.warning(f"connectivity: list cell links failed: {e}")
return []
return links if isinstance(links, list) else []
def reconcile_cell_relays(self) -> Dict[str, Any]:
"""Ensure a cell_relay connection exists for each exit-offering cell link.
For every cell link that offers an exit, ensure exactly one cell_relay
connection named "Cell: <cellname>" exists (carrying mark+table, no
iface/redirect_port/container, and cell_name set). Remove cell_relay
connections whose offer was withdrawn or whose cell link is gone, unless
a peer is still assigned to them (those are kept but reported stale so
routing falls back to default rather than silently breaking).
Idempotent and safe to call on every connection list / cell-link change.
Returns {'created': [...], 'removed': [...]}.
"""
created: List[str] = []
removed: List[str] = []
if self.config_manager is None:
return {'created': created, 'removed': removed}
with self._conn_lock:
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"reconcile_cell_relays: list_connections failed: {e}")
return {'created': created, 'removed': removed}
existing = {c.get('cell_name'): c for c in conns
if c.get('type') == self.CELL_RELAY_TYPE and c.get('cell_name')}
offering = {l.get('cell_name'): l for l in self._list_cell_links()
if l.get('cell_name') and self._offers_exit(l)}
# Create cell_relay connections for newly-offered cells.
for cell_name, link in offering.items():
if cell_name in existing:
continue
rec = self._build_cell_relay_record(cell_name)
if rec is None:
continue
try:
self.config_manager.add_connection(rec)
created.append(rec['id'])
logger.info(f"connectivity: derived cell_relay for cell "
f"{cell_name!r}{rec['id']}")
except Exception as e:
logger.warning(f"reconcile_cell_relays: persist {cell_name!r} "
f"failed: {e}")
# Remove cell_relay connections whose offer was withdrawn — unless a
# peer still references them (left in place so deletion never strands
# a peer; apply_routes treats an un-offered relay as down).
for cell_name, rec in existing.items():
if cell_name in offering:
continue
if self._connection_reference(rec.get('id')):
logger.info(f"connectivity: cell_relay {rec.get('id')} for "
f"{cell_name!r} no longer offered but still "
f"referenced; keeping")
continue
try:
self.config_manager.delete_connection(rec.get('id'))
removed.append(rec.get('id'))
logger.info(f"connectivity: removed stale cell_relay "
f"{rec.get('id')} for cell {cell_name!r}")
except Exception as e:
logger.warning(f"reconcile_cell_relays: delete {cell_name!r} "
f"failed: {e}")
return {'created': created, 'removed': removed}
def _build_cell_relay_record(self, cell_name: str) -> Optional[Dict[str, Any]]:
"""Build a new cell_relay connection record (mark+table, no iface/port)."""
conn_id = self._new_conn_id()
try:
mark, table, iface, redirect_port = self._allocate_resources(
self.CELL_RELAY_TYPE, conn_id)
except ValueError as e:
logger.warning(f"_build_cell_relay_record({cell_name}): {e}")
return None
now = self._now_iso()
return {
'id': conn_id,
'type': self.CELL_RELAY_TYPE,
'name': f"{self.CELL_RELAY_NAME_PREFIX}{cell_name}",
'enabled': True,
'mark': mark,
'table': table,
'iface': iface,
'redirect_port': redirect_port,
'config': {},
'secret_refs': [],
'cell_name': cell_name,
'status': {
'state': 'configured',
'health': 'unknown',
'last_check': None,
'detail': None,
},
'created_at': now,
'updated_at': now,
}
def list_connections(self) -> List[Dict[str, Any]]:
"""Return all connection records (public form, computed status.state).
Reconciles cell_relay connections from cell links first so the unified
list (and the assignments UI) always reflects currently-offered cell
exits without a separate refresh.
"""
try:
self.reconcile_cell_relays()
except Exception as e:
logger.warning(f"list_connections: cell_relay reconcile failed: {e}")
if self.config_manager is None:
return []
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"list_connections: {e}")
return []
out: List[Dict[str, Any]] = []
for record in conns:
state = self._compute_state(
record.get('type'), record.get('config', {}),
record.get('secret_refs', []))
status = dict(record.get('status', {}))
status['state'] = state
rec = dict(record)
rec['status'] = status
out.append(self._public_record(rec))
return out
def get_connection(self, conn_id: str) -> Optional[Dict[str, Any]]:
"""Return one connection record (public form), or None."""
if self.config_manager is None:
return None
record = self.config_manager.get_connection(conn_id)
if record is None:
return None
status = dict(record.get('status', {}))
status['state'] = self._compute_state(
record.get('type'), record.get('config', {}),
record.get('secret_refs', []))
rec = dict(record)
rec['status'] = status
return self._public_record(rec)
@staticmethod
def _public_record(record: Dict[str, Any]) -> Dict[str, Any]:
"""Strip any secret values; only secret_refs are exposed."""
rec = dict(record)
rec.pop('private_key', None)
rec.pop('password', None)
rec.pop('conf', None)
config = dict(rec.get('config', {}))
for k in ('private_key', 'password', 'conf'):
config.pop(k, None)
rec['config'] = config
return rec
@staticmethod
def _now_iso() -> str:
return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
# ── Connectivity v2 — v1→v2 migration ─────────────────────────────────
def _migrate_connectivity_v1_to_v2(
self, legacy: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""Build v2 connection records from the legacy connectivity section.
Called once by config_manager.get_connectivity() when version<2. For
each legacy exit type detected as configured (via _exit_status), creates
exactly one connection instance with allocated resources, copying the
legacy `config` and repointing any existing per-type vault secret to the
new conn_<id>_<field> name. Returns [] when nothing was configured.
Existing secrets are RE-STORED under the new ref name and the old name
deleted, so the only reference that survives is the new one — no secret
is ever lost (re-store happens before old delete).
"""
records: List[Dict[str, Any]] = []
exits = legacy.get('exits') if isinstance(legacy, dict) else {}
exits = exits if isinstance(exits, dict) else {}
# Legacy per-type vault secret names → instance secret field.
legacy_secret_names = {
'sshuttle': [
('connectivity_sshuttle_key', 'private_key'),
('connectivity_sshuttle_password', 'password'),
],
}
used_marks, used_tables, used_ifaces, used_ports = set(), set(), set(), set()
for conn_type in self.CONNECTION_TYPES:
status = self._exit_status(conn_type)
if not status.get('configured'):
continue
conn_id = self._new_conn_id()
mark = self.MARK_BASE
while mark in used_marks:
mark += self.MARK_STRIDE
used_marks.add(mark)
table = self.TABLE_BASE
while table in used_tables:
table += 1
used_tables.add(table)
iface = None
if conn_type in self.IFACE_TYPES:
iface = f"{self.IFACE_PREFIXES[conn_type]}{conn_id.split('_')[-1][:8]}"
used_ifaces.add(iface)
redirect_port = None
if conn_type in self.REDIRECT_TYPES:
port = self.REDIRECT_PORT_BASE
while port in used_ports:
port += 1
redirect_port = port
used_ports.add(port)
legacy_config = exits.get(conn_type)
config = dict(legacy_config) if isinstance(legacy_config, dict) else {}
# Never let a stray secret hide in the copied non-secret config.
for k in ('private_key', 'password', 'conf'):
config.pop(k, None)
secret_refs: List[str] = []
for old_name, field in legacy_secret_names.get(conn_type, []):
if self.vault_manager is None:
continue
try:
value = self.vault_manager.get_secret(old_name)
except Exception as e:
logger.warning(f"migration: read {old_name} failed: {e}")
value = None
if not value:
continue
new_ref = f"{conn_id}_{field}"
try:
self.vault_manager.store_secret(new_ref, value)
secret_refs.append(new_ref)
self.vault_manager.delete_secret(old_name)
except Exception as e:
logger.error(f"migration: repoint {old_name}{new_ref} failed: {e}")
now = self._now_iso()
state = self._compute_state(conn_type, config, secret_refs)
records.append({
'id': conn_id,
'type': conn_type,
'name': self.DEFAULT_CONNECTION_NAMES.get(conn_type, conn_type),
'enabled': True,
'mark': mark,
'table': table,
'iface': iface,
'redirect_port': redirect_port,
'config': config,
'secret_refs': secret_refs,
'cell_name': None,
'status': {
'state': state,
'health': 'unknown',
'last_check': None,
'detail': None,
},
'created_at': now,
'updated_at': now,
})
logger.info(f"connectivity: migrated legacy {conn_type} exit → {conn_id}")
return records
# ── Routing application ───────────────────────────────────────────────
def apply_routes(self) -> Dict[str, Any]:
"""Idempotently rebuild all connectivity rules and policy routing.
Connectivity v2: routing is driven by connection *instances*, not by
per-type constants. Each connection carries its own persisted mark,
table, iface and redirect_port; two instances of the same type route
through distinct tables/marks without collision. A peer's exit_via is
the id of the connection it egresses through.
"""
rules_applied = 0
try:
self._ensure_chains()
except Exception as e:
logger.warning(f"apply_routes: _ensure_chains failed: {e}")
# Flush our dedicated chains (without deleting them)
for table, chain in (('mangle', self.CONNECTIVITY_CHAIN),
('nat', self.CONNECTIVITY_CHAIN)):
try:
self._flush_chain(table, chain)
except Exception as e:
logger.warning(f"apply_routes: flush {table}/{chain} failed: {e}")
connections = self._routing_connections()
# Idempotent ip rule registration: one fwmark→table rule per instance.
# cell_relay connections are excluded: their egress is policy-routed
# inside cell-wireguard (apply_peer_route_via), not via a host mark/table.
for conn in connections:
if conn.get('type') == self.CELL_RELAY_TYPE:
continue
mark, table = conn.get('mark'), conn.get('table')
if not isinstance(mark, int) or not isinstance(table, int):
continue
try:
self._remove_ip_rule(mark, table)
self._add_ip_rule(mark, table)
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: ip rule {conn.get('id')} failed: {e}")
# Per-peer marking + nat redirect, resolved through each peer's
# connection instance. A peer whose connection is unhealthy AND whose
# effective fail-open is True is skipped entirely: no mark, so its
# traffic falls through to the default route (direct internet) instead
# of being blocked by the kill-switch. Fail-closed peers keep their mark
# so the kill-switch blocks them while the tunnel is down.
#
# marked_conn_ids tracks connections that actually have at least one
# marked peer, so their kill-switch stays installed; a connection whose
# only peers are all fail-open-and-down gets no kill-switch.
marked_conn_ids: set = set()
if self.peer_registry is not None:
try:
peers = self.peer_registry.list_peers()
except Exception as e:
logger.warning(f"apply_routes: list_peers failed: {e}")
peers = []
by_id = {c.get('id'): c for c in connections}
for peer in peers:
conn = self._resolve_peer_connection(peer, by_id)
if conn is None:
continue
if self._peer_fails_open(peer, conn):
logger.info(
f"apply_routes: peer {peer.get('peer')!r} fails open over "
f"down connection {conn.get('id')!r}; skipping mark")
continue
src_ip = self._peer_source_ip(peer.get('peer', ''))
if not src_ip:
continue
# cell_relay peers egress through the connected cell's WG tunnel
# (the existing cell-exit mechanism), NOT a local exit container.
if conn.get('type') == self.CELL_RELAY_TYPE:
rules_applied += self._apply_cell_relay_for_peer(peer, conn)
continue
rules_applied += self._apply_connection_for_src(src_ip, conn)
marked_conn_ids.add(conn.get('id'))
# Kill-switch: drop marked packets that would otherwise leak via the
# default route if an iface-based exit interface is down. Installed only
# for connections that still have at least one marked peer.
for conn in connections:
iface = conn.get('iface')
mark = conn.get('mark')
if not iface or not isinstance(mark, int):
continue
if conn.get('id') not in marked_conn_ids:
continue
try:
self._add_killswitch(mark, iface)
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: killswitch {conn.get('id')}: {e}")
return {'ok': True, 'rules_applied': rules_applied}
def _peer_fails_open(self, peer: Dict[str, Any],
conn: Dict[str, Any]) -> bool:
"""True when this peer should fall back to the default route.
A peer falls back only when its connection is unhealthy (health is
explicitly 'down') AND its effective fail-open is True. A 'working' or
'unknown' connection always routes normally so a stale/never-probed
status never silently drops the tunnel.
"""
health = (conn.get('status') or {}).get('health')
if health != 'down':
return False
return self.effective_failopen(peer, conn)
def _routing_connections(self) -> List[Dict[str, Any]]:
"""Return the connection instances that drive routing (enabled only)."""
if self.config_manager is None:
return []
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"apply_routes: list_connections failed: {e}")
return []
return [c for c in conns if c.get('enabled', True)]
@staticmethod
def _resolve_peer_connection(
peer: Dict[str, Any], by_id: Dict[str, Dict[str, Any]],
) -> Optional[Dict[str, Any]]:
"""Resolve a peer's exit_via (a connection id) to its connection record."""
exit_via = peer.get('exit_via', 'default')
if exit_via == 'default':
return None
return by_id.get(exit_via)
def _connection_by_id(self, conn_id: str) -> Optional[Dict[str, Any]]:
"""Look up a raw connection record by id (cell_relay included)."""
if not conn_id or conn_id == 'default' or self.config_manager is None:
return None
try:
for c in self.config_manager.list_connections():
if c.get('id') == conn_id:
return c
except Exception as e:
logger.warning(f"_connection_by_id({conn_id}): {e}")
return None
def _apply_connection_for_src(
self, src_ip: str, conn: Dict[str, Any],
) -> int:
"""Mark + optionally REDIRECT traffic from src_ip via this connection.
Returns the number of rules applied. iface-based connections only need
the fwmark (policy route + killswitch handle egress); redirect-style
connections additionally REDIRECT TCP to the instance's redirect_port.
"""
applied = 0
mark = conn.get('mark')
if isinstance(mark, int):
try:
self._add_mark_rule(src_ip, mark)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: mark rule for {src_ip}/{conn.get('id')}: {e}")
redirect_port = conn.get('redirect_port')
if conn.get('type') in self.REDIRECT_TYPES and isinstance(redirect_port, int):
try:
self._add_redirect(src_ip, redirect_port)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: redirect for {src_ip}/{conn.get('id')}: {e}")
return applied
def _cell_relay_link(self, conn: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Resolve a cell_relay connection's backing cell link, or None.
Returns None when the cell link is gone or no longer offers an exit so
apply_routes treats the relay as down (no route applied → peer falls
back to its default route rather than a black hole).
"""
cell_name = conn.get('cell_name')
if not cell_name:
return None
for link in self._list_cell_links():
if link.get('cell_name') == cell_name and self._offers_exit(link):
return link
return None
def _cell_relay_loops(self, peer: Dict[str, Any],
cell_name: str) -> bool:
"""True when routing this peer via cell_name would form an exit loop.
A cycle exists when the target cell is itself routing traffic back to
US as its exit relay (A→B→A). We detect it from the link record:
`exit_offered`/`exit_relay_active` mean we hand our internet to that
cell, so also routing a peer *through* it closes the loop.
"""
for link in self._list_cell_links():
if link.get('cell_name') != cell_name:
continue
if link.get('exit_offered') or link.get('exit_relay_active'):
logger.warning(
f"apply_routes: refusing to route peer "
f"{peer.get('peer')!r} via cell {cell_name!r} — we already "
f"act as that cell's exit relay (routing loop)")
return True
return False
def _apply_cell_relay_for_peer(self, peer: Dict[str, Any],
conn: Dict[str, Any]) -> int:
"""Route a peer's internet through the connected cell behind a cell_relay.
Bridges the connection model onto the existing cell-exit mechanism:
policy-routes the peer inside cell-wireguard to the cell's WG IP
(wireguard_manager.apply_peer_route_via), keeps peer.route_via in sync
so startup replay + firewall reconciliation stay consistent, marks the
link's exit relay active, and applies the cell exit firewall rules.
Loop-forming assignments (A→B→A) are refused. Returns rules applied.
"""
cell_name = conn.get('cell_name')
link = self._cell_relay_link(conn)
if link is None:
logger.info(f"apply_routes: cell_relay {conn.get('id')} for cell "
f"{cell_name!r} no longer offers an exit; skipping")
return 0
if self._cell_relay_loops(peer, cell_name):
return 0
peer_ip = (peer.get('ip') or '').split('/')[0]
via_wg_ip = link.get('dns_ip')
if not peer_ip or not via_wg_ip:
logger.warning(f"apply_routes: cell_relay {conn.get('id')} missing "
f"peer_ip/via_wg_ip; skipping")
return 0
applied = 0
table = conn.get('table') if isinstance(conn.get('table'), int) else 100
if self.wireguard_manager is not None:
try:
self.wireguard_manager.apply_peer_route_via(
peer_ip, via_wg_ip=via_wg_ip, table=table)
applied += 1
except Exception as e:
logger.warning(f"apply_routes: cell_relay route_via "
f"{peer_ip}{via_wg_ip}: {e}")
# Keep the legacy route_via field in sync so the existing startup
# replay (app.py) and firewall reconciliation operate on the same peer.
if self.peer_registry is not None:
try:
if peer.get('route_via') != cell_name:
self.peer_registry.set_route_via(peer.get('peer'), cell_name)
except Exception as e:
logger.warning(f"apply_routes: set_route_via {cell_name!r}: {e}")
# Mark the relay active on the link + apply the cell exit firewall rules
# (idempotent; mirrors apply_remote_permissions' exit_relay path).
if self.cell_link_manager is not None:
try:
self.cell_link_manager.set_exit_relay_active(cell_name, True)
except Exception as e:
logger.warning(f"apply_routes: set_exit_relay_active "
f"{cell_name!r}: {e}")
self._apply_cell_exit_firewall(link)
return applied
def _apply_cell_exit_firewall(self, link: Dict[str, Any]) -> None:
"""Apply the cell exit FORWARD rules for a cell relay link (best-effort)."""
try:
import firewall_manager as _fm
except Exception as e:
logger.debug(f"_apply_cell_exit_firewall: import failed: {e}")
return
perms = link.get('permissions') or {}
inbound = perms.get('inbound', {}) if isinstance(perms, dict) else {}
inbound_list = [s for s, v in inbound.items() if v]
try:
_fm.apply_cell_rules(link.get('cell_name'), link.get('vpn_subnet'),
inbound_list, exit_relay=True)
except Exception as e:
logger.warning(f"_apply_cell_exit_firewall({link.get('cell_name')!r}): "
f"{e}")
# ── iptables / ip rule helpers ────────────────────────────────────────
def _wg_iptables(self, args: List[str], timeout: int = 10) -> subprocess.CompletedProcess:
"""Run iptables inside the WireGuard container (where peer traffic forwards)."""
cmd = ['docker', 'exec', WIREGUARD_CONTAINER, 'iptables'] + args
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
def _wg_ip(self, args: List[str], timeout: int = 10) -> subprocess.CompletedProcess:
"""Run `ip` inside the WireGuard container."""
cmd = ['docker', 'exec', WIREGUARD_CONTAINER, 'ip'] + args
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
def _ensure_chains(self) -> None:
"""Create PIC_CONNECTIVITY chains in mangle and nat (idempotent)."""
for table, parent_chain in (
('mangle', 'PREROUTING'),
('nat', 'PREROUTING'),
):
# Create chain if it doesn't already exist
check = self._wg_iptables(
['-t', table, '-L', self.CONNECTIVITY_CHAIN, '-n']
)
if check.returncode != 0:
create = self._wg_iptables(
['-t', table, '-N', self.CONNECTIVITY_CHAIN]
)
if create.returncode != 0 and 'exists' not in (create.stderr or ''):
logger.warning(
f"_ensure_chains: cannot create {table}/{self.CONNECTIVITY_CHAIN}: "
f"{create.stderr.strip()}"
)
# Insert jump from parent chain at position 1, idempotent.
jump_args = ['-t', table, '-C', parent_chain, '-j', self.CONNECTIVITY_CHAIN]
exists = self._wg_iptables(jump_args)
if exists.returncode != 0:
self._wg_iptables(
['-t', table, '-I', parent_chain, '1',
'-j', self.CONNECTIVITY_CHAIN]
)
def _flush_chain(self, table: str, chain: str) -> None:
"""Flush a chain in-place (`iptables -F`) without deleting it."""
self._wg_iptables(['-t', table, '-F', chain])
def _add_ip_rule(self, mark: int, table: int) -> None:
"""Add `ip rule fwmark <mark> lookup <table>`."""
self._wg_ip(['rule', 'add', 'fwmark', hex(mark), 'lookup', str(table)])
def _remove_ip_rule(self, mark: int, table: int) -> None:
"""Remove all matching `ip rule fwmark <mark> lookup <table>` (idempotent)."""
# `ip rule del` returns nonzero when no matching rule exists; loop
# until it fails to drain duplicates.
for _ in range(8):
r = self._wg_ip(['rule', 'del', 'fwmark', hex(mark), 'lookup', str(table)])
if r.returncode != 0:
break
def _add_mark_rule(self, src_ip: str, mark: int) -> None:
"""Mark packets from src_ip with mark in the mangle PIC_CONNECTIVITY chain."""
self._wg_iptables([
'-t', 'mangle', '-A', self.CONNECTIVITY_CHAIN,
'-s', src_ip,
'-j', 'MARK', '--set-mark', hex(mark),
])
def _add_redirect(self, src_ip: str, port: int) -> None:
"""Redirect peer's TCP traffic to a local transparent-proxy port."""
self._wg_iptables([
'-t', 'nat', '-A', self.CONNECTIVITY_CHAIN,
'-s', src_ip, '-p', 'tcp',
'-j', 'REDIRECT', '--to-ports', str(port),
])
def _add_tor_redirect(self, src_ip: str) -> None:
"""Redirect peer's TCP traffic to local Tor TransPort."""
self._add_redirect(src_ip, self.TOR_TRANS_PORT)
def _add_killswitch(self, mark: int, iface: Optional[str]) -> None:
"""Drop marked packets that would egress via any interface other than iface.
For Tor (no exit iface), skip — Tor traffic is fully redirected at
nat/REDIRECT and never reaches FORWARD.
"""
if not iface:
return
# Use -C to test, -A to add — idempotent.
check_args = ['-C', 'FORWARD',
'-m', 'mark', '--mark', hex(mark),
'!', '-o', iface, '-j', 'DROP']
exists = self._wg_iptables(check_args)
if exists.returncode != 0:
self._wg_iptables(['-A', 'FORWARD',
'-m', 'mark', '--mark', hex(mark),
'!', '-o', iface, '-j', 'DROP'])
def _exit_status(self, exit_type: str) -> Dict[str, Any]:
"""Return per-exit status (config presence + interface up/down).
An exit counts as configured when a legacy uploaded config file
exists, OR the backing store service is installed, OR its container
is running — exits installed via the Service Store never create the
legacy upload files.
"""
info: Dict[str, Any] = {'configured': False, 'iface_up': False}
if exit_type == 'wireguard_ext':
path = os.path.join(self.wireguard_ext_dir, 'wg_ext0.conf')
info['configured'] = os.path.isfile(path)
elif exit_type == 'openvpn':
try:
info['configured'] = any(
f.endswith('.ovpn')
for f in os.listdir(self.openvpn_dir)
)
except OSError:
info['configured'] = False
elif exit_type == 'tor':
# Tor has no per-cell config file; it counts as configured only via
# the store-installed / container-running bridge below, like every
# other exit type. Do not hardcode True here.
pass
elif exit_type == 'sshuttle':
info['configured'] = os.path.isfile(
os.path.join(self.sshuttle_dir, 'sshuttle.conf'))
elif exit_type == 'proxy':
info['configured'] = os.path.isfile(
os.path.join(self.proxy_dir, 'redsocks.conf'))
if not info['configured'] and (
self._store_service_installed(exit_type)
or self._exit_container_running(exit_type)
):
info['configured'] = True
iface = self.IFACES.get(exit_type)
if iface:
try:
r = self._wg_ip(['link', 'show', iface], timeout=5)
info['iface_up'] = r.returncode == 0 and 'UP' in (r.stdout or '')
except Exception:
info['iface_up'] = False
if info['iface_up']:
info['status'] = 'active'
elif info['configured']:
info['status'] = 'configured'
else:
info['status'] = 'not_configured'
return info
def _store_service_installed(self, exit_type: str) -> bool:
"""True when the store service backing this exit type is installed."""
svc_id = self.STORE_SERVICE_IDS.get(exit_type)
if not svc_id or self.config_manager is None:
return False
try:
installed = self.config_manager.get_installed_services()
except Exception as e:
logger.debug(f"_store_service_installed({exit_type}): {e}")
return False
return isinstance(installed, dict) and svc_id in installed
def _exit_container_running(self, exit_type: str) -> bool:
"""True when the exit's container is currently running."""
cname = self.EXIT_CONTAINERS.get(exit_type)
if not cname:
return False
try:
r = subprocess.run(
['docker', 'inspect', '-f', '{{.State.Running}}', cname],
capture_output=True, text=True, timeout=5,
)
return r.returncode == 0 and r.stdout.strip() == 'true'
except Exception:
return False
# ── Health probing ────────────────────────────────────────────────────
#
# probe_health(connection) returns 'working' | 'down' | 'unknown' per type.
# All real subprocess/socket calls are factored behind small helpers
# (_exec_in_container, _tcp_reachable) so tests patch those, never raw
# subprocess/socket. refresh_health() persists results via
# config_manager.set_connection_status and caches them for HEALTH_TTL.
# Per-type fallback default: tor fails open (direct internet on outage),
# all interface/tunnel/proxy types fail closed (traffic blocked on outage).
FAILOPEN_DEFAULTS = {
"wireguard_ext": False,
"openvpn": False,
"sshuttle": False,
"proxy": False,
"tor": True,
"cell_relay": False,
}
def probe_health(self, connection: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""Probe a connection's liveness. Returns (health, detail).
health is 'working' | 'down' | 'unknown'. detail is a short human
string (or None). Never raises — probe errors map to ('unknown', msg).
"""
conn_type = connection.get('type')
try:
if conn_type == 'wireguard_ext':
return self._probe_wireguard_ext(connection)
if conn_type == 'openvpn':
return self._probe_openvpn(connection)
if conn_type == 'tor':
return self._probe_tor(connection)
if conn_type == 'sshuttle':
return self._probe_sshuttle(connection)
if conn_type == 'proxy':
return self._probe_proxy(connection)
if conn_type == self.CELL_RELAY_TYPE:
return self._probe_cell_relay(connection)
except Exception as e:
logger.warning(f"probe_health({connection.get('id')}): {e}")
return 'unknown', str(e)
return 'unknown', f'no probe for type {conn_type!r}'
def _probe_wireguard_ext(
self, conn: Dict[str, Any],
) -> Tuple[str, Optional[str]]:
"""A WireGuard exit is working when its latest handshake is recent."""
iface = conn.get('iface')
if not iface:
return 'unknown', 'no interface assigned'
container = self.instance_container_name(conn)
r = self._exec_in_container(
container, ['wg', 'show', iface, 'latest-handshakes'])
if r is None or r.returncode != 0:
return 'down', 'wg show failed or interface absent'
newest = 0
for line in (r.stdout or '').splitlines():
parts = line.split()
if len(parts) >= 2:
try:
newest = max(newest, int(parts[-1]))
except ValueError:
continue
if newest == 0:
return 'down', 'no handshake yet'
age = int(time.time()) - newest
if age <= self.WG_HANDSHAKE_MAX_AGE:
return 'working', f'handshake {age}s ago'
return 'down', f'handshake stale ({age}s ago)'
def _probe_openvpn(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""An OpenVPN exit is working when its tun iface exists and is UP."""
iface = conn.get('iface')
container = self.instance_container_name(conn)
# The tun device lives in the openvpn container's net namespace.
r = self._exec_in_container(container, ['ip', 'link', 'show', iface]) \
if iface else None
if r is None or r.returncode != 0:
# Fall back to the legacy fixed tun0 iface check in the WG container.
r2 = self._wg_ip(['link', 'show', self.IFACES.get('openvpn', 'tun0')])
if r2.returncode == 0 and 'UP' in (r2.stdout or ''):
return 'working', 'tun up'
return 'down', 'tun interface absent or down'
if 'UP' in (r.stdout or ''):
return 'working', 'tun up'
return 'down', 'tun interface down'
def _probe_tor(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""Tor is working when its container is running and bootstrapped."""
container = self.EXIT_CONTAINERS.get('tor')
if not self._container_running(container):
return 'down', 'tor container not running'
r = self._exec_in_container(
container, ['sh', '-c',
'grep -m1 "Bootstrapped 100" /var/log/tor/notices.log '
'2>/dev/null || true'])
if r is not None and 'Bootstrapped 100' in (r.stdout or ''):
return 'working', 'bootstrapped'
# Container is up but bootstrap state is unknown — treat as working
# (tor fails open by default; a running container is the cheap signal).
return 'working', 'running'
def _probe_sshuttle(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""sshuttle is working when the SSH host AND local listener are reachable."""
cfg = conn.get('config', {})
host = cfg.get('host')
port = cfg.get('port', 22)
if not host:
return 'unknown', 'no host configured'
if not self._tcp_reachable(host, int(port)):
return 'down', f'ssh host {host}:{port} unreachable'
listen_port = conn.get('redirect_port')
container = self.instance_container_name(conn)
if isinstance(listen_port, int) and not self._listener_reachable(
container, listen_port):
return 'down', f'sshuttle listener :{listen_port} down'
return 'working', 'ssh host + listener reachable'
def _probe_proxy(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""A proxy exit is working when the upstream proxy host:port accepts TCP."""
cfg = conn.get('config', {})
host = cfg.get('host')
port = cfg.get('port')
if not host or not port:
return 'unknown', 'no upstream host/port configured'
if self._tcp_reachable(host, int(port)):
return 'working', f'{host}:{port} reachable'
return 'down', f'upstream {host}:{port} unreachable'
def _probe_cell_relay(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""A cell_relay is working when the cell tunnel handshake is recent AND
the cell still offers its exit.
Reuses cell_link_manager.get_connection_status (which enriches the link
with the live WireGuard handshake). 'down' when the offer is withdrawn,
the link is gone, or the handshake is stale/absent.
"""
cell_name = conn.get('cell_name')
if not cell_name or self.cell_link_manager is None:
return 'unknown', 'no cell link manager'
try:
st = self.cell_link_manager.get_connection_status(cell_name)
except Exception as e:
return 'down', f'cell link unavailable: {e}'
if not self._offers_exit(st):
return 'down', 'cell no longer offers its exit'
if not st.get('online'):
return 'down', 'cell tunnel handshake stale or absent'
return 'working', 'cell tunnel up + exit offered'
def _listener_reachable(self, container: Optional[str], port: int) -> bool:
"""True when a local TCP listener on `port` is up inside the exit container."""
r = self._exec_in_container(
container, ['sh', '-c',
f'nc -z -w2 127.0.0.1 {port} 2>/dev/null && echo ok || true'])
if r is not None and 'ok' in (r.stdout or ''):
return True
# Fall back to a host-side probe (container may lack nc).
return self._tcp_reachable('127.0.0.1', port)
def _container_running(self, container: Optional[str]) -> bool:
"""True when the named container is running (cheap docker inspect)."""
if not container:
return False
try:
r = subprocess.run(
['docker', 'inspect', '-f', '{{.State.Running}}', container],
capture_output=True, text=True, timeout=5)
return r.returncode == 0 and r.stdout.strip() == 'true'
except Exception:
return False
def _exec_in_container(
self, container: Optional[str], args: List[str], timeout: int = 8,
) -> Optional[subprocess.CompletedProcess]:
"""Run a command inside a container; None on failure. Mock target for tests."""
if not container:
return None
try:
return subprocess.run(
['docker', 'exec', container] + args,
capture_output=True, text=True, timeout=timeout)
except Exception as e:
logger.debug(f"_exec_in_container({container}): {e}")
return None
def _tcp_reachable(self, host: str, port: int) -> bool:
"""True when a TCP connection to host:port succeeds. Mock target for tests."""
try:
with socket.create_connection(
(host, port), timeout=self.PROBE_TCP_TIMEOUT):
return True
except Exception:
return False
def refresh_health(
self, connection_id: Optional[str] = None, force: bool = False,
) -> Dict[str, Any]:
"""Probe one or all connections and persist health into their status.
With a TTL cache: a connection whose last_check is younger than
HEALTH_TTL is skipped unless force=True. Returns {id: health}.
"""
if self.config_manager is None:
return {}
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"refresh_health: list_connections failed: {e}")
return {}
if connection_id is not None:
conns = [c for c in conns if c.get('id') == connection_id]
results: Dict[str, Any] = {}
now = int(time.time())
for conn in conns:
cid = conn.get('id')
if not cid or not conn.get('enabled', True):
continue
status = dict(conn.get('status', {}))
if not force and self._health_is_fresh(status, now):
results[cid] = status.get('health', 'unknown')
continue
health, detail = self.probe_health(conn)
status['health'] = health
status['detail'] = detail
status['last_check'] = self._now_iso()
try:
self.config_manager.set_connection_status(cid, status)
except Exception as e:
logger.warning(f"refresh_health: persist {cid} failed: {e}")
results[cid] = health
return results
def _health_is_fresh(self, status: Dict[str, Any], now: int) -> bool:
"""True when status.last_check is within HEALTH_TTL of `now`.
last_check is a UTC ISO string (from _now_iso); calendar.timegm parses
it back as UTC so the comparison is timezone-consistent with `now`
(int(time.time()), also epoch/UTC).
"""
import calendar
last = status.get('last_check')
if not last:
return False
try:
ts = calendar.timegm(time.strptime(last, '%Y-%m-%dT%H:%M:%SZ'))
return (now - int(ts)) < self.HEALTH_TTL
except (ValueError, OverflowError):
return False
def effective_failopen(self, peer: Dict[str, Any],
conn: Optional[Dict[str, Any]]) -> bool:
"""Resolve a peer's effective fail-open for its connection.
peer.exit_failopen overrides when set (bool); otherwise the per-type
default applies. An unknown/missing connection type fails closed.
"""
override = peer.get('exit_failopen')
if isinstance(override, bool):
return override
conn_type = conn.get('type') if conn else None
return self.FAILOPEN_DEFAULTS.get(conn_type, False)
def set_peer_failopen(self, peer_name: str,
failopen: Optional[bool]) -> Dict[str, Any]:
"""Set or clear a peer's fail-open override (None = use type default)."""
if failopen is not None and not isinstance(failopen, bool):
return {'ok': False, 'error': 'failopen must be a boolean or null'}
if self.peer_registry is None:
return {'ok': False, 'error': 'peer_registry not available'}
if not self._peer_exists(peer_name):
return {'ok': False, 'error': f'peer {peer_name!r} not found'}
try:
self.peer_registry.update_peer(peer_name, {'exit_failopen': failopen})
except Exception as e:
logger.error(f"set_peer_failopen({peer_name}): {e}")
return {'ok': False, 'error': str(e)}
try:
self.apply_routes()
except Exception as e:
logger.warning(f"set_peer_failopen: apply_routes failed (non-fatal): {e}")
return {'ok': True, 'peer': peer_name, 'exit_failopen': failopen}
def _peer_source_ip(self, peer_name: str) -> Optional[str]:
"""Return a peer's WireGuard IP (no /CIDR suffix)."""
if not peer_name or self.peer_registry is None:
return None
try:
peer = self.peer_registry.get_peer(peer_name)
except Exception as e:
logger.warning(f"_peer_source_ip({peer_name}): {e}")
return None
if not peer:
return None
ip = peer.get('ip', '')
if not ip:
return None
return ip.split('/')[0]
# ── Config validation ─────────────────────────────────────────────────
def _validate_wg_conf(self, text: str) -> str:
"""Strip Pre/Post-Up/Down hooks and reject conflicting wg0 interface.
Raises ValueError if the config tries to define `Interface = wg0`
(which would clash with the existing peer-server interface).
"""
if not isinstance(text, str):
raise ValueError('wg conf must be a string')
cleaned: List[str] = []
for raw_line in text.splitlines():
stripped = raw_line.strip()
# Reject wg0 interface declaration that would conflict with the
# existing WireGuard server interface.
if stripped.lower().startswith('interface'):
# Look ahead in subsequent lines for `= wg0` would be hard;
# the [Interface] section header itself is fine. We only
# reject explicit Name/Interface = wg0 directives.
pass
# Match assignments like `PostUp = ...`
if '=' in stripped:
key = stripped.split('=', 1)[0].strip()
if key in _WG_FORBIDDEN_PREFIXES:
logger.info(f"_validate_wg_conf: dropped {key} hook")
continue
# Detect Name = wg0 or Interface = wg0 inside Interface section
if key.lower() in ('name', 'interface') and \
stripped.split('=', 1)[1].strip().lower() == 'wg0':
raise ValueError(
"config defines interface 'wg0' which conflicts "
"with the peer-server interface"
)
cleaned.append(raw_line)
return '\n'.join(cleaned).rstrip() + '\n'
def _validate_ovpn(self, text: str) -> str:
"""Strip directives that execute external scripts/binaries."""
if not isinstance(text, str):
raise ValueError('ovpn conf must be a string')
cleaned: List[str] = []
for raw_line in text.splitlines():
stripped = raw_line.strip()
# Match the directive name (first whitespace-delimited token).
if stripped and not stripped.startswith('#'):
first = stripped.split(None, 1)[0]
if first in _OVPN_FORBIDDEN_DIRECTIVES:
logger.info(f"_validate_ovpn: dropped {first} directive")
continue
cleaned.append(raw_line)
return '\n'.join(cleaned).rstrip() + '\n'
# ── Filesystem helpers ────────────────────────────────────────────────
@staticmethod
def _write_secure(path: str, text: str) -> None:
"""Atomic 0o600 write — secrets in these configs must not be world-readable."""
os.makedirs(os.path.dirname(path), exist_ok=True)
tmp = path + '.tmp'
fd = os.open(tmp, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
try:
with os.fdopen(fd, 'w') as f:
f.write(text)
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
os.chmod(tmp, 0o600)
os.replace(tmp, path)
os.chmod(path, 0o600)