feat: connectivity redesign phase 5 — one container per connection instance
Unit Tests / test (push) Successful in 13m5s

instanceable rendering, per-instance up/down on create/delete,
store-service-installed gate, per-instance health

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 22:56:31 -04:00
parent d39c091cec
commit 603225694c
8 changed files with 688 additions and 8 deletions
+237 -5
View File
@@ -199,12 +199,15 @@ class ConnectivityManager(BaseServiceManager):
CONNECTIVITY_CHAIN = 'PIC_CONNECTIVITY'
def __init__(self, config_manager=None, peer_registry=None,
vault_manager=None,
vault_manager=None, service_composer=None,
data_dir: str = '/app/data', config_dir: str = '/app/config'):
super().__init__('connectivity', data_dir, config_dir)
self.config_manager = config_manager
self.peer_registry = peer_registry
self.vault_manager = vault_manager
# Set after construction in managers.py (composer is built later) — used
# to bring per-connection containers up/down out-of-process.
self.service_composer = service_composer
# Serializes connection CRUD + resource allocation across threads.
self._conn_lock = threading.RLock()
@@ -924,6 +927,206 @@ class ConnectivityManager(BaseServiceManager):
def _new_conn_id() -> str:
return f"conn_{secrets.token_hex(4)}"
# ── Connectivity v2 — per-instance container lifecycle ────────────────
#
# Each connection instance runs its own container, named
# cell-<short>-<instance_id>, derived from the connection record (NOT from
# the fixed EXIT_CONTAINERS map, which is the legacy single-container view).
# The backing store service must be installed first — its image and raw
# compose template come from the install record. Tor is single-instance.
# short container slug per type — must match the instanceable
# compose-template container_name prefixes in pic-services.
CONTAINER_SLUGS = {
"wireguard_ext": "wgext",
"openvpn": "ovpn",
"sshuttle": "sshuttle",
"proxy": "proxy",
}
def instance_container_name(self, conn: Dict[str, Any]) -> Optional[str]:
"""Derive the per-instance container name from a connection record.
Tor keeps its fixed single-instance name (cell-tor). All other types
get cell-<slug>-<instance_id>.
"""
conn_type = conn.get('type')
if conn_type == 'tor':
return self.EXIT_CONTAINERS.get('tor')
slug = self.CONTAINER_SLUGS.get(conn_type)
if not slug:
return None
return f"cell-{slug}-{self._instance_id(conn)}"
@staticmethod
def _instance_id(conn: Dict[str, Any]) -> str:
"""Short, docker-safe instance id derived from the connection id."""
return str(conn.get('id', '')).split('_')[-1][:12]
def _store_record_for(self, conn_type: str) -> Optional[Dict[str, Any]]:
"""Return the install record for the store service backing conn_type."""
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
if not svc_id or self.config_manager is None:
return None
try:
installed = self.config_manager.get_installed_services()
except Exception as e:
logger.warning(f"_store_record_for({conn_type}): {e}")
return None
if not isinstance(installed, dict):
return None
return installed.get(svc_id)
def _materialize_instance_config(self, conn: Dict[str, Any],
config_dir: str) -> None:
"""Write the per-instance config files the container's entrypoint reads.
Pulls secrets from the vault by the connection's secret_refs and the
non-secret fields from the record's config. Files land in the
per-instance config dir (bind-mounted into the container).
"""
conn_type = conn.get('type')
config = conn.get('config', {}) or {}
secrets_map = self._load_instance_secrets(conn)
os.makedirs(config_dir, exist_ok=True)
if conn_type == 'wireguard_ext':
conf = secrets_map.get('conf', '')
if conf:
self._write_secure(os.path.join(config_dir, 'wg_ext0.conf'), conf)
elif conn_type == 'openvpn':
conf = secrets_map.get('conf', '')
if conf:
self._write_secure(os.path.join(config_dir, 'client.ovpn'), conf)
elif conn_type == 'sshuttle':
self._materialize_sshuttle_config(conn, config, secrets_map, config_dir)
elif conn_type == 'proxy':
self._write_secure(
os.path.join(config_dir, 'redsocks.conf'),
self._render_redsocks_for_instance(conn, config, secrets_map))
def _load_instance_secrets(self, conn: Dict[str, Any]) -> Dict[str, str]:
"""Resolve a connection's secret_refs to {field: value} from the vault."""
out: Dict[str, str] = {}
conn_id = str(conn.get('id', ''))
if self.vault_manager is None:
return out
for ref in conn.get('secret_refs', []):
field = ref[len(conn_id) + 1:] if ref.startswith(conn_id + '_') else ref
try:
value = self.vault_manager.get_secret(ref)
except Exception as e:
logger.warning(f"_load_instance_secrets: read {ref} failed: {e}")
value = None
if value is not None:
out[field] = value
return out
def _materialize_sshuttle_config(self, conn, config, secrets_map,
config_dir) -> None:
listen_port = conn.get('redirect_port') or self.SSHUTTLE_PORT
conf_lines = [
f"HOST={config.get('host', '')}",
f"PORT={config.get('port', 22)}",
f"USER={config.get('user', '')}",
f"AUTH={config.get('auth', 'key')}",
f"LISTEN_PORT={listen_port}",
f"EXCLUDE={','.join(config.get('exclude_subnets', []) or [])}",
]
if 'known_hosts' in secrets_map:
self._write_secure(os.path.join(config_dir, 'known_hosts'),
secrets_map['known_hosts'].rstrip('\n') + '\n')
if config.get('auth') == 'password':
if 'password' in secrets_map:
self._write_secure(os.path.join(config_dir, 'password'),
secrets_map['password'] + '\n')
else:
if 'private_key' in secrets_map:
self._write_secure(os.path.join(config_dir, 'id_pic'),
secrets_map['private_key'].rstrip('\n') + '\n')
self._write_secure(os.path.join(config_dir, 'sshuttle.conf'),
'\n'.join(conf_lines) + '\n')
def _render_redsocks_for_instance(self, conn, config, secrets_map) -> str:
local_port = conn.get('redirect_port') or self.REDSOCKS_PORT
redsocks_type = 'socks5' if config.get('scheme') == 'socks5' else 'http-connect'
lines = [
'base {',
' log_debug = off;',
' log_info = on;',
' log = stderr;',
' daemon = off;',
' redirector = iptables;',
'}',
'',
'redsocks {',
' local_ip = 0.0.0.0;',
f' local_port = {local_port};',
f" ip = {config.get('host', '')};",
f" port = {config.get('port', '')};",
f' type = {redsocks_type};',
]
if config.get('user'):
lines.append(f" login = \"{config['user']}\";")
if secrets_map.get('password'):
lines.append(f" password = \"{secrets_map['password']}\";")
lines.append('}')
return '\n'.join(lines) + '\n'
def up_connection_instance(self, conn: Dict[str, Any]) -> Dict[str, Any]:
"""Render config + compose for one connection and bring its container up.
Requires the backing store service to be installed (its image + raw
compose template come from the install record). Tor is single-instance
and is started through the plain store-service path, not here.
Returns {'ok': bool, ...}; never raises.
"""
conn_type = conn.get('type')
if conn_type in self.SINGLE_INSTANCE_TYPES:
return {'ok': True, 'single_instance': True}
if self.service_composer is None:
return {'ok': False, 'error': 'service_composer unavailable'}
record = self._store_record_for(conn_type)
if not record:
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
return {'ok': False,
'error': f"store service {svc_id!r} is not installed; "
f'install it before creating a {conn_type} connection'}
template = record.get('compose_template')
manifest = record.get('manifest') or {}
if not template:
return {'ok': False,
'error': f'store service for {conn_type} has no compose '
'template (reinstall required for per-instance support)'}
svc_id = self.STORE_SERVICE_IDS[conn_type]
instance_id = self._instance_id(conn)
try:
config_dir = self.service_composer.instance_config_dir(svc_id, instance_id)
self._materialize_instance_config(conn, config_dir)
except Exception as e:
logger.error(f"up_connection_instance: config write failed: {e}")
return {'ok': False, 'error': 'failed to write instance config'}
return self.service_composer.up_instance(
svc_id, instance_id, manifest, template,
redirect_port=conn.get('redirect_port'))
def down_connection_instance(self, conn: Dict[str, Any],
purge_data: bool = True) -> Dict[str, Any]:
"""Bring down + clean up one connection's container, compose and config."""
conn_type = conn.get('type')
if conn_type in self.SINGLE_INSTANCE_TYPES:
return {'ok': True, 'single_instance': True}
if self.service_composer is None:
return {'ok': False, 'error': 'service_composer unavailable'}
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
if not svc_id:
return {'ok': True}
return self.service_composer.down_instance(
svc_id, self._instance_id(conn), purge_data=purge_data)
# ── Connectivity v2 — connection CRUD ─────────────────────────────────
def _compute_state(self, conn_type: str, config: Dict[str, Any],
@@ -978,6 +1181,15 @@ class ConnectivityManager(BaseServiceManager):
if err:
return {'ok': False, 'error': err}
# The backing store service must be installed before a connection of
# its type can run a container. Tor (single instance) installs its own
# container via the store path, so the gate applies to it too.
if not self._store_service_installed(conn_type):
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
return {'ok': False, 'error':
f"the {svc_id!r} service must be installed from the Service "
f'Store before creating a {conn_type} connection'}
with self._conn_lock:
existing = []
if self.config_manager is not None:
@@ -1059,7 +1271,18 @@ class ConnectivityManager(BaseServiceManager):
logger.info(f"connectivity: created connection {conn_id} "
f"({conn_type}/{name}) mark={hex(mark)} table={table}")
return {'ok': True, 'connection': self._public_record(record)}
# Bring up this connection's own container (cell-<slug>-<id>). Tor is
# single-instance and runs via the store path, so up is a no-op there.
# A failure here is non-fatal to the record — the connection still
# exists and can be retried via apply/up — but is surfaced to the
# caller so the UI can show it.
up = self.up_connection_instance(record)
result = {'ok': True, 'connection': self._public_record(record)}
if not up.get('ok') and not up.get('single_instance'):
result['container'] = {'ok': False,
'error': up.get('error') or up.get('stderr')}
return result
def update_connection(self, conn_id: str, name: Optional[str] = None,
config: Optional[Dict[str, Any]] = None,
@@ -1138,6 +1361,15 @@ class ConnectivityManager(BaseServiceManager):
return {'ok': False, 'error':
f'connection is in use by {ref}; detach it first'}
# Tear down this connection's container + its per-instance compose
# and config before forgetting the record (best-effort; a stale
# container must not block deletion of the config entry).
try:
self.down_connection_instance(record)
except Exception as e:
logger.warning(f"delete_connection: container teardown failed "
f"(non-fatal): {e}")
for secret_ref in record.get('secret_refs', []):
if self.vault_manager is not None:
try:
@@ -1719,7 +1951,7 @@ class ConnectivityManager(BaseServiceManager):
iface = conn.get('iface')
if not iface:
return 'unknown', 'no interface assigned'
container = self.EXIT_CONTAINERS.get('wireguard_ext')
container = self.instance_container_name(conn)
r = self._exec_in_container(
container, ['wg', 'show', iface, 'latest-handshakes'])
if r is None or r.returncode != 0:
@@ -1742,7 +1974,7 @@ class ConnectivityManager(BaseServiceManager):
def _probe_openvpn(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
"""An OpenVPN exit is working when its tun iface exists and is UP."""
iface = conn.get('iface')
container = self.EXIT_CONTAINERS.get('openvpn')
container = self.instance_container_name(conn)
# The tun device lives in the openvpn container's net namespace.
r = self._exec_in_container(container, ['ip', 'link', 'show', iface]) \
if iface else None
@@ -1781,7 +2013,7 @@ class ConnectivityManager(BaseServiceManager):
if not self._tcp_reachable(host, int(port)):
return 'down', f'ssh host {host}:{port} unreachable'
listen_port = conn.get('redirect_port')
container = self.EXIT_CONTAINERS.get('sshuttle')
container = self.instance_container_name(conn)
if isinstance(listen_port, int) and not self._listener_reachable(
container, listen_port):
return 'down', f'sshuttle listener :{listen_port} down'