feat: connectivity redesign phase 5 — one container per connection instance
Unit Tests / test (push) Successful in 13m5s
Unit Tests / test (push) Successful in 13m5s
instanceable rendering, per-instance up/down on create/delete, store-service-installed gate, per-instance health Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+237
-5
@@ -199,12 +199,15 @@ class ConnectivityManager(BaseServiceManager):
|
||||
CONNECTIVITY_CHAIN = 'PIC_CONNECTIVITY'
|
||||
|
||||
def __init__(self, config_manager=None, peer_registry=None,
|
||||
vault_manager=None,
|
||||
vault_manager=None, service_composer=None,
|
||||
data_dir: str = '/app/data', config_dir: str = '/app/config'):
|
||||
super().__init__('connectivity', data_dir, config_dir)
|
||||
self.config_manager = config_manager
|
||||
self.peer_registry = peer_registry
|
||||
self.vault_manager = vault_manager
|
||||
# Set after construction in managers.py (composer is built later) — used
|
||||
# to bring per-connection containers up/down out-of-process.
|
||||
self.service_composer = service_composer
|
||||
|
||||
# Serializes connection CRUD + resource allocation across threads.
|
||||
self._conn_lock = threading.RLock()
|
||||
@@ -924,6 +927,206 @@ class ConnectivityManager(BaseServiceManager):
|
||||
def _new_conn_id() -> str:
|
||||
return f"conn_{secrets.token_hex(4)}"
|
||||
|
||||
# ── Connectivity v2 — per-instance container lifecycle ────────────────
|
||||
#
|
||||
# Each connection instance runs its own container, named
|
||||
# cell-<short>-<instance_id>, derived from the connection record (NOT from
|
||||
# the fixed EXIT_CONTAINERS map, which is the legacy single-container view).
|
||||
# The backing store service must be installed first — its image and raw
|
||||
# compose template come from the install record. Tor is single-instance.
|
||||
|
||||
# short container slug per type — must match the instanceable
|
||||
# compose-template container_name prefixes in pic-services.
|
||||
CONTAINER_SLUGS = {
|
||||
"wireguard_ext": "wgext",
|
||||
"openvpn": "ovpn",
|
||||
"sshuttle": "sshuttle",
|
||||
"proxy": "proxy",
|
||||
}
|
||||
|
||||
def instance_container_name(self, conn: Dict[str, Any]) -> Optional[str]:
|
||||
"""Derive the per-instance container name from a connection record.
|
||||
|
||||
Tor keeps its fixed single-instance name (cell-tor). All other types
|
||||
get cell-<slug>-<instance_id>.
|
||||
"""
|
||||
conn_type = conn.get('type')
|
||||
if conn_type == 'tor':
|
||||
return self.EXIT_CONTAINERS.get('tor')
|
||||
slug = self.CONTAINER_SLUGS.get(conn_type)
|
||||
if not slug:
|
||||
return None
|
||||
return f"cell-{slug}-{self._instance_id(conn)}"
|
||||
|
||||
@staticmethod
|
||||
def _instance_id(conn: Dict[str, Any]) -> str:
|
||||
"""Short, docker-safe instance id derived from the connection id."""
|
||||
return str(conn.get('id', '')).split('_')[-1][:12]
|
||||
|
||||
def _store_record_for(self, conn_type: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return the install record for the store service backing conn_type."""
|
||||
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
|
||||
if not svc_id or self.config_manager is None:
|
||||
return None
|
||||
try:
|
||||
installed = self.config_manager.get_installed_services()
|
||||
except Exception as e:
|
||||
logger.warning(f"_store_record_for({conn_type}): {e}")
|
||||
return None
|
||||
if not isinstance(installed, dict):
|
||||
return None
|
||||
return installed.get(svc_id)
|
||||
|
||||
def _materialize_instance_config(self, conn: Dict[str, Any],
|
||||
config_dir: str) -> None:
|
||||
"""Write the per-instance config files the container's entrypoint reads.
|
||||
|
||||
Pulls secrets from the vault by the connection's secret_refs and the
|
||||
non-secret fields from the record's config. Files land in the
|
||||
per-instance config dir (bind-mounted into the container).
|
||||
"""
|
||||
conn_type = conn.get('type')
|
||||
config = conn.get('config', {}) or {}
|
||||
secrets_map = self._load_instance_secrets(conn)
|
||||
os.makedirs(config_dir, exist_ok=True)
|
||||
|
||||
if conn_type == 'wireguard_ext':
|
||||
conf = secrets_map.get('conf', '')
|
||||
if conf:
|
||||
self._write_secure(os.path.join(config_dir, 'wg_ext0.conf'), conf)
|
||||
elif conn_type == 'openvpn':
|
||||
conf = secrets_map.get('conf', '')
|
||||
if conf:
|
||||
self._write_secure(os.path.join(config_dir, 'client.ovpn'), conf)
|
||||
elif conn_type == 'sshuttle':
|
||||
self._materialize_sshuttle_config(conn, config, secrets_map, config_dir)
|
||||
elif conn_type == 'proxy':
|
||||
self._write_secure(
|
||||
os.path.join(config_dir, 'redsocks.conf'),
|
||||
self._render_redsocks_for_instance(conn, config, secrets_map))
|
||||
|
||||
def _load_instance_secrets(self, conn: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Resolve a connection's secret_refs to {field: value} from the vault."""
|
||||
out: Dict[str, str] = {}
|
||||
conn_id = str(conn.get('id', ''))
|
||||
if self.vault_manager is None:
|
||||
return out
|
||||
for ref in conn.get('secret_refs', []):
|
||||
field = ref[len(conn_id) + 1:] if ref.startswith(conn_id + '_') else ref
|
||||
try:
|
||||
value = self.vault_manager.get_secret(ref)
|
||||
except Exception as e:
|
||||
logger.warning(f"_load_instance_secrets: read {ref} failed: {e}")
|
||||
value = None
|
||||
if value is not None:
|
||||
out[field] = value
|
||||
return out
|
||||
|
||||
def _materialize_sshuttle_config(self, conn, config, secrets_map,
|
||||
config_dir) -> None:
|
||||
listen_port = conn.get('redirect_port') or self.SSHUTTLE_PORT
|
||||
conf_lines = [
|
||||
f"HOST={config.get('host', '')}",
|
||||
f"PORT={config.get('port', 22)}",
|
||||
f"USER={config.get('user', '')}",
|
||||
f"AUTH={config.get('auth', 'key')}",
|
||||
f"LISTEN_PORT={listen_port}",
|
||||
f"EXCLUDE={','.join(config.get('exclude_subnets', []) or [])}",
|
||||
]
|
||||
if 'known_hosts' in secrets_map:
|
||||
self._write_secure(os.path.join(config_dir, 'known_hosts'),
|
||||
secrets_map['known_hosts'].rstrip('\n') + '\n')
|
||||
if config.get('auth') == 'password':
|
||||
if 'password' in secrets_map:
|
||||
self._write_secure(os.path.join(config_dir, 'password'),
|
||||
secrets_map['password'] + '\n')
|
||||
else:
|
||||
if 'private_key' in secrets_map:
|
||||
self._write_secure(os.path.join(config_dir, 'id_pic'),
|
||||
secrets_map['private_key'].rstrip('\n') + '\n')
|
||||
self._write_secure(os.path.join(config_dir, 'sshuttle.conf'),
|
||||
'\n'.join(conf_lines) + '\n')
|
||||
|
||||
def _render_redsocks_for_instance(self, conn, config, secrets_map) -> str:
|
||||
local_port = conn.get('redirect_port') or self.REDSOCKS_PORT
|
||||
redsocks_type = 'socks5' if config.get('scheme') == 'socks5' else 'http-connect'
|
||||
lines = [
|
||||
'base {',
|
||||
' log_debug = off;',
|
||||
' log_info = on;',
|
||||
' log = stderr;',
|
||||
' daemon = off;',
|
||||
' redirector = iptables;',
|
||||
'}',
|
||||
'',
|
||||
'redsocks {',
|
||||
' local_ip = 0.0.0.0;',
|
||||
f' local_port = {local_port};',
|
||||
f" ip = {config.get('host', '')};",
|
||||
f" port = {config.get('port', '')};",
|
||||
f' type = {redsocks_type};',
|
||||
]
|
||||
if config.get('user'):
|
||||
lines.append(f" login = \"{config['user']}\";")
|
||||
if secrets_map.get('password'):
|
||||
lines.append(f" password = \"{secrets_map['password']}\";")
|
||||
lines.append('}')
|
||||
return '\n'.join(lines) + '\n'
|
||||
|
||||
def up_connection_instance(self, conn: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Render config + compose for one connection and bring its container up.
|
||||
|
||||
Requires the backing store service to be installed (its image + raw
|
||||
compose template come from the install record). Tor is single-instance
|
||||
and is started through the plain store-service path, not here.
|
||||
Returns {'ok': bool, ...}; never raises.
|
||||
"""
|
||||
conn_type = conn.get('type')
|
||||
if conn_type in self.SINGLE_INSTANCE_TYPES:
|
||||
return {'ok': True, 'single_instance': True}
|
||||
if self.service_composer is None:
|
||||
return {'ok': False, 'error': 'service_composer unavailable'}
|
||||
|
||||
record = self._store_record_for(conn_type)
|
||||
if not record:
|
||||
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
|
||||
return {'ok': False,
|
||||
'error': f"store service {svc_id!r} is not installed; "
|
||||
f'install it before creating a {conn_type} connection'}
|
||||
template = record.get('compose_template')
|
||||
manifest = record.get('manifest') or {}
|
||||
if not template:
|
||||
return {'ok': False,
|
||||
'error': f'store service for {conn_type} has no compose '
|
||||
'template (reinstall required for per-instance support)'}
|
||||
|
||||
svc_id = self.STORE_SERVICE_IDS[conn_type]
|
||||
instance_id = self._instance_id(conn)
|
||||
try:
|
||||
config_dir = self.service_composer.instance_config_dir(svc_id, instance_id)
|
||||
self._materialize_instance_config(conn, config_dir)
|
||||
except Exception as e:
|
||||
logger.error(f"up_connection_instance: config write failed: {e}")
|
||||
return {'ok': False, 'error': 'failed to write instance config'}
|
||||
|
||||
return self.service_composer.up_instance(
|
||||
svc_id, instance_id, manifest, template,
|
||||
redirect_port=conn.get('redirect_port'))
|
||||
|
||||
def down_connection_instance(self, conn: Dict[str, Any],
|
||||
purge_data: bool = True) -> Dict[str, Any]:
|
||||
"""Bring down + clean up one connection's container, compose and config."""
|
||||
conn_type = conn.get('type')
|
||||
if conn_type in self.SINGLE_INSTANCE_TYPES:
|
||||
return {'ok': True, 'single_instance': True}
|
||||
if self.service_composer is None:
|
||||
return {'ok': False, 'error': 'service_composer unavailable'}
|
||||
svc_id = self.STORE_SERVICE_IDS.get(conn_type)
|
||||
if not svc_id:
|
||||
return {'ok': True}
|
||||
return self.service_composer.down_instance(
|
||||
svc_id, self._instance_id(conn), purge_data=purge_data)
|
||||
|
||||
# ── Connectivity v2 — connection CRUD ─────────────────────────────────
|
||||
|
||||
def _compute_state(self, conn_type: str, config: Dict[str, Any],
|
||||
@@ -978,6 +1181,15 @@ class ConnectivityManager(BaseServiceManager):
|
||||
if err:
|
||||
return {'ok': False, 'error': err}
|
||||
|
||||
# The backing store service must be installed before a connection of
|
||||
# its type can run a container. Tor (single instance) installs its own
|
||||
# container via the store path, so the gate applies to it too.
|
||||
if not self._store_service_installed(conn_type):
|
||||
svc_id = self.STORE_SERVICE_IDS.get(conn_type, conn_type)
|
||||
return {'ok': False, 'error':
|
||||
f"the {svc_id!r} service must be installed from the Service "
|
||||
f'Store before creating a {conn_type} connection'}
|
||||
|
||||
with self._conn_lock:
|
||||
existing = []
|
||||
if self.config_manager is not None:
|
||||
@@ -1059,7 +1271,18 @@ class ConnectivityManager(BaseServiceManager):
|
||||
|
||||
logger.info(f"connectivity: created connection {conn_id} "
|
||||
f"({conn_type}/{name}) mark={hex(mark)} table={table}")
|
||||
return {'ok': True, 'connection': self._public_record(record)}
|
||||
|
||||
# Bring up this connection's own container (cell-<slug>-<id>). Tor is
|
||||
# single-instance and runs via the store path, so up is a no-op there.
|
||||
# A failure here is non-fatal to the record — the connection still
|
||||
# exists and can be retried via apply/up — but is surfaced to the
|
||||
# caller so the UI can show it.
|
||||
up = self.up_connection_instance(record)
|
||||
result = {'ok': True, 'connection': self._public_record(record)}
|
||||
if not up.get('ok') and not up.get('single_instance'):
|
||||
result['container'] = {'ok': False,
|
||||
'error': up.get('error') or up.get('stderr')}
|
||||
return result
|
||||
|
||||
def update_connection(self, conn_id: str, name: Optional[str] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
@@ -1138,6 +1361,15 @@ class ConnectivityManager(BaseServiceManager):
|
||||
return {'ok': False, 'error':
|
||||
f'connection is in use by {ref}; detach it first'}
|
||||
|
||||
# Tear down this connection's container + its per-instance compose
|
||||
# and config before forgetting the record (best-effort; a stale
|
||||
# container must not block deletion of the config entry).
|
||||
try:
|
||||
self.down_connection_instance(record)
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: container teardown failed "
|
||||
f"(non-fatal): {e}")
|
||||
|
||||
for secret_ref in record.get('secret_refs', []):
|
||||
if self.vault_manager is not None:
|
||||
try:
|
||||
@@ -1719,7 +1951,7 @@ class ConnectivityManager(BaseServiceManager):
|
||||
iface = conn.get('iface')
|
||||
if not iface:
|
||||
return 'unknown', 'no interface assigned'
|
||||
container = self.EXIT_CONTAINERS.get('wireguard_ext')
|
||||
container = self.instance_container_name(conn)
|
||||
r = self._exec_in_container(
|
||||
container, ['wg', 'show', iface, 'latest-handshakes'])
|
||||
if r is None or r.returncode != 0:
|
||||
@@ -1742,7 +1974,7 @@ class ConnectivityManager(BaseServiceManager):
|
||||
def _probe_openvpn(self, conn: Dict[str, Any]) -> Tuple[str, Optional[str]]:
|
||||
"""An OpenVPN exit is working when its tun iface exists and is UP."""
|
||||
iface = conn.get('iface')
|
||||
container = self.EXIT_CONTAINERS.get('openvpn')
|
||||
container = self.instance_container_name(conn)
|
||||
# The tun device lives in the openvpn container's net namespace.
|
||||
r = self._exec_in_container(container, ['ip', 'link', 'show', iface]) \
|
||||
if iface else None
|
||||
@@ -1781,7 +2013,7 @@ class ConnectivityManager(BaseServiceManager):
|
||||
if not self._tcp_reachable(host, int(port)):
|
||||
return 'down', f'ssh host {host}:{port} unreachable'
|
||||
listen_port = conn.get('redirect_port')
|
||||
container = self.EXIT_CONTAINERS.get('sshuttle')
|
||||
container = self.instance_container_name(conn)
|
||||
if isinstance(listen_port, int) and not self._listener_reachable(
|
||||
container, listen_port):
|
||||
return 'down', f'sshuttle listener :{listen_port} down'
|
||||
|
||||
Reference in New Issue
Block a user