feat: connectivity redesign phase 2 — instance-aware routing + reference connections by id
Unit Tests / test (push) Successful in 12m6s

apply_routes now iterates over connection instances rather than types:
each instance gets its own fwmark, routing table, interface, and
redirect_port via _routing_connections / _resolve_peer_connection /
_apply_connection_for_src; kill-switch is enforced per iface-instance.
Old per-type MARKS/TABLES constants are kept only as migration scaffolding.

peer_registry: exit_via is now stored as a connection id (or 'default');
_migrate_exit_via_to_connection_id runs on _load_peers to upgrade legacy
type-string values; set_peer_exit_via validates against known connection
ids; VALID_EXIT_VIA removed; config_manager wired in from managers.py.

egress_manager: egress_overrides keyed by service_id → connection_id;
local MARKS/TABLES/EXIT_TYPES/_REDIRECT_PORTS/_add_tor_redirect removed;
(mark, table, redirect_port) resolved at apply-time via
connectivity_manager.get_connection; manifest egress.allowed still
enforced by connection type.

api/app.py + api.js: PUT peer/service exit endpoints accept {connection_id};
back-compat shim resolves a legacy type string to its single active instance.

Tests extended: two same-type instances produce distinct marks/tables/ports;
peer exit_via and egress override id migrations round-trip correctly;
single-instance behaviour is equivalent to the old type-keyed path.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 17:35:28 -04:00
parent 5b9d20eeac
commit 89aed4efe0
12 changed files with 993 additions and 375 deletions
+143 -78
View File
@@ -9,8 +9,13 @@ for install/remove lifecycle hooks.
Rules live on the HOST in PIC_EGRESS chains in the mangle and nat
tables. Container IPs are discovered via docker inspect using the
container_name from the service manifest. Marks are distinct from
ConnectivityManager to prevent rule collisions.
container_name from the service manifest.
Connectivity v2: a service routes through a *connection instance* (by id),
sharing the same fwmark / routing table / redirect port as any peer that
egresses through the same connection. The (mark, table, redirect_port) for a
service are resolved from ConnectivityManager.get_connection(id) — EgressManager
no longer owns its own per-type MARKS/TABLES tables.
"""
import logging
import subprocess
@@ -19,34 +24,18 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
EXIT_TYPES = ("default", "wireguard_ext", "openvpn", "tor", "sshuttle", "proxy")
# fwmark values — must not collide with ConnectivityManager
# (0x10, 0x20, 0x30, 0x40, 0x50)
MARKS = {"wireguard_ext": 0x110, "openvpn": 0x120, "tor": 0x130,
"sshuttle": 0x140, "proxy": 0x150}
# Policy routing table IDs
TABLES = {"wireguard_ext": 210, "openvpn": 220, "tor": 230,
"sshuttle": 240, "proxy": 250}
EGRESS_CHAIN = "PIC_EGRESS"
# Transparent proxy port used by Tor
_TOR_TRANS_PORT = 9040
# Local transparent-proxy ports for redirect-style exits (no exit iface):
# traffic is REDIRECTed to the listener of the corresponding exit container.
_REDIRECT_PORTS = {"tor": _TOR_TRANS_PORT, "sshuttle": 12300, "proxy": 12345}
class EgressManager:
"""Per-service egress enforcement via host iptables fwmark policy-routing."""
def __init__(self, config_manager, service_store_manager=None,
connectivity_manager=None,
data_dir: str = "/app/data", config_dir: str = "/app/config"):
self.config_manager = config_manager
self.service_store_manager = service_store_manager
self.connectivity_manager = connectivity_manager
self._data_dir = data_dir
self._config_dir = config_dir
@@ -60,9 +49,10 @@ class EgressManager:
2. clear_service first (ensures idempotency).
3. If the manifest has no egress block, skip silently.
4. Discover the container IP.
5. Resolve the exit type (override > manifest default > 'default').
6. If exit is 'default', return early with no rules.
7. Otherwise create chains, ensure ip rules, add mark rules.
5. Resolve the connection id (override > manifest default > 'default').
6. If 'default', return early with no rules.
7. Otherwise resolve the connection's (mark, table, redirect_port),
create chains, ensure ip rules, add mark/redirect rules.
"""
manifest = self._get_manifest(service_id)
if manifest is None:
@@ -79,36 +69,39 @@ class EgressManager:
if not container_ip:
return {'ok': False, 'error': 'container IP not discoverable'}
exit_via = self._resolve_exit(service_id, manifest)
connection_id = self._resolve_exit(service_id, manifest)
# Validate exit_via is a known, non-default value
if exit_via not in EXIT_TYPES:
return {
'ok': False,
'error': f'unknown exit_via {exit_via!r}; must be one of {EXIT_TYPES}',
}
if exit_via == 'default':
if connection_id == 'default':
return {'ok': True, 'exit_via': 'default'}
if exit_via not in MARKS:
conn = self._get_connection(connection_id)
if conn is None:
return {
'ok': False,
'error': f'unknown exit_via {exit_via!r}; must be one of {EXIT_TYPES}',
'error': f'unknown connection {connection_id!r}',
}
mark = conn.get('mark')
table = conn.get('table')
if not isinstance(mark, int) or not isinstance(table, int):
return {
'ok': False,
'error': f'connection {connection_id!r} has no routing resources',
}
try:
self._ensure_chains()
self._ensure_host_ip_rules()
self._add_mark_rule(container_ip, MARKS[exit_via], service_id)
if exit_via in _REDIRECT_PORTS:
self._add_redirect(container_ip, _REDIRECT_PORTS[exit_via],
service_id)
self._ensure_host_ip_rule(mark, table)
self._add_mark_rule(container_ip, mark, service_id)
redirect_port = conn.get('redirect_port')
if isinstance(redirect_port, int):
self._add_redirect(container_ip, redirect_port, service_id)
except Exception as exc:
logger.error('apply_service(%s): %s', service_id, exc)
return {'ok': False, 'error': str(exc)}
return {'ok': True, 'exit_via': exit_via, 'container_ip': container_ip}
return {'ok': True, 'exit_via': connection_id,
'container_ip': container_ip}
def clear_service(self, service_id: str) -> Dict[str, Any]:
"""Remove all PIC_EGRESS rules tagged for this service."""
@@ -129,10 +122,13 @@ class EgressManager:
results[svc_id] = self.apply_service(svc_id)
return {'ok': True, 'services': results}
def set_service_exit(self, service_id: str, exit_type: str) -> Dict[str, Any]:
"""Persist a per-service egress override and immediately reapply rules.
def set_service_exit(self, service_id: str, connection_id: str) -> Dict[str, Any]:
"""Persist a per-service egress override (by connection id) and reapply.
exit_type must appear in the manifest's egress.allowed list.
`connection_id` is a real connection id or 'default'. A legacy exit
*type* string is accepted as a one-release back-compat shim and resolved
to the single connection instance of that type. The resolved
connection's type must be in the manifest's egress.allowed list.
"""
manifest = self._get_manifest(service_id)
if manifest is None:
@@ -141,31 +137,91 @@ class EgressManager:
if not self._has_egress(manifest):
return {'ok': False, 'error': f'service {service_id!r} has no egress configuration'}
if connection_id == 'default':
overrides = self._get_egress_overrides()
overrides[service_id] = 'default'
self._set_egress_overrides(overrides)
return self.apply_service(service_id)
resolved = self._resolve_connection_id(connection_id)
if resolved is None:
return {
'ok': False,
'error': f"unknown connection {connection_id!r}; "
f"must be a connection id or 'default'",
}
conn = self._get_connection(resolved)
egress = manifest.get('egress', {})
allowed = egress.get('allowed', list(EXIT_TYPES))
if exit_type not in allowed:
return {
'ok': False,
'error': (
f'exit_type {exit_type!r} is not in the allowed list '
f'for {service_id}: {allowed}'
),
}
if exit_type not in EXIT_TYPES:
return {
'ok': False,
'error': f'unknown exit_type {exit_type!r}; must be one of {EXIT_TYPES}',
}
allowed = egress.get('allowed')
if isinstance(allowed, list) and conn is not None:
if conn.get('type') not in allowed:
return {
'ok': False,
'error': (
f"connection type {conn.get('type')!r} is not in the "
f'allowed list for {service_id}: {allowed}'
),
}
# Persist the override so it survives restarts
overrides = self._get_egress_overrides()
overrides[service_id] = exit_type
overrides[service_id] = resolved
self._set_egress_overrides(overrides)
return self.apply_service(service_id)
def _connections(self) -> List[dict]:
"""Return the v2 connection records, or [] when unavailable."""
if self.connectivity_manager is not None:
try:
conns = self.connectivity_manager.list_connections()
return conns if isinstance(conns, list) else []
except Exception as exc:
logger.warning('egress: list_connections failed: %s', exc)
return []
if self.config_manager is not None:
try:
conns = self.config_manager.list_connections()
return conns if isinstance(conns, list) else []
except Exception as exc:
logger.warning('egress: list_connections failed: %s', exc)
return []
def _get_connection(self, connection_id: str) -> Optional[dict]:
"""Resolve a connection record (with mark/table/redirect_port) by id."""
if self.connectivity_manager is not None:
try:
return self.connectivity_manager.get_connection(connection_id)
except Exception as exc:
logger.warning('egress: get_connection failed: %s', exc)
return None
if self.config_manager is not None:
try:
return self.config_manager.get_connection(connection_id)
except Exception as exc:
logger.warning('egress: get_connection failed: %s', exc)
return None
_LEGACY_EXIT_TYPES = ('wireguard_ext', 'openvpn', 'tor', 'sshuttle', 'proxy')
def _resolve_connection_id(self, value: str) -> Optional[str]:
"""Resolve a value to a valid connection id.
Accepts a real connection id, or — as a back-compat shim — a legacy
type string resolved to the single instance of that type. Returns None
when nothing matches.
"""
conns = self._connections()
for c in conns:
if c.get('id') == value:
return value
if value in self._LEGACY_EXIT_TYPES:
matches = [c for c in conns if c.get('type') == value]
if len(matches) == 1:
return matches[0].get('id')
return None
def get_status(self) -> Dict[str, Any]:
"""Return egress status for every installed service that has egress config."""
installed = self.config_manager.get_installed_services()
@@ -201,15 +257,26 @@ class EgressManager:
return bool(manifest.get('has_egress', False) and manifest.get('egress'))
def _resolve_exit(self, service_id: str, manifest: dict) -> str:
"""Determine the effective exit for a service.
"""Determine the effective connection id for a service.
Priority: persisted override > manifest egress.default > 'default'.
Legacy type strings (from old overrides or a manifest default) are
resolved to the single connection instance of that type; if that can't
be resolved the service falls back to 'default'.
"""
overrides = self._get_egress_overrides()
if service_id in overrides:
return overrides[service_id]
egress = manifest.get('egress') or {}
return egress.get('default', 'default')
value = overrides[service_id]
else:
egress = manifest.get('egress') or {}
value = egress.get('default', 'default')
if value == 'default':
return 'default'
if value in self._LEGACY_EXIT_TYPES:
resolved = self._resolve_connection_id(value)
return resolved if resolved is not None else 'default'
return value
def _discover_container_ip(self, container_name: str,
retries: int = 5, delay: float = 0.2) -> Optional[str]:
@@ -254,16 +321,18 @@ class EgressManager:
['-t', table, '-I', 'PREROUTING', '1', '-j', EGRESS_CHAIN]
)
def _ensure_host_ip_rules(self) -> None:
"""Ensure `ip rule fwmark <mark> lookup <table>` exists for each exit."""
for exit_type, mark in MARKS.items():
table = TABLES[exit_type]
# Remove any existing duplicate rules first, then add once
for _ in range(8):
r = self._ip_rule(['del', 'fwmark', hex(mark), 'lookup', str(table)])
if r.returncode != 0:
break
self._ip_rule(['add', 'fwmark', hex(mark), 'lookup', str(table)])
def _ensure_host_ip_rule(self, mark: int, table: int) -> None:
"""Ensure a single `ip rule fwmark <mark> lookup <table>` exists.
Idempotent: drains any duplicate rules first, then adds exactly one.
The mark/table belong to the connection instance the service routes
through, so a peer and a service on the same connection share the rule.
"""
for _ in range(8):
r = self._ip_rule(['del', 'fwmark', hex(mark), 'lookup', str(table)])
if r.returncode != 0:
break
self._ip_rule(['add', 'fwmark', hex(mark), 'lookup', str(table)])
def _add_mark_rule(self, service_ip: str, mark: int, service_id: str) -> None:
"""Mark outbound packets from the service container with fwmark."""
@@ -283,10 +352,6 @@ class EgressManager:
'-m', 'comment', '--comment', self._tag(service_id),
])
def _add_tor_redirect(self, service_ip: str, service_id: str) -> None:
"""Redirect the service container's TCP traffic to the local Tor TransPort."""
self._add_redirect(service_ip, _TOR_TRANS_PORT, service_id)
def _clear_egress_rules(self, service_id: str) -> None:
"""Remove all rules tagged pic-egr-<service_id> from mangle and nat."""
import re as _re