feat: connectivity redesign phase 2 — instance-aware routing + reference connections by id
Unit Tests / test (push) Successful in 12m6s

apply_routes now iterates over connection instances rather than types:
each instance gets its own fwmark, routing table, interface, and
redirect_port via _routing_connections / _resolve_peer_connection /
_apply_connection_for_src; kill-switch is enforced per iface-instance.
Old per-type MARKS/TABLES constants are kept only as migration scaffolding.

peer_registry: exit_via is now stored as a connection id (or 'default');
_migrate_exit_via_to_connection_id runs on _load_peers to upgrade legacy
type-string values; set_peer_exit_via validates against known connection
ids; VALID_EXIT_VIA removed; config_manager wired in from managers.py.

egress_manager: egress_overrides keyed by service_id → connection_id;
local MARKS/TABLES/EXIT_TYPES/_REDIRECT_PORTS/_add_tor_redirect removed;
(mark, table, redirect_port) resolved at apply-time via
connectivity_manager.get_connection; manifest egress.allowed still
enforced by connection type.

api/app.py + api.js: PUT peer/service exit endpoints accept {connection_id};
back-compat shim resolves a legacy type string to its single active instance.

Tests extended: two same-type instances produce distinct marks/tables/ports;
peer exit_via and egress override id migrations round-trip correctly;
single-instance behaviour is equivalent to the old type-keyed path.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 17:35:28 -04:00
parent 5b9d20eeac
commit 89aed4efe0
12 changed files with 993 additions and 375 deletions
+127 -46
View File
@@ -357,14 +357,16 @@ class ConnectivityManager(BaseServiceManager):
logger.warning(f"get_peer_exits: {e}")
return out
def set_peer_exit(self, peer_name: str, exit_type: str) -> Dict[str, Any]:
"""Assign a peer to an egress path and apply the rule changes."""
if exit_type not in self.EXIT_TYPES:
return {
'ok': False,
'error': f"invalid exit_type {exit_type!r}; "
f"must be one of {self.EXIT_TYPES}",
}
def set_peer_exit(self, peer_name: str, exit_via: str) -> Dict[str, Any]:
"""Assign a peer to a connection (by id) or 'default' and apply rules.
`exit_via` is a connection id, 'default', or — as a one-release
back-compat shim — a legacy exit *type* string, which is resolved to
the single connection instance of that type. Validation that the id
exists lives in peer_registry.set_peer_exit_via.
"""
if not isinstance(exit_via, str) or not exit_via:
return {'ok': False, 'error': 'connection_id is required'}
if not isinstance(peer_name, str) or not re.match(r'^[A-Za-z0-9_.-]{1,64}$', peer_name):
return {'ok': False, 'error': f'invalid peer_name {peer_name!r}'}
@@ -372,11 +374,16 @@ class ConnectivityManager(BaseServiceManager):
return {'ok': False, 'error': 'peer_registry not available'}
try:
ok = self.peer_registry.set_peer_exit_via(peer_name, exit_type)
ok = self.peer_registry.set_peer_exit_via(peer_name, exit_via)
except Exception as e:
logger.error(f"set_peer_exit: registry update failed: {e}")
return {'ok': False, 'error': str(e)}
if not ok:
# Distinguish "no such peer" from "no such connection".
if self._peer_exists(peer_name):
return {'ok': False, 'error':
f'unknown connection {exit_via!r}; '
f"must be a connection id or 'default'"}
return {'ok': False, 'error': f'peer {peer_name!r} not found'}
try:
@@ -384,7 +391,23 @@ class ConnectivityManager(BaseServiceManager):
except Exception as e:
logger.warning(f"set_peer_exit: apply_routes failed (non-fatal): {e}")
return {'ok': True, 'peer': peer_name, 'exit_via': exit_type}
resolved = 'default'
try:
peer = self.peer_registry.get_peer(peer_name)
if peer:
resolved = peer.get('exit_via', 'default')
except Exception:
pass
return {'ok': True, 'peer': peer_name, 'exit_via': resolved}
def _peer_exists(self, peer_name: str) -> bool:
"""True when a peer with this name is registered."""
if self.peer_registry is None:
return False
try:
return self.peer_registry.get_peer(peer_name) is not None
except Exception:
return False
def upload_wireguard_ext(self, conf_text: str) -> Dict[str, Any]:
"""Validate and store an external WireGuard config."""
@@ -1121,17 +1144,26 @@ class ConnectivityManager(BaseServiceManager):
def _connection_reference(self, conn_id: str) -> Optional[str]:
"""Return a human description if a peer/egress references this connection.
Phase 2 wires peers/egress to connection ids; until then nothing
references a connection, so this returns None. Kept as the single
choke-point so phase 2 only has to fill in the lookups here.
A peer references a connection through its exit_via field (a connection
id); a service references one through the egress_overrides map. Either
blocks deletion until the reference is detached.
"""
if self.peer_registry is not None:
try:
for peer in self.peer_registry.list_peers():
if peer.get('connection_id') == conn_id:
if peer.get('exit_via') == conn_id:
return f"peer {peer.get('peer')!r}"
except Exception as e:
logger.debug(f"_connection_reference (peers): {e}")
if self.config_manager is not None:
try:
overrides = self.config_manager.configs.get('egress_overrides')
if isinstance(overrides, dict):
for svc_id, cid in overrides.items():
if cid == conn_id:
return f"service {svc_id!r}"
except Exception as e:
logger.debug(f"_connection_reference (egress): {e}")
return None
def list_connections(self) -> List[Dict[str, Any]]:
@@ -1303,7 +1335,14 @@ class ConnectivityManager(BaseServiceManager):
# ── Routing application ───────────────────────────────────────────────
def apply_routes(self) -> Dict[str, Any]:
"""Idempotently rebuild all connectivity rules and policy routing."""
"""Idempotently rebuild all connectivity rules and policy routing.
Connectivity v2: routing is driven by connection *instances*, not by
per-type constants. Each connection carries its own persisted mark,
table, iface and redirect_port; two instances of the same type route
through distinct tables/marks without collision. A peer's exit_via is
the id of the connection it egresses through.
"""
rules_applied = 0
try:
@@ -1319,18 +1358,23 @@ class ConnectivityManager(BaseServiceManager):
except Exception as e:
logger.warning(f"apply_routes: flush {table}/{chain} failed: {e}")
# Idempotent ip rule registration for each non-default exit
for exit_type in self.MARKS:
mark = self.MARKS[exit_type]
table = self.TABLES[exit_type]
connections = self._routing_connections()
# Idempotent ip rule registration: one fwmark→table rule per instance.
for conn in connections:
mark, table = conn.get('mark'), conn.get('table')
if not isinstance(mark, int) or not isinstance(table, int):
continue
try:
self._remove_ip_rule(mark, table)
self._add_ip_rule(mark, table)
rules_applied += 1
except Exception as e:
logger.warning(f"apply_routes: ip rule {exit_type} failed: {e}")
logger.warning(
f"apply_routes: ip rule {conn.get('id')} failed: {e}")
# Per-peer marking + nat redirect (Tor only)
# Per-peer marking + nat redirect, resolved through each peer's
# connection instance.
if self.peer_registry is not None:
try:
peers = self.peer_registry.list_peers()
@@ -1338,45 +1382,82 @@ class ConnectivityManager(BaseServiceManager):
logger.warning(f"apply_routes: list_peers failed: {e}")
peers = []
by_id = {c.get('id'): c for c in connections}
for peer in peers:
exit_via = peer.get('exit_via', 'default')
if exit_via == 'default' or exit_via not in self.MARKS:
conn = self._resolve_peer_connection(peer, by_id)
if conn is None:
continue
src_ip = self._peer_source_ip(peer.get('peer', ''))
if not src_ip:
continue
mark = self.MARKS[exit_via]
try:
self._add_mark_rule(src_ip, mark)
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: mark rule for {src_ip}/{exit_via}: {e}"
)
# Tor / sshuttle / proxy: redirect TCP to the local
# transparent-proxy port for that exit.
if exit_via in self.REDIRECT_PORTS:
try:
self._add_redirect(src_ip, self.REDIRECT_PORTS[exit_via])
rules_applied += 1
except Exception as e:
logger.warning(
f"apply_routes: {exit_via} redirect for {src_ip}: {e}"
)
rules_applied += self._apply_connection_for_src(src_ip, conn)
# Kill-switch: drop marked packets that would otherwise leak via the
# default route if the exit interface is down.
for exit_type, iface in self.IFACES.items():
mark = self.MARKS[exit_type]
# default route if an iface-based exit interface is down.
for conn in connections:
iface = conn.get('iface')
mark = conn.get('mark')
if not iface or not isinstance(mark, int):
continue
try:
self._add_killswitch(mark, iface)
rules_applied += 1
except Exception as e:
logger.warning(f"apply_routes: killswitch {exit_type}: {e}")
logger.warning(
f"apply_routes: killswitch {conn.get('id')}: {e}")
return {'ok': True, 'rules_applied': rules_applied}
def _routing_connections(self) -> List[Dict[str, Any]]:
"""Return the connection instances that drive routing (enabled only)."""
if self.config_manager is None:
return []
try:
conns = self.config_manager.list_connections()
except Exception as e:
logger.warning(f"apply_routes: list_connections failed: {e}")
return []
return [c for c in conns if c.get('enabled', True)]
@staticmethod
def _resolve_peer_connection(
peer: Dict[str, Any], by_id: Dict[str, Dict[str, Any]],
) -> Optional[Dict[str, Any]]:
"""Resolve a peer's exit_via (a connection id) to its connection record."""
exit_via = peer.get('exit_via', 'default')
if exit_via == 'default':
return None
return by_id.get(exit_via)
def _apply_connection_for_src(
self, src_ip: str, conn: Dict[str, Any],
) -> int:
"""Mark + optionally REDIRECT traffic from src_ip via this connection.
Returns the number of rules applied. iface-based connections only need
the fwmark (policy route + killswitch handle egress); redirect-style
connections additionally REDIRECT TCP to the instance's redirect_port.
"""
applied = 0
mark = conn.get('mark')
if isinstance(mark, int):
try:
self._add_mark_rule(src_ip, mark)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: mark rule for {src_ip}/{conn.get('id')}: {e}")
redirect_port = conn.get('redirect_port')
if conn.get('type') in self.REDIRECT_TYPES and isinstance(redirect_port, int):
try:
self._add_redirect(src_ip, redirect_port)
applied += 1
except Exception as e:
logger.warning(
f"apply_routes: redirect for {src_ip}/{conn.get('id')}: {e}")
return applied
# ── iptables / ip rule helpers ────────────────────────────────────────
def _wg_iptables(self, args: List[str], timeout: int = 10) -> subprocess.CompletedProcess: