Files
pic/api/egress_manager.py
T
roof 89aed4efe0
Unit Tests / test (push) Successful in 12m6s
feat: connectivity redesign phase 2 — instance-aware routing + reference connections by id
apply_routes now iterates over connection instances rather than types:
each instance gets its own fwmark, routing table, interface, and
redirect_port via _routing_connections / _resolve_peer_connection /
_apply_connection_for_src; kill-switch is enforced per iface-instance.
Old per-type MARKS/TABLES constants are kept only as migration scaffolding.

peer_registry: exit_via is now stored as a connection id (or 'default');
_migrate_exit_via_to_connection_id runs on _load_peers to upgrade legacy
type-string values; set_peer_exit_via validates against known connection
ids; VALID_EXIT_VIA removed; config_manager wired in from managers.py.

egress_manager: egress_overrides keyed by service_id → connection_id;
local MARKS/TABLES/EXIT_TYPES/_REDIRECT_PORTS/_add_tor_redirect removed;
(mark, table, redirect_port) resolved at apply-time via
connectivity_manager.get_connection; manifest egress.allowed still
enforced by connection type.

api/app.py + api.js: PUT peer/service exit endpoints accept {connection_id};
back-compat shim resolves a legacy type string to its single active instance.

Tests extended: two same-type instances produce distinct marks/tables/ports;
peer exit_via and egress override id migrations round-trip correctly;
single-instance behaviour is equivalent to the old type-keyed path.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 17:35:28 -04:00

430 lines
18 KiB
Python

#!/usr/bin/env python3
"""
EgressManager — per-service egress enforcement.
Routes outbound traffic from installed service containers through
alternate exits (wireguard_ext, openvpn, tor) using host-side
iptables fwmark policy-routing. Integrates with ServiceStoreManager
for install/remove lifecycle hooks.
Rules live on the HOST in PIC_EGRESS chains in the mangle and nat
tables. Container IPs are discovered via docker inspect using the
container_name from the service manifest.
Connectivity v2: a service routes through a *connection instance* (by id),
sharing the same fwmark / routing table / redirect port as any peer that
egresses through the same connection. The (mark, table, redirect_port) for a
service are resolved from ConnectivityManager.get_connection(id) — EgressManager
no longer owns its own per-type MARKS/TABLES tables.
"""
import logging
import subprocess
import time
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
EGRESS_CHAIN = "PIC_EGRESS"
class EgressManager:
"""Per-service egress enforcement via host iptables fwmark policy-routing."""
def __init__(self, config_manager, service_store_manager=None,
connectivity_manager=None,
data_dir: str = "/app/data", config_dir: str = "/app/config"):
self.config_manager = config_manager
self.service_store_manager = service_store_manager
self.connectivity_manager = connectivity_manager
self._data_dir = data_dir
self._config_dir = config_dir
# ── Public API ─────────────────────────────────────────────────────────
def apply_service(self, service_id: str) -> Dict[str, Any]:
"""Idempotently apply egress rules for one installed service.
Steps:
1. Look up the service manifest.
2. clear_service first (ensures idempotency).
3. If the manifest has no egress block, skip silently.
4. Discover the container IP.
5. Resolve the connection id (override > manifest default > 'default').
6. If 'default', return early with no rules.
7. Otherwise resolve the connection's (mark, table, redirect_port),
create chains, ensure ip rules, add mark/redirect rules.
"""
manifest = self._get_manifest(service_id)
if manifest is None:
return {'ok': False, 'error': f'manifest not found for {service_id}'}
# Always clear first for idempotency
self.clear_service(service_id)
if not self._has_egress(manifest):
return {'ok': True, 'skipped': True}
container_name = manifest.get('container_name', '')
container_ip = self._discover_container_ip(container_name)
if not container_ip:
return {'ok': False, 'error': 'container IP not discoverable'}
connection_id = self._resolve_exit(service_id, manifest)
if connection_id == 'default':
return {'ok': True, 'exit_via': 'default'}
conn = self._get_connection(connection_id)
if conn is None:
return {
'ok': False,
'error': f'unknown connection {connection_id!r}',
}
mark = conn.get('mark')
table = conn.get('table')
if not isinstance(mark, int) or not isinstance(table, int):
return {
'ok': False,
'error': f'connection {connection_id!r} has no routing resources',
}
try:
self._ensure_chains()
self._ensure_host_ip_rule(mark, table)
self._add_mark_rule(container_ip, mark, service_id)
redirect_port = conn.get('redirect_port')
if isinstance(redirect_port, int):
self._add_redirect(container_ip, redirect_port, service_id)
except Exception as exc:
logger.error('apply_service(%s): %s', service_id, exc)
return {'ok': False, 'error': str(exc)}
return {'ok': True, 'exit_via': connection_id,
'container_ip': container_ip}
def clear_service(self, service_id: str) -> Dict[str, Any]:
"""Remove all PIC_EGRESS rules tagged for this service."""
try:
self._clear_egress_rules(service_id)
return {'ok': True}
except Exception as exc:
logger.error('clear_service(%s): %s', service_id, exc)
return {'ok': False, 'error': str(exc)}
def apply_all(self) -> Dict[str, Any]:
"""Apply egress rules for every installed service that has a manifest."""
installed = self.config_manager.get_installed_services()
results: Dict[str, Any] = {}
for svc_id, record in installed.items():
if not isinstance(record, dict) or not record.get('manifest'):
continue
results[svc_id] = self.apply_service(svc_id)
return {'ok': True, 'services': results}
def set_service_exit(self, service_id: str, connection_id: str) -> Dict[str, Any]:
"""Persist a per-service egress override (by connection id) and reapply.
`connection_id` is a real connection id or 'default'. A legacy exit
*type* string is accepted as a one-release back-compat shim and resolved
to the single connection instance of that type. The resolved
connection's type must be in the manifest's egress.allowed list.
"""
manifest = self._get_manifest(service_id)
if manifest is None:
return {'ok': False, 'error': f'service {service_id!r} not installed'}
if not self._has_egress(manifest):
return {'ok': False, 'error': f'service {service_id!r} has no egress configuration'}
if connection_id == 'default':
overrides = self._get_egress_overrides()
overrides[service_id] = 'default'
self._set_egress_overrides(overrides)
return self.apply_service(service_id)
resolved = self._resolve_connection_id(connection_id)
if resolved is None:
return {
'ok': False,
'error': f"unknown connection {connection_id!r}; "
f"must be a connection id or 'default'",
}
conn = self._get_connection(resolved)
egress = manifest.get('egress', {})
allowed = egress.get('allowed')
if isinstance(allowed, list) and conn is not None:
if conn.get('type') not in allowed:
return {
'ok': False,
'error': (
f"connection type {conn.get('type')!r} is not in the "
f'allowed list for {service_id}: {allowed}'
),
}
# Persist the override so it survives restarts
overrides = self._get_egress_overrides()
overrides[service_id] = resolved
self._set_egress_overrides(overrides)
return self.apply_service(service_id)
def _connections(self) -> List[dict]:
"""Return the v2 connection records, or [] when unavailable."""
if self.connectivity_manager is not None:
try:
conns = self.connectivity_manager.list_connections()
return conns if isinstance(conns, list) else []
except Exception as exc:
logger.warning('egress: list_connections failed: %s', exc)
return []
if self.config_manager is not None:
try:
conns = self.config_manager.list_connections()
return conns if isinstance(conns, list) else []
except Exception as exc:
logger.warning('egress: list_connections failed: %s', exc)
return []
def _get_connection(self, connection_id: str) -> Optional[dict]:
"""Resolve a connection record (with mark/table/redirect_port) by id."""
if self.connectivity_manager is not None:
try:
return self.connectivity_manager.get_connection(connection_id)
except Exception as exc:
logger.warning('egress: get_connection failed: %s', exc)
return None
if self.config_manager is not None:
try:
return self.config_manager.get_connection(connection_id)
except Exception as exc:
logger.warning('egress: get_connection failed: %s', exc)
return None
_LEGACY_EXIT_TYPES = ('wireguard_ext', 'openvpn', 'tor', 'sshuttle', 'proxy')
def _resolve_connection_id(self, value: str) -> Optional[str]:
"""Resolve a value to a valid connection id.
Accepts a real connection id, or — as a back-compat shim — a legacy
type string resolved to the single instance of that type. Returns None
when nothing matches.
"""
conns = self._connections()
for c in conns:
if c.get('id') == value:
return value
if value in self._LEGACY_EXIT_TYPES:
matches = [c for c in conns if c.get('type') == value]
if len(matches) == 1:
return matches[0].get('id')
return None
def get_status(self) -> Dict[str, Any]:
"""Return egress status for every installed service that has egress config."""
installed = self.config_manager.get_installed_services()
statuses: Dict[str, Any] = {}
for svc_id, record in installed.items():
if not isinstance(record, dict):
continue
manifest = record.get('manifest')
if not manifest or not self._has_egress(manifest):
continue
container_name = manifest.get('container_name', '')
container_ip = self._discover_container_ip(container_name, retries=1)
exit_via = self._resolve_exit(svc_id, manifest)
statuses[svc_id] = {
'exit_via': exit_via,
'container_ip': container_ip,
'has_egress': True,
}
return {'ok': True, 'services': statuses}
# ── Internals ──────────────────────────────────────────────────────────
def _get_manifest(self, service_id: str) -> Optional[dict]:
"""Retrieve the manifest for an installed service, if available."""
installed = self.config_manager.get_installed_services()
record = installed.get(service_id)
if not record:
return None
return record.get('manifest')
def _has_egress(self, manifest: dict) -> bool:
"""Return True only when the manifest explicitly declares an egress block."""
return bool(manifest.get('has_egress', False) and manifest.get('egress'))
def _resolve_exit(self, service_id: str, manifest: dict) -> str:
"""Determine the effective connection id for a service.
Priority: persisted override > manifest egress.default > 'default'.
Legacy type strings (from old overrides or a manifest default) are
resolved to the single connection instance of that type; if that can't
be resolved the service falls back to 'default'.
"""
overrides = self._get_egress_overrides()
if service_id in overrides:
value = overrides[service_id]
else:
egress = manifest.get('egress') or {}
value = egress.get('default', 'default')
if value == 'default':
return 'default'
if value in self._LEGACY_EXIT_TYPES:
resolved = self._resolve_connection_id(value)
return resolved if resolved is not None else 'default'
return value
def _discover_container_ip(self, container_name: str,
retries: int = 5, delay: float = 0.2) -> Optional[str]:
"""Return the container's cell-network IP, retrying on transient failure."""
if not container_name:
return None
for attempt in range(retries):
result = subprocess.run(
[
'docker', 'inspect',
'-f', '{{.NetworkSettings.Networks.cell-network.IPAddress}}',
container_name,
],
capture_output=True, text=True, timeout=10,
)
ip = result.stdout.strip()
if ip and result.returncode == 0:
return ip
if attempt < retries - 1:
time.sleep(delay)
return None
def _ensure_chains(self) -> None:
"""Idempotently create PIC_EGRESS chains in mangle and nat on the host."""
for table in ('mangle', 'nat'):
# Create the chain if it does not yet exist
check = self._iptables(['-t', table, '-L', EGRESS_CHAIN, '-n'])
if check.returncode != 0:
create = self._iptables(['-t', table, '-N', EGRESS_CHAIN])
if create.returncode != 0 and 'exists' not in (create.stderr or ''):
logger.warning(
'_ensure_chains: cannot create %s/%s: %s',
table, EGRESS_CHAIN, (create.stderr or '').strip(),
)
# Insert jump from PREROUTING at position 1 (idempotent via -C check)
jump_check = self._iptables(
['-t', table, '-C', 'PREROUTING', '-j', EGRESS_CHAIN]
)
if jump_check.returncode != 0:
self._iptables(
['-t', table, '-I', 'PREROUTING', '1', '-j', EGRESS_CHAIN]
)
def _ensure_host_ip_rule(self, mark: int, table: int) -> None:
"""Ensure a single `ip rule fwmark <mark> lookup <table>` exists.
Idempotent: drains any duplicate rules first, then adds exactly one.
The mark/table belong to the connection instance the service routes
through, so a peer and a service on the same connection share the rule.
"""
for _ in range(8):
r = self._ip_rule(['del', 'fwmark', hex(mark), 'lookup', str(table)])
if r.returncode != 0:
break
self._ip_rule(['add', 'fwmark', hex(mark), 'lookup', str(table)])
def _add_mark_rule(self, service_ip: str, mark: int, service_id: str) -> None:
"""Mark outbound packets from the service container with fwmark."""
self._iptables([
'-t', 'mangle', '-A', EGRESS_CHAIN,
'-s', service_ip,
'-j', 'MARK', '--set-mark', hex(mark),
'-m', 'comment', '--comment', self._tag(service_id),
])
def _add_redirect(self, service_ip: str, port: int, service_id: str) -> None:
"""Redirect the container's TCP traffic to a local transparent-proxy port."""
self._iptables([
'-t', 'nat', '-A', EGRESS_CHAIN,
'-s', service_ip, '-p', 'tcp',
'-j', 'REDIRECT', '--to-ports', str(port),
'-m', 'comment', '--comment', self._tag(service_id),
])
def _clear_egress_rules(self, service_id: str) -> None:
"""Remove all rules tagged pic-egr-<service_id> from mangle and nat."""
import re as _re
tag = self._tag(service_id)
comment_re = _re.compile(
rf'--comment\s+["\']?{_re.escape(tag)}["\']?(\s|$)'
)
for table in ('mangle', 'nat'):
try:
save = subprocess.run(
['iptables-save', '-t', table],
capture_output=True, text=True, timeout=10,
)
if save.returncode != 0:
continue
lines = save.stdout.splitlines()
filtered = [ln for ln in lines if not comment_re.search(ln)]
if len(filtered) == len(lines):
continue # nothing to remove
restore_input = '\n'.join(filtered) + '\n'
restore = subprocess.run(
['iptables-restore', '-T', table],
input=restore_input,
capture_output=True, text=True, timeout=10,
)
if restore.returncode != 0:
logger.warning(
'_clear_egress_rules(%s): iptables-restore for %s failed: %s',
service_id, table, (restore.stderr or '').strip(),
)
except Exception as exc:
logger.error('_clear_egress_rules(%s, %s): %s', service_id, table, exc)
@staticmethod
def _tag(service_id: str) -> str:
"""iptables comment tag used to identify rules belonging to a service."""
return f'pic-egr-{service_id}'
def _iptables(self, args: List[str], check: bool = False) -> subprocess.CompletedProcess:
"""Run iptables on the host with the given arguments."""
cmd = ['iptables'] + args
try:
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
except Exception as exc:
logger.error('_iptables %s: %s', args, exc)
raise
def _ip_rule(self, args: List[str]) -> subprocess.CompletedProcess:
"""Run `ip rule` on the host with the given arguments."""
cmd = ['ip', 'rule'] + args
try:
return subprocess.run(cmd, capture_output=True, text=True, timeout=10)
except Exception as exc:
logger.error('_ip_rule %s: %s', args, exc)
raise
# ── Config persistence helpers ─────────────────────────────────────────
def _get_egress_overrides(self) -> Dict[str, str]:
"""Return the persisted egress override map {service_id: exit_type}."""
try:
overrides = self.config_manager.configs.get('egress_overrides')
if isinstance(overrides, dict):
return dict(overrides)
except Exception:
pass
return {}
def _set_egress_overrides(self, overrides: Dict[str, str]) -> None:
"""Persist the egress override map to config."""
try:
self.config_manager.configs['egress_overrides'] = overrides
self.config_manager._save_all_configs()
except Exception as exc:
logger.error('_set_egress_overrides: %s', exc)