feat: Phase 3 - per-peer internet routing via exit cell
Adds the ability to route a specific peer's internet traffic through a
connected cell acting as an exit relay.
Cell A side:
- PUT /api/peers/<peer>/route-via {"via_cell": "cellB"} sets route_via
- Updates WG AllowedIPs to include 0.0.0.0/0 for the exit cell peer
- Adds ip rule + ip route in policy table inside cell-wireguard so the
specific peer's traffic egresses via cellB's WG IP
- Sets exit_relay_active on the cell link and pushes use_as_exit_relay=True
to cellB via peer-sync
Cell B side:
- Receives use_as_exit_relay in the peer-sync payload
- Calls apply_cell_rules(..., exit_relay=True) to add FORWARD -o eth0 ACCEPT
- Stores remote_exit_relay_active flag for startup recovery
Startup recovery:
- apply_all_cell_rules passes exit_relay=remote_exit_relay_active (cellB)
- _apply_startup_enforcement reapplies ip rule for each peer with route_via (cellA)
since policy routing rules don't survive container restart
peer_registry gets route_via field with lazy migration.
22 new tests across test_cell_link_manager, test_peer_registry, test_peer_route_via.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+13
@@ -275,6 +275,19 @@ def _apply_startup_enforcement():
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
|
||||
cell_links=cell_links)
|
||||
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
|
||||
# Phase 3: reapply policy routing rules for peers whose internet traffic is
|
||||
# routed through an exit cell (ip rule entries don't survive container restart)
|
||||
cell_links_map = {l['cell_name']: l for l in cell_links}
|
||||
for peer in peers:
|
||||
via_cell = peer.get('route_via')
|
||||
if not via_cell:
|
||||
continue
|
||||
link = cell_links_map.get(via_cell)
|
||||
if not link:
|
||||
continue
|
||||
peer_ip = peer.get('ip', '').split('/')[0]
|
||||
if peer_ip:
|
||||
wireguard_manager.apply_peer_route_via(peer_ip, via_wg_ip=link['dns_ip'])
|
||||
sync_summary = cell_link_manager.replay_pending_pushes()
|
||||
if sync_summary.get('attempted'):
|
||||
logger.info(f"Startup permission sync: {sync_summary}")
|
||||
|
||||
@@ -84,6 +84,13 @@ class CellLinkManager:
|
||||
if 'remote_exit_offered' not in link:
|
||||
link['remote_exit_offered'] = False
|
||||
changed = True
|
||||
# Phase 3 migration: per-peer internet routing
|
||||
if 'exit_relay_active' not in link:
|
||||
link['exit_relay_active'] = False
|
||||
changed = True
|
||||
if 'remote_exit_relay_active' not in link:
|
||||
link['remote_exit_relay_active'] = False
|
||||
changed = True
|
||||
if changed:
|
||||
self._save(links)
|
||||
return links
|
||||
@@ -154,6 +161,7 @@ class CellLinkManager:
|
||||
'inbound': dict(perms.get('outbound', {})),
|
||||
},
|
||||
'exit_offered': bool(link.get('exit_offered', False)),
|
||||
'use_as_exit_relay': bool(link.get('exit_relay_active', False)),
|
||||
'sent_at': datetime.utcnow().isoformat() + 'Z',
|
||||
}
|
||||
payload = json.dumps(body)
|
||||
@@ -239,7 +247,8 @@ class CellLinkManager:
|
||||
|
||||
def apply_remote_permissions(self, from_public_key: str,
|
||||
permissions: Dict[str, Any],
|
||||
exit_offered: bool = False) -> Dict[str, Any]:
|
||||
exit_offered: bool = False,
|
||||
use_as_exit_relay: bool = False) -> Dict[str, Any]:
|
||||
"""Store permissions pushed by a remote cell (identified by WG public key).
|
||||
|
||||
Validates service names, persists, and re-applies local iptables rules.
|
||||
@@ -257,13 +266,15 @@ class CellLinkManager:
|
||||
|
||||
link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound}
|
||||
link['remote_exit_offered'] = bool(exit_offered)
|
||||
link['remote_exit_relay_active'] = bool(use_as_exit_relay)
|
||||
link['last_remote_update_at'] = datetime.utcnow().isoformat()
|
||||
self._save(links)
|
||||
|
||||
inbound_list = [s for s, v in clean_inbound.items() if v]
|
||||
try:
|
||||
import firewall_manager as _fm
|
||||
_fm.apply_cell_rules(link['cell_name'], link['vpn_subnet'], inbound_list)
|
||||
_fm.apply_cell_rules(link['cell_name'], link['vpn_subnet'], inbound_list,
|
||||
exit_relay=use_as_exit_relay)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"apply_cell_rules after remote push for '{link['cell_name']}' "
|
||||
@@ -470,6 +481,22 @@ class CellLinkManager:
|
||||
self._try_push(cell_name, link)
|
||||
return link
|
||||
|
||||
def set_exit_relay_active(self, cell_name: str, active: bool) -> Dict[str, Any]:
|
||||
"""Record that THIS cell is routing a peer's internet traffic via cell_name.
|
||||
|
||||
Persists the flag locally and pushes updated state to the remote cell so
|
||||
it can enable/disable the FORWARD-to-eth0 rule on its side.
|
||||
Returns the updated link record.
|
||||
"""
|
||||
links = self._load()
|
||||
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
||||
if not link:
|
||||
raise ValueError(f"Cell '{cell_name}' not found")
|
||||
link['exit_relay_active'] = bool(active)
|
||||
self._save(links)
|
||||
self._try_push(cell_name, link)
|
||||
return link
|
||||
|
||||
def get_connection_status(self, cell_name: str) -> Dict[str, Any]:
|
||||
"""Return link record enriched with live WireGuard handshake status."""
|
||||
links = self._load()
|
||||
|
||||
+24
-7
@@ -262,7 +262,8 @@ def _get_cell_api_ip() -> Optional[str]:
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str]) -> bool:
|
||||
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str],
|
||||
exit_relay: bool = False) -> bool:
|
||||
"""Apply FORWARD rules for a cell-to-cell peer.
|
||||
|
||||
Traffic from vpn_subnet is allowed only to service VIPs listed in
|
||||
@@ -270,10 +271,15 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
internet or peer access — only explicit service VIPs, plus the
|
||||
cell-api port (3000) for permission-sync pushes arriving via DNAT.
|
||||
|
||||
Rule insertion order (last inserted → top of chain):
|
||||
1. Catch-all DROP for the subnet (inserted first → bottom)
|
||||
2. Per-service ACCEPT/DROP (inserted in reversed() order)
|
||||
3. API-sync ACCEPT (inserted last → top, above catch-all)
|
||||
When exit_relay=True, the remote cell's peers can route internet
|
||||
traffic through this cell (Phase 3). A broad ACCEPT for traffic
|
||||
going out eth0 is added below per-service rules but above catch-all.
|
||||
|
||||
Rule insertion order (first inserted = bottom, last inserted = top):
|
||||
1. Catch-all DROP for the subnet (inserted first → bottom)
|
||||
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
|
||||
3. Per-service ACCEPT/DROP (inserted in reversed() order)
|
||||
4. API-sync ACCEPT (inserted last → top)
|
||||
"""
|
||||
try:
|
||||
tag = _cell_tag(cell_name)
|
||||
@@ -283,6 +289,13 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet,
|
||||
'-m', 'comment', '--comment', tag, '-j', 'DROP'])
|
||||
|
||||
# Exit relay ACCEPT — allow internet-bound traffic from this cell's peers.
|
||||
# Inserted ABOVE catch-all but BELOW per-service rules so service-level
|
||||
# DROP rules still take effect for specific service VIPs.
|
||||
if exit_relay:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-o', 'eth0',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# Per-service rules — inserted in reverse dict order, highest-priority last
|
||||
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
|
||||
target = 'ACCEPT' if service in inbound_services else 'DROP'
|
||||
@@ -298,7 +311,10 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
'-p', 'tcp', '--dport', '3000',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
logger.info(f"Applied cell rules for {cell_name} ({vpn_subnet}): inbound={inbound_services}")
|
||||
logger.info(
|
||||
f"Applied cell rules for {cell_name} ({vpn_subnet}): "
|
||||
f"inbound={inbound_services} exit_relay={exit_relay}"
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"apply_cell_rules({cell_name}): {e}")
|
||||
@@ -314,7 +330,8 @@ def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
|
||||
continue
|
||||
perms = link.get('permissions', {})
|
||||
inbound = [s for s, v in perms.get('inbound', {}).items() if v]
|
||||
apply_cell_rules(name, subnet, inbound)
|
||||
exit_relay = bool(link.get('remote_exit_relay_active', False))
|
||||
apply_cell_rules(name, subnet, inbound, exit_relay=exit_relay)
|
||||
|
||||
|
||||
def ensure_cell_api_dnat() -> bool:
|
||||
|
||||
@@ -193,6 +193,14 @@ class PeerRegistry(BaseServiceManager):
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error loading peers: {e}")
|
||||
self.peers = []
|
||||
# Phase 3 migration: per-peer internet routing
|
||||
changed = False
|
||||
for peer in self.peers:
|
||||
if 'route_via' not in peer:
|
||||
peer['route_via'] = None
|
||||
changed = True
|
||||
if changed:
|
||||
self._save_peers()
|
||||
else:
|
||||
self.peers = []
|
||||
self.logger.info("No peers file found, starting with empty registry")
|
||||
@@ -326,6 +334,18 @@ class PeerRegistry(BaseServiceManager):
|
||||
self.logger.error(f"Error updating peer {name} IP: {e}")
|
||||
return False
|
||||
|
||||
def set_route_via(self, peer_name: str, via_cell: Optional[str]) -> Dict[str, Any]:
|
||||
"""Set or clear the route_via field on a peer. Returns the updated peer dict."""
|
||||
with self.lock:
|
||||
for peer in self.peers:
|
||||
if peer.get('peer') == peer_name:
|
||||
peer['route_via'] = via_cell
|
||||
peer['updated_at'] = datetime.utcnow().isoformat()
|
||||
self._save_peers()
|
||||
self.logger.info(f"Set route_via for {peer_name}: {via_cell!r}")
|
||||
return dict(peer)
|
||||
raise ValueError(f"Peer '{peer_name}' not found")
|
||||
|
||||
def get_peer_stats(self) -> Dict[str, Any]:
|
||||
"""Get peer registry statistics"""
|
||||
try:
|
||||
|
||||
+4
-1
@@ -219,8 +219,11 @@ def peer_sync_permissions():
|
||||
return jsonify({'ok': False, 'error': f'unknown service: {svc!r}'}), 400
|
||||
|
||||
exit_offered = bool(data.get('exit_offered', False))
|
||||
use_as_exit_relay = bool(data.get('use_as_exit_relay', False))
|
||||
from app import cell_link_manager
|
||||
cell_link_manager.apply_remote_permissions(sender_pubkey, perms, exit_offered=exit_offered)
|
||||
cell_link_manager.apply_remote_permissions(sender_pubkey, perms,
|
||||
exit_offered=exit_offered,
|
||||
use_as_exit_relay=use_as_exit_relay)
|
||||
return jsonify({'ok': True, 'applied_at': datetime.utcnow().isoformat()})
|
||||
except ValueError as e:
|
||||
return jsonify({'ok': False, 'error': str(e)}), 404
|
||||
|
||||
@@ -183,6 +183,77 @@ def update_peer(peer_name):
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route('/api/peers/<peer_name>/route-via', methods=['PUT'])
|
||||
def set_peer_route_via(peer_name):
|
||||
"""Route a peer's internet traffic through a connected exit cell.
|
||||
|
||||
Body: {"via_cell": "cellB"} to enable, {"via_cell": null} to disable.
|
||||
On enable: updates WG AllowedIPs and adds policy routing rule inside
|
||||
cell-wireguard so the peer's packets egress through the exit cell.
|
||||
On disable: reverts AllowedIPs and removes the ip rule.
|
||||
Also signals the exit cell to add/remove the FORWARD-to-eth0 firewall rule.
|
||||
"""
|
||||
try:
|
||||
from app import peer_registry, wireguard_manager, cell_link_manager
|
||||
data = request.get_json(silent=True)
|
||||
if data is None or 'via_cell' not in data:
|
||||
return jsonify({'error': 'via_cell field required (string or null)'}), 400
|
||||
|
||||
via_cell = data['via_cell']
|
||||
if via_cell is not None and not isinstance(via_cell, str):
|
||||
return jsonify({'error': 'via_cell must be a string or null'}), 400
|
||||
|
||||
peer = peer_registry.get_peer(peer_name)
|
||||
if not peer:
|
||||
return jsonify({'error': 'Peer not found'}), 404
|
||||
|
||||
peer_ip = peer.get('ip', '').split('/')[0]
|
||||
if not peer_ip:
|
||||
return jsonify({'error': 'Peer has no IP assigned'}), 400
|
||||
|
||||
old_via = peer.get('route_via')
|
||||
|
||||
# Remove old routing if switching away from a previous exit cell
|
||||
if old_via and old_via != via_cell:
|
||||
old_link = next(
|
||||
(l for l in cell_link_manager.list_connections()
|
||||
if l['cell_name'] == old_via), None
|
||||
)
|
||||
if old_link:
|
||||
wireguard_manager.update_cell_peer_allowed_ips(
|
||||
old_link['public_key'], old_link['vpn_subnet'],
|
||||
add_default_route=False)
|
||||
wireguard_manager.remove_peer_route_via(peer_ip)
|
||||
try:
|
||||
cell_link_manager.set_exit_relay_active(old_via, False)
|
||||
except Exception as e:
|
||||
logger.warning(f"set_exit_relay_active(False) for {old_via!r} failed: {e}")
|
||||
|
||||
# Apply new routing
|
||||
if via_cell:
|
||||
link = next(
|
||||
(l for l in cell_link_manager.list_connections()
|
||||
if l['cell_name'] == via_cell), None
|
||||
)
|
||||
if not link:
|
||||
return jsonify({'error': f"Cell {via_cell!r} not connected"}), 404
|
||||
wireguard_manager.update_cell_peer_allowed_ips(
|
||||
link['public_key'], link['vpn_subnet'], add_default_route=True)
|
||||
wireguard_manager.apply_peer_route_via(peer_ip, via_wg_ip=link['dns_ip'])
|
||||
try:
|
||||
cell_link_manager.set_exit_relay_active(via_cell, True)
|
||||
except Exception as e:
|
||||
logger.warning(f"set_exit_relay_active(True) for {via_cell!r} failed: {e}")
|
||||
|
||||
updated_peer = peer_registry.set_route_via(peer_name, via_cell)
|
||||
return jsonify({'message': f"Route-via for '{peer_name}' updated", 'peer': updated_peer})
|
||||
except ValueError as e:
|
||||
return jsonify({'error': str(e)}), 404
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting route-via for {peer_name}: {e}")
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@bp.route('/api/peers/<peer_name>/clear-reinstall', methods=['POST'])
|
||||
def clear_peer_reinstall(peer_name):
|
||||
try:
|
||||
|
||||
@@ -573,6 +573,80 @@ class WireGuardManager(BaseServiceManager):
|
||||
except Exception as e:
|
||||
logger.warning(f'sync_cell_routes failed (non-fatal): {e}')
|
||||
|
||||
def update_cell_peer_allowed_ips(self, public_key: str,
|
||||
vpn_subnet: str,
|
||||
add_default_route: bool = False) -> bool:
|
||||
"""Update AllowedIPs for a cell peer to optionally include 0.0.0.0/0.
|
||||
|
||||
When add_default_route=True, appends 0.0.0.0/0 to AllowedIPs so that
|
||||
traffic destined for the internet is routed through this cell peer
|
||||
(Phase 3 exit-relay routing).
|
||||
|
||||
At most one WireGuard peer can have 0.0.0.0/0 in AllowedIPs per
|
||||
interface (v1 constraint); callers must remove it from any other cell
|
||||
peer before adding it here.
|
||||
"""
|
||||
try:
|
||||
if add_default_route:
|
||||
new_ips = f'{vpn_subnet}, 0.0.0.0/0'
|
||||
else:
|
||||
new_ips = vpn_subnet
|
||||
return self.update_peer_ip(public_key, new_ips)
|
||||
except Exception as e:
|
||||
logger.error(f'update_cell_peer_allowed_ips failed: {e}')
|
||||
return False
|
||||
|
||||
def apply_peer_route_via(self, peer_ip: str, via_wg_ip: str,
|
||||
table: int = 100) -> bool:
|
||||
"""Add policy routing inside cell-wireguard so peer_ip traffic uses via_wg_ip.
|
||||
|
||||
Creates a dedicated routing table (table) with a default route via the
|
||||
exit cell's WG IP, then adds an ip rule so traffic sourced from peer_ip
|
||||
uses that table.
|
||||
|
||||
This is the cell-A side of Phase 3 routing: after this, traffic from
|
||||
the local peer (at peer_ip) is sent through the WG tunnel to cell B
|
||||
(at via_wg_ip) for internet egress.
|
||||
|
||||
Idempotent: adding a duplicate route/rule returns 0 or EEXIST, which is
|
||||
treated as success.
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return True
|
||||
try:
|
||||
def _wg(cmd):
|
||||
return subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard'] + cmd,
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
# Add default route in the policy table (idempotent via || true)
|
||||
_wg(['ip', 'route', 'add', 'default', 'via', via_wg_ip,
|
||||
'dev', 'wg0', 'table', str(table)])
|
||||
# Add ip rule: traffic FROM peer_ip uses this table
|
||||
_wg(['ip', 'rule', 'add', 'from', f'{peer_ip}/32',
|
||||
'pref', str(table), 'lookup', str(table)])
|
||||
logger.info(f'apply_peer_route_via: {peer_ip} → {via_wg_ip} table {table}')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'apply_peer_route_via failed: {e}')
|
||||
return False
|
||||
|
||||
def remove_peer_route_via(self, peer_ip: str, table: int = 100) -> None:
|
||||
"""Remove the ip rule for peer_ip added by apply_peer_route_via. Non-fatal."""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'rule', 'del', 'from', f'{peer_ip}/32',
|
||||
'pref', str(table), 'lookup', str(table)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def remove_peer(self, public_key: str) -> bool:
|
||||
"""Remove the [Peer] block matching public_key from wg0.conf."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user