#!/usr/bin/env python3 """ CellLinkManager — manages site-to-site connections between PIC cells. Each connection is stored in data/cell_links.json and manifests as: - A WireGuard [Peer] block (AllowedIPs = remote cell's VPN subnet) - A CoreDNS forwarding block (remote domain → remote cell's DNS IP) - An iptables FORWARD rule set (service-level access control) """ import ipaddress import json import logging import os import random import re import subprocess from datetime import datetime, timezone, timedelta from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) VALID_SERVICES = ('calendar', 'files', 'mail', 'webdav') _DEFAULT_PERMISSIONS = { 'inbound': {s: False for s in VALID_SERVICES}, 'outbound': {s: False for s in VALID_SERVICES}, } _PUSH_TIMEOUT = 5 # seconds _BACKOFF_BASE_S = 60 _BACKOFF_MAX_S = 3600 # Strict formats for fields imported from a remote cell's invite. The domain and # dns_ip flow into a `curl --resolve :443:` argv (peer-sync push); # anchoring them — domain must start alphanumeric, dns_ip must be an IP — prevents # a malicious invite injecting a leading-dash value that curl reads as a flag. _INVITE_HOSTNAME_RE = re.compile(r'^[A-Za-z0-9]([A-Za-z0-9.-]{0,253}[A-Za-z0-9])?$') _INVITE_CELL_NAME_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9 _.-]{0,63}$') _INVITE_ENDPOINT_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9._-]*:\d{1,5}$') def _validate_invite_fields(invite: Dict[str, Any]) -> None: """Reject a remote cell's invite whose fields aren't strictly well-formed. Defence-in-depth: these values come from another cell and reach iptables, DNS config, and a curl argv (the peer-sync push --resolves :443: ). Anchoring domain/dns_ip/endpoint to start alphanumeric blocks a malicious leading-dash value that curl would read as a flag. The public_key is validated downstream by WireGuardManager.add_cell_peer. Raise ValueError on anything malformed. """ name = invite.get('cell_name', '') if not isinstance(name, str) or not _INVITE_CELL_NAME_RE.match(name): raise ValueError(f'invalid cell_name {name!r}') domain = invite.get('domain', '') if not isinstance(domain, str) or not _INVITE_HOSTNAME_RE.match(domain): raise ValueError(f'invalid domain {domain!r}: must be a hostname') try: ipaddress.ip_address(str(invite.get('dns_ip', ''))) except ValueError: raise ValueError(f"invalid dns_ip {invite.get('dns_ip')!r}") try: ipaddress.ip_network(str(invite.get('vpn_subnet', '')), strict=False) except ValueError: raise ValueError(f"invalid vpn_subnet {invite.get('vpn_subnet')!r}") endpoint = invite.get('endpoint') if endpoint and not _INVITE_ENDPOINT_RE.match(str(endpoint)): raise ValueError(f'invalid endpoint {endpoint!r}') def _remote_api_url(domain: Optional[str]) -> Optional[str]: """Base URL for a linked cell's API, reached over the WG tunnel. Cross-cell peer-sync goes to the remote's Caddy on 443 (the WireGuard server DNATs VPN-IP:443 → Caddy → API; the API's own :3000 binds 127.0.0.1 and is unreachable from another cell). The URL uses the remote cell's DOMAIN — not its VPN IP — because Caddy only holds a certificate for the domain (ACME) or the .cell name (internal CA); a request by bare IP has no matching SNI and the TLS handshake fails. The push connects to the VPN IP over the tunnel via `curl --resolve :443:` (see _push_permissions_to_remote). """ return f"https://{domain}" if domain else None def _compute_next_retry(attempts: int) -> str: """Return an ISO timestamp for the earliest next retry using capped exponential backoff.""" delay = min(_BACKOFF_BASE_S * (2 ** (attempts - 1)), _BACKOFF_MAX_S) delay += random.uniform(0, _BACKOFF_BASE_S / 2) return (datetime.utcnow() + timedelta(seconds=delay)).isoformat() def _default_perms() -> Dict[str, Any]: return { 'inbound': {s: False for s in VALID_SERVICES}, 'outbound': {s: False for s in VALID_SERVICES}, } class CellLinkManager: def __init__(self, data_dir: str, config_dir: str, wireguard_manager, network_manager): self.data_dir = data_dir self.config_dir = config_dir self.wireguard_manager = wireguard_manager self.network_manager = network_manager self.links_file = os.path.join(data_dir, 'cell_links.json') # ── Storage ─────────────────────────────────────────────────────────────── def _load(self) -> List[Dict[str, Any]]: if os.path.exists(self.links_file): try: with open(self.links_file) as f: links = json.load(f) changed = False for link in links: if 'permissions' not in link: link['permissions'] = _default_perms() changed = True # Phase 1 migration: permission-sync tracking fields # Domain-based HTTPS URL. Rebuild if missing, or if it's a # legacy form: http://:3000 (unreachable) or https:// # (no matching Caddy cert by bare IP). _want_url = _remote_api_url(link.get('domain')) if link.get('remote_api_url') != _want_url and _want_url: link['remote_api_url'] = _want_url changed = True if 'last_push_status' not in link: link['last_push_status'] = 'never' changed = True if 'last_push_at' not in link: link['last_push_at'] = None changed = True if 'last_push_error' not in link: link['last_push_error'] = None changed = True if 'pending_push' not in link: # Existing links predate sync — mark pending so startup replay syncs them link['pending_push'] = True changed = True if 'last_remote_update_at' not in link: link['last_remote_update_at'] = None changed = True # Phase 2 migration: exit-offer signaling fields if 'exit_offered' not in link: link['exit_offered'] = False changed = True if 'remote_exit_offered' not in link: link['remote_exit_offered'] = False changed = True # Phase 3 migration: per-peer internet routing if 'exit_relay_active' not in link: link['exit_relay_active'] = False changed = True if 'remote_exit_relay_active' not in link: link['remote_exit_relay_active'] = False changed = True # Phase 4 migration: retry/backoff state if 'push_attempts' not in link: link['push_attempts'] = 0 changed = True if 'next_retry_at' not in link: link['next_retry_at'] = None changed = True if changed: self._save(links) return links except Exception: return [] return [] def _save(self, links: List[Dict[str, Any]]): with open(self.links_file, 'w') as f: json.dump(links, f, indent=2) # ── Sync helpers ────────────────────────────────────────────────────────── def _local_identity(self) -> Dict[str, str]: """Return this cell's name and WG public key for outbound peer-sync calls.""" from app import config_manager identity = config_manager.configs.get('_identity', {}) cell_name = identity.get('cell_name', os.environ.get('CELL_NAME', 'mycell')) keys = self.wireguard_manager.get_keys() return {'cell_name': cell_name, 'public_key': keys.get('public_key', '')} def _local_wg_ip(self) -> Optional[str]: """Return the local cell-wireguard wg0 IP (e.g. '10.0.0.1'). Used to set X-Forwarded-For so the remote peer-sync endpoint can authenticate us by VPN subnet even after MASQUERADE changes the apparent source to the Docker bridge IP. """ try: r = subprocess.run( ['docker', 'exec', 'cell-wireguard', 'ip', 'addr', 'show', 'wg0'], capture_output=True, text=True, timeout=5 ) for line in r.stdout.splitlines(): line = line.strip() if line.startswith('inet '): return line.split()[1].split('/')[0] except Exception: pass return None def _push_permissions_to_remote(self, link: Dict[str, Any], from_cell: str, from_public_key: str) -> Dict[str, Any]: """POST mirrored permissions to the remote cell's peer-sync endpoint. Returns {'ok': bool, 'error': str|None}. Never raises. The body inverts inbound/outbound: our inbound (what we share with them) becomes their outbound (what they receive from us), and vice-versa. Uses 'docker exec cell-wireguard curl' so the HTTP request originates from inside cell-wireguard's network namespace, which has routes to remote cell VPN subnets that cell-api (on the Docker bridge) lacks. """ url = link.get('remote_api_url') if not url: return {'ok': False, 'error': 'no remote_api_url'} perms = link.get('permissions') or _default_perms() body = { 'version': 1, 'from_cell': from_cell, 'from_public_key': from_public_key, 'permissions': { 'outbound': dict(perms.get('inbound', {})), 'inbound': dict(perms.get('outbound', {})), }, 'exit_offered': bool(link.get('exit_offered', False)), 'use_as_exit_relay': bool(link.get('exit_relay_active', False)), 'sent_at': datetime.utcnow().isoformat() + 'Z', } payload = json.dumps(body) endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions' # Determine local WG IP for X-Forwarded-For (belt-and-suspenders for the # remote's source-subnet auth). With the peer-sync masquerade exclusion # the remote's Caddy already sees our real VPN source and appends it, but # passing it explicitly is harmless. local_wg_ip = self._local_wg_ip() xff_header = f'X-Forwarded-For: {local_wg_ip}' if local_wg_ip else None # Reach the remote over the WG tunnel by its VPN IP, but present the # cell's DOMAIN as SNI/Host so Caddy serves its certificate — a request # to a bare IP has no matching cert and the TLS handshake fails. -k still # covers LAN mode (internal-CA cert curl won't chain to). domain = link.get('domain') dns_ip = link.get('dns_ip') cmd = [ 'docker', 'exec', 'cell-wireguard', 'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}', ] if domain and dns_ip: cmd += ['--resolve', f'{domain}:443:{dns_ip}'] cmd += [ '-X', 'POST', '-H', 'Content-Type: application/json', ] if xff_header: cmd += ['-H', xff_header] cmd += [ '-d', payload, '--max-time', str(_PUSH_TIMEOUT), '--connect-timeout', '3', endpoint, ] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5 ) if result.returncode != 0: err = (result.stderr or result.stdout or 'curl error').strip()[:200] return {'ok': False, 'error': err} status = result.stdout.strip() if status.startswith('2'): return {'ok': True, 'error': None} return {'ok': False, 'error': f'HTTP {status}'} except subprocess.TimeoutExpired: return {'ok': False, 'error': 'timeout'} except Exception as e: return {'ok': False, 'error': str(e)[:200]} def _record_push_result(self, cell_name: str, result: Dict[str, Any]) -> None: """Persist last_push_* and pending_push after a push attempt.""" links = self._load() for link in links: if link['cell_name'] == cell_name: if result.get('ok'): link['last_push_status'] = 'ok' link['last_push_at'] = datetime.utcnow().isoformat() link['last_push_error'] = None link['pending_push'] = False link['push_attempts'] = 0 link['next_retry_at'] = None else: link['last_push_status'] = 'failed' link['last_push_error'] = result.get('error') link['pending_push'] = True attempts = link.get('push_attempts', 0) + 1 link['push_attempts'] = attempts link['next_retry_at'] = _compute_next_retry(attempts) break self._save(links) def _try_push(self, cell_name: str, link: Dict[str, Any]) -> None: """Mark pending, push, record result. Non-fatal.""" # Mark dirty before pushing — a crash mid-push leaves it pending for replay links = self._load() for l in links: if l['cell_name'] == cell_name: l['pending_push'] = True break self._save(links) try: identity = self._local_identity() result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(cell_name, result) if not result['ok']: logger.warning( f"Permission push to '{cell_name}' failed " f"(will retry on startup): {result['error']}" ) except Exception as e: logger.warning(f"Permission push to '{cell_name}' skipped (non-fatal): {e}") def apply_remote_permissions(self, from_public_key: str, permissions: Dict[str, Any], exit_offered: bool = False, use_as_exit_relay: bool = False) -> Dict[str, Any]: """Store permissions pushed by a remote cell (identified by WG public key). Validates service names, persists, and re-applies local iptables rules. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['public_key'] == from_public_key), None) if not link: raise ValueError(f"No connection found for public_key '{from_public_key[:16]}...'") in_raw = permissions.get('inbound', {}) if isinstance(permissions, dict) else {} out_raw = permissions.get('outbound', {}) if isinstance(permissions, dict) else {} clean_inbound = {s: bool(in_raw.get(s, False)) for s in VALID_SERVICES} clean_outbound = {s: bool(out_raw.get(s, False)) for s in VALID_SERVICES} link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound} link['remote_exit_offered'] = bool(exit_offered) link['remote_exit_relay_active'] = bool(use_as_exit_relay) link['last_remote_update_at'] = datetime.utcnow().isoformat() self._save(links) # Soft loop-detection warning: if the remote is asking us to act as exit # AND we already have a peer routing via that cell, it's a potential cycle. if use_as_exit_relay: try: from peer_registry import PeerRegistry import os as _os pr = PeerRegistry(_os.environ.get('DATA_DIR', '/app/data')) loop_peers = [p['name'] for p in pr.list_peers() if p.get('route_via') == link['cell_name']] if loop_peers: logger.warning( f"apply_remote_permissions: '{link['cell_name']}' asked us to act as " f"its exit relay, but we already route peers {loop_peers} via it — " f"potential routing loop detected" ) except Exception: pass inbound_list = [s for s, v in clean_inbound.items() if v] try: import firewall_manager as _fm _fm.apply_cell_rules(link['cell_name'], link['vpn_subnet'], inbound_list, exit_relay=use_as_exit_relay) except Exception as e: logger.warning( f"apply_cell_rules after remote push for '{link['cell_name']}' " f"failed (non-fatal): {e}" ) return link def replay_pending_pushes(self) -> Dict[str, int]: """Retry permission pushes for any link with pending_push=True. Called from _apply_startup_enforcement. Returns counts for logging. """ summary = {'attempted': 0, 'ok': 0, 'failed': 0} try: identity = self._local_identity() except Exception as e: logger.warning(f"replay_pending_pushes: cannot resolve identity ({e})") return summary summary['deferred'] = 0 now_iso = datetime.utcnow().isoformat() for link in self._load(): if not link.get('pending_push'): continue next_retry = link.get('next_retry_at') if next_retry and next_retry > now_iso: summary['deferred'] += 1 logger.info( f"replay: skipping '{link['cell_name']}' — backoff until {next_retry}" ) continue summary['attempted'] += 1 result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(link['cell_name'], result) if result.get('ok'): summary['ok'] += 1 logger.info(f"replay: synced permissions to '{link['cell_name']}'") else: summary['failed'] += 1 logger.warning( f"replay: push to '{link['cell_name']}' failed: {result.get('error')}" ) if summary['attempted'] or summary.get('deferred'): logger.info( f"replay_pending_pushes: {summary['attempted']} attempted, " f"{summary['ok']} ok, {summary['failed']} failed, " f"{summary.get('deferred', 0)} deferred (backoff)" ) return summary # ── Public API ──────────────────────────────────────────────────────────── def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]: """Return an invite package describing this cell for another cell to import. The endpoint advertises the cell's public domain (when in a DDNS/ACME mode) plus this cell's own WireGuard port, rather than a raw external IP — so the remote cell reaches us by name and a NAT/router can forward each cell's distinct WG port to the right host. """ keys = self.wireguard_manager.get_keys() server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0] try: from app import config_manager as _cm except Exception: _cm = None endpoint = self.wireguard_manager.get_advertised_endpoint(_cm) return { 'cell_name': cell_name, 'public_key': keys['public_key'], 'endpoint': endpoint, 'vpn_subnet': self.wireguard_manager._get_configured_network(), 'dns_ip': server_vpn_ip, 'domain': domain, 'version': 1, } def list_connections(self) -> List[Dict[str, Any]]: return self._load() def _check_invite_conflicts(self, invite: Dict[str, Any], exclude_cell: str = '') -> None: """Raise ValueError if invite's subnet or domain conflicts with existing state.""" import ipaddress as _ip links = self._load() remote_subnet = invite.get('vpn_subnet', '') remote_domain = invite.get('domain', '') # Check VPN subnet: must not overlap our own subnet or any existing cell's subnet try: remote_net = _ip.ip_network(remote_subnet, strict=False) own_net = _ip.ip_network( self.wireguard_manager._get_configured_network(), strict=False) if remote_net.overlaps(own_net): raise ValueError( f"VPN subnet {remote_subnet!r} overlaps this cell's own subnet " f"{str(own_net)!r} — the remote cell must use a distinct IP range" ) for link in links: if link['cell_name'] == exclude_cell: continue existing_net = _ip.ip_network(link.get('vpn_subnet', '0.0.0.0/32'), strict=False) if remote_net.overlaps(existing_net): raise ValueError( f"VPN subnet {remote_subnet!r} overlaps already-connected cell " f"'{link['cell_name']}' ({link['vpn_subnet']!r})" ) except ValueError: raise except Exception as e: logger.warning(f'_check_invite_conflicts subnet check skipped: {e}') # Check domain: must not match our own domain or any existing cell's domain if remote_domain: try: from app import config_manager identity = config_manager.configs.get('_identity', {}) own_domain = identity.get('domain_name') or identity.get('domain', os.environ.get('CELL_DOMAIN', '')) if own_domain and remote_domain == own_domain: raise ValueError( f"Domain {remote_domain!r} is the same as this cell's own domain — " f"the remote cell must use a different domain name" ) except ValueError: raise except Exception: pass for link in links: if link['cell_name'] == exclude_cell: continue if link.get('domain') == remote_domain: raise ValueError( f"Domain {remote_domain!r} is already used by connected cell " f"'{link['cell_name']}' — each cell must have a unique domain" ) def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]: """Send OUR invite to the remote cell so it can complete mutual WG pairing. Called immediately after adding the remote as our WG peer, before the WG tunnel is up. Reaches the remote over the PUBLIC path at its advertised endpoint host (a domain in DDNS/ACME modes) on Caddy/443 — the API's :3000 is 127.0.0.1-only and not reachable across cells. Non-fatal — one-sided pairing degrades gracefully; the admin can pair from the other side. """ endpoint = link.get('endpoint') or '' if not endpoint: return {'ok': False, 'error': 'no endpoint'} # Host from endpoint (e.g. "alice.pic.ngo:51821" → "alice.pic.ngo"). try: host = endpoint.rsplit(':', 1)[0].strip('[]') except Exception: return {'ok': False, 'error': f'cannot parse endpoint {endpoint!r}'} try: identity = self._local_identity() from app import config_manager id_cfg = config_manager.configs.get('_identity', {}) own_domain = id_cfg.get('domain_name') or id_cfg.get('domain', os.environ.get('CELL_DOMAIN', 'cell')) own_invite = self.generate_invite(identity['cell_name'], own_domain) except Exception as e: return {'ok': False, 'error': f'could not build own invite: {e}'} url = f'https://{host}/api/cells/peer-sync/accept-invite' payload = json.dumps({'invite': own_invite}) cmd = [ 'docker', 'exec', 'cell-wireguard', # -k: endpoint may be a bare IP (LAN/fallback) whose cert won't match; # accept-invite carries only public keys and the WG handshake is the # real authentication. 'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}', '-X', 'POST', '-H', 'Content-Type: application/json', '-d', payload, '--max-time', str(_PUSH_TIMEOUT), '--connect-timeout', '3', url, ] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5 ) if result.returncode != 0: err = (result.stderr or result.stdout or 'curl error').strip()[:200] return {'ok': False, 'error': err} status = result.stdout.strip() if status.startswith('2'): return {'ok': True, 'error': None} return {'ok': False, 'error': f'HTTP {status}'} except Exception as e: return {'ok': False, 'error': str(e)[:200]} def accept_invite(self, invite: Dict[str, Any]) -> Dict[str, Any]: """Accept a remote cell's invite and complete mutual WG pairing. Called by the /api/cells/peer-sync/accept-invite endpoint when the remote cell pushes its own invite after we connected to it. Idempotent: if we are already connected to this cell, returns the existing link. """ for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'): if field not in invite: raise ValueError(f"Invite missing field: {field!r}") _validate_invite_fields(invite) links = self._load() name = invite['cell_name'] # Already connected — check whether the remote's endpoint, subnet, or domain changed # (e.g. the remote cell changed its WireGuard address or domain) and heal if so. existing = next((l for l in links if l['cell_name'] == name), None) if existing: dns_changed = existing.get('dns_ip') != invite['dns_ip'] subnet_changed = existing.get('vpn_subnet') != invite['vpn_subnet'] endpoint_changed = (invite.get('endpoint') and invite['endpoint'] != existing.get('endpoint')) domain_changed = (invite.get('domain') and invite['domain'] != existing.get('domain')) if dns_changed or subnet_changed or endpoint_changed or domain_changed: # Before healing, verify the updated invite doesn't conflict with # other connected cells (exclude this cell by name so it's not # self-blocking when only endpoint/dns_ip changed). self._check_invite_conflicts(invite, exclude_cell=name) logger.info( f"accept_invite: updating existing cell '{name}' " f"(dns_ip: {existing.get('dns_ip')} → {invite['dns_ip']}, " f"vpn_subnet: {existing.get('vpn_subnet')} → {invite['vpn_subnet']}, " f"domain: {existing.get('domain')} → {invite.get('domain')})" ) old_subnet = existing.get('vpn_subnet', '') old_domain = existing.get('domain', '') existing['dns_ip'] = invite['dns_ip'] existing['vpn_subnet'] = invite['vpn_subnet'] existing['remote_api_url'] = _remote_api_url(invite['domain']) if invite.get('endpoint'): existing['endpoint'] = invite['endpoint'] if domain_changed: existing['domain'] = invite['domain'] self._save(links) # Update WG peer AllowedIPs to the new subnet if subnet_changed and old_subnet: self.wireguard_manager.update_peer_ip( existing['public_key'], invite['vpn_subnet']) # Update DNS forward rule — triggers on dns_ip OR domain change if dns_changed or domain_changed: try: self.network_manager.remove_cell_dns_forward(old_domain) except Exception: pass self.network_manager.add_cell_dns_forward( domain=existing['domain'], dns_ip=invite['dns_ip']) # Reapply firewall rules with new subnet if subnet_changed: try: import firewall_manager as _fm inbound_list = [s for s, v in existing.get('permissions', {}).get('inbound', {}).items() if v] _fm.apply_cell_rules(name, invite['vpn_subnet'], inbound_list) except Exception as e: logger.warning(f"apply_cell_rules after subnet update failed: {e}") return existing # Conflict check (exclude by name since we're adding for the first time) self._check_invite_conflicts(invite) ok = self.wireguard_manager.add_cell_peer( name=name, public_key=invite['public_key'], endpoint=invite.get('endpoint', ''), vpn_subnet=invite['vpn_subnet'], ) if not ok: raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'") dns_result = self.network_manager.add_cell_dns_forward( domain=invite['domain'], dns_ip=invite['dns_ip'], ) if dns_result.get('warnings'): logger.warning('DNS forward warnings for %s (accept_invite): %s', name, dns_result['warnings']) link = { 'cell_name': name, 'public_key': invite['public_key'], 'endpoint': invite.get('endpoint'), 'vpn_subnet': invite['vpn_subnet'], 'dns_ip': invite['dns_ip'], 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': _default_perms(), 'remote_api_url': _remote_api_url(invite['domain']), 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, 'pending_push': True, 'last_remote_update_at': None, } links.append(link) self._save(links) try: import firewall_manager as _fm _fm.apply_cell_rules(name, invite['vpn_subnet'], []) except Exception as e: logger.warning(f"apply_cell_rules for {name} (accept_invite) failed: {e}") logger.info(f"accept_invite: mutual pairing completed for cell '{name}'") return link def add_connection(self, invite: Dict[str, Any], inbound_services: Optional[List[str]] = None) -> Dict[str, Any]: """Import a remote cell's invite and establish the connection.""" _validate_invite_fields(invite) links = self._load() name = invite['cell_name'] if any(l['cell_name'] == name for l in links): raise ValueError(f"Cell '{name}' is already connected") # Check for VPN subnet and domain conflicts before touching WG/DNS self._check_invite_conflicts(invite) ok = self.wireguard_manager.add_cell_peer( name=name, public_key=invite['public_key'], endpoint=invite.get('endpoint', ''), vpn_subnet=invite['vpn_subnet'], ) if not ok: raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'") dns_result = self.network_manager.add_cell_dns_forward( domain=invite['domain'], dns_ip=invite['dns_ip'], ) if dns_result.get('warnings'): logger.warning('DNS forward warnings for %s: %s', name, dns_result['warnings']) inbound = [s for s in (inbound_services or []) if s in VALID_SERVICES] perms = _default_perms() for s in inbound: perms['inbound'][s] = True link = { 'cell_name': name, 'public_key': invite['public_key'], 'endpoint': invite.get('endpoint'), 'vpn_subnet': invite['vpn_subnet'], 'dns_ip': invite['dns_ip'], 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': perms, 'remote_api_url': _remote_api_url(invite['domain']), 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, 'pending_push': True, 'last_remote_update_at': None, } links.append(link) self._save(links) try: import firewall_manager as _fm _fm.apply_cell_rules(name, invite['vpn_subnet'], inbound) except Exception as e: logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}") # Push OUR invite to the remote so it can complete mutual WG pairing. # This is done over the LAN (before the WG tunnel is up) using the # endpoint IP, so the remote doesn't need manual action from both sides. try: inv_result = self._push_invite_to_remote(link) if inv_result.get('ok'): logger.info(f"Mutual pairing invite accepted by '{name}'") else: logger.warning( f"Invite push to '{name}' failed (manual pairing from remote required): " f"{inv_result.get('error')}" ) except Exception as e: logger.warning(f"Invite push to '{name}' skipped (non-fatal): {e}") # Initial permission push (uses WG tunnel — may fail if tunnel not yet up) try: identity = self._local_identity() result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(name, result) if not result['ok']: logger.warning( f"Initial permission push to '{name}' failed " f"(will retry on startup): {result['error']}" ) except Exception as e: logger.warning(f"Initial permission push to '{name}' skipped (non-fatal): {e}") return link def remove_connection(self, cell_name: str): """Tear down a cell connection by name.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") try: import firewall_manager as _fm _fm.clear_cell_rules(cell_name) _fm.remove_cell_subnet_route(link.get('vpn_subnet', '')) except Exception as e: logger.warning(f"firewall teardown for {cell_name} failed (non-fatal): {e}") self.wireguard_manager.remove_peer(link['public_key']) self.network_manager.remove_cell_dns_forward(link['domain']) links = [l for l in links if l['cell_name'] != cell_name] self._save(links) def update_permissions(self, cell_name: str, inbound: Dict[str, bool], outbound: Dict[str, bool]) -> Dict[str, Any]: """Update service sharing permissions for a cell connection. Validates, persists, re-applies iptables, then pushes to remote. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") clean_inbound = {s: bool(inbound.get(s, False)) for s in VALID_SERVICES} clean_outbound = {s: bool(outbound.get(s, False)) for s in VALID_SERVICES} link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound} self._save(links) inbound_list = [s for s, v in clean_inbound.items() if v] try: import firewall_manager as _fm _fm.apply_cell_rules(cell_name, link['vpn_subnet'], inbound_list) except Exception as e: logger.warning(f"apply_cell_rules for {cell_name} failed (non-fatal): {e}") # Push mirrored state to the remote cell (non-fatal) self._try_push(cell_name, link) return link def get_permissions(self, cell_name: str) -> Dict[str, Any]: """Return the permissions dict for a connected cell.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") return link.get('permissions', _default_perms()) def set_exit_offered(self, cell_name: str, offered: bool) -> Dict[str, Any]: """Toggle whether THIS cell offers to route internet traffic for cell_name. The new value is persisted locally then pushed to the remote cell so it knows our offer changed. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") link['exit_offered'] = bool(offered) self._save(links) self._try_push(cell_name, link) return link def set_exit_relay_active(self, cell_name: str, active: bool) -> Dict[str, Any]: """Record that THIS cell is routing a peer's internet traffic via cell_name. Persists the flag locally and pushes updated state to the remote cell so it can enable/disable the FORWARD-to-eth0 rule on its side. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") link['exit_relay_active'] = bool(active) self._save(links) self._try_push(cell_name, link) return link def get_connection_status(self, cell_name: str) -> Dict[str, Any]: """Return link record enriched with live WireGuard handshake status.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") try: st = self.wireguard_manager.get_peer_status(link['public_key']) return {**link, 'online': st.get('online', False), 'last_handshake': st.get('last_handshake')} except Exception: return {**link, 'online': False, 'last_handshake': None}