#!/usr/bin/env python3 """ CellLinkManager — manages site-to-site connections between PIC cells. Each connection is stored in data/cell_links.json and manifests as: - A WireGuard [Peer] block (AllowedIPs = remote cell's VPN subnet) - A CoreDNS forwarding block (remote domain → remote cell's DNS IP) - An iptables FORWARD rule set (service-level access control) """ import json import logging import os import subprocess from datetime import datetime from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) VALID_SERVICES = ('calendar', 'files', 'mail', 'webdav') _DEFAULT_PERMISSIONS = { 'inbound': {s: False for s in VALID_SERVICES}, 'outbound': {s: False for s in VALID_SERVICES}, } _PUSH_TIMEOUT = 5 # seconds def _default_perms() -> Dict[str, Any]: return { 'inbound': {s: False for s in VALID_SERVICES}, 'outbound': {s: False for s in VALID_SERVICES}, } class CellLinkManager: def __init__(self, data_dir: str, config_dir: str, wireguard_manager, network_manager): self.data_dir = data_dir self.config_dir = config_dir self.wireguard_manager = wireguard_manager self.network_manager = network_manager self.links_file = os.path.join(data_dir, 'cell_links.json') # ── Storage ─────────────────────────────────────────────────────────────── def _load(self) -> List[Dict[str, Any]]: if os.path.exists(self.links_file): try: with open(self.links_file) as f: links = json.load(f) changed = False for link in links: if 'permissions' not in link: link['permissions'] = _default_perms() changed = True # Phase 1 migration: permission-sync tracking fields if 'remote_api_url' not in link: link['remote_api_url'] = ( f"http://{link['dns_ip']}:3000" if link.get('dns_ip') else None ) changed = True if 'last_push_status' not in link: link['last_push_status'] = 'never' changed = True if 'last_push_at' not in link: link['last_push_at'] = None changed = True if 'last_push_error' not in link: link['last_push_error'] = None changed = True if 'pending_push' not in link: # Existing links predate sync — mark pending so startup replay syncs them link['pending_push'] = True changed = True if 'last_remote_update_at' not in link: link['last_remote_update_at'] = None changed = True # Phase 2 migration: exit-offer signaling fields if 'exit_offered' not in link: link['exit_offered'] = False changed = True if 'remote_exit_offered' not in link: link['remote_exit_offered'] = False changed = True # Phase 3 migration: per-peer internet routing if 'exit_relay_active' not in link: link['exit_relay_active'] = False changed = True if 'remote_exit_relay_active' not in link: link['remote_exit_relay_active'] = False changed = True if changed: self._save(links) return links except Exception: return [] return [] def _save(self, links: List[Dict[str, Any]]): with open(self.links_file, 'w') as f: json.dump(links, f, indent=2) # ── Sync helpers ────────────────────────────────────────────────────────── def _local_identity(self) -> Dict[str, str]: """Return this cell's name and WG public key for outbound peer-sync calls.""" from app import config_manager identity = config_manager.configs.get('_identity', {}) cell_name = identity.get('cell_name', os.environ.get('CELL_NAME', 'mycell')) keys = self.wireguard_manager.get_keys() return {'cell_name': cell_name, 'public_key': keys.get('public_key', '')} def _local_wg_ip(self) -> Optional[str]: """Return the local cell-wireguard wg0 IP (e.g. '10.0.0.1'). Used to set X-Forwarded-For so the remote peer-sync endpoint can authenticate us by VPN subnet even after MASQUERADE changes the apparent source to the Docker bridge IP. """ try: r = subprocess.run( ['docker', 'exec', 'cell-wireguard', 'ip', 'addr', 'show', 'wg0'], capture_output=True, text=True, timeout=5 ) for line in r.stdout.splitlines(): line = line.strip() if line.startswith('inet '): return line.split()[1].split('/')[0] except Exception: pass return None def _push_permissions_to_remote(self, link: Dict[str, Any], from_cell: str, from_public_key: str) -> Dict[str, Any]: """POST mirrored permissions to the remote cell's peer-sync endpoint. Returns {'ok': bool, 'error': str|None}. Never raises. The body inverts inbound/outbound: our inbound (what we share with them) becomes their outbound (what they receive from us), and vice-versa. Uses 'docker exec cell-wireguard curl' so the HTTP request originates from inside cell-wireguard's network namespace, which has routes to remote cell VPN subnets that cell-api (on the Docker bridge) lacks. """ url = link.get('remote_api_url') if not url: return {'ok': False, 'error': 'no remote_api_url'} perms = link.get('permissions') or _default_perms() body = { 'version': 1, 'from_cell': from_cell, 'from_public_key': from_public_key, 'permissions': { 'outbound': dict(perms.get('inbound', {})), 'inbound': dict(perms.get('outbound', {})), }, 'exit_offered': bool(link.get('exit_offered', False)), 'use_as_exit_relay': bool(link.get('exit_relay_active', False)), 'sent_at': datetime.utcnow().isoformat() + 'Z', } payload = json.dumps(body) endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions' # Determine local WG IP so the remote can authenticate us by source subnet. # MASQUERADE rewrites source to cell-wireguard's eth0 IP (172.20.x.x), which # is NOT in the cell's vpn_subnet. Passing the true WG IP in X-Forwarded-For # lets _authenticate_peer_cell() find the matching cell link. local_wg_ip = self._local_wg_ip() xff_header = f'X-Forwarded-For: {local_wg_ip}' if local_wg_ip else None cmd = [ 'docker', 'exec', 'cell-wireguard', 'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}', '-X', 'POST', '-H', 'Content-Type: application/json', ] if xff_header: cmd += ['-H', xff_header] cmd += [ '-d', payload, '--max-time', str(_PUSH_TIMEOUT), '--connect-timeout', '3', endpoint, ] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5 ) if result.returncode != 0: err = (result.stderr or result.stdout or 'curl error').strip()[:200] return {'ok': False, 'error': err} status = result.stdout.strip() if status.startswith('2'): return {'ok': True, 'error': None} return {'ok': False, 'error': f'HTTP {status}'} except subprocess.TimeoutExpired: return {'ok': False, 'error': 'timeout'} except Exception as e: return {'ok': False, 'error': str(e)[:200]} def _record_push_result(self, cell_name: str, result: Dict[str, Any]) -> None: """Persist last_push_* and pending_push after a push attempt.""" links = self._load() for link in links: if link['cell_name'] == cell_name: if result.get('ok'): link['last_push_status'] = 'ok' link['last_push_at'] = datetime.utcnow().isoformat() link['last_push_error'] = None link['pending_push'] = False else: link['last_push_status'] = 'failed' link['last_push_error'] = result.get('error') link['pending_push'] = True break self._save(links) def _try_push(self, cell_name: str, link: Dict[str, Any]) -> None: """Mark pending, push, record result. Non-fatal.""" # Mark dirty before pushing — a crash mid-push leaves it pending for replay links = self._load() for l in links: if l['cell_name'] == cell_name: l['pending_push'] = True break self._save(links) try: identity = self._local_identity() result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(cell_name, result) if not result['ok']: logger.warning( f"Permission push to '{cell_name}' failed " f"(will retry on startup): {result['error']}" ) except Exception as e: logger.warning(f"Permission push to '{cell_name}' skipped (non-fatal): {e}") def apply_remote_permissions(self, from_public_key: str, permissions: Dict[str, Any], exit_offered: bool = False, use_as_exit_relay: bool = False) -> Dict[str, Any]: """Store permissions pushed by a remote cell (identified by WG public key). Validates service names, persists, and re-applies local iptables rules. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['public_key'] == from_public_key), None) if not link: raise ValueError(f"No connection found for public_key '{from_public_key[:16]}...'") in_raw = permissions.get('inbound', {}) if isinstance(permissions, dict) else {} out_raw = permissions.get('outbound', {}) if isinstance(permissions, dict) else {} clean_inbound = {s: bool(in_raw.get(s, False)) for s in VALID_SERVICES} clean_outbound = {s: bool(out_raw.get(s, False)) for s in VALID_SERVICES} link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound} link['remote_exit_offered'] = bool(exit_offered) link['remote_exit_relay_active'] = bool(use_as_exit_relay) link['last_remote_update_at'] = datetime.utcnow().isoformat() self._save(links) inbound_list = [s for s, v in clean_inbound.items() if v] try: import firewall_manager as _fm _fm.apply_cell_rules(link['cell_name'], link['vpn_subnet'], inbound_list, exit_relay=use_as_exit_relay) except Exception as e: logger.warning( f"apply_cell_rules after remote push for '{link['cell_name']}' " f"failed (non-fatal): {e}" ) return link def replay_pending_pushes(self) -> Dict[str, int]: """Retry permission pushes for any link with pending_push=True. Called from _apply_startup_enforcement. Returns counts for logging. """ summary = {'attempted': 0, 'ok': 0, 'failed': 0} try: identity = self._local_identity() except Exception as e: logger.warning(f"replay_pending_pushes: cannot resolve identity ({e})") return summary for link in self._load(): if not link.get('pending_push'): continue summary['attempted'] += 1 result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(link['cell_name'], result) if result.get('ok'): summary['ok'] += 1 logger.info(f"replay: synced permissions to '{link['cell_name']}'") else: summary['failed'] += 1 logger.warning( f"replay: push to '{link['cell_name']}' failed: {result.get('error')}" ) if summary['attempted']: logger.info( f"replay_pending_pushes: {summary['attempted']} attempted, " f"{summary['ok']} ok, {summary['failed']} failed" ) return summary # ── Public API ──────────────────────────────────────────────────────────── def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]: """Return an invite package describing this cell for another cell to import.""" keys = self.wireguard_manager.get_keys() srv = self.wireguard_manager.get_server_config() server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0] return { 'cell_name': cell_name, 'public_key': keys['public_key'], 'endpoint': srv.get('endpoint'), 'vpn_subnet': self.wireguard_manager._get_configured_network(), 'dns_ip': server_vpn_ip, 'domain': domain, 'version': 1, } def list_connections(self) -> List[Dict[str, Any]]: return self._load() def _check_invite_conflicts(self, invite: Dict[str, Any], exclude_cell: str = '') -> None: """Raise ValueError if invite's subnet or domain conflicts with existing state.""" import ipaddress as _ip links = self._load() remote_subnet = invite.get('vpn_subnet', '') remote_domain = invite.get('domain', '') # Check VPN subnet: must not overlap our own subnet or any existing cell's subnet try: remote_net = _ip.ip_network(remote_subnet, strict=False) own_net = _ip.ip_network( self.wireguard_manager._get_configured_network(), strict=False) if remote_net.overlaps(own_net): raise ValueError( f"VPN subnet {remote_subnet!r} overlaps this cell's own subnet " f"{str(own_net)!r} — the remote cell must use a distinct IP range" ) for link in links: if link['cell_name'] == exclude_cell: continue existing_net = _ip.ip_network(link.get('vpn_subnet', '0.0.0.0/32'), strict=False) if remote_net.overlaps(existing_net): raise ValueError( f"VPN subnet {remote_subnet!r} overlaps already-connected cell " f"'{link['cell_name']}' ({link['vpn_subnet']!r})" ) except ValueError: raise except Exception as e: logger.warning(f'_check_invite_conflicts subnet check skipped: {e}') # Check domain: must not match our own domain or any existing cell's domain if remote_domain: try: from app import config_manager identity = config_manager.configs.get('_identity', {}) own_domain = identity.get('domain', os.environ.get('CELL_DOMAIN', '')) if own_domain and remote_domain == own_domain: raise ValueError( f"Domain {remote_domain!r} is the same as this cell's own domain — " f"the remote cell must use a different domain name" ) except ValueError: raise except Exception: pass for link in links: if link['cell_name'] == exclude_cell: continue if link.get('domain') == remote_domain: raise ValueError( f"Domain {remote_domain!r} is already used by connected cell " f"'{link['cell_name']}' — each cell must have a unique domain" ) def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]: """Send OUR invite to the remote cell so it can complete mutual WG pairing. Called immediately after adding the remote as our WG peer. Uses the remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades gracefully; the admin can pair from the other side manually. """ endpoint = link.get('endpoint') or '' if not endpoint: return {'ok': False, 'error': 'no endpoint'} # Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52") try: host = endpoint.rsplit(':', 1)[0].strip('[]') except Exception: return {'ok': False, 'error': f'cannot parse endpoint {endpoint!r}'} try: identity = self._local_identity() from app import config_manager id_cfg = config_manager.configs.get('_identity', {}) own_domain = id_cfg.get('domain', os.environ.get('CELL_DOMAIN', 'cell')) own_invite = self.generate_invite(identity['cell_name'], own_domain) except Exception as e: return {'ok': False, 'error': f'could not build own invite: {e}'} url = f'http://{host}:3000/api/cells/peer-sync/accept-invite' payload = json.dumps({'invite': own_invite}) cmd = [ 'docker', 'exec', 'cell-wireguard', 'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}', '-X', 'POST', '-H', 'Content-Type: application/json', '-d', payload, '--max-time', str(_PUSH_TIMEOUT), '--connect-timeout', '3', url, ] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5 ) if result.returncode != 0: err = (result.stderr or result.stdout or 'curl error').strip()[:200] return {'ok': False, 'error': err} status = result.stdout.strip() if status.startswith('2'): return {'ok': True, 'error': None} return {'ok': False, 'error': f'HTTP {status}'} except Exception as e: return {'ok': False, 'error': str(e)[:200]} def accept_invite(self, invite: Dict[str, Any]) -> Dict[str, Any]: """Accept a remote cell's invite and complete mutual WG pairing. Called by the /api/cells/peer-sync/accept-invite endpoint when the remote cell pushes its own invite after we connected to it. Idempotent: if we are already connected to this cell, returns the existing link. """ for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'): if field not in invite: raise ValueError(f"Invite missing field: {field!r}") links = self._load() name = invite['cell_name'] # Already connected — check whether the remote's endpoint or subnet changed # (e.g. the remote cell changed its WireGuard address) and heal if so. existing = next((l for l in links if l['cell_name'] == name), None) if existing: dns_changed = existing.get('dns_ip') != invite['dns_ip'] subnet_changed = existing.get('vpn_subnet') != invite['vpn_subnet'] endpoint_changed = (invite.get('endpoint') and invite['endpoint'] != existing.get('endpoint')) if dns_changed or subnet_changed or endpoint_changed: logger.info( f"accept_invite: updating existing cell '{name}' " f"(dns_ip: {existing.get('dns_ip')} → {invite['dns_ip']}, " f"vpn_subnet: {existing.get('vpn_subnet')} → {invite['vpn_subnet']})" ) old_subnet = existing.get('vpn_subnet', '') existing['dns_ip'] = invite['dns_ip'] existing['vpn_subnet'] = invite['vpn_subnet'] existing['remote_api_url'] = f"http://{invite['dns_ip']}:3000" if invite.get('endpoint'): existing['endpoint'] = invite['endpoint'] self._save(links) # Update WG peer AllowedIPs to the new subnet if subnet_changed and old_subnet: self.wireguard_manager.update_peer_ip( existing['public_key'], invite['vpn_subnet']) # Update DNS forward rule (remove old, add new) if dns_changed: try: self.network_manager.remove_cell_dns_forward(existing['domain']) except Exception: pass self.network_manager.add_cell_dns_forward( domain=existing['domain'], dns_ip=invite['dns_ip']) # Reapply firewall rules with new subnet if subnet_changed: try: import firewall_manager as _fm inbound_list = [s for s, v in existing.get('permissions', {}).get('inbound', {}).items() if v] _fm.apply_cell_rules(name, invite['vpn_subnet'], inbound_list) except Exception as e: logger.warning(f"apply_cell_rules after subnet update failed: {e}") return existing # Conflict check (exclude by name since we're adding for the first time) self._check_invite_conflicts(invite) ok = self.wireguard_manager.add_cell_peer( name=name, public_key=invite['public_key'], endpoint=invite.get('endpoint', ''), vpn_subnet=invite['vpn_subnet'], ) if not ok: raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'") dns_result = self.network_manager.add_cell_dns_forward( domain=invite['domain'], dns_ip=invite['dns_ip'], ) if dns_result.get('warnings'): logger.warning('DNS forward warnings for %s (accept_invite): %s', name, dns_result['warnings']) link = { 'cell_name': name, 'public_key': invite['public_key'], 'endpoint': invite.get('endpoint'), 'vpn_subnet': invite['vpn_subnet'], 'dns_ip': invite['dns_ip'], 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': _default_perms(), 'remote_api_url': f"http://{invite['dns_ip']}:3000", 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, 'pending_push': True, 'last_remote_update_at': None, } links.append(link) self._save(links) try: import firewall_manager as _fm _fm.apply_cell_rules(name, invite['vpn_subnet'], []) except Exception as e: logger.warning(f"apply_cell_rules for {name} (accept_invite) failed: {e}") logger.info(f"accept_invite: mutual pairing completed for cell '{name}'") return link def add_connection(self, invite: Dict[str, Any], inbound_services: Optional[List[str]] = None) -> Dict[str, Any]: """Import a remote cell's invite and establish the connection.""" links = self._load() name = invite['cell_name'] if any(l['cell_name'] == name for l in links): raise ValueError(f"Cell '{name}' is already connected") # Check for VPN subnet and domain conflicts before touching WG/DNS self._check_invite_conflicts(invite) ok = self.wireguard_manager.add_cell_peer( name=name, public_key=invite['public_key'], endpoint=invite.get('endpoint', ''), vpn_subnet=invite['vpn_subnet'], ) if not ok: raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'") dns_result = self.network_manager.add_cell_dns_forward( domain=invite['domain'], dns_ip=invite['dns_ip'], ) if dns_result.get('warnings'): logger.warning('DNS forward warnings for %s: %s', name, dns_result['warnings']) inbound = [s for s in (inbound_services or []) if s in VALID_SERVICES] perms = _default_perms() for s in inbound: perms['inbound'][s] = True link = { 'cell_name': name, 'public_key': invite['public_key'], 'endpoint': invite.get('endpoint'), 'vpn_subnet': invite['vpn_subnet'], 'dns_ip': invite['dns_ip'], 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': perms, 'remote_api_url': f"http://{invite['dns_ip']}:3000", 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, 'pending_push': True, 'last_remote_update_at': None, } links.append(link) self._save(links) try: import firewall_manager as _fm _fm.apply_cell_rules(name, invite['vpn_subnet'], inbound) except Exception as e: logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}") # Push OUR invite to the remote so it can complete mutual WG pairing. # This is done over the LAN (before the WG tunnel is up) using the # endpoint IP, so the remote doesn't need manual action from both sides. try: inv_result = self._push_invite_to_remote(link) if inv_result.get('ok'): logger.info(f"Mutual pairing invite accepted by '{name}'") else: logger.warning( f"Invite push to '{name}' failed (manual pairing from remote required): " f"{inv_result.get('error')}" ) except Exception as e: logger.warning(f"Invite push to '{name}' skipped (non-fatal): {e}") # Initial permission push (uses WG tunnel — may fail if tunnel not yet up) try: identity = self._local_identity() result = self._push_permissions_to_remote( link, identity['cell_name'], identity['public_key'] ) self._record_push_result(name, result) if not result['ok']: logger.warning( f"Initial permission push to '{name}' failed " f"(will retry on startup): {result['error']}" ) except Exception as e: logger.warning(f"Initial permission push to '{name}' skipped (non-fatal): {e}") return link def remove_connection(self, cell_name: str): """Tear down a cell connection by name.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") try: import firewall_manager as _fm _fm.clear_cell_rules(cell_name) except Exception as e: logger.warning(f"clear_cell_rules for {cell_name} failed (non-fatal): {e}") self.wireguard_manager.remove_peer(link['public_key']) self.network_manager.remove_cell_dns_forward(link['domain']) links = [l for l in links if l['cell_name'] != cell_name] self._save(links) def update_permissions(self, cell_name: str, inbound: Dict[str, bool], outbound: Dict[str, bool]) -> Dict[str, Any]: """Update service sharing permissions for a cell connection. Validates, persists, re-applies iptables, then pushes to remote. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") clean_inbound = {s: bool(inbound.get(s, False)) for s in VALID_SERVICES} clean_outbound = {s: bool(outbound.get(s, False)) for s in VALID_SERVICES} link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound} self._save(links) inbound_list = [s for s, v in clean_inbound.items() if v] try: import firewall_manager as _fm _fm.apply_cell_rules(cell_name, link['vpn_subnet'], inbound_list) except Exception as e: logger.warning(f"apply_cell_rules for {cell_name} failed (non-fatal): {e}") # Push mirrored state to the remote cell (non-fatal) self._try_push(cell_name, link) return link def get_permissions(self, cell_name: str) -> Dict[str, Any]: """Return the permissions dict for a connected cell.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") return link.get('permissions', _default_perms()) def set_exit_offered(self, cell_name: str, offered: bool) -> Dict[str, Any]: """Toggle whether THIS cell offers to route internet traffic for cell_name. The new value is persisted locally then pushed to the remote cell so it knows our offer changed. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") link['exit_offered'] = bool(offered) self._save(links) self._try_push(cell_name, link) return link def set_exit_relay_active(self, cell_name: str, active: bool) -> Dict[str, Any]: """Record that THIS cell is routing a peer's internet traffic via cell_name. Persists the flag locally and pushes updated state to the remote cell so it can enable/disable the FORWARD-to-eth0 rule on its side. Returns the updated link record. """ links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") link['exit_relay_active'] = bool(active) self._save(links) self._try_push(cell_name, link) return link def get_connection_status(self, cell_name: str) -> Dict[str, Any]: """Return link record enriched with live WireGuard handshake status.""" links = self._load() link = next((l for l in links if l['cell_name'] == cell_name), None) if not link: raise ValueError(f"Cell '{cell_name}' not found") try: st = self.wireguard_manager.get_peer_status(link['public_key']) return {**link, 'online': st.get('online', False), 'last_handshake': st.get('last_handshake')} except Exception: return {**link, 'online': False, 'last_handshake': None}