1bb8a5eb59
Unit Tests / test (push) Successful in 9m50s
Three related cell-link/peer-config fixes (the peer and cell endpoints were showing the raw external IP, which confused public-vs-internal addressing): 1. Peer WireGuard configs now embed the cell's effective domain (DDNS/ACME modes) instead of the detected external IP, via the new WireGuardManager.get_advertised_endpoint(). A name that resolves to the public IP survives IP changes and lets the datacenter forward each cell's WG port to the right host. LAN mode still falls back to the IP; an admin wireguard_endpoint override still wins. 2. Cell invites advertise <effective-domain>:<this cell's WG port> (was the external IP + a default/possibly-wrong port), so a remote cell pairs to the right host and port over the public path. 3. Cross-cell peer-sync no longer targets http://<ip>:3000 (the API binds 127.0.0.1 and is unreachable across cells). It targets the remote's Caddy on HTTPS/443 — which the WireGuard server already DNATs over the tunnel — and the initial pre-tunnel invite push goes to https://<endpoint-host>/... ; legacy http://<ip>:3000 link URLs migrate to https on load. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
840 lines
37 KiB
Python
840 lines
37 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CellLinkManager — manages site-to-site connections between PIC cells.
|
|
|
|
Each connection is stored in data/cell_links.json and manifests as:
|
|
- A WireGuard [Peer] block (AllowedIPs = remote cell's VPN subnet)
|
|
- A CoreDNS forwarding block (remote domain → remote cell's DNS IP)
|
|
- An iptables FORWARD rule set (service-level access control)
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import random
|
|
import subprocess
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
VALID_SERVICES = ('calendar', 'files', 'mail', 'webdav')
|
|
|
|
_DEFAULT_PERMISSIONS = {
|
|
'inbound': {s: False for s in VALID_SERVICES},
|
|
'outbound': {s: False for s in VALID_SERVICES},
|
|
}
|
|
|
|
_PUSH_TIMEOUT = 5 # seconds
|
|
_BACKOFF_BASE_S = 60
|
|
_BACKOFF_MAX_S = 3600
|
|
|
|
|
|
def _remote_api_url(dns_ip: Optional[str]) -> Optional[str]:
|
|
"""Base URL for a linked cell's API, reached over the WG tunnel.
|
|
|
|
Cross-cell peer-sync goes to the remote's Caddy on 443 (the WireGuard server
|
|
DNATs VPN-IP:443 → Caddy → API). The API's own :3000 is bound to 127.0.0.1
|
|
and is NOT reachable from another cell, so we must target HTTPS/443, not
|
|
http://<ip>:3000.
|
|
"""
|
|
return f"https://{dns_ip}" if dns_ip else None
|
|
|
|
|
|
def _compute_next_retry(attempts: int) -> str:
|
|
"""Return an ISO timestamp for the earliest next retry using capped exponential backoff."""
|
|
delay = min(_BACKOFF_BASE_S * (2 ** (attempts - 1)), _BACKOFF_MAX_S)
|
|
delay += random.uniform(0, _BACKOFF_BASE_S / 2)
|
|
return (datetime.utcnow() + timedelta(seconds=delay)).isoformat()
|
|
|
|
|
|
def _default_perms() -> Dict[str, Any]:
|
|
return {
|
|
'inbound': {s: False for s in VALID_SERVICES},
|
|
'outbound': {s: False for s in VALID_SERVICES},
|
|
}
|
|
|
|
|
|
class CellLinkManager:
|
|
def __init__(self, data_dir: str, config_dir: str, wireguard_manager, network_manager):
|
|
self.data_dir = data_dir
|
|
self.config_dir = config_dir
|
|
self.wireguard_manager = wireguard_manager
|
|
self.network_manager = network_manager
|
|
self.links_file = os.path.join(data_dir, 'cell_links.json')
|
|
|
|
# ── Storage ───────────────────────────────────────────────────────────────
|
|
|
|
def _load(self) -> List[Dict[str, Any]]:
|
|
if os.path.exists(self.links_file):
|
|
try:
|
|
with open(self.links_file) as f:
|
|
links = json.load(f)
|
|
changed = False
|
|
for link in links:
|
|
if 'permissions' not in link:
|
|
link['permissions'] = _default_perms()
|
|
changed = True
|
|
# Phase 1 migration: permission-sync tracking fields
|
|
if 'remote_api_url' not in link:
|
|
link['remote_api_url'] = _remote_api_url(link.get('dns_ip'))
|
|
changed = True
|
|
# Migrate legacy http://<ip>:3000 URLs (unreachable across
|
|
# cells) to the HTTPS/Caddy form.
|
|
elif str(link.get('remote_api_url', '')).startswith('http://'):
|
|
link['remote_api_url'] = _remote_api_url(link.get('dns_ip'))
|
|
changed = True
|
|
if 'last_push_status' not in link:
|
|
link['last_push_status'] = 'never'
|
|
changed = True
|
|
if 'last_push_at' not in link:
|
|
link['last_push_at'] = None
|
|
changed = True
|
|
if 'last_push_error' not in link:
|
|
link['last_push_error'] = None
|
|
changed = True
|
|
if 'pending_push' not in link:
|
|
# Existing links predate sync — mark pending so startup replay syncs them
|
|
link['pending_push'] = True
|
|
changed = True
|
|
if 'last_remote_update_at' not in link:
|
|
link['last_remote_update_at'] = None
|
|
changed = True
|
|
# Phase 2 migration: exit-offer signaling fields
|
|
if 'exit_offered' not in link:
|
|
link['exit_offered'] = False
|
|
changed = True
|
|
if 'remote_exit_offered' not in link:
|
|
link['remote_exit_offered'] = False
|
|
changed = True
|
|
# Phase 3 migration: per-peer internet routing
|
|
if 'exit_relay_active' not in link:
|
|
link['exit_relay_active'] = False
|
|
changed = True
|
|
if 'remote_exit_relay_active' not in link:
|
|
link['remote_exit_relay_active'] = False
|
|
changed = True
|
|
# Phase 4 migration: retry/backoff state
|
|
if 'push_attempts' not in link:
|
|
link['push_attempts'] = 0
|
|
changed = True
|
|
if 'next_retry_at' not in link:
|
|
link['next_retry_at'] = None
|
|
changed = True
|
|
if changed:
|
|
self._save(links)
|
|
return links
|
|
except Exception:
|
|
return []
|
|
return []
|
|
|
|
def _save(self, links: List[Dict[str, Any]]):
|
|
with open(self.links_file, 'w') as f:
|
|
json.dump(links, f, indent=2)
|
|
|
|
# ── Sync helpers ──────────────────────────────────────────────────────────
|
|
|
|
def _local_identity(self) -> Dict[str, str]:
|
|
"""Return this cell's name and WG public key for outbound peer-sync calls."""
|
|
from app import config_manager
|
|
identity = config_manager.configs.get('_identity', {})
|
|
cell_name = identity.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
|
|
keys = self.wireguard_manager.get_keys()
|
|
return {'cell_name': cell_name, 'public_key': keys.get('public_key', '')}
|
|
|
|
def _local_wg_ip(self) -> Optional[str]:
|
|
"""Return the local cell-wireguard wg0 IP (e.g. '10.0.0.1').
|
|
|
|
Used to set X-Forwarded-For so the remote peer-sync endpoint can
|
|
authenticate us by VPN subnet even after MASQUERADE changes the
|
|
apparent source to the Docker bridge IP.
|
|
"""
|
|
try:
|
|
r = subprocess.run(
|
|
['docker', 'exec', 'cell-wireguard',
|
|
'ip', 'addr', 'show', 'wg0'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
for line in r.stdout.splitlines():
|
|
line = line.strip()
|
|
if line.startswith('inet '):
|
|
return line.split()[1].split('/')[0]
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def _push_permissions_to_remote(self, link: Dict[str, Any],
|
|
from_cell: str,
|
|
from_public_key: str) -> Dict[str, Any]:
|
|
"""POST mirrored permissions to the remote cell's peer-sync endpoint.
|
|
|
|
Returns {'ok': bool, 'error': str|None}. Never raises.
|
|
|
|
The body inverts inbound/outbound: our inbound (what we share with them)
|
|
becomes their outbound (what they receive from us), and vice-versa.
|
|
|
|
Uses 'docker exec cell-wireguard curl' so the HTTP request originates
|
|
from inside cell-wireguard's network namespace, which has routes to
|
|
remote cell VPN subnets that cell-api (on the Docker bridge) lacks.
|
|
"""
|
|
url = link.get('remote_api_url')
|
|
if not url:
|
|
return {'ok': False, 'error': 'no remote_api_url'}
|
|
|
|
perms = link.get('permissions') or _default_perms()
|
|
body = {
|
|
'version': 1,
|
|
'from_cell': from_cell,
|
|
'from_public_key': from_public_key,
|
|
'permissions': {
|
|
'outbound': dict(perms.get('inbound', {})),
|
|
'inbound': dict(perms.get('outbound', {})),
|
|
},
|
|
'exit_offered': bool(link.get('exit_offered', False)),
|
|
'use_as_exit_relay': bool(link.get('exit_relay_active', False)),
|
|
'sent_at': datetime.utcnow().isoformat() + 'Z',
|
|
}
|
|
payload = json.dumps(body)
|
|
endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
|
|
|
|
# Determine local WG IP so the remote can authenticate us by source subnet.
|
|
# MASQUERADE rewrites source to cell-wireguard's eth0 IP (172.20.x.x), which
|
|
# is NOT in the cell's vpn_subnet. Passing the true WG IP in X-Forwarded-For
|
|
# lets _authenticate_peer_cell() find the matching cell link.
|
|
local_wg_ip = self._local_wg_ip()
|
|
xff_header = f'X-Forwarded-For: {local_wg_ip}' if local_wg_ip else None
|
|
|
|
cmd = [
|
|
'docker', 'exec', 'cell-wireguard',
|
|
# -k: the request reaches Caddy by the remote's VPN IP over the
|
|
# encrypted WG tunnel, so the TLS cert (issued for the cell's domain)
|
|
# won't match the IP — the tunnel already authenticates the peer.
|
|
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
|
'-X', 'POST',
|
|
'-H', 'Content-Type: application/json',
|
|
]
|
|
if xff_header:
|
|
cmd += ['-H', xff_header]
|
|
cmd += [
|
|
'-d', payload,
|
|
'--max-time', str(_PUSH_TIMEOUT),
|
|
'--connect-timeout', '3',
|
|
endpoint,
|
|
]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
|
|
)
|
|
if result.returncode != 0:
|
|
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
|
|
return {'ok': False, 'error': err}
|
|
status = result.stdout.strip()
|
|
if status.startswith('2'):
|
|
return {'ok': True, 'error': None}
|
|
return {'ok': False, 'error': f'HTTP {status}'}
|
|
except subprocess.TimeoutExpired:
|
|
return {'ok': False, 'error': 'timeout'}
|
|
except Exception as e:
|
|
return {'ok': False, 'error': str(e)[:200]}
|
|
|
|
def _record_push_result(self, cell_name: str, result: Dict[str, Any]) -> None:
|
|
"""Persist last_push_* and pending_push after a push attempt."""
|
|
links = self._load()
|
|
for link in links:
|
|
if link['cell_name'] == cell_name:
|
|
if result.get('ok'):
|
|
link['last_push_status'] = 'ok'
|
|
link['last_push_at'] = datetime.utcnow().isoformat()
|
|
link['last_push_error'] = None
|
|
link['pending_push'] = False
|
|
link['push_attempts'] = 0
|
|
link['next_retry_at'] = None
|
|
else:
|
|
link['last_push_status'] = 'failed'
|
|
link['last_push_error'] = result.get('error')
|
|
link['pending_push'] = True
|
|
attempts = link.get('push_attempts', 0) + 1
|
|
link['push_attempts'] = attempts
|
|
link['next_retry_at'] = _compute_next_retry(attempts)
|
|
break
|
|
self._save(links)
|
|
|
|
def _try_push(self, cell_name: str, link: Dict[str, Any]) -> None:
|
|
"""Mark pending, push, record result. Non-fatal."""
|
|
# Mark dirty before pushing — a crash mid-push leaves it pending for replay
|
|
links = self._load()
|
|
for l in links:
|
|
if l['cell_name'] == cell_name:
|
|
l['pending_push'] = True
|
|
break
|
|
self._save(links)
|
|
|
|
try:
|
|
identity = self._local_identity()
|
|
result = self._push_permissions_to_remote(
|
|
link, identity['cell_name'], identity['public_key']
|
|
)
|
|
self._record_push_result(cell_name, result)
|
|
if not result['ok']:
|
|
logger.warning(
|
|
f"Permission push to '{cell_name}' failed "
|
|
f"(will retry on startup): {result['error']}"
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Permission push to '{cell_name}' skipped (non-fatal): {e}")
|
|
|
|
def apply_remote_permissions(self, from_public_key: str,
|
|
permissions: Dict[str, Any],
|
|
exit_offered: bool = False,
|
|
use_as_exit_relay: bool = False) -> Dict[str, Any]:
|
|
"""Store permissions pushed by a remote cell (identified by WG public key).
|
|
|
|
Validates service names, persists, and re-applies local iptables rules.
|
|
Returns the updated link record.
|
|
"""
|
|
links = self._load()
|
|
link = next((l for l in links if l['public_key'] == from_public_key), None)
|
|
if not link:
|
|
raise ValueError(f"No connection found for public_key '{from_public_key[:16]}...'")
|
|
|
|
in_raw = permissions.get('inbound', {}) if isinstance(permissions, dict) else {}
|
|
out_raw = permissions.get('outbound', {}) if isinstance(permissions, dict) else {}
|
|
clean_inbound = {s: bool(in_raw.get(s, False)) for s in VALID_SERVICES}
|
|
clean_outbound = {s: bool(out_raw.get(s, False)) for s in VALID_SERVICES}
|
|
|
|
link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound}
|
|
link['remote_exit_offered'] = bool(exit_offered)
|
|
link['remote_exit_relay_active'] = bool(use_as_exit_relay)
|
|
link['last_remote_update_at'] = datetime.utcnow().isoformat()
|
|
self._save(links)
|
|
|
|
# Soft loop-detection warning: if the remote is asking us to act as exit
|
|
# AND we already have a peer routing via that cell, it's a potential cycle.
|
|
if use_as_exit_relay:
|
|
try:
|
|
from peer_registry import PeerRegistry
|
|
import os as _os
|
|
pr = PeerRegistry(_os.environ.get('DATA_DIR', '/app/data'))
|
|
loop_peers = [p['name'] for p in pr.list_peers()
|
|
if p.get('route_via') == link['cell_name']]
|
|
if loop_peers:
|
|
logger.warning(
|
|
f"apply_remote_permissions: '{link['cell_name']}' asked us to act as "
|
|
f"its exit relay, but we already route peers {loop_peers} via it — "
|
|
f"potential routing loop detected"
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
inbound_list = [s for s, v in clean_inbound.items() if v]
|
|
try:
|
|
import firewall_manager as _fm
|
|
_fm.apply_cell_rules(link['cell_name'], link['vpn_subnet'], inbound_list,
|
|
exit_relay=use_as_exit_relay)
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"apply_cell_rules after remote push for '{link['cell_name']}' "
|
|
f"failed (non-fatal): {e}"
|
|
)
|
|
|
|
return link
|
|
|
|
def replay_pending_pushes(self) -> Dict[str, int]:
|
|
"""Retry permission pushes for any link with pending_push=True.
|
|
|
|
Called from _apply_startup_enforcement. Returns counts for logging.
|
|
"""
|
|
summary = {'attempted': 0, 'ok': 0, 'failed': 0}
|
|
try:
|
|
identity = self._local_identity()
|
|
except Exception as e:
|
|
logger.warning(f"replay_pending_pushes: cannot resolve identity ({e})")
|
|
return summary
|
|
|
|
summary['deferred'] = 0
|
|
now_iso = datetime.utcnow().isoformat()
|
|
for link in self._load():
|
|
if not link.get('pending_push'):
|
|
continue
|
|
next_retry = link.get('next_retry_at')
|
|
if next_retry and next_retry > now_iso:
|
|
summary['deferred'] += 1
|
|
logger.info(
|
|
f"replay: skipping '{link['cell_name']}' — backoff until {next_retry}"
|
|
)
|
|
continue
|
|
summary['attempted'] += 1
|
|
result = self._push_permissions_to_remote(
|
|
link, identity['cell_name'], identity['public_key']
|
|
)
|
|
self._record_push_result(link['cell_name'], result)
|
|
if result.get('ok'):
|
|
summary['ok'] += 1
|
|
logger.info(f"replay: synced permissions to '{link['cell_name']}'")
|
|
else:
|
|
summary['failed'] += 1
|
|
logger.warning(
|
|
f"replay: push to '{link['cell_name']}' failed: {result.get('error')}"
|
|
)
|
|
if summary['attempted'] or summary.get('deferred'):
|
|
logger.info(
|
|
f"replay_pending_pushes: {summary['attempted']} attempted, "
|
|
f"{summary['ok']} ok, {summary['failed']} failed, "
|
|
f"{summary.get('deferred', 0)} deferred (backoff)"
|
|
)
|
|
return summary
|
|
|
|
# ── Public API ────────────────────────────────────────────────────────────
|
|
|
|
def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]:
|
|
"""Return an invite package describing this cell for another cell to import.
|
|
|
|
The endpoint advertises the cell's public domain (when in a DDNS/ACME
|
|
mode) plus this cell's own WireGuard port, rather than a raw external IP —
|
|
so the remote cell reaches us by name and a NAT/router can forward each
|
|
cell's distinct WG port to the right host.
|
|
"""
|
|
keys = self.wireguard_manager.get_keys()
|
|
server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0]
|
|
try:
|
|
from app import config_manager as _cm
|
|
except Exception:
|
|
_cm = None
|
|
endpoint = self.wireguard_manager.get_advertised_endpoint(_cm)
|
|
return {
|
|
'cell_name': cell_name,
|
|
'public_key': keys['public_key'],
|
|
'endpoint': endpoint,
|
|
'vpn_subnet': self.wireguard_manager._get_configured_network(),
|
|
'dns_ip': server_vpn_ip,
|
|
'domain': domain,
|
|
'version': 1,
|
|
}
|
|
|
|
def list_connections(self) -> List[Dict[str, Any]]:
|
|
return self._load()
|
|
|
|
def _check_invite_conflicts(self, invite: Dict[str, Any],
|
|
exclude_cell: str = '') -> None:
|
|
"""Raise ValueError if invite's subnet or domain conflicts with existing state."""
|
|
import ipaddress as _ip
|
|
links = self._load()
|
|
remote_subnet = invite.get('vpn_subnet', '')
|
|
remote_domain = invite.get('domain', '')
|
|
|
|
# Check VPN subnet: must not overlap our own subnet or any existing cell's subnet
|
|
try:
|
|
remote_net = _ip.ip_network(remote_subnet, strict=False)
|
|
own_net = _ip.ip_network(
|
|
self.wireguard_manager._get_configured_network(), strict=False)
|
|
if remote_net.overlaps(own_net):
|
|
raise ValueError(
|
|
f"VPN subnet {remote_subnet!r} overlaps this cell's own subnet "
|
|
f"{str(own_net)!r} — the remote cell must use a distinct IP range"
|
|
)
|
|
for link in links:
|
|
if link['cell_name'] == exclude_cell:
|
|
continue
|
|
existing_net = _ip.ip_network(link.get('vpn_subnet', '0.0.0.0/32'),
|
|
strict=False)
|
|
if remote_net.overlaps(existing_net):
|
|
raise ValueError(
|
|
f"VPN subnet {remote_subnet!r} overlaps already-connected cell "
|
|
f"'{link['cell_name']}' ({link['vpn_subnet']!r})"
|
|
)
|
|
except ValueError:
|
|
raise
|
|
except Exception as e:
|
|
logger.warning(f'_check_invite_conflicts subnet check skipped: {e}')
|
|
|
|
# Check domain: must not match our own domain or any existing cell's domain
|
|
if remote_domain:
|
|
try:
|
|
from app import config_manager
|
|
identity = config_manager.configs.get('_identity', {})
|
|
own_domain = identity.get('domain_name') or identity.get('domain', os.environ.get('CELL_DOMAIN', ''))
|
|
if own_domain and remote_domain == own_domain:
|
|
raise ValueError(
|
|
f"Domain {remote_domain!r} is the same as this cell's own domain — "
|
|
f"the remote cell must use a different domain name"
|
|
)
|
|
except ValueError:
|
|
raise
|
|
except Exception:
|
|
pass
|
|
for link in links:
|
|
if link['cell_name'] == exclude_cell:
|
|
continue
|
|
if link.get('domain') == remote_domain:
|
|
raise ValueError(
|
|
f"Domain {remote_domain!r} is already used by connected cell "
|
|
f"'{link['cell_name']}' — each cell must have a unique domain"
|
|
)
|
|
|
|
def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Send OUR invite to the remote cell so it can complete mutual WG pairing.
|
|
|
|
Called immediately after adding the remote as our WG peer, before the WG
|
|
tunnel is up. Reaches the remote over the PUBLIC path at its advertised
|
|
endpoint host (a domain in DDNS/ACME modes) on Caddy/443 — the API's :3000
|
|
is 127.0.0.1-only and not reachable across cells. Non-fatal — one-sided
|
|
pairing degrades gracefully; the admin can pair from the other side.
|
|
"""
|
|
endpoint = link.get('endpoint') or ''
|
|
if not endpoint:
|
|
return {'ok': False, 'error': 'no endpoint'}
|
|
# Host from endpoint (e.g. "alice.pic.ngo:51821" → "alice.pic.ngo").
|
|
try:
|
|
host = endpoint.rsplit(':', 1)[0].strip('[]')
|
|
except Exception:
|
|
return {'ok': False, 'error': f'cannot parse endpoint {endpoint!r}'}
|
|
|
|
try:
|
|
identity = self._local_identity()
|
|
from app import config_manager
|
|
id_cfg = config_manager.configs.get('_identity', {})
|
|
own_domain = id_cfg.get('domain_name') or id_cfg.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
|
|
own_invite = self.generate_invite(identity['cell_name'], own_domain)
|
|
except Exception as e:
|
|
return {'ok': False, 'error': f'could not build own invite: {e}'}
|
|
|
|
url = f'https://{host}/api/cells/peer-sync/accept-invite'
|
|
payload = json.dumps({'invite': own_invite})
|
|
cmd = [
|
|
'docker', 'exec', 'cell-wireguard',
|
|
# -k: endpoint may be a bare IP (LAN/fallback) whose cert won't match;
|
|
# accept-invite carries only public keys and the WG handshake is the
|
|
# real authentication.
|
|
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
|
'-X', 'POST',
|
|
'-H', 'Content-Type: application/json',
|
|
'-d', payload,
|
|
'--max-time', str(_PUSH_TIMEOUT),
|
|
'--connect-timeout', '3',
|
|
url,
|
|
]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
|
|
)
|
|
if result.returncode != 0:
|
|
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
|
|
return {'ok': False, 'error': err}
|
|
status = result.stdout.strip()
|
|
if status.startswith('2'):
|
|
return {'ok': True, 'error': None}
|
|
return {'ok': False, 'error': f'HTTP {status}'}
|
|
except Exception as e:
|
|
return {'ok': False, 'error': str(e)[:200]}
|
|
|
|
def accept_invite(self, invite: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Accept a remote cell's invite and complete mutual WG pairing.
|
|
|
|
Called by the /api/cells/peer-sync/accept-invite endpoint when the remote
|
|
cell pushes its own invite after we connected to it. Idempotent: if we
|
|
are already connected to this cell, returns the existing link.
|
|
"""
|
|
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
|
|
if field not in invite:
|
|
raise ValueError(f"Invite missing field: {field!r}")
|
|
|
|
links = self._load()
|
|
name = invite['cell_name']
|
|
|
|
# Already connected — check whether the remote's endpoint, subnet, or domain changed
|
|
# (e.g. the remote cell changed its WireGuard address or domain) and heal if so.
|
|
existing = next((l for l in links if l['cell_name'] == name), None)
|
|
if existing:
|
|
dns_changed = existing.get('dns_ip') != invite['dns_ip']
|
|
subnet_changed = existing.get('vpn_subnet') != invite['vpn_subnet']
|
|
endpoint_changed = (invite.get('endpoint') and
|
|
invite['endpoint'] != existing.get('endpoint'))
|
|
domain_changed = (invite.get('domain') and
|
|
invite['domain'] != existing.get('domain'))
|
|
if dns_changed or subnet_changed or endpoint_changed or domain_changed:
|
|
# Before healing, verify the updated invite doesn't conflict with
|
|
# other connected cells (exclude this cell by name so it's not
|
|
# self-blocking when only endpoint/dns_ip changed).
|
|
self._check_invite_conflicts(invite, exclude_cell=name)
|
|
|
|
logger.info(
|
|
f"accept_invite: updating existing cell '{name}' "
|
|
f"(dns_ip: {existing.get('dns_ip')} → {invite['dns_ip']}, "
|
|
f"vpn_subnet: {existing.get('vpn_subnet')} → {invite['vpn_subnet']}, "
|
|
f"domain: {existing.get('domain')} → {invite.get('domain')})"
|
|
)
|
|
old_subnet = existing.get('vpn_subnet', '')
|
|
old_domain = existing.get('domain', '')
|
|
existing['dns_ip'] = invite['dns_ip']
|
|
existing['vpn_subnet'] = invite['vpn_subnet']
|
|
existing['remote_api_url'] = _remote_api_url(invite['dns_ip'])
|
|
if invite.get('endpoint'):
|
|
existing['endpoint'] = invite['endpoint']
|
|
if domain_changed:
|
|
existing['domain'] = invite['domain']
|
|
self._save(links)
|
|
|
|
# Update WG peer AllowedIPs to the new subnet
|
|
if subnet_changed and old_subnet:
|
|
self.wireguard_manager.update_peer_ip(
|
|
existing['public_key'], invite['vpn_subnet'])
|
|
|
|
# Update DNS forward rule — triggers on dns_ip OR domain change
|
|
if dns_changed or domain_changed:
|
|
try:
|
|
self.network_manager.remove_cell_dns_forward(old_domain)
|
|
except Exception:
|
|
pass
|
|
self.network_manager.add_cell_dns_forward(
|
|
domain=existing['domain'], dns_ip=invite['dns_ip'])
|
|
|
|
# Reapply firewall rules with new subnet
|
|
if subnet_changed:
|
|
try:
|
|
import firewall_manager as _fm
|
|
inbound_list = [s for s, v in
|
|
existing.get('permissions', {}).get('inbound', {}).items() if v]
|
|
_fm.apply_cell_rules(name, invite['vpn_subnet'], inbound_list)
|
|
except Exception as e:
|
|
logger.warning(f"apply_cell_rules after subnet update failed: {e}")
|
|
|
|
return existing
|
|
|
|
# Conflict check (exclude by name since we're adding for the first time)
|
|
self._check_invite_conflicts(invite)
|
|
|
|
ok = self.wireguard_manager.add_cell_peer(
|
|
name=name,
|
|
public_key=invite['public_key'],
|
|
endpoint=invite.get('endpoint', ''),
|
|
vpn_subnet=invite['vpn_subnet'],
|
|
)
|
|
if not ok:
|
|
raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'")
|
|
|
|
dns_result = self.network_manager.add_cell_dns_forward(
|
|
domain=invite['domain'],
|
|
dns_ip=invite['dns_ip'],
|
|
)
|
|
if dns_result.get('warnings'):
|
|
logger.warning('DNS forward warnings for %s (accept_invite): %s',
|
|
name, dns_result['warnings'])
|
|
|
|
link = {
|
|
'cell_name': name,
|
|
'public_key': invite['public_key'],
|
|
'endpoint': invite.get('endpoint'),
|
|
'vpn_subnet': invite['vpn_subnet'],
|
|
'dns_ip': invite['dns_ip'],
|
|
'domain': invite['domain'],
|
|
'connected_at': datetime.utcnow().isoformat(),
|
|
'permissions': _default_perms(),
|
|
'remote_api_url': _remote_api_url(invite['dns_ip']),
|
|
'last_push_status': 'never',
|
|
'last_push_at': None,
|
|
'last_push_error': None,
|
|
'pending_push': True,
|
|
'last_remote_update_at': None,
|
|
}
|
|
links.append(link)
|
|
self._save(links)
|
|
|
|
try:
|
|
import firewall_manager as _fm
|
|
_fm.apply_cell_rules(name, invite['vpn_subnet'], [])
|
|
except Exception as e:
|
|
logger.warning(f"apply_cell_rules for {name} (accept_invite) failed: {e}")
|
|
|
|
logger.info(f"accept_invite: mutual pairing completed for cell '{name}'")
|
|
return link
|
|
|
|
def add_connection(self, invite: Dict[str, Any],
|
|
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
"""Import a remote cell's invite and establish the connection."""
|
|
links = self._load()
|
|
name = invite['cell_name']
|
|
if any(l['cell_name'] == name for l in links):
|
|
raise ValueError(f"Cell '{name}' is already connected")
|
|
|
|
# Check for VPN subnet and domain conflicts before touching WG/DNS
|
|
self._check_invite_conflicts(invite)
|
|
|
|
ok = self.wireguard_manager.add_cell_peer(
|
|
name=name,
|
|
public_key=invite['public_key'],
|
|
endpoint=invite.get('endpoint', ''),
|
|
vpn_subnet=invite['vpn_subnet'],
|
|
)
|
|
if not ok:
|
|
raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'")
|
|
|
|
dns_result = self.network_manager.add_cell_dns_forward(
|
|
domain=invite['domain'],
|
|
dns_ip=invite['dns_ip'],
|
|
)
|
|
if dns_result.get('warnings'):
|
|
logger.warning('DNS forward warnings for %s: %s', name, dns_result['warnings'])
|
|
|
|
inbound = [s for s in (inbound_services or []) if s in VALID_SERVICES]
|
|
perms = _default_perms()
|
|
for s in inbound:
|
|
perms['inbound'][s] = True
|
|
|
|
link = {
|
|
'cell_name': name,
|
|
'public_key': invite['public_key'],
|
|
'endpoint': invite.get('endpoint'),
|
|
'vpn_subnet': invite['vpn_subnet'],
|
|
'dns_ip': invite['dns_ip'],
|
|
'domain': invite['domain'],
|
|
'connected_at': datetime.utcnow().isoformat(),
|
|
'permissions': perms,
|
|
'remote_api_url': _remote_api_url(invite['dns_ip']),
|
|
'last_push_status': 'never',
|
|
'last_push_at': None,
|
|
'last_push_error': None,
|
|
'pending_push': True,
|
|
'last_remote_update_at': None,
|
|
}
|
|
links.append(link)
|
|
self._save(links)
|
|
|
|
try:
|
|
import firewall_manager as _fm
|
|
_fm.apply_cell_rules(name, invite['vpn_subnet'], inbound)
|
|
except Exception as e:
|
|
logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}")
|
|
|
|
# Push OUR invite to the remote so it can complete mutual WG pairing.
|
|
# This is done over the LAN (before the WG tunnel is up) using the
|
|
# endpoint IP, so the remote doesn't need manual action from both sides.
|
|
try:
|
|
inv_result = self._push_invite_to_remote(link)
|
|
if inv_result.get('ok'):
|
|
logger.info(f"Mutual pairing invite accepted by '{name}'")
|
|
else:
|
|
logger.warning(
|
|
f"Invite push to '{name}' failed (manual pairing from remote required): "
|
|
f"{inv_result.get('error')}"
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Invite push to '{name}' skipped (non-fatal): {e}")
|
|
|
|
# Initial permission push (uses WG tunnel — may fail if tunnel not yet up)
|
|
try:
|
|
identity = self._local_identity()
|
|
result = self._push_permissions_to_remote(
|
|
link, identity['cell_name'], identity['public_key']
|
|
)
|
|
self._record_push_result(name, result)
|
|
if not result['ok']:
|
|
logger.warning(
|
|
f"Initial permission push to '{name}' failed "
|
|
f"(will retry on startup): {result['error']}"
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Initial permission push to '{name}' skipped (non-fatal): {e}")
|
|
|
|
return link
|
|
|
|
def remove_connection(self, cell_name: str):
|
|
"""Tear down a cell connection by name."""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
|
|
try:
|
|
import firewall_manager as _fm
|
|
_fm.clear_cell_rules(cell_name)
|
|
except Exception as e:
|
|
logger.warning(f"clear_cell_rules for {cell_name} failed (non-fatal): {e}")
|
|
|
|
self.wireguard_manager.remove_peer(link['public_key'])
|
|
self.network_manager.remove_cell_dns_forward(link['domain'])
|
|
|
|
links = [l for l in links if l['cell_name'] != cell_name]
|
|
self._save(links)
|
|
|
|
def update_permissions(self, cell_name: str,
|
|
inbound: Dict[str, bool],
|
|
outbound: Dict[str, bool]) -> Dict[str, Any]:
|
|
"""Update service sharing permissions for a cell connection.
|
|
|
|
Validates, persists, re-applies iptables, then pushes to remote.
|
|
Returns the updated link record.
|
|
"""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
|
|
clean_inbound = {s: bool(inbound.get(s, False)) for s in VALID_SERVICES}
|
|
clean_outbound = {s: bool(outbound.get(s, False)) for s in VALID_SERVICES}
|
|
link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound}
|
|
self._save(links)
|
|
|
|
inbound_list = [s for s, v in clean_inbound.items() if v]
|
|
try:
|
|
import firewall_manager as _fm
|
|
_fm.apply_cell_rules(cell_name, link['vpn_subnet'], inbound_list)
|
|
except Exception as e:
|
|
logger.warning(f"apply_cell_rules for {cell_name} failed (non-fatal): {e}")
|
|
|
|
# Push mirrored state to the remote cell (non-fatal)
|
|
self._try_push(cell_name, link)
|
|
|
|
return link
|
|
|
|
def get_permissions(self, cell_name: str) -> Dict[str, Any]:
|
|
"""Return the permissions dict for a connected cell."""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
return link.get('permissions', _default_perms())
|
|
|
|
def set_exit_offered(self, cell_name: str, offered: bool) -> Dict[str, Any]:
|
|
"""Toggle whether THIS cell offers to route internet traffic for cell_name.
|
|
|
|
The new value is persisted locally then pushed to the remote cell so it
|
|
knows our offer changed. Returns the updated link record.
|
|
"""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
link['exit_offered'] = bool(offered)
|
|
self._save(links)
|
|
self._try_push(cell_name, link)
|
|
return link
|
|
|
|
def set_exit_relay_active(self, cell_name: str, active: bool) -> Dict[str, Any]:
|
|
"""Record that THIS cell is routing a peer's internet traffic via cell_name.
|
|
|
|
Persists the flag locally and pushes updated state to the remote cell so
|
|
it can enable/disable the FORWARD-to-eth0 rule on its side.
|
|
Returns the updated link record.
|
|
"""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
link['exit_relay_active'] = bool(active)
|
|
self._save(links)
|
|
self._try_push(cell_name, link)
|
|
return link
|
|
|
|
def get_connection_status(self, cell_name: str) -> Dict[str, Any]:
|
|
"""Return link record enriched with live WireGuard handshake status."""
|
|
links = self._load()
|
|
link = next((l for l in links if l['cell_name'] == cell_name), None)
|
|
if not link:
|
|
raise ValueError(f"Cell '{cell_name}' not found")
|
|
try:
|
|
st = self.wireguard_manager.get_peer_status(link['public_key'])
|
|
return {**link, 'online': st.get('online', False),
|
|
'last_handshake': st.get('last_handshake')}
|
|
except Exception:
|
|
return {**link, 'online': False, 'last_handshake': None}
|