Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ab6e715d8 | |||
| 639fb66e5b | |||
| 714fb9b1a9 | |||
| c7e01d4aa7 | |||
| 1bb8a5eb59 |
+14
-2
@@ -310,7 +310,15 @@ class CaddyManager(BaseServiceManager):
|
||||
service_routes: str, core_routes: str,
|
||||
cert_path: str = _CADDY_INTERNAL_CERT,
|
||||
key_path: str = _CADDY_INTERNAL_KEY) -> str:
|
||||
"""LAN mode: HTTP only + internal-CA TLS, no ACME."""
|
||||
"""LAN mode: internal-CA TLS on 443, plain HTTP on 80, no ACME.
|
||||
|
||||
The same routes are served on both an HTTPS site (the internal-CA cert)
|
||||
and an HTTP site. They must be SEPARATE site blocks: a `tls` directive on
|
||||
an `http://` (port 80) address is rejected by Caddy ("server listening on
|
||||
[:80] is HTTP, but attempts to configure TLS connection policies"). Both
|
||||
are needed because the WireGuard server DNATs peer traffic to Caddy on
|
||||
both 80 and 443.
|
||||
"""
|
||||
body = []
|
||||
if service_routes:
|
||||
body.append(self._indent_routes(service_routes))
|
||||
@@ -325,10 +333,14 @@ class CaddyManager(BaseServiceManager):
|
||||
" auto_https off\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
|
||||
f"https://{cell_name}.cell {{\n"
|
||||
f" tls {cert_path} {key_path}\n"
|
||||
f"{inner}\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
|
||||
f"{inner}\n"
|
||||
"}\n"
|
||||
)
|
||||
|
||||
def _caddyfile_pic_ngo(self, cell_name: str,
|
||||
|
||||
+106
-24
@@ -8,10 +8,12 @@ Each connection is stored in data/cell_links.json and manifests as:
|
||||
- An iptables FORWARD rule set (service-level access control)
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
@@ -30,6 +32,58 @@ _BACKOFF_BASE_S = 60
|
||||
_BACKOFF_MAX_S = 3600
|
||||
|
||||
|
||||
# Strict formats for fields imported from a remote cell's invite. The domain and
|
||||
# dns_ip flow into a `curl --resolve <domain>:443:<dns_ip>` argv (peer-sync push);
|
||||
# anchoring them — domain must start alphanumeric, dns_ip must be an IP — prevents
|
||||
# a malicious invite injecting a leading-dash value that curl reads as a flag.
|
||||
_INVITE_HOSTNAME_RE = re.compile(r'^[A-Za-z0-9]([A-Za-z0-9.-]{0,253}[A-Za-z0-9])?$')
|
||||
_INVITE_CELL_NAME_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9 _.-]{0,63}$')
|
||||
_INVITE_ENDPOINT_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9._-]*:\d{1,5}$')
|
||||
|
||||
|
||||
def _validate_invite_fields(invite: Dict[str, Any]) -> None:
|
||||
"""Reject a remote cell's invite whose fields aren't strictly well-formed.
|
||||
|
||||
Defence-in-depth: these values come from another cell and reach iptables,
|
||||
DNS config, and a curl argv (the peer-sync push --resolves <domain>:443:
|
||||
<dns_ip>). Anchoring domain/dns_ip/endpoint to start alphanumeric blocks a
|
||||
malicious leading-dash value that curl would read as a flag. The public_key
|
||||
is validated downstream by WireGuardManager.add_cell_peer. Raise ValueError
|
||||
on anything malformed.
|
||||
"""
|
||||
name = invite.get('cell_name', '')
|
||||
if not isinstance(name, str) or not _INVITE_CELL_NAME_RE.match(name):
|
||||
raise ValueError(f'invalid cell_name {name!r}')
|
||||
domain = invite.get('domain', '')
|
||||
if not isinstance(domain, str) or not _INVITE_HOSTNAME_RE.match(domain):
|
||||
raise ValueError(f'invalid domain {domain!r}: must be a hostname')
|
||||
try:
|
||||
ipaddress.ip_address(str(invite.get('dns_ip', '')))
|
||||
except ValueError:
|
||||
raise ValueError(f"invalid dns_ip {invite.get('dns_ip')!r}")
|
||||
try:
|
||||
ipaddress.ip_network(str(invite.get('vpn_subnet', '')), strict=False)
|
||||
except ValueError:
|
||||
raise ValueError(f"invalid vpn_subnet {invite.get('vpn_subnet')!r}")
|
||||
endpoint = invite.get('endpoint')
|
||||
if endpoint and not _INVITE_ENDPOINT_RE.match(str(endpoint)):
|
||||
raise ValueError(f'invalid endpoint {endpoint!r}')
|
||||
|
||||
|
||||
def _remote_api_url(domain: Optional[str]) -> Optional[str]:
|
||||
"""Base URL for a linked cell's API, reached over the WG tunnel.
|
||||
|
||||
Cross-cell peer-sync goes to the remote's Caddy on 443 (the WireGuard server
|
||||
DNATs VPN-IP:443 → Caddy → API; the API's own :3000 binds 127.0.0.1 and is
|
||||
unreachable from another cell). The URL uses the remote cell's DOMAIN — not
|
||||
its VPN IP — because Caddy only holds a certificate for the domain (ACME) or
|
||||
the .cell name (internal CA); a request by bare IP has no matching SNI and the
|
||||
TLS handshake fails. The push connects to the VPN IP over the tunnel via
|
||||
`curl --resolve <domain>:443:<dns_ip>` (see _push_permissions_to_remote).
|
||||
"""
|
||||
return f"https://{domain}" if domain else None
|
||||
|
||||
|
||||
def _compute_next_retry(attempts: int) -> str:
|
||||
"""Return an ISO timestamp for the earliest next retry using capped exponential backoff."""
|
||||
delay = min(_BACKOFF_BASE_S * (2 ** (attempts - 1)), _BACKOFF_MAX_S)
|
||||
@@ -65,11 +119,12 @@ class CellLinkManager:
|
||||
link['permissions'] = _default_perms()
|
||||
changed = True
|
||||
# Phase 1 migration: permission-sync tracking fields
|
||||
if 'remote_api_url' not in link:
|
||||
link['remote_api_url'] = (
|
||||
f"http://{link['dns_ip']}:3000"
|
||||
if link.get('dns_ip') else None
|
||||
)
|
||||
# Domain-based HTTPS URL. Rebuild if missing, or if it's a
|
||||
# legacy form: http://<ip>:3000 (unreachable) or https://<ip>
|
||||
# (no matching Caddy cert by bare IP).
|
||||
_want_url = _remote_api_url(link.get('domain'))
|
||||
if link.get('remote_api_url') != _want_url and _want_url:
|
||||
link['remote_api_url'] = _want_url
|
||||
changed = True
|
||||
if 'last_push_status' not in link:
|
||||
link['last_push_status'] = 'never'
|
||||
@@ -184,16 +239,26 @@ class CellLinkManager:
|
||||
payload = json.dumps(body)
|
||||
endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
|
||||
|
||||
# Determine local WG IP so the remote can authenticate us by source subnet.
|
||||
# MASQUERADE rewrites source to cell-wireguard's eth0 IP (172.20.x.x), which
|
||||
# is NOT in the cell's vpn_subnet. Passing the true WG IP in X-Forwarded-For
|
||||
# lets _authenticate_peer_cell() find the matching cell link.
|
||||
# Determine local WG IP for X-Forwarded-For (belt-and-suspenders for the
|
||||
# remote's source-subnet auth). With the peer-sync masquerade exclusion
|
||||
# the remote's Caddy already sees our real VPN source and appends it, but
|
||||
# passing it explicitly is harmless.
|
||||
local_wg_ip = self._local_wg_ip()
|
||||
xff_header = f'X-Forwarded-For: {local_wg_ip}' if local_wg_ip else None
|
||||
|
||||
# Reach the remote over the WG tunnel by its VPN IP, but present the
|
||||
# cell's DOMAIN as SNI/Host so Caddy serves its certificate — a request
|
||||
# to a bare IP has no matching cert and the TLS handshake fails. -k still
|
||||
# covers LAN mode (internal-CA cert curl won't chain to).
|
||||
domain = link.get('domain')
|
||||
dns_ip = link.get('dns_ip')
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
]
|
||||
if domain and dns_ip:
|
||||
cmd += ['--resolve', f'{domain}:443:{dns_ip}']
|
||||
cmd += [
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
]
|
||||
@@ -371,14 +436,24 @@ class CellLinkManager:
|
||||
# ── Public API ────────────────────────────────────────────────────────────
|
||||
|
||||
def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]:
|
||||
"""Return an invite package describing this cell for another cell to import."""
|
||||
"""Return an invite package describing this cell for another cell to import.
|
||||
|
||||
The endpoint advertises the cell's public domain (when in a DDNS/ACME
|
||||
mode) plus this cell's own WireGuard port, rather than a raw external IP —
|
||||
so the remote cell reaches us by name and a NAT/router can forward each
|
||||
cell's distinct WG port to the right host.
|
||||
"""
|
||||
keys = self.wireguard_manager.get_keys()
|
||||
srv = self.wireguard_manager.get_server_config()
|
||||
server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0]
|
||||
try:
|
||||
from app import config_manager as _cm
|
||||
except Exception:
|
||||
_cm = None
|
||||
endpoint = self.wireguard_manager.get_advertised_endpoint(_cm)
|
||||
return {
|
||||
'cell_name': cell_name,
|
||||
'public_key': keys['public_key'],
|
||||
'endpoint': srv.get('endpoint'),
|
||||
'endpoint': endpoint,
|
||||
'vpn_subnet': self.wireguard_manager._get_configured_network(),
|
||||
'dns_ip': server_vpn_ip,
|
||||
'domain': domain,
|
||||
@@ -448,15 +523,16 @@ class CellLinkManager:
|
||||
def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Send OUR invite to the remote cell so it can complete mutual WG pairing.
|
||||
|
||||
Called immediately after adding the remote as our WG peer. Uses the
|
||||
remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather
|
||||
than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades
|
||||
gracefully; the admin can pair from the other side manually.
|
||||
Called immediately after adding the remote as our WG peer, before the WG
|
||||
tunnel is up. Reaches the remote over the PUBLIC path at its advertised
|
||||
endpoint host (a domain in DDNS/ACME modes) on Caddy/443 — the API's :3000
|
||||
is 127.0.0.1-only and not reachable across cells. Non-fatal — one-sided
|
||||
pairing degrades gracefully; the admin can pair from the other side.
|
||||
"""
|
||||
endpoint = link.get('endpoint') or ''
|
||||
if not endpoint:
|
||||
return {'ok': False, 'error': 'no endpoint'}
|
||||
# Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52")
|
||||
# Host from endpoint (e.g. "alice.pic.ngo:51821" → "alice.pic.ngo").
|
||||
try:
|
||||
host = endpoint.rsplit(':', 1)[0].strip('[]')
|
||||
except Exception:
|
||||
@@ -471,11 +547,14 @@ class CellLinkManager:
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'could not build own invite: {e}'}
|
||||
|
||||
url = f'http://{host}:3000/api/cells/peer-sync/accept-invite'
|
||||
url = f'https://{host}/api/cells/peer-sync/accept-invite'
|
||||
payload = json.dumps({'invite': own_invite})
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
# -k: endpoint may be a bare IP (LAN/fallback) whose cert won't match;
|
||||
# accept-invite carries only public keys and the WG handshake is the
|
||||
# real authentication.
|
||||
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
'-d', payload,
|
||||
@@ -507,6 +586,7 @@ class CellLinkManager:
|
||||
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
|
||||
if field not in invite:
|
||||
raise ValueError(f"Invite missing field: {field!r}")
|
||||
_validate_invite_fields(invite)
|
||||
|
||||
links = self._load()
|
||||
name = invite['cell_name']
|
||||
@@ -537,7 +617,7 @@ class CellLinkManager:
|
||||
old_domain = existing.get('domain', '')
|
||||
existing['dns_ip'] = invite['dns_ip']
|
||||
existing['vpn_subnet'] = invite['vpn_subnet']
|
||||
existing['remote_api_url'] = f"http://{invite['dns_ip']}:3000"
|
||||
existing['remote_api_url'] = _remote_api_url(invite['domain'])
|
||||
if invite.get('endpoint'):
|
||||
existing['endpoint'] = invite['endpoint']
|
||||
if domain_changed:
|
||||
@@ -599,7 +679,7 @@ class CellLinkManager:
|
||||
'domain': invite['domain'],
|
||||
'connected_at': datetime.utcnow().isoformat(),
|
||||
'permissions': _default_perms(),
|
||||
'remote_api_url': f"http://{invite['dns_ip']}:3000",
|
||||
'remote_api_url': _remote_api_url(invite['domain']),
|
||||
'last_push_status': 'never',
|
||||
'last_push_at': None,
|
||||
'last_push_error': None,
|
||||
@@ -621,6 +701,7 @@ class CellLinkManager:
|
||||
def add_connection(self, invite: Dict[str, Any],
|
||||
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Import a remote cell's invite and establish the connection."""
|
||||
_validate_invite_fields(invite)
|
||||
links = self._load()
|
||||
name = invite['cell_name']
|
||||
if any(l['cell_name'] == name for l in links):
|
||||
@@ -659,7 +740,7 @@ class CellLinkManager:
|
||||
'domain': invite['domain'],
|
||||
'connected_at': datetime.utcnow().isoformat(),
|
||||
'permissions': perms,
|
||||
'remote_api_url': f"http://{invite['dns_ip']}:3000",
|
||||
'remote_api_url': _remote_api_url(invite['domain']),
|
||||
'last_push_status': 'never',
|
||||
'last_push_at': None,
|
||||
'last_push_error': None,
|
||||
@@ -717,8 +798,9 @@ class CellLinkManager:
|
||||
try:
|
||||
import firewall_manager as _fm
|
||||
_fm.clear_cell_rules(cell_name)
|
||||
_fm.remove_cell_subnet_route(link.get('vpn_subnet', ''))
|
||||
except Exception as e:
|
||||
logger.warning(f"clear_cell_rules for {cell_name} failed (non-fatal): {e}")
|
||||
logger.warning(f"firewall teardown for {cell_name} failed (non-fatal): {e}")
|
||||
|
||||
self.wireguard_manager.remove_peer(link['public_key'])
|
||||
self.network_manager.remove_cell_dns_forward(link['domain'])
|
||||
|
||||
@@ -1437,6 +1437,18 @@ class ConnectivityManager(BaseServiceManager):
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: killswitch cleanup failed "
|
||||
f"(non-fatal): {e}")
|
||||
elif (record.get('type') == self.CELL_RELAY_TYPE
|
||||
and isinstance(table, int)
|
||||
and self.wireguard_manager is not None):
|
||||
# A cell_relay policy-routes peers via a source ip rule + a
|
||||
# shared default route in its table inside cell-wireguard. Per-peer
|
||||
# detach removes the rules; the table's default route only goes
|
||||
# away here, when the connection is gone — otherwise it leaks.
|
||||
try:
|
||||
self.wireguard_manager.teardown_route_table(table)
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: cell_relay route table "
|
||||
f"cleanup failed (non-fatal): {e}")
|
||||
|
||||
for secret_ref in record.get('secret_refs', []):
|
||||
if self.vault_manager is not None:
|
||||
@@ -1554,6 +1566,18 @@ class ConnectivityManager(BaseServiceManager):
|
||||
f"{cell_name!r} no longer offered but still "
|
||||
f"referenced; keeping")
|
||||
continue
|
||||
# Flush the relay's policy-routing table (shared default route)
|
||||
# before forgetting the record — this path deletes the config
|
||||
# entry directly rather than via delete_connection, so it must
|
||||
# do the same host-routing teardown or the route leaks.
|
||||
rtable = rec.get('table')
|
||||
if self.wireguard_manager is not None and isinstance(rtable, int):
|
||||
try:
|
||||
self.wireguard_manager.teardown_route_table(rtable)
|
||||
except Exception as e:
|
||||
logger.warning(f"reconcile_cell_relays: route table "
|
||||
f"cleanup for {cell_name!r} failed "
|
||||
f"(non-fatal): {e}")
|
||||
try:
|
||||
self.config_manager.delete_connection(rec.get('id'))
|
||||
removed.append(rec.get('id'))
|
||||
|
||||
+48
-9
@@ -374,7 +374,8 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
Traffic from vpn_subnet is allowed only to service VIPs listed in
|
||||
inbound_services; all other cell traffic is DROPped. Cells get no
|
||||
internet or peer access — only explicit service access via Caddy on
|
||||
port 80, plus the cell-api port (3000) for permission-sync pushes.
|
||||
port 80, plus Caddy on 443 for cross-cell peer-sync pushes (offer/
|
||||
permission state) which reach cell-api through Caddy.
|
||||
|
||||
DNS (port 53) is always allowed so cell peers can resolve service names.
|
||||
Service names resolve to the WG server IP; ensure_service_dnat() routes
|
||||
@@ -388,7 +389,7 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
|
||||
3. Service ACCEPT to Caddy port 80 (if any inbound_services)
|
||||
4. DNS ACCEPT to cell-dns port 53 (UDP + TCP)
|
||||
5. API-sync ACCEPT (inserted last → top)
|
||||
5. Peer-sync ACCEPT to Caddy port 443 (inserted last → top)
|
||||
"""
|
||||
try:
|
||||
tag = _cell_tag(cell_name)
|
||||
@@ -425,19 +426,38 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
'-p', proto, '--dport', '53',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
|
||||
# the catch-all DROP). Remote cells push permissions to our cell-api via the
|
||||
# WG tunnel; iptables sees source=cell_subnet dst=api_ip after DNAT.
|
||||
api_ip = _get_cell_api_ip()
|
||||
if api_ip:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', api_ip,
|
||||
'-p', 'tcp', '--dport', '3000',
|
||||
# Peer-sync ACCEPT — inserted LAST so it goes to position 1 (above the
|
||||
# catch-all DROP). Remote cells push offer/permission state to our API over
|
||||
# the WG tunnel. The push targets the remote's Caddy on 443 (DNAT wg0:443 →
|
||||
# Caddy → cell-api), NOT cell-api:3000 directly: the API binds 127.0.0.1
|
||||
# only and is reachable solely through Caddy. After DNAT iptables sees
|
||||
# source=cell_subnet dst=caddy_ip:443; the existing `-o eth0 MASQUERADE`
|
||||
# routes Caddy's reply back through the tunnel.
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if caddy_ip:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', caddy_ip,
|
||||
'-p', 'tcp', '--dport', '443',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
# Preserve the linked cell's real VPN source on peer-sync traffic:
|
||||
# the blanket `-o eth0 MASQUERADE` would rewrite it to cell-wireguard's
|
||||
# bridge IP, and the remote side authenticates the push by matching the
|
||||
# source (via X-Forwarded-For) to the cell's VPN subnet. RETURN before
|
||||
# the MASQUERADE (inserted at the top of nat POSTROUTING). Caddy's reply
|
||||
# to the real VPN IP routes back via the cell-subnet host route
|
||||
# (ensure_cell_subnet_routes). The :80 service path keeps masquerade.
|
||||
_iptables(['-t', 'nat', '-I', 'POSTROUTING', '-s', vpn_subnet,
|
||||
'-d', caddy_ip, '-p', 'tcp', '--dport', '443',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'RETURN'])
|
||||
|
||||
# Ensure reply traffic (e.g. ICMP, TCP ACKs) for connections initiated
|
||||
# by local peers to this cell is not dropped by the cell's catch-all DROP.
|
||||
ensure_forward_stateful()
|
||||
|
||||
# Host route so Caddy's peer-sync reply (to the linked cell's un-masqueraded
|
||||
# VPN IP) leaves via cell-wireguard rather than the default gateway. Added at
|
||||
# startup for all links; ensure it on runtime link-add too. Idempotent.
|
||||
ensure_cell_subnet_routes([{'vpn_subnet': vpn_subnet}])
|
||||
|
||||
logger.info(
|
||||
f"Applied cell rules for {cell_name} ({vpn_subnet}): "
|
||||
f"inbound={inbound_services} exit_relay={exit_relay}"
|
||||
@@ -684,6 +704,25 @@ def ensure_cell_subnet_routes(cell_links: List[Dict[str, Any]]) -> None:
|
||||
logger.warning(f'ensure_cell_subnet_routes: {subnet}: {e}')
|
||||
|
||||
|
||||
def remove_cell_subnet_route(vpn_subnet: str) -> None:
|
||||
"""Remove the host route for a disconnected cell's VPN subnet (idempotent).
|
||||
|
||||
Counterpart to ensure_cell_subnet_routes. Without it the route lingers after a
|
||||
cell is unlinked — blackholing that subnet via cell-wireguard, and (on a host
|
||||
that runs the API/tests directly, e.g. a dev box) making is_local_request /
|
||||
_local_subnets treat the stale subnet as locally attached.
|
||||
"""
|
||||
if not vpn_subnet:
|
||||
return
|
||||
WG_BRIDGE_IP = '172.20.0.9'
|
||||
try:
|
||||
_run(['docker', 'run', '--rm', '--network', 'host', '--cap-add', 'NET_ADMIN',
|
||||
'alpine', 'ip', 'route', 'del', vpn_subnet, 'via', WG_BRIDGE_IP],
|
||||
check=False)
|
||||
except Exception as e:
|
||||
logger.warning(f'remove_cell_subnet_route: {vpn_subnet}: {e}')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -357,6 +357,13 @@ def remove_peer(peer_name):
|
||||
if success:
|
||||
if peer_ip:
|
||||
firewall_manager.clear_peer_rules(peer_ip)
|
||||
# Clear any cell_relay / route-via policy rule for this peer so a
|
||||
# deleted-while-assigned peer doesn't leave a stale source ip rule
|
||||
# (which could later misroute a new peer that reuses the IP).
|
||||
try:
|
||||
wireguard_manager.remove_peer_route_via(peer_ip)
|
||||
except Exception as wg_err:
|
||||
logger.warning(f"Peer {peer_name}: relay route cleanup failed (non-fatal): {wg_err}")
|
||||
_dns_primary, _dns_szones = _configured_dns_params()
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _dns_primary,
|
||||
cell_links=cell_link_manager.list_connections(),
|
||||
|
||||
@@ -8,15 +8,11 @@ bp = Blueprint('wireguard', __name__)
|
||||
def _effective_endpoint(wireguard_manager, config_manager) -> str:
|
||||
"""Return the WireGuard endpoint to embed in peer configs.
|
||||
|
||||
Uses wireguard_endpoint from identity config when set (admin override),
|
||||
falling back to get_external_ip() detection.
|
||||
Prefers the cell's public domain (DDNS/ACME modes) or an admin override over
|
||||
the raw external IP, so a peer config points at a name that resolves to the
|
||||
cell rather than a bare IP. See WireGuardManager.get_advertised_endpoint.
|
||||
"""
|
||||
srv = wireguard_manager.get_server_config()
|
||||
override = (config_manager.get_identity().get('wireguard_endpoint') or '').strip()
|
||||
if override:
|
||||
port = srv.get('port', 51820)
|
||||
return override if ':' in override else f'{override}:{port}'
|
||||
return srv.get('endpoint') or '<SERVER_IP>'
|
||||
return wireguard_manager.get_advertised_endpoint(config_manager) or '<SERVER_IP>'
|
||||
|
||||
@bp.route('/api/wireguard/keys', methods=['GET'])
|
||||
def get_wireguard_keys():
|
||||
|
||||
@@ -786,21 +786,60 @@ class WireGuardManager(BaseServiceManager):
|
||||
logger.error(f'apply_peer_route_via failed: {e}')
|
||||
return False
|
||||
|
||||
def remove_peer_route_via(self, peer_ip: str, table: int = 100) -> None:
|
||||
"""Remove the ip rule for peer_ip added by apply_peer_route_via. Non-fatal."""
|
||||
def remove_peer_route_via(self, peer_ip: str) -> None:
|
||||
"""Remove the policy-routing ip rule(s) for peer_ip. Non-fatal.
|
||||
|
||||
Deletes every `ip rule from peer_ip/32` regardless of which table it
|
||||
points at: the v2 cell_relay path adds the rule with the connection's
|
||||
own table (1000+) while the legacy route-via path uses table 100, so a
|
||||
caller clearing a peer's exit does not reliably know the table. Matching
|
||||
by source alone removes the rule in both cases (and any duplicate). The
|
||||
shared routing *table* itself is torn down separately at connection
|
||||
teardown — see teardown_route_table.
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf or 'wg_confs' not in real_conf:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'rule', 'del', 'from', f'{peer_ip}/32',
|
||||
'pref', str(table), 'lookup', str(table)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
for _ in range(32):
|
||||
r = subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'rule', 'del', 'from', f'{peer_ip}/32'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
if r.returncode != 0:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def teardown_route_table(self, table: int) -> None:
|
||||
"""Tear down a relay routing table when its connection is removed. Non-fatal.
|
||||
|
||||
Removes any remaining `ip rule ... lookup <table>` entries (e.g. one left
|
||||
by a peer deleted while still assigned) and flushes the table's routes —
|
||||
notably the `default via <cell-ip>` route that apply_peer_route_via
|
||||
installs. That route is shared by every peer routed through the relay, so
|
||||
no per-peer detach may remove it; it can only be cleared once the
|
||||
connection itself is gone, or it leaks (stale default route + a possible
|
||||
blackhole if a rule survives).
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf or 'wg_confs' not in real_conf:
|
||||
return
|
||||
try:
|
||||
def _wg(cmd):
|
||||
return subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard'] + cmd,
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
for _ in range(64):
|
||||
r = _wg(['ip', 'rule', 'del', 'lookup', str(table)])
|
||||
if r.returncode != 0:
|
||||
break
|
||||
_wg(['ip', 'route', 'flush', 'table', str(table)])
|
||||
except Exception as e:
|
||||
logger.warning(f'teardown_route_table({table}) failed: {e}')
|
||||
|
||||
def remove_peer(self, public_key: str) -> bool:
|
||||
"""Remove the [Peer] block matching public_key from wg0.conf."""
|
||||
try:
|
||||
@@ -1054,6 +1093,38 @@ class WireGuardManager(BaseServiceManager):
|
||||
'vpn_network': self._get_configured_network(),
|
||||
}
|
||||
|
||||
# Domain modes whose effective domain is a publicly-resolvable FQDN that the
|
||||
# WireGuard endpoint should advertise instead of a raw IP. In these modes the
|
||||
# domain resolves (via DDNS/ACME) to the cell's public IP, so peers and linked
|
||||
# cells reach the cell by name — which survives IP changes and lets a NAT/router
|
||||
# forward each cell's WG port to the right host.
|
||||
PUBLIC_DOMAIN_MODES = ('pic_ngo', 'cloudflare', 'duckdns', 'http01')
|
||||
|
||||
def get_advertised_endpoint(self, config_manager=None) -> Optional[str]:
|
||||
"""Return the WireGuard endpoint (host:port) to advertise to peers/cells.
|
||||
|
||||
Preference order:
|
||||
1. an explicit admin override (`_identity.wireguard_endpoint`),
|
||||
2. the cell's public domain in a DDNS/ACME mode (`<domain>:<port>`),
|
||||
3. the detected external IP (`<ip>:<port>`) — LAN/fallback.
|
||||
|
||||
The port is always this cell's own configured WireGuard port, so a cell
|
||||
on a non-default port advertises it correctly (the router forwards that
|
||||
public port to this host).
|
||||
"""
|
||||
port = self._get_configured_port()
|
||||
identity = config_manager.get_identity() if config_manager is not None else {}
|
||||
override = (identity.get('wireguard_endpoint') or '').strip()
|
||||
if override:
|
||||
return override if ':' in override else f'{override}:{port}'
|
||||
mode = identity.get('domain_mode', 'lan')
|
||||
if mode in self.PUBLIC_DOMAIN_MODES and config_manager is not None:
|
||||
host = (config_manager.get_effective_domain() or '').strip()
|
||||
if host:
|
||||
return f'{host}:{port}'
|
||||
ext = self.get_external_ip()
|
||||
return f'{ext}:{port}' if ext else None
|
||||
|
||||
def get_peer_status(self, public_key: str) -> Dict[str, Any]:
|
||||
"""Return live handshake + transfer stats for a peer from `wg show`."""
|
||||
try:
|
||||
|
||||
@@ -48,12 +48,16 @@ class TestGenerateCaddyfileLan(unittest.TestCase):
|
||||
self.assertNotIn('acme_email', out)
|
||||
self.assertNotIn('dns pic_ngo', out)
|
||||
self.assertNotIn('dns cloudflare', out)
|
||||
# Internal-CA TLS pair
|
||||
# Internal-CA TLS pair, on an HTTPS (443) site — never on an http:// one.
|
||||
self.assertIn('tls /etc/caddy/internal/cert.pem '
|
||||
'/etc/caddy/internal/key.pem', out)
|
||||
# Cell hostname plus virtual IP listener
|
||||
self.assertIn('http://mycell.cell', out)
|
||||
self.assertIn('http://172.20.0.2:80', out)
|
||||
self.assertIn('https://mycell.cell {', out)
|
||||
# Cell hostname plus virtual IP listener on plain HTTP (80)
|
||||
self.assertIn('http://mycell.cell, http://172.20.0.2:80 {', out)
|
||||
# The HTTP (:80) block must NOT carry a tls directive — Caddy rejects
|
||||
# "server listening on [:80] is HTTP, but attempts to configure TLS".
|
||||
http_block = out.split('http://mycell.cell, http://172.20.0.2:80 {', 1)[1]
|
||||
self.assertNotIn('tls ', http_block)
|
||||
|
||||
|
||||
class TestGenerateCaddyfilePicNgo(unittest.TestCase):
|
||||
|
||||
+117
-12
@@ -14,9 +14,39 @@ import json
|
||||
import shutil
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import cell_link_manager
|
||||
from cell_link_manager import CellLinkManager
|
||||
|
||||
|
||||
_fw_patch = None
|
||||
|
||||
|
||||
def setUpModule():
|
||||
"""Stop cell-link unit tests from running real firewall side-effects.
|
||||
|
||||
add_connection/remove_connection call into firewall_manager, which shells out
|
||||
to `docker exec cell-wireguard iptables` and `docker run` host-route changes.
|
||||
On the dev/CI host those mutate live routes — a stale cell-subnet route once
|
||||
made is_local_request treat a VPN subnet as local and broke the full suite.
|
||||
Tests that assert specific firewall calls use their own local patch, which
|
||||
takes precedence within its context.
|
||||
"""
|
||||
global _fw_patch
|
||||
_fw_patch = patch.multiple(
|
||||
'firewall_manager',
|
||||
apply_cell_rules=MagicMock(return_value=True),
|
||||
clear_cell_rules=MagicMock(),
|
||||
ensure_cell_subnet_routes=MagicMock(),
|
||||
remove_cell_subnet_route=MagicMock(),
|
||||
)
|
||||
_fw_patch.start()
|
||||
|
||||
|
||||
def tearDownModule():
|
||||
if _fw_patch is not None:
|
||||
_fw_patch.stop()
|
||||
|
||||
|
||||
def _make_wg_mock():
|
||||
wg = MagicMock()
|
||||
wg.get_keys.return_value = {'public_key': 'serverpubkey=', 'private_key': 'serverprivkey='}
|
||||
@@ -26,6 +56,7 @@ def _make_wg_mock():
|
||||
}
|
||||
wg._get_configured_network.return_value = '10.0.0.0/24'
|
||||
wg._get_configured_address.return_value = '10.0.0.1/24'
|
||||
wg.get_advertised_endpoint.return_value = '1.2.3.4:51820'
|
||||
wg.add_cell_peer.return_value = True
|
||||
wg.remove_peer.return_value = True
|
||||
return wg
|
||||
@@ -49,6 +80,37 @@ SAMPLE_INVITE = {
|
||||
}
|
||||
|
||||
|
||||
class TestInviteFieldValidation(unittest.TestCase):
|
||||
"""_validate_invite_fields rejects malformed remote-invite fields.
|
||||
|
||||
The domain/dns_ip flow into a `curl --resolve` argv on peer-sync push, so a
|
||||
leading-dash domain (argument injection) and non-IP dns_ip must be rejected.
|
||||
"""
|
||||
|
||||
def test_valid_invite_passes(self):
|
||||
cell_link_manager._validate_invite_fields(SAMPLE_INVITE) # no raise
|
||||
|
||||
def test_rejects_leading_dash_domain(self):
|
||||
bad = {**SAMPLE_INVITE, 'domain': '-oProxyCommand=evil'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_non_ip_dns_ip(self):
|
||||
bad = {**SAMPLE_INVITE, 'dns_ip': '-x'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_subnet(self):
|
||||
bad = {**SAMPLE_INVITE, 'vpn_subnet': 'not-a-cidr'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_endpoint(self):
|
||||
bad = {**SAMPLE_INVITE, 'endpoint': '-evil:51820'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
|
||||
class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@@ -82,6 +144,13 @@ class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
self.assertEqual(invite['cell_name'], 'myhome')
|
||||
self.assertEqual(invite['domain'], 'myhome.local')
|
||||
|
||||
def test_generate_invite_endpoint_from_advertised_endpoint(self):
|
||||
"""The invite endpoint comes from get_advertised_endpoint (domain-aware),
|
||||
not a raw external IP — so the remote cell reaches us by name + our port."""
|
||||
self.wg.get_advertised_endpoint.return_value = 'myhome.pic.ngo:51821'
|
||||
invite = self.mgr.generate_invite('myhome', 'myhome.pic.ngo')
|
||||
self.assertEqual(invite['endpoint'], 'myhome.pic.ngo:51821')
|
||||
|
||||
|
||||
class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
|
||||
@@ -138,6 +207,15 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
self.mgr.remove_connection('office')
|
||||
self.nm.remove_cell_dns_forward.assert_called_once_with('office.cell')
|
||||
|
||||
def test_remove_connection_removes_host_subnet_route(self):
|
||||
"""Unlinking a cell removes its host route so the subnet isn't left
|
||||
blackholed / treated as locally attached."""
|
||||
import firewall_manager as _fm
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
_fm.remove_cell_subnet_route.reset_mock()
|
||||
self.mgr.remove_connection('office')
|
||||
_fm.remove_cell_subnet_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
def test_remove_connection_deletes_from_list(self):
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
self.mgr.remove_connection('office')
|
||||
@@ -182,7 +260,9 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
result = self.mgr.accept_invite(updated_invite)
|
||||
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.2')
|
||||
self.assertEqual(result['remote_api_url'], 'http://10.1.0.2:3000')
|
||||
# remote_api_url is domain-based (the push --resolves it to the VPN IP),
|
||||
# so a dns_ip change does not alter it.
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.2')
|
||||
@@ -470,9 +550,10 @@ class TestPushInviteToRemote(unittest.TestCase):
|
||||
result = self.mgr._push_invite_to_remote(link)
|
||||
self.assertFalse(result['ok'])
|
||||
|
||||
def test_push_invite_sends_to_correct_lan_host(self):
|
||||
"""The curl URL must use the LAN IP from the endpoint, not the WG dns_ip."""
|
||||
link = self._make_link(endpoint='192.168.31.52:51820')
|
||||
def test_push_invite_sends_to_endpoint_host_over_https(self):
|
||||
"""The curl targets the endpoint host on Caddy/HTTPS (443), not the WG
|
||||
dns_ip and not the internal :3000 API port."""
|
||||
link = self._make_link(endpoint='alice.pic.ngo:51821')
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kw):
|
||||
@@ -493,10 +574,11 @@ class TestPushInviteToRemote(unittest.TestCase):
|
||||
self.mgr._push_invite_to_remote(link)
|
||||
|
||||
url_in_cmd = captured['cmd'][-1]
|
||||
self.assertIn('192.168.31.52', url_in_cmd)
|
||||
self.assertIn('accept-invite', url_in_cmd)
|
||||
# Must NOT use the WG dns_ip (10.1.0.1)
|
||||
self.assertNotIn('10.1.0.1', url_in_cmd)
|
||||
self.assertEqual(url_in_cmd,
|
||||
'https://alice.pic.ngo/api/cells/peer-sync/accept-invite')
|
||||
self.assertNotIn(':3000', url_in_cmd)
|
||||
self.assertNotIn('10.1.0.1', url_in_cmd) # not the WG dns_ip
|
||||
self.assertIn('-k', captured['cmd']) # cert may not match a bare IP
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -605,7 +687,7 @@ class TestAcceptInviteNew(unittest.TestCase):
|
||||
with patch('firewall_manager.apply_cell_rules'):
|
||||
result = self.mgr.accept_invite(updated)
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'http://10.1.0.5:3000')
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.5')
|
||||
@@ -1045,9 +1127,11 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
|
||||
def test_add_connection_sets_remote_api_url_from_dns_ip(self):
|
||||
def test_add_connection_sets_remote_api_url_from_domain(self):
|
||||
link = self._add_office()
|
||||
self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000')
|
||||
# Cross-cell API is reached via the remote's domain over Caddy/443 (the
|
||||
# push --resolves the domain to the VPN IP over the tunnel).
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
|
||||
def test_add_connection_triggers_push(self):
|
||||
push_mock = MagicMock(return_value={'ok': True, 'error': None})
|
||||
@@ -1321,7 +1405,7 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_status', link)
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000')
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
self.assertTrue(link['pending_push']) # pre-existing → marked pending
|
||||
self.assertEqual(link['last_push_status'], 'never')
|
||||
|
||||
@@ -1330,6 +1414,27 @@ class TestPermissionSync(unittest.TestCase):
|
||||
raw = json.load(f)
|
||||
self.assertIn('pending_push', raw[0])
|
||||
|
||||
def test_load_migrates_legacy_url_forms_to_https_domain(self):
|
||||
"""Legacy remote_api_url forms — http://<ip>:3000 (unreachable) and
|
||||
https://<ip> (no matching Caddy cert by bare IP) — are rewritten on load to
|
||||
the domain-based HTTPS form."""
|
||||
for legacy_url in ('http://10.1.0.9:3000', 'https://10.1.0.9'):
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': legacy_url,
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell',
|
||||
f'failed to migrate {legacy_url!r}')
|
||||
|
||||
|
||||
class TestExitOffer(unittest.TestCase):
|
||||
"""Tests for Phase 2: exit-offer signaling."""
|
||||
|
||||
@@ -319,5 +319,56 @@ class TestHealth(_Base):
|
||||
self.assertEqual(health, 'down')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Teardown cleanup — regression for the confirmed cell_relay routing leak.
|
||||
#
|
||||
# A cell_relay policy-routes a peer with `ip rule from <peer> lookup <table>`
|
||||
# plus a shared `default via <cell-ip>` route in that table, inside
|
||||
# cell-wireguard. Before the fix, detaching/deleting the peer left the rule
|
||||
# (remove_peer_route_via used the wrong default table) and nothing ever flushed
|
||||
# the table's default route — both leaked, confirmed on hardware.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTeardownCleanup(_Base):
|
||||
|
||||
def _relay(self):
|
||||
self.cell_link.list_connections.return_value = [_link('alpha')]
|
||||
self.mgr.reconcile_cell_relays()
|
||||
return self._raw_relays()[0]
|
||||
|
||||
def test_detach_removes_peer_ip_rule(self):
|
||||
relay = self._relay()
|
||||
peer = {'peer': 'laptop', 'ip': '10.0.0.5/32',
|
||||
'exit_via': relay['id'], 'route_via': 'alpha'}
|
||||
self.peer_registry.get_peer.return_value = peer
|
||||
self.peer_registry.set_peer_exit_via.return_value = True
|
||||
with patch.object(self.mgr, 'apply_routes'):
|
||||
res = self.mgr.set_peer_exit('laptop', 'default')
|
||||
self.assertTrue(res['ok'])
|
||||
# The peer's source ip rule is cleared by source (table-agnostic), so it
|
||||
# matches the relay's allocated table rather than the old default 100.
|
||||
self.wg.remove_peer_route_via.assert_called_once_with('10.0.0.5')
|
||||
|
||||
def test_delete_connection_flushes_relay_route_table(self):
|
||||
relay = self._relay()
|
||||
# Not referenced by any peer (detached) → deletable.
|
||||
self.peer_registry.list_peers.return_value = []
|
||||
res = self.mgr.delete_connection(relay['id'])
|
||||
self.assertTrue(res['ok'])
|
||||
self.wg.teardown_route_table.assert_called_once_with(relay['table'])
|
||||
|
||||
def test_reconcile_removal_flushes_relay_route_table(self):
|
||||
relay = self._relay()
|
||||
table = relay['table']
|
||||
# Offer withdrawn and not referenced → reconcile removes the relay and
|
||||
# must flush its routing table (this path bypasses delete_connection).
|
||||
self.cell_link.list_connections.return_value = [
|
||||
_link('alpha', remote_exit_offered=False)]
|
||||
self.peer_registry.list_peers.return_value = []
|
||||
out = self.mgr.reconcile_cell_relays()
|
||||
self.assertIn(relay['id'], out['removed'])
|
||||
self.wg.teardown_route_table.assert_called_once_with(table)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -652,6 +652,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value=self._FAKE_API_IP), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value=self._FAKE_CADDY_IP), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value=self._FAKE_DNS_IP), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules(cell_name, vpn_subnet, inbound_services)
|
||||
|
||||
@@ -702,32 +703,59 @@ class TestCellRules(unittest.TestCase):
|
||||
]
|
||||
self.assertTrue(subnet_drops, "Expected a catch-all DROP rule for the subnet")
|
||||
|
||||
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
|
||||
"""apply_cell_rules inserts Caddy ACCEPT when inbound_services is non-empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertIn('ACCEPT', caddy_targets,
|
||||
"Expected ACCEPT to Caddy when inbound_services is non-empty")
|
||||
|
||||
def test_apply_cell_rules_no_caddy_accept_when_no_inbound(self):
|
||||
"""apply_cell_rules does NOT insert Caddy ACCEPT when inbound_services is empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertNotIn('ACCEPT', caddy_targets,
|
||||
"No Caddy ACCEPT expected when inbound_services is empty")
|
||||
|
||||
def test_apply_cell_rules_accepts_api_sync_traffic(self):
|
||||
"""apply_cell_rules inserts ACCEPT for cell-api:3000 so permission-sync pushes pass."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
api_ip = self._FAKE_API_IP
|
||||
api_accepts = [
|
||||
def _caddy_accepts_on_port(self, calls, port):
|
||||
"""Caddy-dest ACCEPT calls matching --dport <port>."""
|
||||
return [
|
||||
c for c in calls
|
||||
if '-s' in c and '10.0.1.0/24' in c
|
||||
and '-d' in c and api_ip in c
|
||||
and '--dport' in c and '3000' in c
|
||||
if '-d' in c and self._FAKE_CADDY_IP in c
|
||||
and '--dport' in c and str(port) in c
|
||||
and '-j' in c and c[c.index('-j') + 1] == 'ACCEPT'
|
||||
]
|
||||
self.assertTrue(api_accepts, 'Expected an ACCEPT rule for cell-api:3000')
|
||||
|
||||
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
|
||||
"""apply_cell_rules inserts a Caddy:80 ACCEPT when inbound_services is non-empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
|
||||
self.assertTrue(self._caddy_accepts_on_port(calls, 80),
|
||||
"Expected ACCEPT to Caddy:80 for an inbound service")
|
||||
|
||||
def test_apply_cell_rules_no_service_accept_when_no_inbound(self):
|
||||
"""No Caddy:80 (service) ACCEPT when inbound_services is empty.
|
||||
|
||||
The :443 peer-sync ACCEPT is separate and always present (below).
|
||||
"""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
|
||||
"No Caddy:80 service ACCEPT expected with empty inbound")
|
||||
|
||||
def test_apply_cell_rules_accepts_peer_sync_to_caddy_443(self):
|
||||
"""Cross-cell peer-sync ACCEPT to Caddy:443 is always added (the push reaches
|
||||
cell-api through Caddy, since the API binds 127.0.0.1 only)."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
peer_sync = [
|
||||
c for c in self._caddy_accepts_on_port(calls, 443)
|
||||
if '-s' in c and '10.0.1.0/24' in c
|
||||
]
|
||||
self.assertTrue(peer_sync, 'Expected ACCEPT to Caddy:443 for peer-sync')
|
||||
# And it must NOT target the (127.0.0.1-only) cell-api on :3000 anymore.
|
||||
api_3000 = [
|
||||
c for c in calls
|
||||
if '-d' in c and self._FAKE_API_IP in c and '--dport' in c and '3000' in c
|
||||
]
|
||||
self.assertFalse(api_3000, 'Peer-sync must not target cell-api:3000')
|
||||
|
||||
def test_apply_cell_rules_excludes_peer_sync_from_masquerade(self):
|
||||
"""Peer-sync to Caddy:443 must RETURN in nat POSTROUTING (skip the blanket
|
||||
MASQUERADE) so the remote sees the linked cell's real VPN source for auth."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
returns = [
|
||||
c for c in calls
|
||||
if '-t' in c and 'nat' in c and 'POSTROUTING' in c
|
||||
and '-s' in c and '10.0.1.0/24' in c
|
||||
and '-d' in c and self._FAKE_CADDY_IP in c
|
||||
and '--dport' in c and '443' in c
|
||||
and '-j' in c and c[c.index('-j') + 1] == 'RETURN'
|
||||
]
|
||||
self.assertTrue(returns, 'Expected nat POSTROUTING RETURN to preserve peer-sync source')
|
||||
|
||||
def test_apply_cell_rules_api_sync_accept_before_catchall_drop(self):
|
||||
"""The API-sync ACCEPT must be inserted after service rules so it ends up above DROP."""
|
||||
@@ -743,6 +771,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules('office', '10.0.1.0/24', [])
|
||||
|
||||
@@ -754,12 +783,12 @@ class TestCellRules(unittest.TestCase):
|
||||
# ── apply_cell_rules — empty inbound (all-deny) ───────────────────────────
|
||||
|
||||
def test_apply_cell_rules_empty_inbound_no_service_accept(self):
|
||||
"""With inbound_services=[], no service ACCEPT is added; catch-all DROP blocks traffic."""
|
||||
"""With inbound_services=[], no Caddy:80 service ACCEPT is added; the catch-all
|
||||
DROP blocks service traffic (only the :443 peer-sync ACCEPT is present)."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
# No ACCEPT to Caddy
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertNotIn('ACCEPT', caddy_targets,
|
||||
"No Caddy ACCEPT expected with empty inbound_services")
|
||||
# No service ACCEPT to Caddy on :80
|
||||
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
|
||||
"No Caddy:80 ACCEPT expected with empty inbound_services")
|
||||
# No per-VIP rules at all
|
||||
for service, svc_ip in firewall_manager.SERVICE_IPS.items():
|
||||
svc_targets = self._targets_for_dest(calls, svc_ip)
|
||||
@@ -839,6 +868,31 @@ class TestCellRules(unittest.TestCase):
|
||||
# peer rule for a different entity must survive
|
||||
self.assertIn('pic-peer-10-0-0-2/32', content)
|
||||
|
||||
# ── remove_cell_subnet_route ──────────────────────────────────────────────
|
||||
|
||||
def test_remove_cell_subnet_route_issues_ip_route_del(self):
|
||||
"""remove_cell_subnet_route deletes the host route for the cell's subnet."""
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kw):
|
||||
captured['cmd'] = cmd
|
||||
return MagicMock(returncode=0, stdout='', stderr='')
|
||||
|
||||
with patch.object(firewall_manager, '_run', side_effect=fake_run):
|
||||
firewall_manager.remove_cell_subnet_route('10.1.0.0/24')
|
||||
cmd = captured.get('cmd', [])
|
||||
self.assertIn('ip', cmd)
|
||||
self.assertIn('route', cmd)
|
||||
self.assertIn('del', cmd)
|
||||
self.assertIn('10.1.0.0/24', cmd)
|
||||
self.assertIn('172.20.0.9', cmd)
|
||||
|
||||
def test_remove_cell_subnet_route_noop_on_empty(self):
|
||||
"""An empty subnet is a no-op (no docker call)."""
|
||||
with patch.object(firewall_manager, '_run') as run:
|
||||
firewall_manager.remove_cell_subnet_route('')
|
||||
run.assert_not_called()
|
||||
|
||||
# ── apply_all_cell_rules ──────────────────────────────────────────────────
|
||||
|
||||
def test_apply_all_cell_rules_calls_apply_for_each(self):
|
||||
@@ -1117,6 +1171,7 @@ class TestEnsureForwardStateful(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful') as mock_stateful:
|
||||
firewall_manager.apply_cell_rules('testcell', '10.0.0.0/24', [])
|
||||
mock_stateful.assert_called_once()
|
||||
|
||||
@@ -90,11 +90,13 @@ class TestWireGuardEndpoints(unittest.TestCase):
|
||||
'endpoint': '1.2.3.4:51820',
|
||||
'port': 51820,
|
||||
}
|
||||
mock_wg.get_advertised_endpoint.return_value = '1.2.3.4:51820'
|
||||
r = self.client.get('/api/wireguard/server-config')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
data = json.loads(r.data)
|
||||
self.assertIn('public_key', data)
|
||||
self.assertIn('endpoint', data)
|
||||
self.assertEqual(data.get('effective_endpoint'), '1.2.3.4:51820')
|
||||
|
||||
@patch('app.wireguard_manager')
|
||||
def test_server_config_returns_500_on_exception(self, mock_wg):
|
||||
|
||||
@@ -885,5 +885,60 @@ class TestCellRoutes(unittest.TestCase):
|
||||
mock_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
|
||||
class _FakeCM:
|
||||
"""Minimal config_manager stand-in for get_advertised_endpoint tests."""
|
||||
def __init__(self, identity, effective_domain):
|
||||
self._identity = identity
|
||||
self._effective = effective_domain
|
||||
|
||||
def get_identity(self):
|
||||
return self._identity
|
||||
|
||||
def get_effective_domain(self):
|
||||
return self._effective
|
||||
|
||||
|
||||
class TestAdvertisedEndpoint(unittest.TestCase):
|
||||
"""get_advertised_endpoint prefers domain/override over the raw external IP."""
|
||||
|
||||
def setUp(self):
|
||||
self.test_dir = tempfile.mkdtemp()
|
||||
patcher = patch.object(WireGuardManager, '_syncconf', return_value=None)
|
||||
patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
self.wg = WireGuardManager(self.test_dir, self.test_dir)
|
||||
# Pin the configured port and external IP for deterministic endpoints.
|
||||
self.wg._get_configured_port = MagicMock(return_value=51821)
|
||||
self.wg.get_external_ip = MagicMock(return_value='198.51.100.7')
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.test_dir, ignore_errors=True)
|
||||
|
||||
def test_public_mode_uses_effective_domain_and_own_port(self):
|
||||
cm = _FakeCM({'domain_mode': 'pic_ngo'}, 'alice.pic.ngo')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'alice.pic.ngo:51821')
|
||||
|
||||
def test_lan_mode_falls_back_to_external_ip(self):
|
||||
cm = _FakeCM({'domain_mode': 'lan'}, 'cell')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821')
|
||||
|
||||
def test_admin_override_wins(self):
|
||||
cm = _FakeCM({'domain_mode': 'pic_ngo', 'wireguard_endpoint': 'vpn.example.com'}, 'alice.pic.ngo')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:51821')
|
||||
|
||||
def test_override_with_explicit_port_kept(self):
|
||||
cm = _FakeCM({'domain_mode': 'lan', 'wireguard_endpoint': 'vpn.example.com:7777'}, 'cell')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:7777')
|
||||
|
||||
def test_none_when_no_domain_and_no_external_ip(self):
|
||||
self.wg.get_external_ip = MagicMock(return_value=None)
|
||||
cm = _FakeCM({'domain_mode': 'lan'}, 'cell')
|
||||
self.assertIsNone(self.wg.get_advertised_endpoint(cm))
|
||||
|
||||
def test_public_mode_without_domain_falls_back_to_ip(self):
|
||||
cm = _FakeCM({'domain_mode': 'cloudflare'}, '')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,6 +1,11 @@
|
||||
FROM alpine:3.20@sha256:d9e853e87e55526f6b2917df91a2115c36dd7c696a35be12163d44e6e2a4b6bc
|
||||
|
||||
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2
|
||||
# curl + ca-certificates: cell-to-cell peer-sync pushes (offer/permission state)
|
||||
# originate from this container's network namespace — the only one with routes to
|
||||
# remote-cell VPN subnets over the tunnel — and go over HTTPS to the remote's
|
||||
# Caddy. busybox wget here has no TLS, so curl is required (~5MB over the slim
|
||||
# base; the alternative is no automatic cross-cell sync).
|
||||
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2 curl ca-certificates
|
||||
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
Reference in New Issue
Block a user