Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ab6e715d8 | |||
| 639fb66e5b | |||
| 714fb9b1a9 | |||
| c7e01d4aa7 | |||
| 1bb8a5eb59 | |||
| fa746a3b30 | |||
| c806a9bb54 | |||
| 6bc1d625bf |
@@ -8,7 +8,11 @@ WORKDIR /app/api
|
||||
|
||||
# The API runs as root by design: it drives iptables, the docker socket, and
|
||||
# docker-execs into sibling containers. Non-root is not feasible here.
|
||||
# The Compose v2 plugin is a separate binary under cli-plugins/ — ServiceComposer
|
||||
# shells out to `docker compose` for every store-service lifecycle op, so it must
|
||||
# be copied alongside the docker CLI, not just the docker binary.
|
||||
COPY --from=dockercli /usr/local/bin/docker /usr/local/bin/docker
|
||||
COPY --from=dockercli /usr/local/libexec/docker/cli-plugins/docker-compose /usr/local/libexec/docker/cli-plugins/docker-compose
|
||||
|
||||
# cosign verifies store-service image signatures against the bundled public key
|
||||
# (config/cosign/cosign.pub) before ServiceComposer starts a container.
|
||||
|
||||
+14
-2
@@ -310,7 +310,15 @@ class CaddyManager(BaseServiceManager):
|
||||
service_routes: str, core_routes: str,
|
||||
cert_path: str = _CADDY_INTERNAL_CERT,
|
||||
key_path: str = _CADDY_INTERNAL_KEY) -> str:
|
||||
"""LAN mode: HTTP only + internal-CA TLS, no ACME."""
|
||||
"""LAN mode: internal-CA TLS on 443, plain HTTP on 80, no ACME.
|
||||
|
||||
The same routes are served on both an HTTPS site (the internal-CA cert)
|
||||
and an HTTP site. They must be SEPARATE site blocks: a `tls` directive on
|
||||
an `http://` (port 80) address is rejected by Caddy ("server listening on
|
||||
[:80] is HTTP, but attempts to configure TLS connection policies"). Both
|
||||
are needed because the WireGuard server DNATs peer traffic to Caddy on
|
||||
both 80 and 443.
|
||||
"""
|
||||
body = []
|
||||
if service_routes:
|
||||
body.append(self._indent_routes(service_routes))
|
||||
@@ -325,10 +333,14 @@ class CaddyManager(BaseServiceManager):
|
||||
" auto_https off\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
|
||||
f"https://{cell_name}.cell {{\n"
|
||||
f" tls {cert_path} {key_path}\n"
|
||||
f"{inner}\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
|
||||
f"{inner}\n"
|
||||
"}\n"
|
||||
)
|
||||
|
||||
def _caddyfile_pic_ngo(self, cell_name: str,
|
||||
|
||||
+106
-24
@@ -8,10 +8,12 @@ Each connection is stored in data/cell_links.json and manifests as:
|
||||
- An iptables FORWARD rule set (service-level access control)
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
@@ -30,6 +32,58 @@ _BACKOFF_BASE_S = 60
|
||||
_BACKOFF_MAX_S = 3600
|
||||
|
||||
|
||||
# Strict formats for fields imported from a remote cell's invite. The domain and
|
||||
# dns_ip flow into a `curl --resolve <domain>:443:<dns_ip>` argv (peer-sync push);
|
||||
# anchoring them — domain must start alphanumeric, dns_ip must be an IP — prevents
|
||||
# a malicious invite injecting a leading-dash value that curl reads as a flag.
|
||||
_INVITE_HOSTNAME_RE = re.compile(r'^[A-Za-z0-9]([A-Za-z0-9.-]{0,253}[A-Za-z0-9])?$')
|
||||
_INVITE_CELL_NAME_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9 _.-]{0,63}$')
|
||||
_INVITE_ENDPOINT_RE = re.compile(r'^[A-Za-z0-9][A-Za-z0-9._-]*:\d{1,5}$')
|
||||
|
||||
|
||||
def _validate_invite_fields(invite: Dict[str, Any]) -> None:
|
||||
"""Reject a remote cell's invite whose fields aren't strictly well-formed.
|
||||
|
||||
Defence-in-depth: these values come from another cell and reach iptables,
|
||||
DNS config, and a curl argv (the peer-sync push --resolves <domain>:443:
|
||||
<dns_ip>). Anchoring domain/dns_ip/endpoint to start alphanumeric blocks a
|
||||
malicious leading-dash value that curl would read as a flag. The public_key
|
||||
is validated downstream by WireGuardManager.add_cell_peer. Raise ValueError
|
||||
on anything malformed.
|
||||
"""
|
||||
name = invite.get('cell_name', '')
|
||||
if not isinstance(name, str) or not _INVITE_CELL_NAME_RE.match(name):
|
||||
raise ValueError(f'invalid cell_name {name!r}')
|
||||
domain = invite.get('domain', '')
|
||||
if not isinstance(domain, str) or not _INVITE_HOSTNAME_RE.match(domain):
|
||||
raise ValueError(f'invalid domain {domain!r}: must be a hostname')
|
||||
try:
|
||||
ipaddress.ip_address(str(invite.get('dns_ip', '')))
|
||||
except ValueError:
|
||||
raise ValueError(f"invalid dns_ip {invite.get('dns_ip')!r}")
|
||||
try:
|
||||
ipaddress.ip_network(str(invite.get('vpn_subnet', '')), strict=False)
|
||||
except ValueError:
|
||||
raise ValueError(f"invalid vpn_subnet {invite.get('vpn_subnet')!r}")
|
||||
endpoint = invite.get('endpoint')
|
||||
if endpoint and not _INVITE_ENDPOINT_RE.match(str(endpoint)):
|
||||
raise ValueError(f'invalid endpoint {endpoint!r}')
|
||||
|
||||
|
||||
def _remote_api_url(domain: Optional[str]) -> Optional[str]:
|
||||
"""Base URL for a linked cell's API, reached over the WG tunnel.
|
||||
|
||||
Cross-cell peer-sync goes to the remote's Caddy on 443 (the WireGuard server
|
||||
DNATs VPN-IP:443 → Caddy → API; the API's own :3000 binds 127.0.0.1 and is
|
||||
unreachable from another cell). The URL uses the remote cell's DOMAIN — not
|
||||
its VPN IP — because Caddy only holds a certificate for the domain (ACME) or
|
||||
the .cell name (internal CA); a request by bare IP has no matching SNI and the
|
||||
TLS handshake fails. The push connects to the VPN IP over the tunnel via
|
||||
`curl --resolve <domain>:443:<dns_ip>` (see _push_permissions_to_remote).
|
||||
"""
|
||||
return f"https://{domain}" if domain else None
|
||||
|
||||
|
||||
def _compute_next_retry(attempts: int) -> str:
|
||||
"""Return an ISO timestamp for the earliest next retry using capped exponential backoff."""
|
||||
delay = min(_BACKOFF_BASE_S * (2 ** (attempts - 1)), _BACKOFF_MAX_S)
|
||||
@@ -65,11 +119,12 @@ class CellLinkManager:
|
||||
link['permissions'] = _default_perms()
|
||||
changed = True
|
||||
# Phase 1 migration: permission-sync tracking fields
|
||||
if 'remote_api_url' not in link:
|
||||
link['remote_api_url'] = (
|
||||
f"http://{link['dns_ip']}:3000"
|
||||
if link.get('dns_ip') else None
|
||||
)
|
||||
# Domain-based HTTPS URL. Rebuild if missing, or if it's a
|
||||
# legacy form: http://<ip>:3000 (unreachable) or https://<ip>
|
||||
# (no matching Caddy cert by bare IP).
|
||||
_want_url = _remote_api_url(link.get('domain'))
|
||||
if link.get('remote_api_url') != _want_url and _want_url:
|
||||
link['remote_api_url'] = _want_url
|
||||
changed = True
|
||||
if 'last_push_status' not in link:
|
||||
link['last_push_status'] = 'never'
|
||||
@@ -184,16 +239,26 @@ class CellLinkManager:
|
||||
payload = json.dumps(body)
|
||||
endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
|
||||
|
||||
# Determine local WG IP so the remote can authenticate us by source subnet.
|
||||
# MASQUERADE rewrites source to cell-wireguard's eth0 IP (172.20.x.x), which
|
||||
# is NOT in the cell's vpn_subnet. Passing the true WG IP in X-Forwarded-For
|
||||
# lets _authenticate_peer_cell() find the matching cell link.
|
||||
# Determine local WG IP for X-Forwarded-For (belt-and-suspenders for the
|
||||
# remote's source-subnet auth). With the peer-sync masquerade exclusion
|
||||
# the remote's Caddy already sees our real VPN source and appends it, but
|
||||
# passing it explicitly is harmless.
|
||||
local_wg_ip = self._local_wg_ip()
|
||||
xff_header = f'X-Forwarded-For: {local_wg_ip}' if local_wg_ip else None
|
||||
|
||||
# Reach the remote over the WG tunnel by its VPN IP, but present the
|
||||
# cell's DOMAIN as SNI/Host so Caddy serves its certificate — a request
|
||||
# to a bare IP has no matching cert and the TLS handshake fails. -k still
|
||||
# covers LAN mode (internal-CA cert curl won't chain to).
|
||||
domain = link.get('domain')
|
||||
dns_ip = link.get('dns_ip')
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
]
|
||||
if domain and dns_ip:
|
||||
cmd += ['--resolve', f'{domain}:443:{dns_ip}']
|
||||
cmd += [
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
]
|
||||
@@ -371,14 +436,24 @@ class CellLinkManager:
|
||||
# ── Public API ────────────────────────────────────────────────────────────
|
||||
|
||||
def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]:
|
||||
"""Return an invite package describing this cell for another cell to import."""
|
||||
"""Return an invite package describing this cell for another cell to import.
|
||||
|
||||
The endpoint advertises the cell's public domain (when in a DDNS/ACME
|
||||
mode) plus this cell's own WireGuard port, rather than a raw external IP —
|
||||
so the remote cell reaches us by name and a NAT/router can forward each
|
||||
cell's distinct WG port to the right host.
|
||||
"""
|
||||
keys = self.wireguard_manager.get_keys()
|
||||
srv = self.wireguard_manager.get_server_config()
|
||||
server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0]
|
||||
try:
|
||||
from app import config_manager as _cm
|
||||
except Exception:
|
||||
_cm = None
|
||||
endpoint = self.wireguard_manager.get_advertised_endpoint(_cm)
|
||||
return {
|
||||
'cell_name': cell_name,
|
||||
'public_key': keys['public_key'],
|
||||
'endpoint': srv.get('endpoint'),
|
||||
'endpoint': endpoint,
|
||||
'vpn_subnet': self.wireguard_manager._get_configured_network(),
|
||||
'dns_ip': server_vpn_ip,
|
||||
'domain': domain,
|
||||
@@ -448,15 +523,16 @@ class CellLinkManager:
|
||||
def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Send OUR invite to the remote cell so it can complete mutual WG pairing.
|
||||
|
||||
Called immediately after adding the remote as our WG peer. Uses the
|
||||
remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather
|
||||
than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades
|
||||
gracefully; the admin can pair from the other side manually.
|
||||
Called immediately after adding the remote as our WG peer, before the WG
|
||||
tunnel is up. Reaches the remote over the PUBLIC path at its advertised
|
||||
endpoint host (a domain in DDNS/ACME modes) on Caddy/443 — the API's :3000
|
||||
is 127.0.0.1-only and not reachable across cells. Non-fatal — one-sided
|
||||
pairing degrades gracefully; the admin can pair from the other side.
|
||||
"""
|
||||
endpoint = link.get('endpoint') or ''
|
||||
if not endpoint:
|
||||
return {'ok': False, 'error': 'no endpoint'}
|
||||
# Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52")
|
||||
# Host from endpoint (e.g. "alice.pic.ngo:51821" → "alice.pic.ngo").
|
||||
try:
|
||||
host = endpoint.rsplit(':', 1)[0].strip('[]')
|
||||
except Exception:
|
||||
@@ -471,11 +547,14 @@ class CellLinkManager:
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'could not build own invite: {e}'}
|
||||
|
||||
url = f'http://{host}:3000/api/cells/peer-sync/accept-invite'
|
||||
url = f'https://{host}/api/cells/peer-sync/accept-invite'
|
||||
payload = json.dumps({'invite': own_invite})
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
# -k: endpoint may be a bare IP (LAN/fallback) whose cert won't match;
|
||||
# accept-invite carries only public keys and the WG handshake is the
|
||||
# real authentication.
|
||||
'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
'-d', payload,
|
||||
@@ -507,6 +586,7 @@ class CellLinkManager:
|
||||
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
|
||||
if field not in invite:
|
||||
raise ValueError(f"Invite missing field: {field!r}")
|
||||
_validate_invite_fields(invite)
|
||||
|
||||
links = self._load()
|
||||
name = invite['cell_name']
|
||||
@@ -537,7 +617,7 @@ class CellLinkManager:
|
||||
old_domain = existing.get('domain', '')
|
||||
existing['dns_ip'] = invite['dns_ip']
|
||||
existing['vpn_subnet'] = invite['vpn_subnet']
|
||||
existing['remote_api_url'] = f"http://{invite['dns_ip']}:3000"
|
||||
existing['remote_api_url'] = _remote_api_url(invite['domain'])
|
||||
if invite.get('endpoint'):
|
||||
existing['endpoint'] = invite['endpoint']
|
||||
if domain_changed:
|
||||
@@ -599,7 +679,7 @@ class CellLinkManager:
|
||||
'domain': invite['domain'],
|
||||
'connected_at': datetime.utcnow().isoformat(),
|
||||
'permissions': _default_perms(),
|
||||
'remote_api_url': f"http://{invite['dns_ip']}:3000",
|
||||
'remote_api_url': _remote_api_url(invite['domain']),
|
||||
'last_push_status': 'never',
|
||||
'last_push_at': None,
|
||||
'last_push_error': None,
|
||||
@@ -621,6 +701,7 @@ class CellLinkManager:
|
||||
def add_connection(self, invite: Dict[str, Any],
|
||||
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Import a remote cell's invite and establish the connection."""
|
||||
_validate_invite_fields(invite)
|
||||
links = self._load()
|
||||
name = invite['cell_name']
|
||||
if any(l['cell_name'] == name for l in links):
|
||||
@@ -659,7 +740,7 @@ class CellLinkManager:
|
||||
'domain': invite['domain'],
|
||||
'connected_at': datetime.utcnow().isoformat(),
|
||||
'permissions': perms,
|
||||
'remote_api_url': f"http://{invite['dns_ip']}:3000",
|
||||
'remote_api_url': _remote_api_url(invite['domain']),
|
||||
'last_push_status': 'never',
|
||||
'last_push_at': None,
|
||||
'last_push_error': None,
|
||||
@@ -717,8 +798,9 @@ class CellLinkManager:
|
||||
try:
|
||||
import firewall_manager as _fm
|
||||
_fm.clear_cell_rules(cell_name)
|
||||
_fm.remove_cell_subnet_route(link.get('vpn_subnet', ''))
|
||||
except Exception as e:
|
||||
logger.warning(f"clear_cell_rules for {cell_name} failed (non-fatal): {e}")
|
||||
logger.warning(f"firewall teardown for {cell_name} failed (non-fatal): {e}")
|
||||
|
||||
self.wireguard_manager.remove_peer(link['public_key'])
|
||||
self.network_manager.remove_cell_dns_forward(link['domain'])
|
||||
|
||||
@@ -1419,6 +1419,37 @@ class ConnectivityManager(BaseServiceManager):
|
||||
logger.warning(f"delete_connection: container teardown failed "
|
||||
f"(non-fatal): {e}")
|
||||
|
||||
# Free this connection's host policy-routing rule and kill-switch.
|
||||
# apply_routes only re-adds rules for *existing* connections and only
|
||||
# flushes the PIC_CONNECTIVITY chains — it never removes the deleted
|
||||
# connection's individually-managed `ip rule fwmark→table` or its
|
||||
# FORWARD kill-switch, so they must be torn down here or they leak.
|
||||
mark, table = record.get('mark'), record.get('table')
|
||||
if (record.get('type') != self.CELL_RELAY_TYPE
|
||||
and isinstance(mark, int) and isinstance(table, int)):
|
||||
try:
|
||||
self._remove_ip_rule(mark, table)
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: ip rule cleanup failed "
|
||||
f"(non-fatal): {e}")
|
||||
try:
|
||||
self._remove_killswitch(mark, record.get('iface'))
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: killswitch cleanup failed "
|
||||
f"(non-fatal): {e}")
|
||||
elif (record.get('type') == self.CELL_RELAY_TYPE
|
||||
and isinstance(table, int)
|
||||
and self.wireguard_manager is not None):
|
||||
# A cell_relay policy-routes peers via a source ip rule + a
|
||||
# shared default route in its table inside cell-wireguard. Per-peer
|
||||
# detach removes the rules; the table's default route only goes
|
||||
# away here, when the connection is gone — otherwise it leaks.
|
||||
try:
|
||||
self.wireguard_manager.teardown_route_table(table)
|
||||
except Exception as e:
|
||||
logger.warning(f"delete_connection: cell_relay route table "
|
||||
f"cleanup failed (non-fatal): {e}")
|
||||
|
||||
for secret_ref in record.get('secret_refs', []):
|
||||
if self.vault_manager is not None:
|
||||
try:
|
||||
@@ -1535,6 +1566,18 @@ class ConnectivityManager(BaseServiceManager):
|
||||
f"{cell_name!r} no longer offered but still "
|
||||
f"referenced; keeping")
|
||||
continue
|
||||
# Flush the relay's policy-routing table (shared default route)
|
||||
# before forgetting the record — this path deletes the config
|
||||
# entry directly rather than via delete_connection, so it must
|
||||
# do the same host-routing teardown or the route leaks.
|
||||
rtable = rec.get('table')
|
||||
if self.wireguard_manager is not None and isinstance(rtable, int):
|
||||
try:
|
||||
self.wireguard_manager.teardown_route_table(rtable)
|
||||
except Exception as e:
|
||||
logger.warning(f"reconcile_cell_relays: route table "
|
||||
f"cleanup for {cell_name!r} failed "
|
||||
f"(non-fatal): {e}")
|
||||
try:
|
||||
self.config_manager.delete_connection(rec.get('id'))
|
||||
removed.append(rec.get('id'))
|
||||
@@ -2138,6 +2181,24 @@ class ConnectivityManager(BaseServiceManager):
|
||||
'-m', 'mark', '--mark', hex(mark),
|
||||
'!', '-o', iface, '-j', 'DROP'])
|
||||
|
||||
def _remove_killswitch(self, mark: int, iface: Optional[str]) -> None:
|
||||
"""Remove a connection's kill-switch FORWARD DROP (idempotent).
|
||||
|
||||
Unlike the per-peer MARK/REDIRECT rules (which live in the flushed
|
||||
PIC_CONNECTIVITY chains), the kill-switch is appended directly to
|
||||
FORWARD, so it is not cleared by apply_routes' chain flush — a deleted
|
||||
connection would otherwise leave a stale DROP that blocks a later
|
||||
connection reusing the same mark. Drain duplicates with a bounded loop.
|
||||
"""
|
||||
if not iface:
|
||||
return
|
||||
for _ in range(8):
|
||||
r = self._wg_iptables(['-D', 'FORWARD',
|
||||
'-m', 'mark', '--mark', hex(mark),
|
||||
'!', '-o', iface, '-j', 'DROP'])
|
||||
if r.returncode != 0:
|
||||
break
|
||||
|
||||
def _exit_status(self, exit_type: str) -> Dict[str, Any]:
|
||||
"""Return per-exit status (config presence + interface up/down).
|
||||
|
||||
|
||||
+48
-9
@@ -374,7 +374,8 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
Traffic from vpn_subnet is allowed only to service VIPs listed in
|
||||
inbound_services; all other cell traffic is DROPped. Cells get no
|
||||
internet or peer access — only explicit service access via Caddy on
|
||||
port 80, plus the cell-api port (3000) for permission-sync pushes.
|
||||
port 80, plus Caddy on 443 for cross-cell peer-sync pushes (offer/
|
||||
permission state) which reach cell-api through Caddy.
|
||||
|
||||
DNS (port 53) is always allowed so cell peers can resolve service names.
|
||||
Service names resolve to the WG server IP; ensure_service_dnat() routes
|
||||
@@ -388,7 +389,7 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
|
||||
3. Service ACCEPT to Caddy port 80 (if any inbound_services)
|
||||
4. DNS ACCEPT to cell-dns port 53 (UDP + TCP)
|
||||
5. API-sync ACCEPT (inserted last → top)
|
||||
5. Peer-sync ACCEPT to Caddy port 443 (inserted last → top)
|
||||
"""
|
||||
try:
|
||||
tag = _cell_tag(cell_name)
|
||||
@@ -425,19 +426,38 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
'-p', proto, '--dport', '53',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
|
||||
# the catch-all DROP). Remote cells push permissions to our cell-api via the
|
||||
# WG tunnel; iptables sees source=cell_subnet dst=api_ip after DNAT.
|
||||
api_ip = _get_cell_api_ip()
|
||||
if api_ip:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', api_ip,
|
||||
'-p', 'tcp', '--dport', '3000',
|
||||
# Peer-sync ACCEPT — inserted LAST so it goes to position 1 (above the
|
||||
# catch-all DROP). Remote cells push offer/permission state to our API over
|
||||
# the WG tunnel. The push targets the remote's Caddy on 443 (DNAT wg0:443 →
|
||||
# Caddy → cell-api), NOT cell-api:3000 directly: the API binds 127.0.0.1
|
||||
# only and is reachable solely through Caddy. After DNAT iptables sees
|
||||
# source=cell_subnet dst=caddy_ip:443; the existing `-o eth0 MASQUERADE`
|
||||
# routes Caddy's reply back through the tunnel.
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if caddy_ip:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', caddy_ip,
|
||||
'-p', 'tcp', '--dport', '443',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
# Preserve the linked cell's real VPN source on peer-sync traffic:
|
||||
# the blanket `-o eth0 MASQUERADE` would rewrite it to cell-wireguard's
|
||||
# bridge IP, and the remote side authenticates the push by matching the
|
||||
# source (via X-Forwarded-For) to the cell's VPN subnet. RETURN before
|
||||
# the MASQUERADE (inserted at the top of nat POSTROUTING). Caddy's reply
|
||||
# to the real VPN IP routes back via the cell-subnet host route
|
||||
# (ensure_cell_subnet_routes). The :80 service path keeps masquerade.
|
||||
_iptables(['-t', 'nat', '-I', 'POSTROUTING', '-s', vpn_subnet,
|
||||
'-d', caddy_ip, '-p', 'tcp', '--dport', '443',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'RETURN'])
|
||||
|
||||
# Ensure reply traffic (e.g. ICMP, TCP ACKs) for connections initiated
|
||||
# by local peers to this cell is not dropped by the cell's catch-all DROP.
|
||||
ensure_forward_stateful()
|
||||
|
||||
# Host route so Caddy's peer-sync reply (to the linked cell's un-masqueraded
|
||||
# VPN IP) leaves via cell-wireguard rather than the default gateway. Added at
|
||||
# startup for all links; ensure it on runtime link-add too. Idempotent.
|
||||
ensure_cell_subnet_routes([{'vpn_subnet': vpn_subnet}])
|
||||
|
||||
logger.info(
|
||||
f"Applied cell rules for {cell_name} ({vpn_subnet}): "
|
||||
f"inbound={inbound_services} exit_relay={exit_relay}"
|
||||
@@ -684,6 +704,25 @@ def ensure_cell_subnet_routes(cell_links: List[Dict[str, Any]]) -> None:
|
||||
logger.warning(f'ensure_cell_subnet_routes: {subnet}: {e}')
|
||||
|
||||
|
||||
def remove_cell_subnet_route(vpn_subnet: str) -> None:
|
||||
"""Remove the host route for a disconnected cell's VPN subnet (idempotent).
|
||||
|
||||
Counterpart to ensure_cell_subnet_routes. Without it the route lingers after a
|
||||
cell is unlinked — blackholing that subnet via cell-wireguard, and (on a host
|
||||
that runs the API/tests directly, e.g. a dev box) making is_local_request /
|
||||
_local_subnets treat the stale subnet as locally attached.
|
||||
"""
|
||||
if not vpn_subnet:
|
||||
return
|
||||
WG_BRIDGE_IP = '172.20.0.9'
|
||||
try:
|
||||
_run(['docker', 'run', '--rm', '--network', 'host', '--cap-add', 'NET_ADMIN',
|
||||
'alpine', 'ip', 'route', 'del', vpn_subnet, 'via', WG_BRIDGE_IP],
|
||||
check=False)
|
||||
except Exception as e:
|
||||
logger.warning(f'remove_cell_subnet_route: {vpn_subnet}: {e}')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -45,7 +45,7 @@ _HOOK_BINARY_RE = re.compile(r'^[a-z][a-z0-9_-]{0,31}$')
|
||||
_CAP_NAME_RE = re.compile(r'^[A-Z_]+$')
|
||||
_ID_RE = re.compile(r'^[a-z][a-z0-9_-]{0,30}$')
|
||||
_IMAGE_DIGEST_RE = re.compile(
|
||||
r'^git\.pic\.ngo/roof/[a-zA-Z0-9._/-]+@sha256:[0-9a-f]{64}$'
|
||||
r'^git\.pic\.ngo/roof/[a-zA-Z0-9._/-]+(:[a-zA-Z0-9._-]+)?@sha256:[0-9a-f]{64}$'
|
||||
)
|
||||
|
||||
# ── Build-context (Dockerfile) lint ───────────────────────────────────────
|
||||
|
||||
@@ -357,6 +357,13 @@ def remove_peer(peer_name):
|
||||
if success:
|
||||
if peer_ip:
|
||||
firewall_manager.clear_peer_rules(peer_ip)
|
||||
# Clear any cell_relay / route-via policy rule for this peer so a
|
||||
# deleted-while-assigned peer doesn't leave a stale source ip rule
|
||||
# (which could later misroute a new peer that reuses the IP).
|
||||
try:
|
||||
wireguard_manager.remove_peer_route_via(peer_ip)
|
||||
except Exception as wg_err:
|
||||
logger.warning(f"Peer {peer_name}: relay route cleanup failed (non-fatal): {wg_err}")
|
||||
_dns_primary, _dns_szones = _configured_dns_params()
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _dns_primary,
|
||||
cell_links=cell_link_manager.list_connections(),
|
||||
|
||||
@@ -8,15 +8,11 @@ bp = Blueprint('wireguard', __name__)
|
||||
def _effective_endpoint(wireguard_manager, config_manager) -> str:
|
||||
"""Return the WireGuard endpoint to embed in peer configs.
|
||||
|
||||
Uses wireguard_endpoint from identity config when set (admin override),
|
||||
falling back to get_external_ip() detection.
|
||||
Prefers the cell's public domain (DDNS/ACME modes) or an admin override over
|
||||
the raw external IP, so a peer config points at a name that resolves to the
|
||||
cell rather than a bare IP. See WireGuardManager.get_advertised_endpoint.
|
||||
"""
|
||||
srv = wireguard_manager.get_server_config()
|
||||
override = (config_manager.get_identity().get('wireguard_endpoint') or '').strip()
|
||||
if override:
|
||||
port = srv.get('port', 51820)
|
||||
return override if ':' in override else f'{override}:{port}'
|
||||
return srv.get('endpoint') or '<SERVER_IP>'
|
||||
return wireguard_manager.get_advertised_endpoint(config_manager) or '<SERVER_IP>'
|
||||
|
||||
@bp.route('/api/wireguard/keys', methods=['GET'])
|
||||
def get_wireguard_keys():
|
||||
|
||||
@@ -35,9 +35,10 @@ _SAFE_ID_RE = re.compile(r'^[a-z0-9][a-z0-9_-]{0,63}$')
|
||||
_DIGEST_RE = re.compile(r'@sha256:[0-9a-f]{64}$')
|
||||
|
||||
# Bundled cosign public key — shipped in the repo (config/cosign/cosign.pub) so
|
||||
# every cell can verify store-service image signatures offline. install.sh keeps
|
||||
# it at /opt/pic/config/cosign/cosign.pub; in the cell-api container it is
|
||||
# COPYed to /app/config/cosign/cosign.pub.
|
||||
# every cell can verify store-service image signatures offline. It is bind-mounted
|
||||
# into cell-api at /app/config/cosign/cosign.pub (see docker-compose.yml). Because
|
||||
# `make reinstall`/`uninstall` run `rm -rf config/`, setup_cell.ensure_cosign_pubkey()
|
||||
# restores it from git on every setup so the mount is never empty.
|
||||
_COSIGN_PUBKEY_PATH = os.environ.get(
|
||||
'PIC_COSIGN_PUBKEY', '/app/config/cosign/cosign.pub'
|
||||
)
|
||||
@@ -157,6 +158,11 @@ class ServiceComposer:
|
||||
result = result.replace('${PIC_CELL_NAME}', cell_name)
|
||||
result = result.replace('${PIC_SERVICE_ID}', service_id)
|
||||
result = result.replace('${PIC_DATA_DIR}', str(Path(self.data_dir).resolve()))
|
||||
# ${PIC_IMAGE} resolves to the manifest's image — the digest-pinned,
|
||||
# cosign-verified reference. Templates (especially instanceable ones)
|
||||
# MUST use this rather than hardcoding an image:tag, so the container
|
||||
# that actually runs is the same image the store verified at install.
|
||||
result = result.replace('${PIC_IMAGE}', str(manifest.get('image', '')))
|
||||
|
||||
if instance_vars:
|
||||
for var in ('INSTANCE_ID', 'REDIRECT_PORT'):
|
||||
|
||||
@@ -333,16 +333,32 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to fetch compose template: {e}'}
|
||||
|
||||
# Write compose file and start containers (validation inside write_compose)
|
||||
# Write compose file and start containers (validation inside write_compose).
|
||||
# Instanceable connectivity services back one container PER connection
|
||||
# instance, rendered later by ConnectivityManager with a concrete
|
||||
# ${INSTANCE_ID}/${REDIRECT_PORT}. Their base template still contains
|
||||
# those placeholders, so there is no base container to bring up at
|
||||
# install time — rendering/pulling/up-ing it here fails on the unset
|
||||
# variables. Verify the image signature now (the enforce gate still
|
||||
# applies), but defer the container to connection creation.
|
||||
if self.service_composer is not None:
|
||||
try:
|
||||
result = self.service_composer.install(service_id, manifest, template_content)
|
||||
except ValueError as e:
|
||||
return {'ok': False, 'error': str(e)}
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to start service: {e}'}
|
||||
if not result.get('ok'):
|
||||
return {'ok': False, 'error': result.get('error') or result.get('stderr', 'docker up failed')}
|
||||
if manifest.get('instanceable'):
|
||||
try:
|
||||
verify = self.service_composer.verify_image(service_id, manifest)
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'image verification failed: {e}'}
|
||||
if not verify.get('ok'):
|
||||
return {'ok': False,
|
||||
'error': verify.get('error', 'image verification failed')}
|
||||
else:
|
||||
try:
|
||||
result = self.service_composer.install(service_id, manifest, template_content)
|
||||
except ValueError as e:
|
||||
return {'ok': False, 'error': str(e)}
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to start service: {e}'}
|
||||
if not result.get('ok'):
|
||||
return {'ok': False, 'error': result.get('error') or result.get('stderr', 'docker up failed')}
|
||||
|
||||
# Persist minimal install record. For instanceable connectivity
|
||||
# services the raw compose template is stored so ConnectivityManager
|
||||
|
||||
@@ -786,21 +786,60 @@ class WireGuardManager(BaseServiceManager):
|
||||
logger.error(f'apply_peer_route_via failed: {e}')
|
||||
return False
|
||||
|
||||
def remove_peer_route_via(self, peer_ip: str, table: int = 100) -> None:
|
||||
"""Remove the ip rule for peer_ip added by apply_peer_route_via. Non-fatal."""
|
||||
def remove_peer_route_via(self, peer_ip: str) -> None:
|
||||
"""Remove the policy-routing ip rule(s) for peer_ip. Non-fatal.
|
||||
|
||||
Deletes every `ip rule from peer_ip/32` regardless of which table it
|
||||
points at: the v2 cell_relay path adds the rule with the connection's
|
||||
own table (1000+) while the legacy route-via path uses table 100, so a
|
||||
caller clearing a peer's exit does not reliably know the table. Matching
|
||||
by source alone removes the rule in both cases (and any duplicate). The
|
||||
shared routing *table* itself is torn down separately at connection
|
||||
teardown — see teardown_route_table.
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf or 'wg_confs' not in real_conf:
|
||||
return
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'rule', 'del', 'from', f'{peer_ip}/32',
|
||||
'pref', str(table), 'lookup', str(table)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
for _ in range(32):
|
||||
r = subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard',
|
||||
'ip', 'rule', 'del', 'from', f'{peer_ip}/32'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
if r.returncode != 0:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def teardown_route_table(self, table: int) -> None:
|
||||
"""Tear down a relay routing table when its connection is removed. Non-fatal.
|
||||
|
||||
Removes any remaining `ip rule ... lookup <table>` entries (e.g. one left
|
||||
by a peer deleted while still assigned) and flushes the table's routes —
|
||||
notably the `default via <cell-ip>` route that apply_peer_route_via
|
||||
installs. That route is shared by every peer routed through the relay, so
|
||||
no per-peer detach may remove it; it can only be cleared once the
|
||||
connection itself is gone, or it leaks (stale default route + a possible
|
||||
blackhole if a rule survives).
|
||||
"""
|
||||
real_conf = self._config_file()
|
||||
if '/tmp/' in real_conf or 'pytest' in real_conf or 'wg_confs' not in real_conf:
|
||||
return
|
||||
try:
|
||||
def _wg(cmd):
|
||||
return subprocess.run(
|
||||
['docker', 'exec', 'cell-wireguard'] + cmd,
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
for _ in range(64):
|
||||
r = _wg(['ip', 'rule', 'del', 'lookup', str(table)])
|
||||
if r.returncode != 0:
|
||||
break
|
||||
_wg(['ip', 'route', 'flush', 'table', str(table)])
|
||||
except Exception as e:
|
||||
logger.warning(f'teardown_route_table({table}) failed: {e}')
|
||||
|
||||
def remove_peer(self, public_key: str) -> bool:
|
||||
"""Remove the [Peer] block matching public_key from wg0.conf."""
|
||||
try:
|
||||
@@ -1054,6 +1093,38 @@ class WireGuardManager(BaseServiceManager):
|
||||
'vpn_network': self._get_configured_network(),
|
||||
}
|
||||
|
||||
# Domain modes whose effective domain is a publicly-resolvable FQDN that the
|
||||
# WireGuard endpoint should advertise instead of a raw IP. In these modes the
|
||||
# domain resolves (via DDNS/ACME) to the cell's public IP, so peers and linked
|
||||
# cells reach the cell by name — which survives IP changes and lets a NAT/router
|
||||
# forward each cell's WG port to the right host.
|
||||
PUBLIC_DOMAIN_MODES = ('pic_ngo', 'cloudflare', 'duckdns', 'http01')
|
||||
|
||||
def get_advertised_endpoint(self, config_manager=None) -> Optional[str]:
|
||||
"""Return the WireGuard endpoint (host:port) to advertise to peers/cells.
|
||||
|
||||
Preference order:
|
||||
1. an explicit admin override (`_identity.wireguard_endpoint`),
|
||||
2. the cell's public domain in a DDNS/ACME mode (`<domain>:<port>`),
|
||||
3. the detected external IP (`<ip>:<port>`) — LAN/fallback.
|
||||
|
||||
The port is always this cell's own configured WireGuard port, so a cell
|
||||
on a non-default port advertises it correctly (the router forwards that
|
||||
public port to this host).
|
||||
"""
|
||||
port = self._get_configured_port()
|
||||
identity = config_manager.get_identity() if config_manager is not None else {}
|
||||
override = (identity.get('wireguard_endpoint') or '').strip()
|
||||
if override:
|
||||
return override if ':' in override else f'{override}:{port}'
|
||||
mode = identity.get('domain_mode', 'lan')
|
||||
if mode in self.PUBLIC_DOMAIN_MODES and config_manager is not None:
|
||||
host = (config_manager.get_effective_domain() or '').strip()
|
||||
if host:
|
||||
return f'{host}:{port}'
|
||||
ext = self.get_external_ip()
|
||||
return f'{ext}:{port}' if ext else None
|
||||
|
||||
def get_peer_status(self, public_key: str) -> Dict[str, Any]:
|
||||
"""Return live handshake + transfer stats for a peer from `wg show`."""
|
||||
try:
|
||||
|
||||
@@ -62,6 +62,38 @@ def ensure_file(rel):
|
||||
print(f'[EXISTS] {rel}')
|
||||
|
||||
|
||||
def ensure_cosign_pubkey():
|
||||
"""Restore the tracked cosign public key if a config wipe removed it.
|
||||
|
||||
`config/cosign/cosign.pub` is a git-tracked asset bind-mounted into cell-api
|
||||
and used to verify store-service image signatures. `make reinstall`/
|
||||
`uninstall` run `rm -rf config/`, which deletes it from the working tree, and
|
||||
nothing else recreates it — leaving every store install broken under the
|
||||
default enforce mode. Restore it from HEAD here (setup runs on every
|
||||
install/reinstall). Best-effort: if this is not a git checkout, warn rather
|
||||
than fail — install.sh surfaces the same warning.
|
||||
"""
|
||||
rel = os.path.join('config', 'cosign', 'cosign.pub')
|
||||
path = os.path.join(ROOT, rel)
|
||||
if os.path.exists(path) and os.path.getsize(path) > 0:
|
||||
print(f'[EXISTS] {rel}')
|
||||
return
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
try:
|
||||
blob = subprocess.run(
|
||||
['git', '-C', ROOT, 'show', 'HEAD:config/cosign/cosign.pub'],
|
||||
capture_output=True, check=True).stdout
|
||||
if blob:
|
||||
with open(path, 'wb') as f:
|
||||
f.write(blob)
|
||||
print(f'[RESTORED] {rel} (from git HEAD)')
|
||||
return
|
||||
except Exception as e:
|
||||
print(f'[WARN] could not restore {rel} from git: {e}')
|
||||
print(f'[WARN] {rel} is missing — store-service image signature '
|
||||
'verification will fail under enforce mode until it is provided')
|
||||
|
||||
|
||||
def ensure_caddy_ca_cert():
|
||||
cert_dir = os.path.join(ROOT, 'config', 'caddy', 'certs')
|
||||
ca_key = os.path.join(cert_dir, 'ca.key')
|
||||
@@ -402,6 +434,7 @@ def main():
|
||||
for f in REQUIRED_FILES:
|
||||
ensure_file(f)
|
||||
|
||||
ensure_cosign_pubkey()
|
||||
ensure_caddy_ca_cert()
|
||||
priv, _pub = generate_wg_keys()
|
||||
write_wg0_conf(priv, vpn_address, wg_port)
|
||||
|
||||
@@ -48,12 +48,16 @@ class TestGenerateCaddyfileLan(unittest.TestCase):
|
||||
self.assertNotIn('acme_email', out)
|
||||
self.assertNotIn('dns pic_ngo', out)
|
||||
self.assertNotIn('dns cloudflare', out)
|
||||
# Internal-CA TLS pair
|
||||
# Internal-CA TLS pair, on an HTTPS (443) site — never on an http:// one.
|
||||
self.assertIn('tls /etc/caddy/internal/cert.pem '
|
||||
'/etc/caddy/internal/key.pem', out)
|
||||
# Cell hostname plus virtual IP listener
|
||||
self.assertIn('http://mycell.cell', out)
|
||||
self.assertIn('http://172.20.0.2:80', out)
|
||||
self.assertIn('https://mycell.cell {', out)
|
||||
# Cell hostname plus virtual IP listener on plain HTTP (80)
|
||||
self.assertIn('http://mycell.cell, http://172.20.0.2:80 {', out)
|
||||
# The HTTP (:80) block must NOT carry a tls directive — Caddy rejects
|
||||
# "server listening on [:80] is HTTP, but attempts to configure TLS".
|
||||
http_block = out.split('http://mycell.cell, http://172.20.0.2:80 {', 1)[1]
|
||||
self.assertNotIn('tls ', http_block)
|
||||
|
||||
|
||||
class TestGenerateCaddyfilePicNgo(unittest.TestCase):
|
||||
|
||||
+117
-12
@@ -14,9 +14,39 @@ import json
|
||||
import shutil
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import cell_link_manager
|
||||
from cell_link_manager import CellLinkManager
|
||||
|
||||
|
||||
_fw_patch = None
|
||||
|
||||
|
||||
def setUpModule():
|
||||
"""Stop cell-link unit tests from running real firewall side-effects.
|
||||
|
||||
add_connection/remove_connection call into firewall_manager, which shells out
|
||||
to `docker exec cell-wireguard iptables` and `docker run` host-route changes.
|
||||
On the dev/CI host those mutate live routes — a stale cell-subnet route once
|
||||
made is_local_request treat a VPN subnet as local and broke the full suite.
|
||||
Tests that assert specific firewall calls use their own local patch, which
|
||||
takes precedence within its context.
|
||||
"""
|
||||
global _fw_patch
|
||||
_fw_patch = patch.multiple(
|
||||
'firewall_manager',
|
||||
apply_cell_rules=MagicMock(return_value=True),
|
||||
clear_cell_rules=MagicMock(),
|
||||
ensure_cell_subnet_routes=MagicMock(),
|
||||
remove_cell_subnet_route=MagicMock(),
|
||||
)
|
||||
_fw_patch.start()
|
||||
|
||||
|
||||
def tearDownModule():
|
||||
if _fw_patch is not None:
|
||||
_fw_patch.stop()
|
||||
|
||||
|
||||
def _make_wg_mock():
|
||||
wg = MagicMock()
|
||||
wg.get_keys.return_value = {'public_key': 'serverpubkey=', 'private_key': 'serverprivkey='}
|
||||
@@ -26,6 +56,7 @@ def _make_wg_mock():
|
||||
}
|
||||
wg._get_configured_network.return_value = '10.0.0.0/24'
|
||||
wg._get_configured_address.return_value = '10.0.0.1/24'
|
||||
wg.get_advertised_endpoint.return_value = '1.2.3.4:51820'
|
||||
wg.add_cell_peer.return_value = True
|
||||
wg.remove_peer.return_value = True
|
||||
return wg
|
||||
@@ -49,6 +80,37 @@ SAMPLE_INVITE = {
|
||||
}
|
||||
|
||||
|
||||
class TestInviteFieldValidation(unittest.TestCase):
|
||||
"""_validate_invite_fields rejects malformed remote-invite fields.
|
||||
|
||||
The domain/dns_ip flow into a `curl --resolve` argv on peer-sync push, so a
|
||||
leading-dash domain (argument injection) and non-IP dns_ip must be rejected.
|
||||
"""
|
||||
|
||||
def test_valid_invite_passes(self):
|
||||
cell_link_manager._validate_invite_fields(SAMPLE_INVITE) # no raise
|
||||
|
||||
def test_rejects_leading_dash_domain(self):
|
||||
bad = {**SAMPLE_INVITE, 'domain': '-oProxyCommand=evil'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_non_ip_dns_ip(self):
|
||||
bad = {**SAMPLE_INVITE, 'dns_ip': '-x'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_subnet(self):
|
||||
bad = {**SAMPLE_INVITE, 'vpn_subnet': 'not-a-cidr'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
def test_rejects_bad_endpoint(self):
|
||||
bad = {**SAMPLE_INVITE, 'endpoint': '-evil:51820'}
|
||||
with self.assertRaises(ValueError):
|
||||
cell_link_manager._validate_invite_fields(bad)
|
||||
|
||||
|
||||
class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@@ -82,6 +144,13 @@ class TestCellLinkManagerInvite(unittest.TestCase):
|
||||
self.assertEqual(invite['cell_name'], 'myhome')
|
||||
self.assertEqual(invite['domain'], 'myhome.local')
|
||||
|
||||
def test_generate_invite_endpoint_from_advertised_endpoint(self):
|
||||
"""The invite endpoint comes from get_advertised_endpoint (domain-aware),
|
||||
not a raw external IP — so the remote cell reaches us by name + our port."""
|
||||
self.wg.get_advertised_endpoint.return_value = 'myhome.pic.ngo:51821'
|
||||
invite = self.mgr.generate_invite('myhome', 'myhome.pic.ngo')
|
||||
self.assertEqual(invite['endpoint'], 'myhome.pic.ngo:51821')
|
||||
|
||||
|
||||
class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
|
||||
@@ -138,6 +207,15 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
self.mgr.remove_connection('office')
|
||||
self.nm.remove_cell_dns_forward.assert_called_once_with('office.cell')
|
||||
|
||||
def test_remove_connection_removes_host_subnet_route(self):
|
||||
"""Unlinking a cell removes its host route so the subnet isn't left
|
||||
blackholed / treated as locally attached."""
|
||||
import firewall_manager as _fm
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
_fm.remove_cell_subnet_route.reset_mock()
|
||||
self.mgr.remove_connection('office')
|
||||
_fm.remove_cell_subnet_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
def test_remove_connection_deletes_from_list(self):
|
||||
self.mgr.add_connection(SAMPLE_INVITE)
|
||||
self.mgr.remove_connection('office')
|
||||
@@ -182,7 +260,9 @@ class TestCellLinkManagerConnections(unittest.TestCase):
|
||||
result = self.mgr.accept_invite(updated_invite)
|
||||
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.2')
|
||||
self.assertEqual(result['remote_api_url'], 'http://10.1.0.2:3000')
|
||||
# remote_api_url is domain-based (the push --resolves it to the VPN IP),
|
||||
# so a dns_ip change does not alter it.
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.2')
|
||||
@@ -470,9 +550,10 @@ class TestPushInviteToRemote(unittest.TestCase):
|
||||
result = self.mgr._push_invite_to_remote(link)
|
||||
self.assertFalse(result['ok'])
|
||||
|
||||
def test_push_invite_sends_to_correct_lan_host(self):
|
||||
"""The curl URL must use the LAN IP from the endpoint, not the WG dns_ip."""
|
||||
link = self._make_link(endpoint='192.168.31.52:51820')
|
||||
def test_push_invite_sends_to_endpoint_host_over_https(self):
|
||||
"""The curl targets the endpoint host on Caddy/HTTPS (443), not the WG
|
||||
dns_ip and not the internal :3000 API port."""
|
||||
link = self._make_link(endpoint='alice.pic.ngo:51821')
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kw):
|
||||
@@ -493,10 +574,11 @@ class TestPushInviteToRemote(unittest.TestCase):
|
||||
self.mgr._push_invite_to_remote(link)
|
||||
|
||||
url_in_cmd = captured['cmd'][-1]
|
||||
self.assertIn('192.168.31.52', url_in_cmd)
|
||||
self.assertIn('accept-invite', url_in_cmd)
|
||||
# Must NOT use the WG dns_ip (10.1.0.1)
|
||||
self.assertNotIn('10.1.0.1', url_in_cmd)
|
||||
self.assertEqual(url_in_cmd,
|
||||
'https://alice.pic.ngo/api/cells/peer-sync/accept-invite')
|
||||
self.assertNotIn(':3000', url_in_cmd)
|
||||
self.assertNotIn('10.1.0.1', url_in_cmd) # not the WG dns_ip
|
||||
self.assertIn('-k', captured['cmd']) # cert may not match a bare IP
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -605,7 +687,7 @@ class TestAcceptInviteNew(unittest.TestCase):
|
||||
with patch('firewall_manager.apply_cell_rules'):
|
||||
result = self.mgr.accept_invite(updated)
|
||||
self.assertEqual(result['dns_ip'], '10.1.0.5')
|
||||
self.assertEqual(result['remote_api_url'], 'http://10.1.0.5:3000')
|
||||
self.assertEqual(result['remote_api_url'], 'https://office.cell')
|
||||
self.nm.remove_cell_dns_forward.assert_called()
|
||||
self.nm.add_cell_dns_forward.assert_called_with(
|
||||
domain='office.cell', dns_ip='10.1.0.5')
|
||||
@@ -1045,9 +1127,11 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
|
||||
def test_add_connection_sets_remote_api_url_from_dns_ip(self):
|
||||
def test_add_connection_sets_remote_api_url_from_domain(self):
|
||||
link = self._add_office()
|
||||
self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000')
|
||||
# Cross-cell API is reached via the remote's domain over Caddy/443 (the
|
||||
# push --resolves the domain to the VPN IP over the tunnel).
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
|
||||
def test_add_connection_triggers_push(self):
|
||||
push_mock = MagicMock(return_value={'ok': True, 'error': None})
|
||||
@@ -1321,7 +1405,7 @@ class TestPermissionSync(unittest.TestCase):
|
||||
self.assertIn('last_push_status', link)
|
||||
self.assertIn('last_push_at', link)
|
||||
self.assertIn('last_remote_update_at', link)
|
||||
self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000')
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell')
|
||||
self.assertTrue(link['pending_push']) # pre-existing → marked pending
|
||||
self.assertEqual(link['last_push_status'], 'never')
|
||||
|
||||
@@ -1330,6 +1414,27 @@ class TestPermissionSync(unittest.TestCase):
|
||||
raw = json.load(f)
|
||||
self.assertIn('pending_push', raw[0])
|
||||
|
||||
def test_load_migrates_legacy_url_forms_to_https_domain(self):
|
||||
"""Legacy remote_api_url forms — http://<ip>:3000 (unreachable) and
|
||||
https://<ip> (no matching Caddy cert by bare IP) — are rewritten on load to
|
||||
the domain-based HTTPS form."""
|
||||
for legacy_url in ('http://10.1.0.9:3000', 'https://10.1.0.9'):
|
||||
legacy = [{
|
||||
'cell_name': 'office',
|
||||
'public_key': 'officepubkey=',
|
||||
'vpn_subnet': '10.1.0.0/24',
|
||||
'dns_ip': '10.1.0.9',
|
||||
'domain': 'office.cell',
|
||||
'permissions': {'inbound': {}, 'outbound': {}},
|
||||
'remote_api_url': legacy_url,
|
||||
}]
|
||||
links_file = os.path.join(self.test_dir, 'cell_links.json')
|
||||
with open(links_file, 'w') as f:
|
||||
json.dump(legacy, f)
|
||||
link = self.mgr.list_connections()[0]
|
||||
self.assertEqual(link['remote_api_url'], 'https://office.cell',
|
||||
f'failed to migrate {legacy_url!r}')
|
||||
|
||||
|
||||
class TestExitOffer(unittest.TestCase):
|
||||
"""Tests for Phase 2: exit-offer signaling."""
|
||||
|
||||
@@ -319,5 +319,56 @@ class TestHealth(_Base):
|
||||
self.assertEqual(health, 'down')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Teardown cleanup — regression for the confirmed cell_relay routing leak.
|
||||
#
|
||||
# A cell_relay policy-routes a peer with `ip rule from <peer> lookup <table>`
|
||||
# plus a shared `default via <cell-ip>` route in that table, inside
|
||||
# cell-wireguard. Before the fix, detaching/deleting the peer left the rule
|
||||
# (remove_peer_route_via used the wrong default table) and nothing ever flushed
|
||||
# the table's default route — both leaked, confirmed on hardware.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTeardownCleanup(_Base):
|
||||
|
||||
def _relay(self):
|
||||
self.cell_link.list_connections.return_value = [_link('alpha')]
|
||||
self.mgr.reconcile_cell_relays()
|
||||
return self._raw_relays()[0]
|
||||
|
||||
def test_detach_removes_peer_ip_rule(self):
|
||||
relay = self._relay()
|
||||
peer = {'peer': 'laptop', 'ip': '10.0.0.5/32',
|
||||
'exit_via': relay['id'], 'route_via': 'alpha'}
|
||||
self.peer_registry.get_peer.return_value = peer
|
||||
self.peer_registry.set_peer_exit_via.return_value = True
|
||||
with patch.object(self.mgr, 'apply_routes'):
|
||||
res = self.mgr.set_peer_exit('laptop', 'default')
|
||||
self.assertTrue(res['ok'])
|
||||
# The peer's source ip rule is cleared by source (table-agnostic), so it
|
||||
# matches the relay's allocated table rather than the old default 100.
|
||||
self.wg.remove_peer_route_via.assert_called_once_with('10.0.0.5')
|
||||
|
||||
def test_delete_connection_flushes_relay_route_table(self):
|
||||
relay = self._relay()
|
||||
# Not referenced by any peer (detached) → deletable.
|
||||
self.peer_registry.list_peers.return_value = []
|
||||
res = self.mgr.delete_connection(relay['id'])
|
||||
self.assertTrue(res['ok'])
|
||||
self.wg.teardown_route_table.assert_called_once_with(relay['table'])
|
||||
|
||||
def test_reconcile_removal_flushes_relay_route_table(self):
|
||||
relay = self._relay()
|
||||
table = relay['table']
|
||||
# Offer withdrawn and not referenced → reconcile removes the relay and
|
||||
# must flush its routing table (this path bypasses delete_connection).
|
||||
self.cell_link.list_connections.return_value = [
|
||||
_link('alpha', remote_exit_offered=False)]
|
||||
self.peer_registry.list_peers.return_value = []
|
||||
out = self.mgr.reconcile_cell_relays()
|
||||
self.assertIn(relay['id'], out['removed'])
|
||||
self.wg.teardown_route_table.assert_called_once_with(table)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -110,6 +110,13 @@ class _Base(unittest.TestCase):
|
||||
for _svc in ('wireguard-ext', 'openvpn-client', 'tor', 'sshuttle', 'proxy'):
|
||||
self.cm.set_installed_service(_svc, {'id': _svc, 'manifest': {}})
|
||||
self.cm._save_all_configs()
|
||||
# No test in this module should shell into the WireGuard container. Stub
|
||||
# the exec helpers so host-rule cleanup paths (delete_connection) never
|
||||
# touch real docker/iptables; returncode 1 makes the drain loops stop.
|
||||
self.mgr._wg_ip = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
self.mgr._wg_iptables = MagicMock(
|
||||
return_value=MagicMock(returncode=1, stdout='', stderr=''))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp, ignore_errors=True)
|
||||
@@ -296,6 +303,42 @@ class TestDeleteConnection(_Base):
|
||||
out = self.mgr.delete_connection('conn_nope')
|
||||
self.assertFalse(out['ok'])
|
||||
|
||||
def test_delete_removes_host_ip_rule(self):
|
||||
"""Deleting a connection must remove its fwmark->table ip rule.
|
||||
|
||||
apply_routes only re-adds rules for surviving connections and only
|
||||
flushes the PIC_CONNECTIVITY chains, so the deleted connection's
|
||||
individually-managed `ip rule` would otherwise leak in cell-wireguard.
|
||||
"""
|
||||
res = self.mgr.create_connection('proxy', 'gone', _proxy_cfg())
|
||||
conn = res['connection']
|
||||
mark, table = conn['mark'], conn['table']
|
||||
self.mgr._wg_ip.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ip_calls = [c.args[0] for c in self.mgr._wg_ip.call_args_list]
|
||||
self.assertIn(
|
||||
['rule', 'del', 'fwmark', hex(mark), 'lookup', str(table)],
|
||||
ip_calls,
|
||||
)
|
||||
|
||||
def test_delete_removes_iface_killswitch(self):
|
||||
"""An iface-type connection's FORWARD kill-switch is removed on delete."""
|
||||
res = self.mgr.create_connection(
|
||||
'wireguard_ext', 'wgks', {},
|
||||
secrets={'conf': '[Interface]\nPrivateKey = x\n'})
|
||||
conn = res['connection']
|
||||
mark, iface = conn['mark'], conn['iface']
|
||||
self.mgr._wg_iptables.reset_mock()
|
||||
out = self.mgr.delete_connection(conn['id'])
|
||||
self.assertTrue(out['ok'], out)
|
||||
ipt_calls = [c.args[0] for c in self.mgr._wg_iptables.call_args_list]
|
||||
self.assertTrue(
|
||||
any(c[:2] == ['-D', 'FORWARD'] and '--mark' in c and hex(mark) in c
|
||||
and iface in c for c in ipt_calls),
|
||||
ipt_calls,
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateConnection(_Base):
|
||||
|
||||
|
||||
@@ -652,6 +652,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value=self._FAKE_API_IP), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value=self._FAKE_CADDY_IP), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value=self._FAKE_DNS_IP), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules(cell_name, vpn_subnet, inbound_services)
|
||||
|
||||
@@ -702,32 +703,59 @@ class TestCellRules(unittest.TestCase):
|
||||
]
|
||||
self.assertTrue(subnet_drops, "Expected a catch-all DROP rule for the subnet")
|
||||
|
||||
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
|
||||
"""apply_cell_rules inserts Caddy ACCEPT when inbound_services is non-empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertIn('ACCEPT', caddy_targets,
|
||||
"Expected ACCEPT to Caddy when inbound_services is non-empty")
|
||||
|
||||
def test_apply_cell_rules_no_caddy_accept_when_no_inbound(self):
|
||||
"""apply_cell_rules does NOT insert Caddy ACCEPT when inbound_services is empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertNotIn('ACCEPT', caddy_targets,
|
||||
"No Caddy ACCEPT expected when inbound_services is empty")
|
||||
|
||||
def test_apply_cell_rules_accepts_api_sync_traffic(self):
|
||||
"""apply_cell_rules inserts ACCEPT for cell-api:3000 so permission-sync pushes pass."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
api_ip = self._FAKE_API_IP
|
||||
api_accepts = [
|
||||
def _caddy_accepts_on_port(self, calls, port):
|
||||
"""Caddy-dest ACCEPT calls matching --dport <port>."""
|
||||
return [
|
||||
c for c in calls
|
||||
if '-s' in c and '10.0.1.0/24' in c
|
||||
and '-d' in c and api_ip in c
|
||||
and '--dport' in c and '3000' in c
|
||||
if '-d' in c and self._FAKE_CADDY_IP in c
|
||||
and '--dport' in c and str(port) in c
|
||||
and '-j' in c and c[c.index('-j') + 1] == 'ACCEPT'
|
||||
]
|
||||
self.assertTrue(api_accepts, 'Expected an ACCEPT rule for cell-api:3000')
|
||||
|
||||
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
|
||||
"""apply_cell_rules inserts a Caddy:80 ACCEPT when inbound_services is non-empty."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
|
||||
self.assertTrue(self._caddy_accepts_on_port(calls, 80),
|
||||
"Expected ACCEPT to Caddy:80 for an inbound service")
|
||||
|
||||
def test_apply_cell_rules_no_service_accept_when_no_inbound(self):
|
||||
"""No Caddy:80 (service) ACCEPT when inbound_services is empty.
|
||||
|
||||
The :443 peer-sync ACCEPT is separate and always present (below).
|
||||
"""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
|
||||
"No Caddy:80 service ACCEPT expected with empty inbound")
|
||||
|
||||
def test_apply_cell_rules_accepts_peer_sync_to_caddy_443(self):
|
||||
"""Cross-cell peer-sync ACCEPT to Caddy:443 is always added (the push reaches
|
||||
cell-api through Caddy, since the API binds 127.0.0.1 only)."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
peer_sync = [
|
||||
c for c in self._caddy_accepts_on_port(calls, 443)
|
||||
if '-s' in c and '10.0.1.0/24' in c
|
||||
]
|
||||
self.assertTrue(peer_sync, 'Expected ACCEPT to Caddy:443 for peer-sync')
|
||||
# And it must NOT target the (127.0.0.1-only) cell-api on :3000 anymore.
|
||||
api_3000 = [
|
||||
c for c in calls
|
||||
if '-d' in c and self._FAKE_API_IP in c and '--dport' in c and '3000' in c
|
||||
]
|
||||
self.assertFalse(api_3000, 'Peer-sync must not target cell-api:3000')
|
||||
|
||||
def test_apply_cell_rules_excludes_peer_sync_from_masquerade(self):
|
||||
"""Peer-sync to Caddy:443 must RETURN in nat POSTROUTING (skip the blanket
|
||||
MASQUERADE) so the remote sees the linked cell's real VPN source for auth."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
returns = [
|
||||
c for c in calls
|
||||
if '-t' in c and 'nat' in c and 'POSTROUTING' in c
|
||||
and '-s' in c and '10.0.1.0/24' in c
|
||||
and '-d' in c and self._FAKE_CADDY_IP in c
|
||||
and '--dport' in c and '443' in c
|
||||
and '-j' in c and c[c.index('-j') + 1] == 'RETURN'
|
||||
]
|
||||
self.assertTrue(returns, 'Expected nat POSTROUTING RETURN to preserve peer-sync source')
|
||||
|
||||
def test_apply_cell_rules_api_sync_accept_before_catchall_drop(self):
|
||||
"""The API-sync ACCEPT must be inserted after service rules so it ends up above DROP."""
|
||||
@@ -743,6 +771,7 @@ class TestCellRules(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful', return_value=True):
|
||||
firewall_manager.apply_cell_rules('office', '10.0.1.0/24', [])
|
||||
|
||||
@@ -754,12 +783,12 @@ class TestCellRules(unittest.TestCase):
|
||||
# ── apply_cell_rules — empty inbound (all-deny) ───────────────────────────
|
||||
|
||||
def test_apply_cell_rules_empty_inbound_no_service_accept(self):
|
||||
"""With inbound_services=[], no service ACCEPT is added; catch-all DROP blocks traffic."""
|
||||
"""With inbound_services=[], no Caddy:80 service ACCEPT is added; the catch-all
|
||||
DROP blocks service traffic (only the :443 peer-sync ACCEPT is present)."""
|
||||
calls = self._capture_apply('office', '10.0.1.0/24', [])
|
||||
# No ACCEPT to Caddy
|
||||
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
|
||||
self.assertNotIn('ACCEPT', caddy_targets,
|
||||
"No Caddy ACCEPT expected with empty inbound_services")
|
||||
# No service ACCEPT to Caddy on :80
|
||||
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
|
||||
"No Caddy:80 ACCEPT expected with empty inbound_services")
|
||||
# No per-VIP rules at all
|
||||
for service, svc_ip in firewall_manager.SERVICE_IPS.items():
|
||||
svc_targets = self._targets_for_dest(calls, svc_ip)
|
||||
@@ -839,6 +868,31 @@ class TestCellRules(unittest.TestCase):
|
||||
# peer rule for a different entity must survive
|
||||
self.assertIn('pic-peer-10-0-0-2/32', content)
|
||||
|
||||
# ── remove_cell_subnet_route ──────────────────────────────────────────────
|
||||
|
||||
def test_remove_cell_subnet_route_issues_ip_route_del(self):
|
||||
"""remove_cell_subnet_route deletes the host route for the cell's subnet."""
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kw):
|
||||
captured['cmd'] = cmd
|
||||
return MagicMock(returncode=0, stdout='', stderr='')
|
||||
|
||||
with patch.object(firewall_manager, '_run', side_effect=fake_run):
|
||||
firewall_manager.remove_cell_subnet_route('10.1.0.0/24')
|
||||
cmd = captured.get('cmd', [])
|
||||
self.assertIn('ip', cmd)
|
||||
self.assertIn('route', cmd)
|
||||
self.assertIn('del', cmd)
|
||||
self.assertIn('10.1.0.0/24', cmd)
|
||||
self.assertIn('172.20.0.9', cmd)
|
||||
|
||||
def test_remove_cell_subnet_route_noop_on_empty(self):
|
||||
"""An empty subnet is a no-op (no docker call)."""
|
||||
with patch.object(firewall_manager, '_run') as run:
|
||||
firewall_manager.remove_cell_subnet_route('')
|
||||
run.assert_not_called()
|
||||
|
||||
# ── apply_all_cell_rules ──────────────────────────────────────────────────
|
||||
|
||||
def test_apply_all_cell_rules_calls_apply_for_each(self):
|
||||
@@ -1117,6 +1171,7 @@ class TestEnsureForwardStateful(unittest.TestCase):
|
||||
patch.object(firewall_manager, '_get_caddy_container_ip', return_value='172.20.0.2'), \
|
||||
patch.object(firewall_manager, '_get_dns_container_ip', return_value='172.20.0.3'), \
|
||||
patch.object(firewall_manager, '_get_cell_api_ip', return_value='172.20.0.10'), \
|
||||
patch.object(firewall_manager, 'ensure_cell_subnet_routes', return_value=None), \
|
||||
patch.object(firewall_manager, 'ensure_forward_stateful') as mock_stateful:
|
||||
firewall_manager.apply_cell_rules('testcell', '10.0.0.0/24', [])
|
||||
mock_stateful.assert_called_once()
|
||||
|
||||
@@ -175,6 +175,19 @@ class TestValidateManifest(unittest.TestCase):
|
||||
self.assertTrue(ok)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_tag_and_digest_passes(self):
|
||||
# The publish pipeline writes back name:tag@sha256:<digest> (a valid OCI
|
||||
# reference). The validator must accept the tag alongside the digest —
|
||||
# service_store_manager already does, and rejecting it here blocks every
|
||||
# published store image from installing.
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
_minimal_manifest(
|
||||
image=f'git.pic.ngo/roof/svc-proxy:latest@sha256:{digest}')
|
||||
)
|
||||
self.assertTrue(ok, errs)
|
||||
self.assertEqual(errs, [])
|
||||
|
||||
def test_image_wrong_registry_rejected(self):
|
||||
digest = 'a' * 64
|
||||
ok, errs = validate_manifest(
|
||||
|
||||
@@ -88,6 +88,23 @@ class TestRenderTemplate(unittest.TestCase):
|
||||
result = self.composer.render_template('myservice', manifest, template)
|
||||
self.assertEqual(result, 'ID=myservice')
|
||||
|
||||
def test_pic_image_substituted_from_manifest(self):
|
||||
# ${PIC_IMAGE} must resolve to the manifest's digest-pinned image so the
|
||||
# per-instance container runs the exact ref the store verified — not an
|
||||
# unpinned :latest hardcoded in the template.
|
||||
digest = 'git.pic.ngo/roof/svc-proxy:latest@sha256:' + 'a' * 64
|
||||
manifest = _make_manifest()
|
||||
manifest['image'] = digest
|
||||
template = 'image: ${PIC_IMAGE}'
|
||||
result = self.composer.render_template('myservice', manifest, template)
|
||||
self.assertEqual(result, f'image: {digest}')
|
||||
|
||||
def test_pic_image_empty_when_manifest_has_no_image(self):
|
||||
manifest = _make_manifest()
|
||||
result = self.composer.render_template(
|
||||
'myservice', manifest, 'image: ${PIC_IMAGE}')
|
||||
self.assertEqual(result, 'image: ')
|
||||
|
||||
def test_pic_secret_generated_and_substituted(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
composer = _composer(data_dir=tmpdir)
|
||||
|
||||
@@ -726,6 +726,43 @@ class TestInstall(unittest.TestCase):
|
||||
self.assertIn('digest', result['error'].lower())
|
||||
composer.install.assert_not_called()
|
||||
|
||||
def test_install_instanceable_verifies_image_but_does_not_up_container(self):
|
||||
"""Instanceable services defer the container to connection creation.
|
||||
|
||||
Their base compose template still contains ${INSTANCE_ID}/${REDIRECT_PORT},
|
||||
so the base container must NOT be rendered/pulled/up'd at install time —
|
||||
only the image signature is verified, and the record (with the raw
|
||||
template) is stored for ConnectivityManager to render per instance.
|
||||
"""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': True}
|
||||
result = ssm.install('proxy')
|
||||
self.assertTrue(result['ok'], result)
|
||||
composer.verify_image.assert_called_once()
|
||||
composer.install.assert_not_called()
|
||||
# The raw template is persisted so per-instance rendering needs no refetch.
|
||||
record = cm.set_installed_service.call_args[0][1]
|
||||
self.assertIn('compose_template', record)
|
||||
|
||||
def test_install_instanceable_aborts_when_image_verification_fails(self):
|
||||
"""An instanceable service whose image fails verification must not install."""
|
||||
manifest = _valid_manifest(
|
||||
id='proxy', container_name='cell-proxy-${INSTANCE_ID}',
|
||||
instanceable=True,
|
||||
)
|
||||
ssm, cm, _, composer = _make_ssm(manifest=manifest)
|
||||
cm.get_image_verification_mode.return_value = 'enforce'
|
||||
composer.verify_image.return_value = {'ok': False, 'error': 'signature verification failed'}
|
||||
result = ssm.install('proxy')
|
||||
self.assertFalse(result['ok'])
|
||||
composer.install.assert_not_called()
|
||||
cm.set_installed_service.assert_not_called()
|
||||
|
||||
def test_install_without_composer_stores_record(self):
|
||||
"""When service_composer=None, skip compose but still store the install record."""
|
||||
manifest = _valid_manifest(id='myapp', container_name='cell-myapp')
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Tests for scripts/setup_cell.py setup helpers."""
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))
|
||||
import setup_cell # noqa: E402
|
||||
|
||||
|
||||
class TestEnsureCosignPubkey(unittest.TestCase):
|
||||
"""ensure_cosign_pubkey restores the tracked key after a `rm -rf config/`.
|
||||
|
||||
Regression: `make reinstall`/`uninstall` wipe config/, deleting the tracked
|
||||
config/cosign/cosign.pub; without restore, enforce-mode store installs break.
|
||||
"""
|
||||
|
||||
KEY_REL = os.path.join('config', 'cosign', 'cosign.pub')
|
||||
KEY_BODY = '-----BEGIN PUBLIC KEY-----\nTESTKEYDATA\n-----END PUBLIC KEY-----\n'
|
||||
|
||||
def setUp(self):
|
||||
self.tmp = tempfile.mkdtemp()
|
||||
env = {**os.environ, 'GIT_CONFIG_GLOBAL': '/dev/null', 'GIT_CONFIG_SYSTEM': '/dev/null'}
|
||||
subprocess.run(['git', 'init', '-q', self.tmp], check=True, env=env)
|
||||
subprocess.run(['git', '-C', self.tmp, 'config', 'user.email', 't@t'], check=True)
|
||||
subprocess.run(['git', '-C', self.tmp, 'config', 'user.name', 't'], check=True)
|
||||
self.key = os.path.join(self.tmp, self.KEY_REL)
|
||||
os.makedirs(os.path.dirname(self.key))
|
||||
with open(self.key, 'w') as f:
|
||||
f.write(self.KEY_BODY)
|
||||
subprocess.run(['git', '-C', self.tmp, 'add', '-A'], check=True)
|
||||
subprocess.run(['git', '-C', self.tmp, 'commit', '-qm', 'init'], check=True, env=env)
|
||||
self._root = setup_cell.ROOT
|
||||
setup_cell.ROOT = self.tmp
|
||||
|
||||
def tearDown(self):
|
||||
setup_cell.ROOT = self._root
|
||||
shutil.rmtree(self.tmp, ignore_errors=True)
|
||||
|
||||
def test_restores_key_when_wiped(self):
|
||||
os.remove(self.key)
|
||||
shutil.rmtree(os.path.dirname(self.key)) # mimic `rm -rf config/`
|
||||
self.assertFalse(os.path.exists(self.key))
|
||||
setup_cell.ensure_cosign_pubkey()
|
||||
self.assertTrue(os.path.exists(self.key))
|
||||
self.assertEqual(open(self.key).read(), self.KEY_BODY)
|
||||
|
||||
def test_noop_when_key_present(self):
|
||||
setup_cell.ensure_cosign_pubkey()
|
||||
self.assertEqual(open(self.key).read(), self.KEY_BODY)
|
||||
|
||||
def test_warns_not_raises_outside_git(self):
|
||||
# Not a git checkout and key missing → must warn, never raise.
|
||||
non_git = tempfile.mkdtemp()
|
||||
setup_cell.ROOT = non_git
|
||||
try:
|
||||
setup_cell.ensure_cosign_pubkey() # should not raise
|
||||
self.assertFalse(os.path.exists(os.path.join(non_git, self.KEY_REL)))
|
||||
finally:
|
||||
shutil.rmtree(non_git, ignore_errors=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -90,11 +90,13 @@ class TestWireGuardEndpoints(unittest.TestCase):
|
||||
'endpoint': '1.2.3.4:51820',
|
||||
'port': 51820,
|
||||
}
|
||||
mock_wg.get_advertised_endpoint.return_value = '1.2.3.4:51820'
|
||||
r = self.client.get('/api/wireguard/server-config')
|
||||
self.assertEqual(r.status_code, 200)
|
||||
data = json.loads(r.data)
|
||||
self.assertIn('public_key', data)
|
||||
self.assertIn('endpoint', data)
|
||||
self.assertEqual(data.get('effective_endpoint'), '1.2.3.4:51820')
|
||||
|
||||
@patch('app.wireguard_manager')
|
||||
def test_server_config_returns_500_on_exception(self, mock_wg):
|
||||
|
||||
@@ -885,5 +885,60 @@ class TestCellRoutes(unittest.TestCase):
|
||||
mock_route.assert_called_once_with('10.1.0.0/24')
|
||||
|
||||
|
||||
class _FakeCM:
|
||||
"""Minimal config_manager stand-in for get_advertised_endpoint tests."""
|
||||
def __init__(self, identity, effective_domain):
|
||||
self._identity = identity
|
||||
self._effective = effective_domain
|
||||
|
||||
def get_identity(self):
|
||||
return self._identity
|
||||
|
||||
def get_effective_domain(self):
|
||||
return self._effective
|
||||
|
||||
|
||||
class TestAdvertisedEndpoint(unittest.TestCase):
|
||||
"""get_advertised_endpoint prefers domain/override over the raw external IP."""
|
||||
|
||||
def setUp(self):
|
||||
self.test_dir = tempfile.mkdtemp()
|
||||
patcher = patch.object(WireGuardManager, '_syncconf', return_value=None)
|
||||
patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
self.wg = WireGuardManager(self.test_dir, self.test_dir)
|
||||
# Pin the configured port and external IP for deterministic endpoints.
|
||||
self.wg._get_configured_port = MagicMock(return_value=51821)
|
||||
self.wg.get_external_ip = MagicMock(return_value='198.51.100.7')
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.test_dir, ignore_errors=True)
|
||||
|
||||
def test_public_mode_uses_effective_domain_and_own_port(self):
|
||||
cm = _FakeCM({'domain_mode': 'pic_ngo'}, 'alice.pic.ngo')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'alice.pic.ngo:51821')
|
||||
|
||||
def test_lan_mode_falls_back_to_external_ip(self):
|
||||
cm = _FakeCM({'domain_mode': 'lan'}, 'cell')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821')
|
||||
|
||||
def test_admin_override_wins(self):
|
||||
cm = _FakeCM({'domain_mode': 'pic_ngo', 'wireguard_endpoint': 'vpn.example.com'}, 'alice.pic.ngo')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:51821')
|
||||
|
||||
def test_override_with_explicit_port_kept(self):
|
||||
cm = _FakeCM({'domain_mode': 'lan', 'wireguard_endpoint': 'vpn.example.com:7777'}, 'cell')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:7777')
|
||||
|
||||
def test_none_when_no_domain_and_no_external_ip(self):
|
||||
self.wg.get_external_ip = MagicMock(return_value=None)
|
||||
cm = _FakeCM({'domain_mode': 'lan'}, 'cell')
|
||||
self.assertIsNone(self.wg.get_advertised_endpoint(cm))
|
||||
|
||||
def test_public_mode_without_domain_falls_back_to_ip(self):
|
||||
cm = _FakeCM({'domain_mode': 'cloudflare'}, '')
|
||||
self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,6 +1,11 @@
|
||||
FROM alpine:3.20@sha256:d9e853e87e55526f6b2917df91a2115c36dd7c696a35be12163d44e6e2a4b6bc
|
||||
|
||||
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2
|
||||
# curl + ca-certificates: cell-to-cell peer-sync pushes (offer/permission state)
|
||||
# originate from this container's network namespace — the only one with routes to
|
||||
# remote-cell VPN subnets over the tunnel — and go over HTTPS to the remote's
|
||||
# Caddy. busybox wget here has no TLS, so curl is required (~5MB over the slim
|
||||
# base; the alternative is no automatic cross-cell sync).
|
||||
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2 curl ca-certificates
|
||||
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
Reference in New Issue
Block a user