2 Commits

Author SHA1 Message Date
roof 714fb9b1a9 fix: make cross-cell peer-sync push actually reach the remote cell's API
Unit Tests / test (push) Successful in 9m48s
The offer/permission push between linked cells never worked end-to-end. Two
fixes complete the transport (the push already targets the remote over the WG
tunnel; fix #3 earlier pointed it at HTTPS):

1. The slim WireGuard image (where the push originates — the only namespace with
   routes to remote-cell VPN subnets) had no TLS-capable HTTP client (busybox
   wget lacks TLS, no curl). Add curl + ca-certificates (~5MB).

2. The receiving cell's cell-link firewall allowed the linked subnet to reach
   cell-api:3000 — a dead path (the API binds 127.0.0.1 only; nothing DNATs
   :3000). Move the peer-sync ACCEPT to Caddy:443, which the WG server already
   DNATs (wg0:443 → Caddy → cell-api) and whose replies the existing
   `-o eth0 MASQUERADE` routes back through the tunnel. Source auth (cell VPN
   subnet via X-Forwarded-For) is preserved; the API stays 127.0.0.1-only.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-16 10:01:56 -04:00
roof c7e01d4aa7 fix: LAN Caddyfile serves TLS on an https:// site, not an http:// one
Unit Tests / test (push) Successful in 9m46s
_caddyfile_lan emitted the internal-CA `tls` directive inside an
`http://<cell>.cell, http://172.20.0.2:80` block. Caddy rejects a tls
directive on a port-80 (HTTP) listener ("server listening on [:80] is HTTP,
but attempts to configure TLS connection policies"), so cell-caddy crash-looped
in LAN mode. Split into a `https://<cell>.cell` site (internal-CA tls) plus a
separate plain-HTTP block for :80 — both needed because the WireGuard server
DNATs peer traffic to Caddy on 80 and 443.

Note: LAN mode still needs the internal serving cert wired to the mounted certs
dir (a separate gap) before cell-caddy comes fully up.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-16 07:26:15 -04:00
5 changed files with 83 additions and 44 deletions
+14 -2
View File
@@ -310,7 +310,15 @@ class CaddyManager(BaseServiceManager):
service_routes: str, core_routes: str,
cert_path: str = _CADDY_INTERNAL_CERT,
key_path: str = _CADDY_INTERNAL_KEY) -> str:
"""LAN mode: HTTP only + internal-CA TLS, no ACME."""
"""LAN mode: internal-CA TLS on 443, plain HTTP on 80, no ACME.
The same routes are served on both an HTTPS site (the internal-CA cert)
and an HTTP site. They must be SEPARATE site blocks: a `tls` directive on
an `http://` (port 80) address is rejected by Caddy ("server listening on
[:80] is HTTP, but attempts to configure TLS connection policies"). Both
are needed because the WireGuard server DNATs peer traffic to Caddy on
both 80 and 443.
"""
body = []
if service_routes:
body.append(self._indent_routes(service_routes))
@@ -325,10 +333,14 @@ class CaddyManager(BaseServiceManager):
" auto_https off\n"
"}\n"
"\n"
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
f"https://{cell_name}.cell {{\n"
f" tls {cert_path} {key_path}\n"
f"{inner}\n"
"}\n"
"\n"
f"http://{cell_name}.cell, http://172.20.0.2:80 {{\n"
f"{inner}\n"
"}\n"
)
def _caddyfile_pic_ngo(self, cell_name: str,
+14 -9
View File
@@ -374,7 +374,8 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
Traffic from vpn_subnet is allowed only to service VIPs listed in
inbound_services; all other cell traffic is DROPped. Cells get no
internet or peer access — only explicit service access via Caddy on
port 80, plus the cell-api port (3000) for permission-sync pushes.
port 80, plus Caddy on 443 for cross-cell peer-sync pushes (offer/
permission state) which reach cell-api through Caddy.
DNS (port 53) is always allowed so cell peers can resolve service names.
Service names resolve to the WG server IP; ensure_service_dnat() routes
@@ -388,7 +389,7 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
3. Service ACCEPT to Caddy port 80 (if any inbound_services)
4. DNS ACCEPT to cell-dns port 53 (UDP + TCP)
5. API-sync ACCEPT (inserted last → top)
5. Peer-sync ACCEPT to Caddy port 443 (inserted last → top)
"""
try:
tag = _cell_tag(cell_name)
@@ -425,13 +426,17 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
'-p', proto, '--dport', '53',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
# the catch-all DROP). Remote cells push permissions to our cell-api via the
# WG tunnel; iptables sees source=cell_subnet dst=api_ip after DNAT.
api_ip = _get_cell_api_ip()
if api_ip:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', api_ip,
'-p', 'tcp', '--dport', '3000',
# Peer-sync ACCEPT — inserted LAST so it goes to position 1 (above the
# catch-all DROP). Remote cells push offer/permission state to our API over
# the WG tunnel. The push targets the remote's Caddy on 443 (DNAT wg0:443 →
# Caddy → cell-api), NOT cell-api:3000 directly: the API binds 127.0.0.1
# only and is reachable solely through Caddy. After DNAT iptables sees
# source=cell_subnet dst=caddy_ip:443; the existing `-o eth0 MASQUERADE`
# routes Caddy's reply back through the tunnel.
caddy_ip = _get_caddy_container_ip()
if caddy_ip:
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', caddy_ip,
'-p', 'tcp', '--dport', '443',
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
# Ensure reply traffic (e.g. ICMP, TCP ACKs) for connections initiated
+8 -4
View File
@@ -48,12 +48,16 @@ class TestGenerateCaddyfileLan(unittest.TestCase):
self.assertNotIn('acme_email', out)
self.assertNotIn('dns pic_ngo', out)
self.assertNotIn('dns cloudflare', out)
# Internal-CA TLS pair
# Internal-CA TLS pair, on an HTTPS (443) site — never on an http:// one.
self.assertIn('tls /etc/caddy/internal/cert.pem '
'/etc/caddy/internal/key.pem', out)
# Cell hostname plus virtual IP listener
self.assertIn('http://mycell.cell', out)
self.assertIn('http://172.20.0.2:80', out)
self.assertIn('https://mycell.cell {', out)
# Cell hostname plus virtual IP listener on plain HTTP (80)
self.assertIn('http://mycell.cell, http://172.20.0.2:80 {', out)
# The HTTP (:80) block must NOT carry a tls directive — Caddy rejects
# "server listening on [:80] is HTTP, but attempts to configure TLS".
http_block = out.split('http://mycell.cell, http://172.20.0.2:80 {', 1)[1]
self.assertNotIn('tls ', http_block)
class TestGenerateCaddyfilePicNgo(unittest.TestCase):
+41 -28
View File
@@ -702,32 +702,45 @@ class TestCellRules(unittest.TestCase):
]
self.assertTrue(subnet_drops, "Expected a catch-all DROP rule for the subnet")
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
"""apply_cell_rules inserts Caddy ACCEPT when inbound_services is non-empty."""
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertIn('ACCEPT', caddy_targets,
"Expected ACCEPT to Caddy when inbound_services is non-empty")
def test_apply_cell_rules_no_caddy_accept_when_no_inbound(self):
"""apply_cell_rules does NOT insert Caddy ACCEPT when inbound_services is empty."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertNotIn('ACCEPT', caddy_targets,
"No Caddy ACCEPT expected when inbound_services is empty")
def test_apply_cell_rules_accepts_api_sync_traffic(self):
"""apply_cell_rules inserts ACCEPT for cell-api:3000 so permission-sync pushes pass."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
api_ip = self._FAKE_API_IP
api_accepts = [
def _caddy_accepts_on_port(self, calls, port):
"""Caddy-dest ACCEPT calls matching --dport <port>."""
return [
c for c in calls
if '-s' in c and '10.0.1.0/24' in c
and '-d' in c and api_ip in c
and '--dport' in c and '3000' in c
if '-d' in c and self._FAKE_CADDY_IP in c
and '--dport' in c and str(port) in c
and '-j' in c and c[c.index('-j') + 1] == 'ACCEPT'
]
self.assertTrue(api_accepts, 'Expected an ACCEPT rule for cell-api:3000')
def test_apply_cell_rules_sends_accept_for_allowed_service(self):
"""apply_cell_rules inserts a Caddy:80 ACCEPT when inbound_services is non-empty."""
calls = self._capture_apply('office', '10.0.1.0/24', ['calendar'])
self.assertTrue(self._caddy_accepts_on_port(calls, 80),
"Expected ACCEPT to Caddy:80 for an inbound service")
def test_apply_cell_rules_no_service_accept_when_no_inbound(self):
"""No Caddy:80 (service) ACCEPT when inbound_services is empty.
The :443 peer-sync ACCEPT is separate and always present (below).
"""
calls = self._capture_apply('office', '10.0.1.0/24', [])
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
"No Caddy:80 service ACCEPT expected with empty inbound")
def test_apply_cell_rules_accepts_peer_sync_to_caddy_443(self):
"""Cross-cell peer-sync ACCEPT to Caddy:443 is always added (the push reaches
cell-api through Caddy, since the API binds 127.0.0.1 only)."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
peer_sync = [
c for c in self._caddy_accepts_on_port(calls, 443)
if '-s' in c and '10.0.1.0/24' in c
]
self.assertTrue(peer_sync, 'Expected ACCEPT to Caddy:443 for peer-sync')
# And it must NOT target the (127.0.0.1-only) cell-api on :3000 anymore.
api_3000 = [
c for c in calls
if '-d' in c and self._FAKE_API_IP in c and '--dport' in c and '3000' in c
]
self.assertFalse(api_3000, 'Peer-sync must not target cell-api:3000')
def test_apply_cell_rules_api_sync_accept_before_catchall_drop(self):
"""The API-sync ACCEPT must be inserted after service rules so it ends up above DROP."""
@@ -754,12 +767,12 @@ class TestCellRules(unittest.TestCase):
# ── apply_cell_rules — empty inbound (all-deny) ───────────────────────────
def test_apply_cell_rules_empty_inbound_no_service_accept(self):
"""With inbound_services=[], no service ACCEPT is added; catch-all DROP blocks traffic."""
"""With inbound_services=[], no Caddy:80 service ACCEPT is added; the catch-all
DROP blocks service traffic (only the :443 peer-sync ACCEPT is present)."""
calls = self._capture_apply('office', '10.0.1.0/24', [])
# No ACCEPT to Caddy
caddy_targets = self._targets_for_dest(calls, self._FAKE_CADDY_IP)
self.assertNotIn('ACCEPT', caddy_targets,
"No Caddy ACCEPT expected with empty inbound_services")
# No service ACCEPT to Caddy on :80
self.assertFalse(self._caddy_accepts_on_port(calls, 80),
"No Caddy:80 ACCEPT expected with empty inbound_services")
# No per-VIP rules at all
for service, svc_ip in firewall_manager.SERVICE_IPS.items():
svc_targets = self._targets_for_dest(calls, svc_ip)
+6 -1
View File
@@ -1,6 +1,11 @@
FROM alpine:3.20@sha256:d9e853e87e55526f6b2917df91a2115c36dd7c696a35be12163d44e6e2a4b6bc
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2
# curl + ca-certificates: cell-to-cell peer-sync pushes (offer/permission state)
# originate from this container's network namespace — the only one with routes to
# remote-cell VPN subnets over the tunnel — and go over HTTPS to the remote's
# Caddy. busybox wget here has no TLS, so curl is required (~5MB over the slim
# base; the alternative is no automatic cross-cell sync).
RUN apk add --no-cache wireguard-tools iptables ip6tables iproute2 curl ca-certificates
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh