Fix Phase 1 permission sync: route push via cell-wireguard + DNAT receive

cell-api has no route to remote WG tunnel IPs — only cell-wireguard does.
Fix _push_permissions_to_remote() to use 'docker exec cell-wireguard curl'
so outbound sync HTTP traverses the WG tunnel from the right namespace.

On the receive side, add ensure_cell_api_dnat() which installs three
iptables rules inside cell-wireguard on startup:
  - PREROUTING DNAT: wg0:3000 → cell-api:3000 (Docker bridge IP)
  - POSTROUTING MASQUERADE: so cell-api's reply routes back via wg0
  - FORWARD ACCEPT: allow the wg0→eth0 forwarded traffic

Called from _apply_startup_enforcement() so rules survive container restarts.
Tests updated to mock subprocess.run instead of urllib.request.urlopen.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 13:48:49 -04:00
parent a3d0cd5a48
commit 4ba79fd614
5 changed files with 149 additions and 28 deletions
+1
View File
@@ -264,6 +264,7 @@ def _apply_startup_enforcement():
cell_links = cell_link_manager.list_connections()
firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_cell_rules(cell_links)
firewall_manager.ensure_cell_api_dnat()
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
cell_links=cell_links)
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
+30 -17
View File
@@ -11,8 +11,7 @@ Each connection is stored in data/cell_links.json and manifests as:
import json
import logging
import os
import urllib.error
import urllib.request
import subprocess
from datetime import datetime
from typing import Any, Dict, List, Optional
@@ -108,6 +107,10 @@ class CellLinkManager:
The body inverts inbound/outbound: our inbound (what we share with them)
becomes their outbound (what they receive from us), and vice-versa.
Uses 'docker exec cell-wireguard curl' so the HTTP request originates
from inside cell-wireguard's network namespace, which has routes to
remote cell VPN subnets that cell-api (on the Docker bridge) lacks.
"""
url = link.get('remote_api_url')
if not url:
@@ -124,22 +127,32 @@ class CellLinkManager:
},
'sent_at': datetime.utcnow().isoformat() + 'Z',
}
payload = json.dumps(body).encode('utf-8')
req = urllib.request.Request(
url.rstrip('/') + '/api/cells/peer-sync/permissions',
data=payload,
method='POST',
headers={'Content-Type': 'application/json'},
)
payload = json.dumps(body)
endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
cmd = [
'docker', 'exec', 'cell-wireguard',
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
'-X', 'POST',
'-H', 'Content-Type: application/json',
'-d', payload,
'--max-time', str(_PUSH_TIMEOUT),
'--connect-timeout', '3',
endpoint,
]
try:
with urllib.request.urlopen(req, timeout=_PUSH_TIMEOUT) as resp:
if 200 <= resp.status < 300:
return {'ok': True, 'error': None}
return {'ok': False, 'error': f'HTTP {resp.status}'}
except urllib.error.HTTPError as e:
return {'ok': False, 'error': f'HTTP {e.code}'}
except urllib.error.URLError as e:
return {'ok': False, 'error': str(e.reason)[:200]}
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
)
if result.returncode != 0:
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
return {'ok': False, 'error': err}
status = result.stdout.strip()
if status.startswith('2'):
return {'ok': True, 'error': None}
return {'ok': False, 'error': f'HTTP {status}'}
except subprocess.TimeoutExpired:
return {'ok': False, 'error': 'timeout'}
except Exception as e:
return {'ok': False, 'error': str(e)[:200]}
+45
View File
@@ -298,6 +298,51 @@ def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
apply_cell_rules(name, subnet, inbound)
def ensure_cell_api_dnat() -> bool:
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
Remote cells push permission updates over the WireGuard tunnel to our
wg0 interface on port 3000. Since cell-api only listens on the Docker
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
that traffic. Called on every startup so rules survive container restarts.
"""
try:
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
api_ip = r.stdout.strip()
if not api_ip:
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
if _wg_exec(['iptables'] + masq_check).returncode != 0:
_wg_exec(['iptables'] + masq_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_cell_api_dnat: wg0:3000 → {api_ip}:3000')
return True
except Exception as e:
logger.error(f'ensure_cell_api_dnat: {e}')
return False
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------