Fix Phase 1 permission sync: route push via cell-wireguard + DNAT receive

cell-api has no route to remote WG tunnel IPs — only cell-wireguard does.
Fix _push_permissions_to_remote() to use 'docker exec cell-wireguard curl'
so outbound sync HTTP traverses the WG tunnel from the right namespace.

On the receive side, add ensure_cell_api_dnat() which installs three
iptables rules inside cell-wireguard on startup:
  - PREROUTING DNAT: wg0:3000 → cell-api:3000 (Docker bridge IP)
  - POSTROUTING MASQUERADE: so cell-api's reply routes back via wg0
  - FORWARD ACCEPT: allow the wg0→eth0 forwarded traffic

Called from _apply_startup_enforcement() so rules survive container restarts.
Tests updated to mock subprocess.run instead of urllib.request.urlopen.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 13:48:49 -04:00
parent a3d0cd5a48
commit 4ba79fd614
5 changed files with 149 additions and 28 deletions
+1
View File
@@ -264,6 +264,7 @@ def _apply_startup_enforcement():
cell_links = cell_link_manager.list_connections() cell_links = cell_link_manager.list_connections()
firewall_manager.apply_all_peer_rules(peers) firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_cell_rules(cell_links) firewall_manager.apply_all_cell_rules(cell_links)
firewall_manager.ensure_cell_api_dnat()
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(), firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
cell_links=cell_links) cell_links=cell_links)
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup") logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
+29 -16
View File
@@ -11,8 +11,7 @@ Each connection is stored in data/cell_links.json and manifests as:
import json import json
import logging import logging
import os import os
import urllib.error import subprocess
import urllib.request
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@@ -108,6 +107,10 @@ class CellLinkManager:
The body inverts inbound/outbound: our inbound (what we share with them) The body inverts inbound/outbound: our inbound (what we share with them)
becomes their outbound (what they receive from us), and vice-versa. becomes their outbound (what they receive from us), and vice-versa.
Uses 'docker exec cell-wireguard curl' so the HTTP request originates
from inside cell-wireguard's network namespace, which has routes to
remote cell VPN subnets that cell-api (on the Docker bridge) lacks.
""" """
url = link.get('remote_api_url') url = link.get('remote_api_url')
if not url: if not url:
@@ -124,22 +127,32 @@ class CellLinkManager:
}, },
'sent_at': datetime.utcnow().isoformat() + 'Z', 'sent_at': datetime.utcnow().isoformat() + 'Z',
} }
payload = json.dumps(body).encode('utf-8') payload = json.dumps(body)
req = urllib.request.Request( endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
url.rstrip('/') + '/api/cells/peer-sync/permissions',
data=payload, cmd = [
method='POST', 'docker', 'exec', 'cell-wireguard',
headers={'Content-Type': 'application/json'}, 'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
) '-X', 'POST',
'-H', 'Content-Type: application/json',
'-d', payload,
'--max-time', str(_PUSH_TIMEOUT),
'--connect-timeout', '3',
endpoint,
]
try: try:
with urllib.request.urlopen(req, timeout=_PUSH_TIMEOUT) as resp: result = subprocess.run(
if 200 <= resp.status < 300: cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
)
if result.returncode != 0:
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
return {'ok': False, 'error': err}
status = result.stdout.strip()
if status.startswith('2'):
return {'ok': True, 'error': None} return {'ok': True, 'error': None}
return {'ok': False, 'error': f'HTTP {resp.status}'} return {'ok': False, 'error': f'HTTP {status}'}
except urllib.error.HTTPError as e: except subprocess.TimeoutExpired:
return {'ok': False, 'error': f'HTTP {e.code}'} return {'ok': False, 'error': 'timeout'}
except urllib.error.URLError as e:
return {'ok': False, 'error': str(e.reason)[:200]}
except Exception as e: except Exception as e:
return {'ok': False, 'error': str(e)[:200]} return {'ok': False, 'error': str(e)[:200]}
+45
View File
@@ -298,6 +298,51 @@ def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
apply_cell_rules(name, subnet, inbound) apply_cell_rules(name, subnet, inbound)
def ensure_cell_api_dnat() -> bool:
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
Remote cells push permission updates over the WireGuard tunnel to our
wg0 interface on port 3000. Since cell-api only listens on the Docker
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
that traffic. Called on every startup so rules survive container restarts.
"""
try:
r = _run(['docker', 'inspect', '--format',
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
'cell-api'], check=False)
api_ip = r.stdout.strip()
if not api_ip:
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
return False
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
_wg_exec(['iptables'] + dnat_add)
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
if _wg_exec(['iptables'] + masq_check).returncode != 0:
_wg_exec(['iptables'] + masq_add)
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
_wg_exec(['iptables'] + fwd_add)
logger.info(f'ensure_cell_api_dnat: wg0:3000 → {api_ip}:3000')
return True
except Exception as e:
logger.error(f'ensure_cell_api_dnat: {e}')
return False
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation) # DNS ACL (CoreDNS Corefile generation)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
+15 -11
View File
@@ -522,16 +522,18 @@ class TestPermissionSync(unittest.TestCase):
sent_body = {} sent_body = {}
def fake_urlopen(req, timeout=None): def fake_run(cmd, **kwargs):
import json as _j import json as _j
sent_body.update(_j.loads(req.data)) # Extract the -d payload from curl args
resp = MagicMock() d_idx = cmd.index('-d')
resp.__enter__ = lambda s: s sent_body.update(_j.loads(cmd[d_idx + 1]))
resp.__exit__ = MagicMock(return_value=False) r = MagicMock()
resp.status = 200 r.returncode = 0
return resp r.stdout = '200'
r.stderr = ''
return r
with patch('urllib.request.urlopen', fake_urlopen): with patch('subprocess.run', fake_run):
result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=') result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=')
self.assertTrue(result['ok']) self.assertTrue(result['ok'])
@@ -544,9 +546,11 @@ class TestPermissionSync(unittest.TestCase):
def test_push_http_error_returns_not_ok(self): def test_push_http_error_returns_not_ok(self):
self._add_office() self._add_office()
link = self.mgr.list_connections()[0] link = self.mgr.list_connections()[0]
with patch('urllib.request.urlopen', mock_result = MagicMock()
side_effect=__import__('urllib.error', fromlist=['HTTPError']).HTTPError( mock_result.returncode = 0
url='', code=503, msg='Service Unavailable', hdrs=None, fp=None)): mock_result.stdout = '503'
mock_result.stderr = ''
with patch('subprocess.run', return_value=mock_result):
result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=') result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=')
self.assertFalse(result['ok']) self.assertFalse(result['ok'])
self.assertIn('503', result['error']) self.assertIn('503', result['error'])
+58
View File
@@ -622,5 +622,63 @@ class TestCellRules(unittest.TestCase):
self.assertEqual(mock_apply.call_args.args[0], 'office') self.assertEqual(mock_apply.call_args.args[0], 'office')
class TestEnsureCellApiDnat(unittest.TestCase):
"""Tests for ensure_cell_api_dnat — DNAT wg0:3000 → cell-api:3000."""
def _wg_exec_no_existing_rules(self, args):
r = MagicMock()
r.returncode = 1 if '-C' in args else 0 # -C = check: fail = not present
r.stdout = ''
r.stderr = ''
return r
def _wg_exec_all_rules_exist(self, args):
r = MagicMock()
r.returncode = 0 # -C succeeds = rule already present
r.stdout = ''
return r
def _inspect_ok(self, api_ip='172.20.0.10'):
r = MagicMock()
r.returncode = 0
r.stdout = api_ip
return r
def test_dnat_rules_added_when_not_present(self):
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
patch.object(firewall_manager, '_wg_exec',
side_effect=self._wg_exec_no_existing_rules) as wg_mock:
result = firewall_manager.ensure_cell_api_dnat()
self.assertTrue(result)
calls_args = [c.args[0] for c in wg_mock.call_args_list]
dnat_adds = [a for a in calls_args if 'DNAT' in a and '-A' in a]
self.assertTrue(len(dnat_adds) >= 1, 'DNAT -A rule must be added')
def test_dnat_skipped_if_already_present(self):
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
patch.object(firewall_manager, '_wg_exec',
side_effect=self._wg_exec_all_rules_exist) as wg_mock:
result = firewall_manager.ensure_cell_api_dnat()
self.assertTrue(result)
calls_args = [c.args[0] for c in wg_mock.call_args_list]
add_calls = [a for a in calls_args if '-A' in a or '-I' in a]
self.assertEqual(len(add_calls), 0, 'No rules should be added when they already exist')
def test_returns_false_when_cell_api_not_found(self):
r = MagicMock()
r.returncode = 0
r.stdout = ''
with patch.object(firewall_manager, '_run', return_value=r):
result = firewall_manager.ensure_cell_api_dnat()
self.assertFalse(result)
def test_returns_false_on_exception(self):
with patch.object(firewall_manager, '_run', side_effect=RuntimeError('docker gone')):
result = firewall_manager.ensure_cell_api_dnat()
self.assertFalse(result)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()