Fix Phase 1 permission sync: route push via cell-wireguard + DNAT receive
cell-api has no route to remote WG tunnel IPs — only cell-wireguard does. Fix _push_permissions_to_remote() to use 'docker exec cell-wireguard curl' so outbound sync HTTP traverses the WG tunnel from the right namespace. On the receive side, add ensure_cell_api_dnat() which installs three iptables rules inside cell-wireguard on startup: - PREROUTING DNAT: wg0:3000 → cell-api:3000 (Docker bridge IP) - POSTROUTING MASQUERADE: so cell-api's reply routes back via wg0 - FORWARD ACCEPT: allow the wg0→eth0 forwarded traffic Called from _apply_startup_enforcement() so rules survive container restarts. Tests updated to mock subprocess.run instead of urllib.request.urlopen. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -264,6 +264,7 @@ def _apply_startup_enforcement():
|
||||
cell_links = cell_link_manager.list_connections()
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_cell_rules(cell_links)
|
||||
firewall_manager.ensure_cell_api_dnat()
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
|
||||
cell_links=cell_links)
|
||||
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
|
||||
|
||||
+29
-16
@@ -11,8 +11,7 @@ Each connection is stored in data/cell_links.json and manifests as:
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -108,6 +107,10 @@ class CellLinkManager:
|
||||
|
||||
The body inverts inbound/outbound: our inbound (what we share with them)
|
||||
becomes their outbound (what they receive from us), and vice-versa.
|
||||
|
||||
Uses 'docker exec cell-wireguard curl' so the HTTP request originates
|
||||
from inside cell-wireguard's network namespace, which has routes to
|
||||
remote cell VPN subnets that cell-api (on the Docker bridge) lacks.
|
||||
"""
|
||||
url = link.get('remote_api_url')
|
||||
if not url:
|
||||
@@ -124,22 +127,32 @@ class CellLinkManager:
|
||||
},
|
||||
'sent_at': datetime.utcnow().isoformat() + 'Z',
|
||||
}
|
||||
payload = json.dumps(body).encode('utf-8')
|
||||
req = urllib.request.Request(
|
||||
url.rstrip('/') + '/api/cells/peer-sync/permissions',
|
||||
data=payload,
|
||||
method='POST',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
)
|
||||
payload = json.dumps(body)
|
||||
endpoint = url.rstrip('/') + '/api/cells/peer-sync/permissions'
|
||||
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
'-d', payload,
|
||||
'--max-time', str(_PUSH_TIMEOUT),
|
||||
'--connect-timeout', '3',
|
||||
endpoint,
|
||||
]
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=_PUSH_TIMEOUT) as resp:
|
||||
if 200 <= resp.status < 300:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
|
||||
)
|
||||
if result.returncode != 0:
|
||||
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
|
||||
return {'ok': False, 'error': err}
|
||||
status = result.stdout.strip()
|
||||
if status.startswith('2'):
|
||||
return {'ok': True, 'error': None}
|
||||
return {'ok': False, 'error': f'HTTP {resp.status}'}
|
||||
except urllib.error.HTTPError as e:
|
||||
return {'ok': False, 'error': f'HTTP {e.code}'}
|
||||
except urllib.error.URLError as e:
|
||||
return {'ok': False, 'error': str(e.reason)[:200]}
|
||||
return {'ok': False, 'error': f'HTTP {status}'}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {'ok': False, 'error': 'timeout'}
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': str(e)[:200]}
|
||||
|
||||
|
||||
@@ -298,6 +298,51 @@ def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
|
||||
apply_cell_rules(name, subnet, inbound)
|
||||
|
||||
|
||||
def ensure_cell_api_dnat() -> bool:
|
||||
"""DNAT wg0:3000 → cell-api:3000 inside cell-wireguard.
|
||||
|
||||
Remote cells push permission updates over the WireGuard tunnel to our
|
||||
wg0 interface on port 3000. Since cell-api only listens on the Docker
|
||||
bridge, we need a DNAT rule inside cell-wireguard's namespace to forward
|
||||
that traffic. Called on every startup so rules survive container restarts.
|
||||
"""
|
||||
try:
|
||||
r = _run(['docker', 'inspect', '--format',
|
||||
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
|
||||
'cell-api'], check=False)
|
||||
api_ip = r.stdout.strip()
|
||||
if not api_ip:
|
||||
logger.warning('ensure_cell_api_dnat: cell-api container not found or no IP')
|
||||
return False
|
||||
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '3000', '-j', 'DNAT', '--to-destination', f'{api_ip}:3000']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
|
||||
masq_check = ['-t', 'nat', '-C', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
|
||||
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
|
||||
masq_add = ['-t', 'nat', '-A', 'POSTROUTING', '-o', 'eth0', '-d', api_ip,
|
||||
'-p', 'tcp', '--dport', '3000', '-j', 'MASQUERADE']
|
||||
if _wg_exec(['iptables'] + masq_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + masq_add)
|
||||
|
||||
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
|
||||
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', 'tcp', '--dport', '3000', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
|
||||
logger.info(f'ensure_cell_api_dnat: wg0:3000 → {api_ip}:3000')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_cell_api_dnat: {e}')
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -522,16 +522,18 @@ class TestPermissionSync(unittest.TestCase):
|
||||
|
||||
sent_body = {}
|
||||
|
||||
def fake_urlopen(req, timeout=None):
|
||||
def fake_run(cmd, **kwargs):
|
||||
import json as _j
|
||||
sent_body.update(_j.loads(req.data))
|
||||
resp = MagicMock()
|
||||
resp.__enter__ = lambda s: s
|
||||
resp.__exit__ = MagicMock(return_value=False)
|
||||
resp.status = 200
|
||||
return resp
|
||||
# Extract the -d payload from curl args
|
||||
d_idx = cmd.index('-d')
|
||||
sent_body.update(_j.loads(cmd[d_idx + 1]))
|
||||
r = MagicMock()
|
||||
r.returncode = 0
|
||||
r.stdout = '200'
|
||||
r.stderr = ''
|
||||
return r
|
||||
|
||||
with patch('urllib.request.urlopen', fake_urlopen):
|
||||
with patch('subprocess.run', fake_run):
|
||||
result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=')
|
||||
|
||||
self.assertTrue(result['ok'])
|
||||
@@ -544,9 +546,11 @@ class TestPermissionSync(unittest.TestCase):
|
||||
def test_push_http_error_returns_not_ok(self):
|
||||
self._add_office()
|
||||
link = self.mgr.list_connections()[0]
|
||||
with patch('urllib.request.urlopen',
|
||||
side_effect=__import__('urllib.error', fromlist=['HTTPError']).HTTPError(
|
||||
url='', code=503, msg='Service Unavailable', hdrs=None, fp=None)):
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = '503'
|
||||
mock_result.stderr = ''
|
||||
with patch('subprocess.run', return_value=mock_result):
|
||||
result = self.mgr._push_permissions_to_remote(link, 'home', 'homepubkey=')
|
||||
self.assertFalse(result['ok'])
|
||||
self.assertIn('503', result['error'])
|
||||
|
||||
@@ -622,5 +622,63 @@ class TestCellRules(unittest.TestCase):
|
||||
self.assertEqual(mock_apply.call_args.args[0], 'office')
|
||||
|
||||
|
||||
class TestEnsureCellApiDnat(unittest.TestCase):
|
||||
"""Tests for ensure_cell_api_dnat — DNAT wg0:3000 → cell-api:3000."""
|
||||
|
||||
def _wg_exec_no_existing_rules(self, args):
|
||||
r = MagicMock()
|
||||
r.returncode = 1 if '-C' in args else 0 # -C = check: fail = not present
|
||||
r.stdout = ''
|
||||
r.stderr = ''
|
||||
return r
|
||||
|
||||
def _wg_exec_all_rules_exist(self, args):
|
||||
r = MagicMock()
|
||||
r.returncode = 0 # -C succeeds = rule already present
|
||||
r.stdout = ''
|
||||
return r
|
||||
|
||||
def _inspect_ok(self, api_ip='172.20.0.10'):
|
||||
r = MagicMock()
|
||||
r.returncode = 0
|
||||
r.stdout = api_ip
|
||||
return r
|
||||
|
||||
def test_dnat_rules_added_when_not_present(self):
|
||||
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
|
||||
patch.object(firewall_manager, '_wg_exec',
|
||||
side_effect=self._wg_exec_no_existing_rules) as wg_mock:
|
||||
result = firewall_manager.ensure_cell_api_dnat()
|
||||
|
||||
self.assertTrue(result)
|
||||
calls_args = [c.args[0] for c in wg_mock.call_args_list]
|
||||
dnat_adds = [a for a in calls_args if 'DNAT' in a and '-A' in a]
|
||||
self.assertTrue(len(dnat_adds) >= 1, 'DNAT -A rule must be added')
|
||||
|
||||
def test_dnat_skipped_if_already_present(self):
|
||||
with patch.object(firewall_manager, '_run', return_value=self._inspect_ok()), \
|
||||
patch.object(firewall_manager, '_wg_exec',
|
||||
side_effect=self._wg_exec_all_rules_exist) as wg_mock:
|
||||
result = firewall_manager.ensure_cell_api_dnat()
|
||||
|
||||
self.assertTrue(result)
|
||||
calls_args = [c.args[0] for c in wg_mock.call_args_list]
|
||||
add_calls = [a for a in calls_args if '-A' in a or '-I' in a]
|
||||
self.assertEqual(len(add_calls), 0, 'No rules should be added when they already exist')
|
||||
|
||||
def test_returns_false_when_cell_api_not_found(self):
|
||||
r = MagicMock()
|
||||
r.returncode = 0
|
||||
r.stdout = ''
|
||||
with patch.object(firewall_manager, '_run', return_value=r):
|
||||
result = firewall_manager.ensure_cell_api_dnat()
|
||||
self.assertFalse(result)
|
||||
|
||||
def test_returns_false_on_exception(self):
|
||||
with patch.object(firewall_manager, '_run', side_effect=RuntimeError('docker gone')):
|
||||
result = firewall_manager.ensure_cell_api_dnat()
|
||||
self.assertFalse(result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user