diff --git a/api/firewall_manager.py b/api/firewall_manager.py index 8e8804b..8e003a1 100644 --- a/api/firewall_manager.py +++ b/api/firewall_manager.py @@ -363,8 +363,12 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str tag = _cell_tag(cell_name) clear_cell_rules(cell_name) - # Catch-all DROP — inserted first so it ends up at the bottom + # Catch-all DROP for new connections only — inserted first so it ends up at the bottom. + # Using state=NEW,INVALID preserves ESTABLISHED/RELATED packets (ICMP replies, + # TCP ACKs) for connections initiated by local peers to this cell, which would + # otherwise be dropped before reaching the stateful ACCEPT rule. _iptables(['-I', 'FORWARD', '-s', vpn_subnet, + '-m', 'state', '--state', 'NEW,INVALID', '-m', 'comment', '--comment', tag, '-j', 'DROP']) # Exit relay ACCEPT — allow internet-bound traffic from this cell's peers. diff --git a/tests/e2e/wg/test_cell_to_cell_routing.py b/tests/e2e/wg/test_cell_to_cell_routing.py new file mode 100644 index 0000000..8135d3c --- /dev/null +++ b/tests/e2e/wg/test_cell_to_cell_routing.py @@ -0,0 +1,303 @@ +""" +E2E test: cross-cell routing for a split-tunnel VPN peer. + +Creates a temporary WireGuard peer on cell2 (pic1 / test), brings up a real +WireGuard tunnel from the test-runner host, and verifies that cell1 (pic0 / dev) +is reachable end-to-end via the cell-to-cell link. + +Why this test is meaningful +--------------------------- +10.0.0.1 is cell1's WireGuard server IP, reachable ONLY inside cell1's +cell-wireguard Docker container. It is NOT reachable directly from the +test-runner host (verified: 100% packet loss without VPN). + +If a ping to 10.0.0.1 succeeds during the test, the full path was taken: + + [test-runner wg-e2e] → 192.168.31.52:51821 → [pic1 cell-wireguard FORWARD] + → [cell-to-cell WG tunnel] → [pic0 cell-wireguard] → 10.0.0.1 + +Prerequisites +------------- + * SSH access to 192.168.31.52 (pic1) as 'roof' with no passphrase + * `wg-quick` and `sudo` available on the test runner (pic0) + * Both cells must have an active cell-to-cell WireGuard handshake + +Skip conditions are checked at fixture time; no manual flag needed. +""" +import os +import subprocess +import secrets +import time + +import pytest + +# ------------------------------------------------------------------------- +# Constants +# ------------------------------------------------------------------------- + +PIC1_LAN = '192.168.31.52' # test cell (cell2) +PIC1_WG_PORT = 51821 # WireGuard ListenPort on pic1 +PIC1_WG_PUBKEY = 'ITl3+KfcNjsDq9ztE+1TC10rmeqaLmpGgTXEEk07BiE=' + +PIC1_WG_SERVER_IP = '10.0.2.1' # cell2's WireGuard server IP +PIC0_WG_SERVER_IP = '10.0.0.1' # cell1's WireGuard server IP (cross-cell target) + +TEST_PEER_IP = '10.0.2.250' # unused IP in cell2's VPN subnet +TEST_PEER_CIDR = f'{TEST_PEER_IP}/32' +IFACE_NAME = 'pic-e2e-c2c' + +# AllowedIPs for the test peer: cell2's local subnet + cell1's subnet (cross-cell) +SPLIT_TUNNEL_ALLOWED_IPS = '10.0.2.0/24, 10.0.0.0/24' + +IPTABLES_COMMENT = 'pic-e2e-c2c-test' + +pytestmark = pytest.mark.wg + + +# ------------------------------------------------------------------------- +# Helpers +# ------------------------------------------------------------------------- + +def _run(cmd, **kw): + return subprocess.run(cmd, capture_output=True, text=True, **kw) + + +def _ssh(cmd, timeout=15): + """Run a command on pic1 via SSH and return the CompletedProcess.""" + return _run( + ['ssh', '-o', 'StrictHostKeyChecking=no', '-o', 'BatchMode=yes', + '-o', f'ConnectTimeout=5', f'roof@{PIC1_LAN}', cmd], + timeout=timeout, + ) + + +def _pic1_wg(args, timeout=10): + """Run a command inside pic1's cell-wireguard container via SSH.""" + cmd = 'docker exec cell-wireguard ' + args + r = _ssh(cmd, timeout=timeout) + return r + + +def _ping(ip, count=3, wait=2): + r = _run(['ping', '-c', str(count), '-W', str(wait), ip], timeout=count * wait + 5) + return r.returncode == 0 + + +def _cleanup_iface(): + _run(['sudo', 'ip', 'link', 'delete', IFACE_NAME], timeout=5) + + +def _cleanup_pic1_peer(pubkey): + _pic1_wg(f'wg set wg0 peer {pubkey} remove') + + +def _cleanup_pic1_iptables(): + _pic1_wg(f'iptables -D FORWARD -s {TEST_PEER_IP} -j ACCEPT ' + f'-m comment --comment {IPTABLES_COMMENT}') + + +# ------------------------------------------------------------------------- +# Session-level skip check +# ------------------------------------------------------------------------- + +def _check_prerequisites(): + """Return a skip reason string, or None if all prereqs are met.""" + # Check wg-quick + if _run(['which', 'wg-quick']).returncode != 0: + return 'wg-quick not found on test runner' + # Check sudo + if _run(['sudo', '-n', 'true']).returncode != 0: + return 'passwordless sudo not available on test runner' + # Check SSH to pic1 + r = _ssh('echo ok', timeout=6) + if r.returncode != 0 or 'ok' not in r.stdout: + return f'SSH to {PIC1_LAN} failed: {r.stderr.strip() or r.stdout.strip()}' + # Check that 10.0.0.1 is NOT reachable directly (otherwise test is meaningless) + # (a failure here is just a warning, not a skip) + return None + + +# ------------------------------------------------------------------------- +# Module-level skip +# ------------------------------------------------------------------------- + +_SKIP_REASON = _check_prerequisites() + + +# ------------------------------------------------------------------------- +# Fixtures +# ------------------------------------------------------------------------- + +@pytest.fixture(scope='module') +def wg_setup(tmp_path_factory): + """ + Module-scoped fixture: adds test peer to pic1, brings up wg interface on + pic0 host, yields, then tears everything down. + + Yields a dict: + { + 'peer_ip': '10.0.2.250', + 'allowed_ips': '10.0.2.0/24, 10.0.0.0/24', + 'privkey': '', + 'pubkey': '', + } + """ + if _SKIP_REASON: + pytest.skip(_SKIP_REASON) + + tmp_path = tmp_path_factory.mktemp('wg_e2e_c2c') + + # --- Generate a WireGuard key pair --- + priv_r = _run(['wg', 'genkey'], timeout=5) + assert priv_r.returncode == 0, f'wg genkey failed: {priv_r.stderr}' + privkey = priv_r.stdout.strip() + + pub_r = subprocess.run(['wg', 'pubkey'], input=privkey, capture_output=True, + text=True, timeout=5) + assert pub_r.returncode == 0, f'wg pubkey failed: {pub_r.stderr}' + pubkey = pub_r.stdout.strip() + + # --- Add peer to pic1's wg0 (live, no restart needed) --- + r = _pic1_wg(f'wg set wg0 peer {pubkey} allowed-ips {TEST_PEER_CIDR} persistent-keepalive 25') + assert r.returncode == 0, f'wg set peer failed on pic1: {r.stderr}' + + # --- Add permissive iptables rule so test traffic passes FORWARD --- + r = _pic1_wg( + f'iptables -I FORWARD 1 -s {TEST_PEER_IP} -j ACCEPT ' + f'-m comment --comment {IPTABLES_COMMENT}' + ) + assert r.returncode == 0, f'iptables -I FORWARD failed on pic1: {r.stderr}' + + # --- Write wg-quick config on the test runner --- + conf_path = str(tmp_path / f'{IFACE_NAME}.conf') + conf = ( + f'[Interface]\n' + f'PrivateKey = {privkey}\n' + f'Address = {TEST_PEER_IP}/32\n' + f'\n' + f'[Peer]\n' + f'PublicKey = {PIC1_WG_PUBKEY}\n' + f'Endpoint = {PIC1_LAN}:{PIC1_WG_PORT}\n' + f'AllowedIPs = {SPLIT_TUNNEL_ALLOWED_IPS}\n' + f'PersistentKeepalive = 25\n' + ) + with open(conf_path, 'w') as f: + f.write(conf) + os.chmod(conf_path, 0o600) + + # --- Bring up the WireGuard interface --- + up_r = _run(['sudo', 'wg-quick', 'up', conf_path], timeout=15) + assert up_r.returncode == 0, f'wg-quick up failed: {up_r.stderr}\n{up_r.stdout}' + + # Give WireGuard a moment to establish the handshake + time.sleep(3) + + yield { + 'peer_ip': TEST_PEER_IP, + 'allowed_ips': SPLIT_TUNNEL_ALLOWED_IPS, + 'privkey': privkey, + 'pubkey': pubkey, + 'conf_path': conf_path, + } + + # --- Teardown --- + _run(['sudo', 'wg-quick', 'down', conf_path], timeout=15) + try: + os.unlink(conf_path) + except Exception: + pass + _cleanup_pic1_iptables() + _cleanup_pic1_peer(pubkey) + + +# ------------------------------------------------------------------------- +# Tests +# ------------------------------------------------------------------------- + +class TestCellToCellRouting: + """ + Full end-to-end: split-tunnel peer on cell2 reaches cell1 via cell-to-cell tunnel. + """ + + def test_prerequisites_10_0_0_1_not_reachable_directly(self): + """Confirm 10.0.0.1 is NOT reachable from host without VPN (test validity check).""" + assert not _ping(PIC0_WG_SERVER_IP, count=1, wait=1), ( + f'{PIC0_WG_SERVER_IP} is reachable WITHOUT the VPN — the test would be ' + f'a false positive. The test is only meaningful when this IP is unreachable ' + f'without the tunnel.' + ) + + def test_cell2_wg_ip_reachable(self, wg_setup): + """Cell2's WireGuard server IP is reachable (basic tunnel sanity).""" + assert _ping(PIC1_WG_SERVER_IP), ( + f'Cell2 WG server IP {PIC1_WG_SERVER_IP} not reachable. ' + f'Handshake may not have established. ' + f'Peer allowed-ips: {wg_setup["allowed_ips"]}' + ) + + def test_handshake_established(self, wg_setup): + """A WireGuard handshake with pic1 has completed (within 30 s).""" + deadline = time.time() + 30 + while time.time() < deadline: + r = _run(['sudo', 'wg', 'show', IFACE_NAME], timeout=5) + if 'latest handshake' in r.stdout: + return + time.sleep(2) + pytest.fail( + f'No WireGuard handshake with pic1 after 30 s.\n' + f'wg show output:\n{r.stdout}' + ) + + def test_cross_cell_wg_ip_reachable(self, wg_setup): + """ + Cell1's WireGuard IP (10.0.0.1) is reachable from a peer connected to cell2. + + This is the critical cross-cell routing test. The full path is: + test-runner → wg-e2e → pic1 cell-wireguard FORWARD → cell-to-cell tunnel → pic0 10.0.0.1 + """ + assert _ping(PIC0_WG_SERVER_IP, count=3, wait=3), ( + f'Cell1 WG IP {PIC0_WG_SERVER_IP} NOT reachable from split-tunnel peer on cell2. ' + f'\nAllowed IPs: {wg_setup["allowed_ips"]}' + f'\nThis means the cell-to-cell routing is broken. Check:' + f'\n 1. pic1 FORWARD chain has ESTABLISHED,RELATED ACCEPT' + f'\n 2. pic1 wg0.conf has AllowedIPs=10.0.0.0/24 for the dev cell peer' + f'\n 3. Cell-to-cell WireGuard handshake is recent (wg show on pic1)' + ) + + def test_cross_cell_api_reachable(self, wg_setup): + """Cell1's API /health is reachable through the cell-to-cell tunnel.""" + import urllib.request, urllib.error + url = f'http://{PIC0_WG_SERVER_IP}:3000/health' + try: + with urllib.request.urlopen(url, timeout=8) as resp: + import json + body = json.loads(resp.read()) + assert body.get('status') == 'healthy', ( + f'Cell1 API returned unexpected health: {body}' + ) + except urllib.error.URLError as e: + pytest.fail( + f'Cell1 API at {url} not reachable via cell-to-cell tunnel: {e}. ' + f'\nNote: if test_cross_cell_wg_ip_reachable passed but this fails, ' + f'the tunnel is up but port 3000 may be blocked by cell1\'s firewall.' + ) + + def test_cross_cell_web_reachable(self, wg_setup): + """Cell1's web service (port 80 via Caddy) is reachable through the tunnel.""" + import urllib.request, urllib.error + # Port 80 goes to Caddy → services. We expect any HTTP response (even a redirect). + url = f'http://{PIC0_WG_SERVER_IP}/' + try: + with urllib.request.urlopen(url, timeout=8) as resp: + assert resp.status in (200, 301, 302, 307, 308), ( + f'Unexpected HTTP status from cell1 Caddy: {resp.status}' + ) + except urllib.error.HTTPError as e: + # HTTPError means we got a response — tunnel works even if it's a 4xx/5xx + assert e.code < 500, ( + f'Cell1 Caddy returned server error {e.code} — may indicate a Caddy issue' + ) + except urllib.error.URLError as e: + pytest.fail( + f'Cell1 web (Caddy) at {url} not reachable via tunnel: {e}' + )