diff --git a/api/cell_link_manager.py b/api/cell_link_manager.py index c6ec132..46300ac 100644 --- a/api/cell_link_manager.py +++ b/api/cell_link_manager.py @@ -30,6 +30,17 @@ _BACKOFF_BASE_S = 60 _BACKOFF_MAX_S = 3600 +def _remote_api_url(dns_ip: Optional[str]) -> Optional[str]: + """Base URL for a linked cell's API, reached over the WG tunnel. + + Cross-cell peer-sync goes to the remote's Caddy on 443 (the WireGuard server + DNATs VPN-IP:443 → Caddy → API). The API's own :3000 is bound to 127.0.0.1 + and is NOT reachable from another cell, so we must target HTTPS/443, not + http://:3000. + """ + return f"https://{dns_ip}" if dns_ip else None + + def _compute_next_retry(attempts: int) -> str: """Return an ISO timestamp for the earliest next retry using capped exponential backoff.""" delay = min(_BACKOFF_BASE_S * (2 ** (attempts - 1)), _BACKOFF_MAX_S) @@ -66,10 +77,12 @@ class CellLinkManager: changed = True # Phase 1 migration: permission-sync tracking fields if 'remote_api_url' not in link: - link['remote_api_url'] = ( - f"http://{link['dns_ip']}:3000" - if link.get('dns_ip') else None - ) + link['remote_api_url'] = _remote_api_url(link.get('dns_ip')) + changed = True + # Migrate legacy http://:3000 URLs (unreachable across + # cells) to the HTTPS/Caddy form. + elif str(link.get('remote_api_url', '')).startswith('http://'): + link['remote_api_url'] = _remote_api_url(link.get('dns_ip')) changed = True if 'last_push_status' not in link: link['last_push_status'] = 'never' @@ -193,7 +206,10 @@ class CellLinkManager: cmd = [ 'docker', 'exec', 'cell-wireguard', - 'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}', + # -k: the request reaches Caddy by the remote's VPN IP over the + # encrypted WG tunnel, so the TLS cert (issued for the cell's domain) + # won't match the IP — the tunnel already authenticates the peer. + 'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}', '-X', 'POST', '-H', 'Content-Type: application/json', ] @@ -371,14 +387,24 @@ class CellLinkManager: # ── Public API ──────────────────────────────────────────────────────────── def generate_invite(self, cell_name: str, domain: str) -> Dict[str, Any]: - """Return an invite package describing this cell for another cell to import.""" + """Return an invite package describing this cell for another cell to import. + + The endpoint advertises the cell's public domain (when in a DDNS/ACME + mode) plus this cell's own WireGuard port, rather than a raw external IP — + so the remote cell reaches us by name and a NAT/router can forward each + cell's distinct WG port to the right host. + """ keys = self.wireguard_manager.get_keys() - srv = self.wireguard_manager.get_server_config() server_vpn_ip = self.wireguard_manager._get_configured_address().split('/')[0] + try: + from app import config_manager as _cm + except Exception: + _cm = None + endpoint = self.wireguard_manager.get_advertised_endpoint(_cm) return { 'cell_name': cell_name, 'public_key': keys['public_key'], - 'endpoint': srv.get('endpoint'), + 'endpoint': endpoint, 'vpn_subnet': self.wireguard_manager._get_configured_network(), 'dns_ip': server_vpn_ip, 'domain': domain, @@ -448,15 +474,16 @@ class CellLinkManager: def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]: """Send OUR invite to the remote cell so it can complete mutual WG pairing. - Called immediately after adding the remote as our WG peer. Uses the - remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather - than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades - gracefully; the admin can pair from the other side manually. + Called immediately after adding the remote as our WG peer, before the WG + tunnel is up. Reaches the remote over the PUBLIC path at its advertised + endpoint host (a domain in DDNS/ACME modes) on Caddy/443 — the API's :3000 + is 127.0.0.1-only and not reachable across cells. Non-fatal — one-sided + pairing degrades gracefully; the admin can pair from the other side. """ endpoint = link.get('endpoint') or '' if not endpoint: return {'ok': False, 'error': 'no endpoint'} - # Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52") + # Host from endpoint (e.g. "alice.pic.ngo:51821" → "alice.pic.ngo"). try: host = endpoint.rsplit(':', 1)[0].strip('[]') except Exception: @@ -471,11 +498,14 @@ class CellLinkManager: except Exception as e: return {'ok': False, 'error': f'could not build own invite: {e}'} - url = f'http://{host}:3000/api/cells/peer-sync/accept-invite' + url = f'https://{host}/api/cells/peer-sync/accept-invite' payload = json.dumps({'invite': own_invite}) cmd = [ 'docker', 'exec', 'cell-wireguard', - 'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}', + # -k: endpoint may be a bare IP (LAN/fallback) whose cert won't match; + # accept-invite carries only public keys and the WG handshake is the + # real authentication. + 'curl', '-s', '-k', '-o', '/dev/null', '-w', '%{http_code}', '-X', 'POST', '-H', 'Content-Type: application/json', '-d', payload, @@ -537,7 +567,7 @@ class CellLinkManager: old_domain = existing.get('domain', '') existing['dns_ip'] = invite['dns_ip'] existing['vpn_subnet'] = invite['vpn_subnet'] - existing['remote_api_url'] = f"http://{invite['dns_ip']}:3000" + existing['remote_api_url'] = _remote_api_url(invite['dns_ip']) if invite.get('endpoint'): existing['endpoint'] = invite['endpoint'] if domain_changed: @@ -599,7 +629,7 @@ class CellLinkManager: 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': _default_perms(), - 'remote_api_url': f"http://{invite['dns_ip']}:3000", + 'remote_api_url': _remote_api_url(invite['dns_ip']), 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, @@ -659,7 +689,7 @@ class CellLinkManager: 'domain': invite['domain'], 'connected_at': datetime.utcnow().isoformat(), 'permissions': perms, - 'remote_api_url': f"http://{invite['dns_ip']}:3000", + 'remote_api_url': _remote_api_url(invite['dns_ip']), 'last_push_status': 'never', 'last_push_at': None, 'last_push_error': None, diff --git a/api/routes/wireguard.py b/api/routes/wireguard.py index d0a9026..c0ee9cb 100644 --- a/api/routes/wireguard.py +++ b/api/routes/wireguard.py @@ -8,15 +8,11 @@ bp = Blueprint('wireguard', __name__) def _effective_endpoint(wireguard_manager, config_manager) -> str: """Return the WireGuard endpoint to embed in peer configs. - Uses wireguard_endpoint from identity config when set (admin override), - falling back to get_external_ip() detection. + Prefers the cell's public domain (DDNS/ACME modes) or an admin override over + the raw external IP, so a peer config points at a name that resolves to the + cell rather than a bare IP. See WireGuardManager.get_advertised_endpoint. """ - srv = wireguard_manager.get_server_config() - override = (config_manager.get_identity().get('wireguard_endpoint') or '').strip() - if override: - port = srv.get('port', 51820) - return override if ':' in override else f'{override}:{port}' - return srv.get('endpoint') or '' + return wireguard_manager.get_advertised_endpoint(config_manager) or '' @bp.route('/api/wireguard/keys', methods=['GET']) def get_wireguard_keys(): diff --git a/api/wireguard_manager.py b/api/wireguard_manager.py index 142214f..e4524e5 100644 --- a/api/wireguard_manager.py +++ b/api/wireguard_manager.py @@ -1054,6 +1054,38 @@ class WireGuardManager(BaseServiceManager): 'vpn_network': self._get_configured_network(), } + # Domain modes whose effective domain is a publicly-resolvable FQDN that the + # WireGuard endpoint should advertise instead of a raw IP. In these modes the + # domain resolves (via DDNS/ACME) to the cell's public IP, so peers and linked + # cells reach the cell by name — which survives IP changes and lets a NAT/router + # forward each cell's WG port to the right host. + PUBLIC_DOMAIN_MODES = ('pic_ngo', 'cloudflare', 'duckdns', 'http01') + + def get_advertised_endpoint(self, config_manager=None) -> Optional[str]: + """Return the WireGuard endpoint (host:port) to advertise to peers/cells. + + Preference order: + 1. an explicit admin override (`_identity.wireguard_endpoint`), + 2. the cell's public domain in a DDNS/ACME mode (`:`), + 3. the detected external IP (`:`) — LAN/fallback. + + The port is always this cell's own configured WireGuard port, so a cell + on a non-default port advertises it correctly (the router forwards that + public port to this host). + """ + port = self._get_configured_port() + identity = config_manager.get_identity() if config_manager is not None else {} + override = (identity.get('wireguard_endpoint') or '').strip() + if override: + return override if ':' in override else f'{override}:{port}' + mode = identity.get('domain_mode', 'lan') + if mode in self.PUBLIC_DOMAIN_MODES and config_manager is not None: + host = (config_manager.get_effective_domain() or '').strip() + if host: + return f'{host}:{port}' + ext = self.get_external_ip() + return f'{ext}:{port}' if ext else None + def get_peer_status(self, public_key: str) -> Dict[str, Any]: """Return live handshake + transfer stats for a peer from `wg show`.""" try: diff --git a/tests/test_cell_link_manager.py b/tests/test_cell_link_manager.py index b95e5fe..c4e86d3 100644 --- a/tests/test_cell_link_manager.py +++ b/tests/test_cell_link_manager.py @@ -26,6 +26,7 @@ def _make_wg_mock(): } wg._get_configured_network.return_value = '10.0.0.0/24' wg._get_configured_address.return_value = '10.0.0.1/24' + wg.get_advertised_endpoint.return_value = '1.2.3.4:51820' wg.add_cell_peer.return_value = True wg.remove_peer.return_value = True return wg @@ -82,6 +83,13 @@ class TestCellLinkManagerInvite(unittest.TestCase): self.assertEqual(invite['cell_name'], 'myhome') self.assertEqual(invite['domain'], 'myhome.local') + def test_generate_invite_endpoint_from_advertised_endpoint(self): + """The invite endpoint comes from get_advertised_endpoint (domain-aware), + not a raw external IP — so the remote cell reaches us by name + our port.""" + self.wg.get_advertised_endpoint.return_value = 'myhome.pic.ngo:51821' + invite = self.mgr.generate_invite('myhome', 'myhome.pic.ngo') + self.assertEqual(invite['endpoint'], 'myhome.pic.ngo:51821') + class TestCellLinkManagerConnections(unittest.TestCase): @@ -182,7 +190,7 @@ class TestCellLinkManagerConnections(unittest.TestCase): result = self.mgr.accept_invite(updated_invite) self.assertEqual(result['dns_ip'], '10.1.0.2') - self.assertEqual(result['remote_api_url'], 'http://10.1.0.2:3000') + self.assertEqual(result['remote_api_url'], 'https://10.1.0.2') self.nm.remove_cell_dns_forward.assert_called() self.nm.add_cell_dns_forward.assert_called_with( domain='office.cell', dns_ip='10.1.0.2') @@ -470,9 +478,10 @@ class TestPushInviteToRemote(unittest.TestCase): result = self.mgr._push_invite_to_remote(link) self.assertFalse(result['ok']) - def test_push_invite_sends_to_correct_lan_host(self): - """The curl URL must use the LAN IP from the endpoint, not the WG dns_ip.""" - link = self._make_link(endpoint='192.168.31.52:51820') + def test_push_invite_sends_to_endpoint_host_over_https(self): + """The curl targets the endpoint host on Caddy/HTTPS (443), not the WG + dns_ip and not the internal :3000 API port.""" + link = self._make_link(endpoint='alice.pic.ngo:51821') captured = {} def fake_run(cmd, **kw): @@ -493,10 +502,11 @@ class TestPushInviteToRemote(unittest.TestCase): self.mgr._push_invite_to_remote(link) url_in_cmd = captured['cmd'][-1] - self.assertIn('192.168.31.52', url_in_cmd) - self.assertIn('accept-invite', url_in_cmd) - # Must NOT use the WG dns_ip (10.1.0.1) - self.assertNotIn('10.1.0.1', url_in_cmd) + self.assertEqual(url_in_cmd, + 'https://alice.pic.ngo/api/cells/peer-sync/accept-invite') + self.assertNotIn(':3000', url_in_cmd) + self.assertNotIn('10.1.0.1', url_in_cmd) # not the WG dns_ip + self.assertIn('-k', captured['cmd']) # cert may not match a bare IP # --------------------------------------------------------------------------- @@ -605,7 +615,7 @@ class TestAcceptInviteNew(unittest.TestCase): with patch('firewall_manager.apply_cell_rules'): result = self.mgr.accept_invite(updated) self.assertEqual(result['dns_ip'], '10.1.0.5') - self.assertEqual(result['remote_api_url'], 'http://10.1.0.5:3000') + self.assertEqual(result['remote_api_url'], 'https://10.1.0.5') self.nm.remove_cell_dns_forward.assert_called() self.nm.add_cell_dns_forward.assert_called_with( domain='office.cell', dns_ip='10.1.0.5') @@ -1047,7 +1057,8 @@ class TestPermissionSync(unittest.TestCase): def test_add_connection_sets_remote_api_url_from_dns_ip(self): link = self._add_office() - self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000') + # Cross-cell API is reached over the tunnel via Caddy/443, not :3000. + self.assertEqual(link['remote_api_url'], 'https://10.1.0.1') def test_add_connection_triggers_push(self): push_mock = MagicMock(return_value={'ok': True, 'error': None}) @@ -1321,7 +1332,7 @@ class TestPermissionSync(unittest.TestCase): self.assertIn('last_push_status', link) self.assertIn('last_push_at', link) self.assertIn('last_remote_update_at', link) - self.assertEqual(link['remote_api_url'], 'http://10.1.0.1:3000') + self.assertEqual(link['remote_api_url'], 'https://10.1.0.1') self.assertTrue(link['pending_push']) # pre-existing → marked pending self.assertEqual(link['last_push_status'], 'never') @@ -1330,6 +1341,24 @@ class TestPermissionSync(unittest.TestCase): raw = json.load(f) self.assertIn('pending_push', raw[0]) + def test_load_migrates_legacy_http_3000_url_to_https(self): + """An existing link with the old http://:3000 URL (unreachable across + cells) is rewritten to the HTTPS/Caddy form on load.""" + legacy = [{ + 'cell_name': 'office', + 'public_key': 'officepubkey=', + 'vpn_subnet': '10.1.0.0/24', + 'dns_ip': '10.1.0.9', + 'domain': 'office.cell', + 'permissions': {'inbound': {}, 'outbound': {}}, + 'remote_api_url': 'http://10.1.0.9:3000', + }] + links_file = os.path.join(self.test_dir, 'cell_links.json') + with open(links_file, 'w') as f: + json.dump(legacy, f) + link = self.mgr.list_connections()[0] + self.assertEqual(link['remote_api_url'], 'https://10.1.0.9') + class TestExitOffer(unittest.TestCase): """Tests for Phase 2: exit-offer signaling.""" diff --git a/tests/test_wireguard_endpoints.py b/tests/test_wireguard_endpoints.py index c9f6b5a..1a16b0f 100644 --- a/tests/test_wireguard_endpoints.py +++ b/tests/test_wireguard_endpoints.py @@ -90,11 +90,13 @@ class TestWireGuardEndpoints(unittest.TestCase): 'endpoint': '1.2.3.4:51820', 'port': 51820, } + mock_wg.get_advertised_endpoint.return_value = '1.2.3.4:51820' r = self.client.get('/api/wireguard/server-config') self.assertEqual(r.status_code, 200) data = json.loads(r.data) self.assertIn('public_key', data) self.assertIn('endpoint', data) + self.assertEqual(data.get('effective_endpoint'), '1.2.3.4:51820') @patch('app.wireguard_manager') def test_server_config_returns_500_on_exception(self, mock_wg): diff --git a/tests/test_wireguard_manager.py b/tests/test_wireguard_manager.py index edfcd2a..2bd1433 100644 --- a/tests/test_wireguard_manager.py +++ b/tests/test_wireguard_manager.py @@ -885,5 +885,60 @@ class TestCellRoutes(unittest.TestCase): mock_route.assert_called_once_with('10.1.0.0/24') +class _FakeCM: + """Minimal config_manager stand-in for get_advertised_endpoint tests.""" + def __init__(self, identity, effective_domain): + self._identity = identity + self._effective = effective_domain + + def get_identity(self): + return self._identity + + def get_effective_domain(self): + return self._effective + + +class TestAdvertisedEndpoint(unittest.TestCase): + """get_advertised_endpoint prefers domain/override over the raw external IP.""" + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + patcher = patch.object(WireGuardManager, '_syncconf', return_value=None) + patcher.start() + self.addCleanup(patcher.stop) + self.wg = WireGuardManager(self.test_dir, self.test_dir) + # Pin the configured port and external IP for deterministic endpoints. + self.wg._get_configured_port = MagicMock(return_value=51821) + self.wg.get_external_ip = MagicMock(return_value='198.51.100.7') + + def tearDown(self): + shutil.rmtree(self.test_dir, ignore_errors=True) + + def test_public_mode_uses_effective_domain_and_own_port(self): + cm = _FakeCM({'domain_mode': 'pic_ngo'}, 'alice.pic.ngo') + self.assertEqual(self.wg.get_advertised_endpoint(cm), 'alice.pic.ngo:51821') + + def test_lan_mode_falls_back_to_external_ip(self): + cm = _FakeCM({'domain_mode': 'lan'}, 'cell') + self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821') + + def test_admin_override_wins(self): + cm = _FakeCM({'domain_mode': 'pic_ngo', 'wireguard_endpoint': 'vpn.example.com'}, 'alice.pic.ngo') + self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:51821') + + def test_override_with_explicit_port_kept(self): + cm = _FakeCM({'domain_mode': 'lan', 'wireguard_endpoint': 'vpn.example.com:7777'}, 'cell') + self.assertEqual(self.wg.get_advertised_endpoint(cm), 'vpn.example.com:7777') + + def test_none_when_no_domain_and_no_external_ip(self): + self.wg.get_external_ip = MagicMock(return_value=None) + cm = _FakeCM({'domain_mode': 'lan'}, 'cell') + self.assertIsNone(self.wg.get_advertised_endpoint(cm)) + + def test_public_mode_without_domain_falls_back_to_ip(self): + cm = _FakeCM({'domain_mode': 'cloudflare'}, '') + self.assertEqual(self.wg.get_advertised_endpoint(cm), '198.51.100.7:51821') + + if __name__ == '__main__': unittest.main() \ No newline at end of file