Files
pic/tests/test_connectivity_health.py
T
roof d39c091cec
Unit Tests / test (push) Successful in 13m15s
feat: connectivity redesign phase 3+4 — per-connection health, per-peer fallback, connection CRUD API
Health probes (probe_health/refresh_health) are type-aware: WireGuard
checks the last WG handshake timestamp, OpenVPN checks the tun/tap
interface, Tor checks the control-port GETINFO, and sshuttle/proxy
types do a TCP reachability probe to the remote endpoint. Results are
persisted via set_connection_status and wired into the health_monitor_loop
so the UI always has a current health snapshot without polling.

Per-peer fail-open semantics: VPN, SSH, and proxy connections default to
fail-closed (kill-switch stays active even when the tunnel is down).
Tor defaults to fail-open. The default can be overridden per-peer via
set_peer_failopen/effective_failopen. apply_routes skips the fwmark and
kill-switch rules for any fail-open peer whose connection health is not
"working", letting traffic fall back to direct routing transparently.

New generic admin-only connection CRUD endpoints (GET/POST/PUT/DELETE
/api/connectivity/connections, GET /<id>/health, PUT
/api/connectivity/peers/<peer>/failopen) are guarded by the existing
admin role check. connection.create, connection.update, connection.delete,
and peer.failopen are all registered in ROUTE_ACTION_MAP for the audit
hook so every change is recorded in the owner-visible change log.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 21:50:45 -04:00

454 lines
19 KiB
Python

"""
Phase 3 tests for ConnectivityManager — per-connection health probing,
refresh_health persistence + TTL cache, per-peer configurable fallback
(exit_failopen + type defaults), and apply_routes fail-open/fail-closed
behaviour while a connection is DOWN.
All real subprocess/socket access is mocked via the small helper methods
(_exec_in_container, _tcp_reachable, _container_running, _listener_reachable,
_wg_ip) so no live infrastructure is touched.
"""
import os
import sys
import shutil
import tempfile
import time
import unittest
from unittest.mock import MagicMock, patch
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'api'))
import connectivity_manager as cm_module
from connectivity_manager import ConnectivityManager
def _make_manager(config_manager=None, peer_registry=None, tmp_dir=None):
if tmp_dir is None:
tmp_dir = tempfile.mkdtemp()
if config_manager is None:
config_manager = MagicMock()
config_manager.get_identity.return_value = {
'cell_name': 'test', 'ip_range': '172.20.0.0/16'}
config_manager.list_connections.return_value = []
if peer_registry is None:
peer_registry = MagicMock()
peer_registry.list_peers.return_value = []
with patch.object(ConnectivityManager, '_subscribe_to_events',
lambda self: None):
mgr = ConnectivityManager(
config_manager=config_manager,
peer_registry=peer_registry,
data_dir=tmp_dir,
config_dir=tmp_dir,
)
return mgr
def _cp(returncode=0, stdout='', stderr=''):
return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr)
# ---------------------------------------------------------------------------
# probe_health per type
# ---------------------------------------------------------------------------
class TestProbeHealth(unittest.TestCase):
def setUp(self):
self.tmp = tempfile.mkdtemp()
self.mgr = _make_manager(tmp_dir=self.tmp)
def tearDown(self):
shutil.rmtree(self.tmp, ignore_errors=True)
# wireguard_ext ---------------------------------------------------------
def test_wireguard_recent_handshake_working(self):
conn = {'id': 'c1', 'type': 'wireguard_ext', 'iface': 'wgext_a'}
recent = str(int(time.time()) - 10)
with patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout=f'PUBKEY\t{recent}\n')):
health, detail = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
def test_wireguard_stale_handshake_down(self):
conn = {'id': 'c1', 'type': 'wireguard_ext', 'iface': 'wgext_a'}
stale = str(int(time.time()) - 9999)
with patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout=f'PUBKEY\t{stale}\n')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_wireguard_no_handshake_down(self):
conn = {'id': 'c1', 'type': 'wireguard_ext', 'iface': 'wgext_a'}
with patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout='PUBKEY\t0\n')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_wireguard_exec_fails_down(self):
conn = {'id': 'c1', 'type': 'wireguard_ext', 'iface': 'wgext_a'}
with patch.object(self.mgr, '_exec_in_container', return_value=None):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_wireguard_no_iface_unknown(self):
conn = {'id': 'c1', 'type': 'wireguard_ext', 'iface': None}
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'unknown')
# openvpn ---------------------------------------------------------------
def test_openvpn_tun_up_working(self):
conn = {'id': 'c2', 'type': 'openvpn', 'iface': 'ovpn_x'}
with patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout='5: ovpn_x: <UP,LOWER_UP>')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
def test_openvpn_tun_down(self):
conn = {'id': 'c2', 'type': 'openvpn', 'iface': 'ovpn_x'}
with patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout='5: ovpn_x: <DOWN>')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_openvpn_falls_back_to_wg_container_tun(self):
conn = {'id': 'c2', 'type': 'openvpn', 'iface': 'ovpn_x'}
with patch.object(self.mgr, '_exec_in_container', return_value=None), \
patch.object(self.mgr, '_wg_ip',
return_value=_cp(stdout='tun0: <UP>')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
# tor -------------------------------------------------------------------
def test_tor_bootstrapped_working(self):
conn = {'id': 'c3', 'type': 'tor'}
with patch.object(self.mgr, '_container_running', return_value=True), \
patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout='Bootstrapped 100% (done)')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
def test_tor_container_down(self):
conn = {'id': 'c3', 'type': 'tor'}
with patch.object(self.mgr, '_container_running', return_value=False):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_tor_running_but_no_bootstrap_log_still_working(self):
conn = {'id': 'c3', 'type': 'tor'}
with patch.object(self.mgr, '_container_running', return_value=True), \
patch.object(self.mgr, '_exec_in_container',
return_value=_cp(stdout='')):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
# sshuttle --------------------------------------------------------------
def test_sshuttle_host_and_listener_working(self):
conn = {'id': 'c4', 'type': 'sshuttle', 'redirect_port': 9100,
'config': {'host': 'ssh.example.com', 'port': 22}}
with patch.object(self.mgr, '_tcp_reachable', return_value=True), \
patch.object(self.mgr, '_listener_reachable', return_value=True):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
def test_sshuttle_host_unreachable_down(self):
conn = {'id': 'c4', 'type': 'sshuttle', 'redirect_port': 9100,
'config': {'host': 'ssh.example.com', 'port': 22}}
with patch.object(self.mgr, '_tcp_reachable', return_value=False):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_sshuttle_listener_down(self):
conn = {'id': 'c4', 'type': 'sshuttle', 'redirect_port': 9100,
'config': {'host': 'ssh.example.com', 'port': 22}}
with patch.object(self.mgr, '_tcp_reachable', return_value=True), \
patch.object(self.mgr, '_listener_reachable', return_value=False):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_sshuttle_no_host_unknown(self):
conn = {'id': 'c4', 'type': 'sshuttle', 'config': {}}
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'unknown')
# proxy -----------------------------------------------------------------
def test_proxy_reachable_working(self):
conn = {'id': 'c5', 'type': 'proxy',
'config': {'host': 'proxy.example.com', 'port': 3128}}
with patch.object(self.mgr, '_tcp_reachable', return_value=True):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'working')
def test_proxy_unreachable_down(self):
conn = {'id': 'c5', 'type': 'proxy',
'config': {'host': 'proxy.example.com', 'port': 3128}}
with patch.object(self.mgr, '_tcp_reachable', return_value=False):
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'down')
def test_proxy_missing_config_unknown(self):
conn = {'id': 'c5', 'type': 'proxy', 'config': {}}
health, _ = self.mgr.probe_health(conn)
self.assertEqual(health, 'unknown')
def test_unknown_type_returns_unknown(self):
health, _ = self.mgr.probe_health({'id': 'x', 'type': 'bogus'})
self.assertEqual(health, 'unknown')
def test_probe_never_raises(self):
conn = {'id': 'c5', 'type': 'proxy',
'config': {'host': 'h', 'port': 1}}
with patch.object(self.mgr, '_tcp_reachable',
side_effect=RuntimeError('boom')):
health, detail = self.mgr.probe_health(conn)
self.assertEqual(health, 'unknown')
self.assertIn('boom', detail)
# ---------------------------------------------------------------------------
# refresh_health — persistence + TTL cache
# ---------------------------------------------------------------------------
class TestRefreshHealth(unittest.TestCase):
def setUp(self):
self.tmp = tempfile.mkdtemp()
self.conns = [
{'id': 'c1', 'type': 'proxy', 'enabled': True,
'config': {'host': 'p', 'port': 3128}, 'status': {}},
]
self.cm = MagicMock()
self.cm.list_connections.return_value = self.conns
self.mgr = _make_manager(config_manager=self.cm, tmp_dir=self.tmp)
def tearDown(self):
shutil.rmtree(self.tmp, ignore_errors=True)
def test_refresh_persists_status(self):
with patch.object(self.mgr, '_tcp_reachable', return_value=True):
result = self.mgr.refresh_health()
self.assertEqual(result['c1'], 'working')
self.cm.set_connection_status.assert_called_once()
cid, status = self.cm.set_connection_status.call_args.args
self.assertEqual(cid, 'c1')
self.assertEqual(status['health'], 'working')
self.assertIsNotNone(status['last_check'])
def test_refresh_single_connection_only(self):
self.conns.append({'id': 'c2', 'type': 'proxy', 'enabled': True,
'config': {'host': 'p2', 'port': 3128}, 'status': {}})
with patch.object(self.mgr, '_tcp_reachable', return_value=True):
result = self.mgr.refresh_health(connection_id='c2')
self.assertEqual(list(result.keys()), ['c2'])
def test_ttl_skips_fresh_connections(self):
fresh = self.mgr._now_iso()
self.conns[0]['status'] = {'health': 'working', 'last_check': fresh}
with patch.object(self.mgr, '_tcp_reachable') as tcp:
result = self.mgr.refresh_health()
tcp.assert_not_called()
self.assertEqual(result['c1'], 'working')
self.cm.set_connection_status.assert_not_called()
def test_force_reprobes_even_when_fresh(self):
fresh = self.mgr._now_iso()
self.conns[0]['status'] = {'health': 'working', 'last_check': fresh}
with patch.object(self.mgr, '_tcp_reachable', return_value=False):
self.mgr.refresh_health(force=True)
self.cm.set_connection_status.assert_called_once()
def test_disabled_connection_skipped(self):
self.conns[0]['enabled'] = False
with patch.object(self.mgr, '_tcp_reachable', return_value=True) as tcp:
result = self.mgr.refresh_health()
tcp.assert_not_called()
self.assertEqual(result, {})
# ---------------------------------------------------------------------------
# per-peer fail-open resolution
# ---------------------------------------------------------------------------
class TestFailopenResolution(unittest.TestCase):
def setUp(self):
self.tmp = tempfile.mkdtemp()
self.pr = MagicMock()
self.mgr = _make_manager(peer_registry=self.pr, tmp_dir=self.tmp)
def tearDown(self):
shutil.rmtree(self.tmp, ignore_errors=True)
def test_type_default_tor_fails_open(self):
peer = {'peer': 'a'}
self.assertTrue(self.mgr.effective_failopen(peer, {'type': 'tor'}))
def test_type_default_wireguard_fails_closed(self):
peer = {'peer': 'a'}
self.assertFalse(
self.mgr.effective_failopen(peer, {'type': 'wireguard_ext'}))
def test_override_true_beats_type_default(self):
peer = {'peer': 'a', 'exit_failopen': True}
self.assertTrue(
self.mgr.effective_failopen(peer, {'type': 'wireguard_ext'}))
def test_override_false_beats_tor_default(self):
peer = {'peer': 'a', 'exit_failopen': False}
self.assertFalse(self.mgr.effective_failopen(peer, {'type': 'tor'}))
def test_none_override_uses_type_default(self):
peer = {'peer': 'a', 'exit_failopen': None}
self.assertTrue(self.mgr.effective_failopen(peer, {'type': 'tor'}))
def test_set_peer_failopen_updates_peer(self):
self.pr.get_peer.return_value = {'peer': 'a'}
with patch.object(self.mgr, 'apply_routes', return_value={'ok': True}):
result = self.mgr.set_peer_failopen('a', True)
self.assertTrue(result['ok'])
self.pr.update_peer.assert_called_once_with('a', {'exit_failopen': True})
def test_set_peer_failopen_clear_with_null(self):
self.pr.get_peer.return_value = {'peer': 'a'}
with patch.object(self.mgr, 'apply_routes', return_value={'ok': True}):
result = self.mgr.set_peer_failopen('a', None)
self.assertTrue(result['ok'])
self.pr.update_peer.assert_called_once_with('a', {'exit_failopen': None})
def test_set_peer_failopen_unknown_peer(self):
self.pr.get_peer.return_value = None
result = self.mgr.set_peer_failopen('ghost', True)
self.assertFalse(result['ok'])
self.assertIn('not found', result['error'])
def test_set_peer_failopen_rejects_non_bool(self):
self.pr.get_peer.return_value = {'peer': 'a'}
result = self.mgr.set_peer_failopen('a', 'yes')
self.assertFalse(result['ok'])
# ---------------------------------------------------------------------------
# apply_routes — fail-open / fail-closed under DOWN connections
# ---------------------------------------------------------------------------
class TestApplyRoutesFallback(unittest.TestCase):
def setUp(self):
self.tmp = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.tmp, ignore_errors=True)
def _mgr(self, conns, peers, peer_ips):
cm = MagicMock()
cm.get_identity.return_value = {'cell_name': 't', 'ip_range': '172.20.0.0/16'}
cm.list_connections.return_value = conns
pr = MagicMock()
pr.list_peers.return_value = peers
pr.get_peer.side_effect = lambda n: peer_ips.get(n)
return _make_manager(config_manager=cm, peer_registry=pr, tmp_dir=self.tmp)
def test_down_failopen_peer_skips_mark_and_killswitch(self):
conns = [{'id': 'c1', 'type': 'wireguard_ext', 'enabled': True,
'mark': 0x1000, 'table': 1000, 'iface': 'wgext_a',
'redirect_port': None, 'status': {'health': 'down'}}]
peers = [{'peer': 'a', 'exit_via': 'c1', 'exit_failopen': True}]
ips = {'a': {'peer': 'a', 'ip': '172.20.0.50/32'}}
mgr = self._mgr(conns, peers, ips)
with patch.object(mgr, '_add_mark_rule') as mark, \
patch.object(mgr, '_add_killswitch') as ks, \
patch.object(cm_module, 'subprocess') as sp:
sp.run.return_value = _cp()
mgr.apply_routes()
mark.assert_not_called()
ks.assert_not_called()
def test_down_failclosed_peer_keeps_mark_and_killswitch(self):
conns = [{'id': 'c1', 'type': 'wireguard_ext', 'enabled': True,
'mark': 0x1000, 'table': 1000, 'iface': 'wgext_a',
'redirect_port': None, 'status': {'health': 'down'}}]
# No override → wireguard_ext default is fail-closed.
peers = [{'peer': 'a', 'exit_via': 'c1'}]
ips = {'a': {'peer': 'a', 'ip': '172.20.0.50/32'}}
mgr = self._mgr(conns, peers, ips)
with patch.object(mgr, '_add_mark_rule') as mark, \
patch.object(mgr, '_add_killswitch') as ks, \
patch.object(cm_module, 'subprocess') as sp:
sp.run.return_value = _cp()
mgr.apply_routes()
mark.assert_called_once_with('172.20.0.50', 0x1000)
ks.assert_called_once_with(0x1000, 'wgext_a')
def test_working_failopen_peer_routes_normally(self):
conns = [{'id': 'c1', 'type': 'wireguard_ext', 'enabled': True,
'mark': 0x1000, 'table': 1000, 'iface': 'wgext_a',
'redirect_port': None, 'status': {'health': 'working'}}]
peers = [{'peer': 'a', 'exit_via': 'c1', 'exit_failopen': True}]
ips = {'a': {'peer': 'a', 'ip': '172.20.0.50/32'}}
mgr = self._mgr(conns, peers, ips)
with patch.object(mgr, '_add_mark_rule') as mark, \
patch.object(mgr, '_add_killswitch') as ks, \
patch.object(cm_module, 'subprocess') as sp:
sp.run.return_value = _cp()
mgr.apply_routes()
mark.assert_called_once_with('172.20.0.50', 0x1000)
ks.assert_called_once_with(0x1000, 'wgext_a')
def test_unknown_health_routes_normally(self):
"""A never-probed connection (health unknown) must not silently drop."""
conns = [{'id': 'c1', 'type': 'wireguard_ext', 'enabled': True,
'mark': 0x1000, 'table': 1000, 'iface': 'wgext_a',
'redirect_port': None, 'status': {'health': 'unknown'}}]
peers = [{'peer': 'a', 'exit_via': 'c1', 'exit_failopen': True}]
ips = {'a': {'peer': 'a', 'ip': '172.20.0.50/32'}}
mgr = self._mgr(conns, peers, ips)
with patch.object(mgr, '_add_mark_rule') as mark, \
patch.object(cm_module, 'subprocess') as sp:
sp.run.return_value = _cp()
mgr.apply_routes()
mark.assert_called_once_with('172.20.0.50', 0x1000)
def test_mixed_peers_failclosed_keeps_killswitch(self):
"""When one peer fails open and another fails closed on the same DOWN
connection, the fail-closed peer keeps its mark and the killswitch
stays so its traffic is blocked while the tunnel is down."""
conns = [{'id': 'c1', 'type': 'wireguard_ext', 'enabled': True,
'mark': 0x1000, 'table': 1000, 'iface': 'wgext_a',
'redirect_port': None, 'status': {'health': 'down'}}]
peers = [
{'peer': 'a', 'exit_via': 'c1', 'exit_failopen': True},
{'peer': 'b', 'exit_via': 'c1', 'exit_failopen': False},
]
ips = {'a': {'peer': 'a', 'ip': '172.20.0.50/32'},
'b': {'peer': 'b', 'ip': '172.20.0.51/32'}}
mgr = self._mgr(conns, peers, ips)
marked = []
with patch.object(mgr, '_add_mark_rule',
side_effect=lambda ip, m: marked.append(ip)), \
patch.object(mgr, '_add_killswitch') as ks, \
patch.object(cm_module, 'subprocess') as sp:
sp.run.return_value = _cp()
mgr.apply_routes()
self.assertEqual(marked, ['172.20.0.51'])
ks.assert_called_once_with(0x1000, 'wgext_a')
if __name__ == '__main__':
unittest.main()