fix: WG address change now queues pending restart + heals cell connections

Three issues fixed together:

1. WireGuard address changes now go through the pending-restart queue
   (shown in the UI banner) instead of restarting cell-wireguard immediately.
   Only private_key changes still restart immediately; address and port
   changes both defer to the user-initiated Apply flow.  Previously the
   address change was silently applied and never appeared in Settings →
   Pending Configuration.

2. When the WG address changes, the API spawns a background thread that
   pushes the updated invite to all connected cells (over LAN, before the
   WG tunnel is back up).  This lets remote cells automatically update
   their dns_ip, AllowedIPs, and CoreDNS forwarding rules without manual
   re-pairing.

3. accept_invite now handles the "already connected but changed" case:
   if the remote cell re-sends an invite with a different dns_ip, vpn_subnet
   or endpoint, we update the stored link, the WG AllowedIPs, and the
   CoreDNS forward rule in place — no delete/re-add required.  Previously
   the endpoint was ignored and returned the stale record unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 08:29:18 -04:00
parent 0e16d6968a
commit 960a4ecc51
4 changed files with 128 additions and 4 deletions
+44 -1
View File
@@ -461,9 +461,52 @@ class CellLinkManager:
links = self._load()
name = invite['cell_name']
# Idempotent: already connected
# Already connected — check whether the remote's endpoint or subnet changed
# (e.g. the remote cell changed its WireGuard address) and heal if so.
existing = next((l for l in links if l['cell_name'] == name), None)
if existing:
dns_changed = existing.get('dns_ip') != invite['dns_ip']
subnet_changed = existing.get('vpn_subnet') != invite['vpn_subnet']
endpoint_changed = (invite.get('endpoint') and
invite['endpoint'] != existing.get('endpoint'))
if dns_changed or subnet_changed or endpoint_changed:
logger.info(
f"accept_invite: updating existing cell '{name}' "
f"(dns_ip: {existing.get('dns_ip')}{invite['dns_ip']}, "
f"vpn_subnet: {existing.get('vpn_subnet')}{invite['vpn_subnet']})"
)
old_subnet = existing.get('vpn_subnet', '')
existing['dns_ip'] = invite['dns_ip']
existing['vpn_subnet'] = invite['vpn_subnet']
existing['remote_api_url'] = f"http://{invite['dns_ip']}:3000"
if invite.get('endpoint'):
existing['endpoint'] = invite['endpoint']
self._save(links)
# Update WG peer AllowedIPs to the new subnet
if subnet_changed and old_subnet:
self.wireguard_manager.update_peer_ip(
existing['public_key'], invite['vpn_subnet'])
# Update DNS forward rule (remove old, add new)
if dns_changed:
try:
self.network_manager.remove_cell_dns_forward(existing['domain'])
except Exception:
pass
self.network_manager.add_cell_dns_forward(
domain=existing['domain'], dns_ip=invite['dns_ip'])
# Reapply firewall rules with new subnet
if subnet_changed:
try:
import firewall_manager as _fm
inbound_list = [s for s, v in
existing.get('permissions', {}).get('inbound', {}).items() if v]
_fm.apply_cell_rules(name, invite['vpn_subnet'], inbound_list)
except Exception as e:
logger.warning(f"apply_cell_rules after subnet update failed: {e}")
return existing
# Conflict check (exclude by name since we're adding for the first time)
+31
View File
@@ -359,6 +359,37 @@ def update_config():
port_changed_containers.add('wireguard')
port_change_messages.append(f'wireguard_port: {old_wg}{new_wg}')
# WireGuard address change — queue a wireguard container restart and push
# the updated invite to all connected cells so they can update their
# dns_ip, vpn_subnet, and WG AllowedIPs without manual re-pairing.
_wg_address_changed = (
'wireguard' in data
and isinstance(data.get('wireguard'), dict)
and 'address' in data['wireguard']
and data['wireguard']['address'] != old_svc_configs.get('wireguard', {}).get('address', '')
)
if _wg_address_changed:
_new_addr = data['wireguard']['address']
_old_addr = old_svc_configs.get('wireguard', {}).get('address', '(unknown)')
port_changed_containers.add('wireguard')
port_change_messages.append(f'wireguard address: {_old_addr}{_new_addr}')
# Push updated invite to connected cells in the background so they can
# heal their dns_ip / AllowedIPs without any manual action.
def _push_cell_invites():
import time as _time
_time.sleep(3) # brief wait to let wg0.conf settle
try:
from app import cell_link_manager as _clm
for _link in _clm.list_connections():
try:
_clm._push_invite_to_remote(_link)
logger.info(f"Pushed updated invite to cell '{_link['cell_name']}' after address change")
except Exception as _e:
logger.warning(f"Post-address-change invite push to '{_link.get('cell_name')}' failed: {_e}")
except Exception as _e:
logger.warning(f"_push_cell_invites failed: {_e}")
threading.Thread(target=_push_cell_invites, daemon=True).start()
if port_changed_containers:
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
_ip_range = config_manager.configs.get('_identity', {}).get(
+5 -3
View File
@@ -426,9 +426,11 @@ class WireGuardManager(BaseServiceManager):
if changed:
with open(cf, 'w') as f:
f.writelines(lines)
# Port-only changes: docker binding must be updated first via pending restart.
# Non-port changes (address, private_key) can restart immediately.
if not port_only_change:
# private_key changes require an immediate restart (new key takes effect only
# after the WireGuard interface is recreated).
# address and port changes go through the pending-restart queue so the admin
# can review before the tunnel is bounced — callers must set _pending_restart.
if 'private_key' in config and config['private_key']:
self._restart_container('cell-wireguard')
restarted.append('cell-wireguard')
except Exception as e:
+48
View File
@@ -157,6 +157,54 @@ class TestCellLinkManagerConnections(unittest.TestCase):
self.mgr.add_connection(second)
self.assertEqual(len(self.mgr.list_connections()), 2)
# accept_invite — new connection
def test_accept_invite_adds_new_connection(self):
with patch('firewall_manager.apply_cell_rules'):
self.mgr.accept_invite(SAMPLE_INVITE)
links = self.mgr.list_connections()
self.assertEqual(len(links), 1)
self.assertEqual(links[0]['cell_name'], 'office')
def test_accept_invite_idempotent_no_change(self):
with patch('firewall_manager.apply_cell_rules'):
self.mgr.accept_invite(SAMPLE_INVITE)
self.wg.reset_mock()
self.mgr.accept_invite(SAMPLE_INVITE)
# No WG update for identical invite
self.wg.update_peer_ip.assert_not_called()
def test_accept_invite_updates_dns_ip_on_existing(self):
with patch('firewall_manager.apply_cell_rules'):
self.mgr.accept_invite(SAMPLE_INVITE)
updated_invite = {**SAMPLE_INVITE, 'dns_ip': '10.1.0.2'}
with patch('firewall_manager.apply_cell_rules'):
result = self.mgr.accept_invite(updated_invite)
self.assertEqual(result['dns_ip'], '10.1.0.2')
self.assertEqual(result['remote_api_url'], 'http://10.1.0.2:3000')
self.nm.remove_cell_dns_forward.assert_called()
self.nm.add_cell_dns_forward.assert_called_with(
domain='office.cell', dns_ip='10.1.0.2')
def test_accept_invite_updates_vpn_subnet_on_existing(self):
with patch('firewall_manager.apply_cell_rules'):
self.mgr.accept_invite(SAMPLE_INVITE)
self.wg.update_peer_ip = MagicMock(return_value=True)
updated_invite = {**SAMPLE_INVITE, 'vpn_subnet': '10.5.0.0/24'}
with patch('firewall_manager.apply_cell_rules'):
result = self.mgr.accept_invite(updated_invite)
self.assertEqual(result['vpn_subnet'], '10.5.0.0/24')
self.wg.update_peer_ip.assert_called_once_with('officepubkey=', '10.5.0.0/24')
def test_accept_invite_does_not_duplicate_link(self):
with patch('firewall_manager.apply_cell_rules'):
self.mgr.accept_invite(SAMPLE_INVITE)
self.mgr.accept_invite({**SAMPLE_INVITE, 'dns_ip': '10.1.0.99'})
self.assertEqual(len(self.mgr.list_connections()), 1)
if __name__ == '__main__':
unittest.main()