feat: auto mutual WG pairing + subnet/domain conflict detection

**Auto mutual pairing**
When Cell A imports Cell B's invite (POST /api/cells on A), A now
immediately pushes its own invite to Cell B over the LAN (using the
endpoint IP, before the WG tunnel exists) via the new endpoint:
  POST /api/cells/peer-sync/accept-invite

Cell B auto-adds Cell A as a WireGuard peer and DNS forward, completing
the bidirectional tunnel without any manual action on Cell B's UI.
The endpoint is idempotent and unauthenticated (runs before WG tunnel).

Previously, the pairing was one-sided: Cell A had Cell B as a WG peer
but Cell B never had Cell A — the tunnel never established and all
cross-cell operations silently failed.

**Conflict detection (add_connection + accept-invite)**
_check_invite_conflicts() now validates before connecting:
  - VPN subnet must not overlap own subnet or any already-connected cell's subnet
  - Domain must not match own domain or any already-connected cell's domain
Returns clear error messages so the admin knows which cell to reconfigure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 06:24:46 -04:00
parent 1a611e0474
commit 99c1d9cd92
2 changed files with 234 additions and 3 deletions
+197 -3
View File
@@ -338,6 +338,182 @@ class CellLinkManager:
def list_connections(self) -> List[Dict[str, Any]]:
return self._load()
def _check_invite_conflicts(self, invite: Dict[str, Any],
exclude_cell: str = '') -> None:
"""Raise ValueError if invite's subnet or domain conflicts with existing state."""
import ipaddress as _ip
links = self._load()
remote_subnet = invite.get('vpn_subnet', '')
remote_domain = invite.get('domain', '')
# Check VPN subnet: must not overlap our own subnet or any existing cell's subnet
try:
remote_net = _ip.ip_network(remote_subnet, strict=False)
own_net = _ip.ip_network(
self.wireguard_manager._get_configured_network(), strict=False)
if remote_net.overlaps(own_net):
raise ValueError(
f"VPN subnet {remote_subnet!r} overlaps this cell's own subnet "
f"{str(own_net)!r} — the remote cell must use a distinct IP range"
)
for link in links:
if link['cell_name'] == exclude_cell:
continue
existing_net = _ip.ip_network(link.get('vpn_subnet', '0.0.0.0/32'),
strict=False)
if remote_net.overlaps(existing_net):
raise ValueError(
f"VPN subnet {remote_subnet!r} overlaps already-connected cell "
f"'{link['cell_name']}' ({link['vpn_subnet']!r})"
)
except ValueError:
raise
except Exception as e:
logger.warning(f'_check_invite_conflicts subnet check skipped: {e}')
# Check domain: must not match our own domain or any existing cell's domain
if remote_domain:
try:
from app import config_manager
identity = config_manager.configs.get('_identity', {})
own_domain = identity.get('domain', os.environ.get('CELL_DOMAIN', ''))
if own_domain and remote_domain == own_domain:
raise ValueError(
f"Domain {remote_domain!r} is the same as this cell's own domain — "
f"the remote cell must use a different domain name"
)
except ValueError:
raise
except Exception:
pass
for link in links:
if link['cell_name'] == exclude_cell:
continue
if link.get('domain') == remote_domain:
raise ValueError(
f"Domain {remote_domain!r} is already used by connected cell "
f"'{link['cell_name']}' — each cell must have a unique domain"
)
def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]:
"""Send OUR invite to the remote cell so it can complete mutual WG pairing.
Called immediately after adding the remote as our WG peer. Uses the
remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather
than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades
gracefully; the admin can pair from the other side manually.
"""
endpoint = link.get('endpoint') or ''
if not endpoint:
return {'ok': False, 'error': 'no endpoint'}
# Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52")
try:
host = endpoint.rsplit(':', 1)[0].strip('[]')
except Exception:
return {'ok': False, 'error': f'cannot parse endpoint {endpoint!r}'}
try:
identity = self._local_identity()
from app import config_manager
id_cfg = config_manager.configs.get('_identity', {})
own_domain = id_cfg.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
own_invite = self.generate_invite(identity['cell_name'], own_domain)
except Exception as e:
return {'ok': False, 'error': f'could not build own invite: {e}'}
url = f'http://{host}:3000/api/cells/peer-sync/accept-invite'
payload = json.dumps({'invite': own_invite})
cmd = [
'docker', 'exec', 'cell-wireguard',
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
'-X', 'POST',
'-H', 'Content-Type: application/json',
'-d', payload,
'--max-time', str(_PUSH_TIMEOUT),
'--connect-timeout', '3',
url,
]
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
)
if result.returncode != 0:
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
return {'ok': False, 'error': err}
status = result.stdout.strip()
if status.startswith('2'):
return {'ok': True, 'error': None}
return {'ok': False, 'error': f'HTTP {status}'}
except Exception as e:
return {'ok': False, 'error': str(e)[:200]}
def accept_invite(self, invite: Dict[str, Any]) -> Dict[str, Any]:
"""Accept a remote cell's invite and complete mutual WG pairing.
Called by the /api/cells/peer-sync/accept-invite endpoint when the remote
cell pushes its own invite after we connected to it. Idempotent: if we
are already connected to this cell, returns the existing link.
"""
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
if field not in invite:
raise ValueError(f"Invite missing field: {field!r}")
links = self._load()
name = invite['cell_name']
# Idempotent: already connected
existing = next((l for l in links if l['cell_name'] == name), None)
if existing:
return existing
# Conflict check (exclude by name since we're adding for the first time)
self._check_invite_conflicts(invite)
ok = self.wireguard_manager.add_cell_peer(
name=name,
public_key=invite['public_key'],
endpoint=invite.get('endpoint', ''),
vpn_subnet=invite['vpn_subnet'],
)
if not ok:
raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'")
dns_result = self.network_manager.add_cell_dns_forward(
domain=invite['domain'],
dns_ip=invite['dns_ip'],
)
if dns_result.get('warnings'):
logger.warning('DNS forward warnings for %s (accept_invite): %s',
name, dns_result['warnings'])
link = {
'cell_name': name,
'public_key': invite['public_key'],
'endpoint': invite.get('endpoint'),
'vpn_subnet': invite['vpn_subnet'],
'dns_ip': invite['dns_ip'],
'domain': invite['domain'],
'connected_at': datetime.utcnow().isoformat(),
'permissions': _default_perms(),
'remote_api_url': f"http://{invite['dns_ip']}:3000",
'last_push_status': 'never',
'last_push_at': None,
'last_push_error': None,
'pending_push': True,
'last_remote_update_at': None,
}
links.append(link)
self._save(links)
try:
import firewall_manager as _fm
_fm.apply_cell_rules(name, invite['vpn_subnet'], [])
except Exception as e:
logger.warning(f"apply_cell_rules for {name} (accept_invite) failed: {e}")
logger.info(f"accept_invite: mutual pairing completed for cell '{name}'")
return link
def add_connection(self, invite: Dict[str, Any],
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
"""Import a remote cell's invite and establish the connection."""
@@ -346,6 +522,9 @@ class CellLinkManager:
if any(l['cell_name'] == name for l in links):
raise ValueError(f"Cell '{name}' is already connected")
# Check for VPN subnet and domain conflicts before touching WG/DNS
self._check_invite_conflicts(invite)
ok = self.wireguard_manager.add_cell_peer(
name=name,
public_key=invite['public_key'],
@@ -392,7 +571,22 @@ class CellLinkManager:
except Exception as e:
logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}")
# Initial push so the remote immediately knows our permission state
# Push OUR invite to the remote so it can complete mutual WG pairing.
# This is done over the LAN (before the WG tunnel is up) using the
# endpoint IP, so the remote doesn't need manual action from both sides.
try:
inv_result = self._push_invite_to_remote(link)
if inv_result.get('ok'):
logger.info(f"Mutual pairing invite accepted by '{name}'")
else:
logger.warning(
f"Invite push to '{name}' failed (manual pairing from remote required): "
f"{inv_result.get('error')}"
)
except Exception as e:
logger.warning(f"Invite push to '{name}' skipped (non-fatal): {e}")
# Initial permission push (uses WG tunnel — may fail if tunnel not yet up)
try:
identity = self._local_identity()
result = self._push_permissions_to_remote(
@@ -401,11 +595,11 @@ class CellLinkManager:
self._record_push_result(name, result)
if not result['ok']:
logger.warning(
f"Initial push to '{name}' failed "
f"Initial permission push to '{name}' failed "
f"(will retry on startup): {result['error']}"
)
except Exception as e:
logger.warning(f"Initial push to '{name}' skipped (non-fatal): {e}")
logger.warning(f"Initial permission push to '{name}' skipped (non-fatal): {e}")
return link
+37
View File
@@ -183,6 +183,43 @@ def set_exit_offer(cell_name):
return jsonify({'error': str(e)}), 500
@bp.route('/api/cells/peer-sync/accept-invite', methods=['POST'])
def peer_sync_accept_invite():
"""Machine-to-machine: a newly-connected cell pushes its own invite for mutual WG pairing.
Called by Cell A over the LAN (before the WG tunnel exists) immediately after Cell A
imports Cell B's invite. Cell B uses this to add Cell A as a WireGuard peer and
complete the bidirectional tunnel setup without manual admin action on Cell B.
No session auth — the request arrives before the WG tunnel is up. Basic sanity
checks (valid invite format, no subnet/domain conflicts) are applied. The endpoint
is idempotent: calling it again for an already-connected cell is a no-op.
"""
try:
from app import cell_link_manager
data = request.get_json(silent=True) or {}
invite = data.get('invite')
if not invite or not isinstance(invite, dict):
return jsonify({'ok': False, 'error': 'invite object required'}), 400
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
if field not in invite:
return jsonify({'ok': False, 'error': f'invite missing field: {field!r}'}), 400
if invite.get('version') not in (1, None):
return jsonify({'ok': False, 'error': 'unsupported invite version'}), 400
link = cell_link_manager.accept_invite(invite)
return jsonify({'ok': True, 'cell_name': link['cell_name']}), 201
except ValueError as e:
return jsonify({'ok': False, 'error': str(e)}), 400
except RuntimeError as e:
return jsonify({'ok': False, 'error': str(e)}), 400
except Exception as e:
logger.error(f'accept-invite error: {e}')
return jsonify({'ok': False, 'error': 'internal error'}), 500
@bp.route('/api/cells/peer-sync/permissions', methods=['POST'])
def peer_sync_permissions():
"""Machine-to-machine endpoint: a connected cell pushes its mirrored permission state.