feat: auto mutual WG pairing + subnet/domain conflict detection
**Auto mutual pairing** When Cell A imports Cell B's invite (POST /api/cells on A), A now immediately pushes its own invite to Cell B over the LAN (using the endpoint IP, before the WG tunnel exists) via the new endpoint: POST /api/cells/peer-sync/accept-invite Cell B auto-adds Cell A as a WireGuard peer and DNS forward, completing the bidirectional tunnel without any manual action on Cell B's UI. The endpoint is idempotent and unauthenticated (runs before WG tunnel). Previously, the pairing was one-sided: Cell A had Cell B as a WG peer but Cell B never had Cell A — the tunnel never established and all cross-cell operations silently failed. **Conflict detection (add_connection + accept-invite)** _check_invite_conflicts() now validates before connecting: - VPN subnet must not overlap own subnet or any already-connected cell's subnet - Domain must not match own domain or any already-connected cell's domain Returns clear error messages so the admin knows which cell to reconfigure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+197
-3
@@ -338,6 +338,182 @@ class CellLinkManager:
|
||||
def list_connections(self) -> List[Dict[str, Any]]:
|
||||
return self._load()
|
||||
|
||||
def _check_invite_conflicts(self, invite: Dict[str, Any],
|
||||
exclude_cell: str = '') -> None:
|
||||
"""Raise ValueError if invite's subnet or domain conflicts with existing state."""
|
||||
import ipaddress as _ip
|
||||
links = self._load()
|
||||
remote_subnet = invite.get('vpn_subnet', '')
|
||||
remote_domain = invite.get('domain', '')
|
||||
|
||||
# Check VPN subnet: must not overlap our own subnet or any existing cell's subnet
|
||||
try:
|
||||
remote_net = _ip.ip_network(remote_subnet, strict=False)
|
||||
own_net = _ip.ip_network(
|
||||
self.wireguard_manager._get_configured_network(), strict=False)
|
||||
if remote_net.overlaps(own_net):
|
||||
raise ValueError(
|
||||
f"VPN subnet {remote_subnet!r} overlaps this cell's own subnet "
|
||||
f"{str(own_net)!r} — the remote cell must use a distinct IP range"
|
||||
)
|
||||
for link in links:
|
||||
if link['cell_name'] == exclude_cell:
|
||||
continue
|
||||
existing_net = _ip.ip_network(link.get('vpn_subnet', '0.0.0.0/32'),
|
||||
strict=False)
|
||||
if remote_net.overlaps(existing_net):
|
||||
raise ValueError(
|
||||
f"VPN subnet {remote_subnet!r} overlaps already-connected cell "
|
||||
f"'{link['cell_name']}' ({link['vpn_subnet']!r})"
|
||||
)
|
||||
except ValueError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f'_check_invite_conflicts subnet check skipped: {e}')
|
||||
|
||||
# Check domain: must not match our own domain or any existing cell's domain
|
||||
if remote_domain:
|
||||
try:
|
||||
from app import config_manager
|
||||
identity = config_manager.configs.get('_identity', {})
|
||||
own_domain = identity.get('domain', os.environ.get('CELL_DOMAIN', ''))
|
||||
if own_domain and remote_domain == own_domain:
|
||||
raise ValueError(
|
||||
f"Domain {remote_domain!r} is the same as this cell's own domain — "
|
||||
f"the remote cell must use a different domain name"
|
||||
)
|
||||
except ValueError:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
for link in links:
|
||||
if link['cell_name'] == exclude_cell:
|
||||
continue
|
||||
if link.get('domain') == remote_domain:
|
||||
raise ValueError(
|
||||
f"Domain {remote_domain!r} is already used by connected cell "
|
||||
f"'{link['cell_name']}' — each cell must have a unique domain"
|
||||
)
|
||||
|
||||
def _push_invite_to_remote(self, link: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Send OUR invite to the remote cell so it can complete mutual WG pairing.
|
||||
|
||||
Called immediately after adding the remote as our WG peer. Uses the
|
||||
remote's endpoint IP (LAN-reachable before the WG tunnel is up) rather
|
||||
than the WG-internal dns_ip. Non-fatal — one-sided pairing degrades
|
||||
gracefully; the admin can pair from the other side manually.
|
||||
"""
|
||||
endpoint = link.get('endpoint') or ''
|
||||
if not endpoint:
|
||||
return {'ok': False, 'error': 'no endpoint'}
|
||||
# Parse LAN IP from endpoint (e.g. "192.168.31.52:51820" → "192.168.31.52")
|
||||
try:
|
||||
host = endpoint.rsplit(':', 1)[0].strip('[]')
|
||||
except Exception:
|
||||
return {'ok': False, 'error': f'cannot parse endpoint {endpoint!r}'}
|
||||
|
||||
try:
|
||||
identity = self._local_identity()
|
||||
from app import config_manager
|
||||
id_cfg = config_manager.configs.get('_identity', {})
|
||||
own_domain = id_cfg.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
|
||||
own_invite = self.generate_invite(identity['cell_name'], own_domain)
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'could not build own invite: {e}'}
|
||||
|
||||
url = f'http://{host}:3000/api/cells/peer-sync/accept-invite'
|
||||
payload = json.dumps({'invite': own_invite})
|
||||
cmd = [
|
||||
'docker', 'exec', 'cell-wireguard',
|
||||
'curl', '-s', '-o', '/dev/null', '-w', '%{http_code}',
|
||||
'-X', 'POST',
|
||||
'-H', 'Content-Type: application/json',
|
||||
'-d', payload,
|
||||
'--max-time', str(_PUSH_TIMEOUT),
|
||||
'--connect-timeout', '3',
|
||||
url,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=_PUSH_TIMEOUT + 5
|
||||
)
|
||||
if result.returncode != 0:
|
||||
err = (result.stderr or result.stdout or 'curl error').strip()[:200]
|
||||
return {'ok': False, 'error': err}
|
||||
status = result.stdout.strip()
|
||||
if status.startswith('2'):
|
||||
return {'ok': True, 'error': None}
|
||||
return {'ok': False, 'error': f'HTTP {status}'}
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': str(e)[:200]}
|
||||
|
||||
def accept_invite(self, invite: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Accept a remote cell's invite and complete mutual WG pairing.
|
||||
|
||||
Called by the /api/cells/peer-sync/accept-invite endpoint when the remote
|
||||
cell pushes its own invite after we connected to it. Idempotent: if we
|
||||
are already connected to this cell, returns the existing link.
|
||||
"""
|
||||
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
|
||||
if field not in invite:
|
||||
raise ValueError(f"Invite missing field: {field!r}")
|
||||
|
||||
links = self._load()
|
||||
name = invite['cell_name']
|
||||
|
||||
# Idempotent: already connected
|
||||
existing = next((l for l in links if l['cell_name'] == name), None)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
# Conflict check (exclude by name since we're adding for the first time)
|
||||
self._check_invite_conflicts(invite)
|
||||
|
||||
ok = self.wireguard_manager.add_cell_peer(
|
||||
name=name,
|
||||
public_key=invite['public_key'],
|
||||
endpoint=invite.get('endpoint', ''),
|
||||
vpn_subnet=invite['vpn_subnet'],
|
||||
)
|
||||
if not ok:
|
||||
raise RuntimeError(f"Failed to add WireGuard peer for cell '{name}'")
|
||||
|
||||
dns_result = self.network_manager.add_cell_dns_forward(
|
||||
domain=invite['domain'],
|
||||
dns_ip=invite['dns_ip'],
|
||||
)
|
||||
if dns_result.get('warnings'):
|
||||
logger.warning('DNS forward warnings for %s (accept_invite): %s',
|
||||
name, dns_result['warnings'])
|
||||
|
||||
link = {
|
||||
'cell_name': name,
|
||||
'public_key': invite['public_key'],
|
||||
'endpoint': invite.get('endpoint'),
|
||||
'vpn_subnet': invite['vpn_subnet'],
|
||||
'dns_ip': invite['dns_ip'],
|
||||
'domain': invite['domain'],
|
||||
'connected_at': datetime.utcnow().isoformat(),
|
||||
'permissions': _default_perms(),
|
||||
'remote_api_url': f"http://{invite['dns_ip']}:3000",
|
||||
'last_push_status': 'never',
|
||||
'last_push_at': None,
|
||||
'last_push_error': None,
|
||||
'pending_push': True,
|
||||
'last_remote_update_at': None,
|
||||
}
|
||||
links.append(link)
|
||||
self._save(links)
|
||||
|
||||
try:
|
||||
import firewall_manager as _fm
|
||||
_fm.apply_cell_rules(name, invite['vpn_subnet'], [])
|
||||
except Exception as e:
|
||||
logger.warning(f"apply_cell_rules for {name} (accept_invite) failed: {e}")
|
||||
|
||||
logger.info(f"accept_invite: mutual pairing completed for cell '{name}'")
|
||||
return link
|
||||
|
||||
def add_connection(self, invite: Dict[str, Any],
|
||||
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Import a remote cell's invite and establish the connection."""
|
||||
@@ -346,6 +522,9 @@ class CellLinkManager:
|
||||
if any(l['cell_name'] == name for l in links):
|
||||
raise ValueError(f"Cell '{name}' is already connected")
|
||||
|
||||
# Check for VPN subnet and domain conflicts before touching WG/DNS
|
||||
self._check_invite_conflicts(invite)
|
||||
|
||||
ok = self.wireguard_manager.add_cell_peer(
|
||||
name=name,
|
||||
public_key=invite['public_key'],
|
||||
@@ -392,7 +571,22 @@ class CellLinkManager:
|
||||
except Exception as e:
|
||||
logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}")
|
||||
|
||||
# Initial push so the remote immediately knows our permission state
|
||||
# Push OUR invite to the remote so it can complete mutual WG pairing.
|
||||
# This is done over the LAN (before the WG tunnel is up) using the
|
||||
# endpoint IP, so the remote doesn't need manual action from both sides.
|
||||
try:
|
||||
inv_result = self._push_invite_to_remote(link)
|
||||
if inv_result.get('ok'):
|
||||
logger.info(f"Mutual pairing invite accepted by '{name}'")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Invite push to '{name}' failed (manual pairing from remote required): "
|
||||
f"{inv_result.get('error')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Invite push to '{name}' skipped (non-fatal): {e}")
|
||||
|
||||
# Initial permission push (uses WG tunnel — may fail if tunnel not yet up)
|
||||
try:
|
||||
identity = self._local_identity()
|
||||
result = self._push_permissions_to_remote(
|
||||
@@ -401,11 +595,11 @@ class CellLinkManager:
|
||||
self._record_push_result(name, result)
|
||||
if not result['ok']:
|
||||
logger.warning(
|
||||
f"Initial push to '{name}' failed "
|
||||
f"Initial permission push to '{name}' failed "
|
||||
f"(will retry on startup): {result['error']}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Initial push to '{name}' skipped (non-fatal): {e}")
|
||||
logger.warning(f"Initial permission push to '{name}' skipped (non-fatal): {e}")
|
||||
|
||||
return link
|
||||
|
||||
|
||||
Reference in New Issue
Block a user