feat(cells): fix PIC-to-PIC connection + add service-sharing permissions

Phase 1 — connection fixes:
- routing_manager.stop(): remove iptables -F / -t nat -F nuclear flush that
  would wipe WireGuard MASQUERADE and all peer rules on any UI stop action
- wireguard_manager.add_cell_peer(): reject vpn_subnet that overlaps the local
  WG network (routing blackhole — was the root cause of no handshake)
- wireguard_manager._syncconf(): pass Endpoint to 'wg set' so cell peers with
  static endpoints are synced to the kernel (not just AllowedIPs)

Phase 2 — service-sharing permissions backend:
- firewall_manager: add _cell_tag(), clear_cell_rules(), apply_cell_rules(),
  apply_all_cell_rules() — iptables FORWARD rules for cell-to-cell traffic
  using 'pic-cell-<name>' comment tags, distinct from 'pic-peer-*'
- app.py startup enforcement: call apply_all_cell_rules(cell_links) so rules
  survive API restarts
- cell_link_manager: permissions schema {inbound, outbound} per service;
  lazy migration for existing entries; update_permissions(), get_permissions();
  apply_cell_rules wired into add_connection/remove_connection
- routes/cells.py: GET /api/cells/services, GET+PUT /api/cells/<n>/permissions;
  RuntimeError now returns 400 (not 500) from add_connection

Removed broken 'test' cell (subnet 10.0.0.0/24 collided with local WG network).
Second PIC must use a distinct subnet (e.g. 10.0.1.0/24) before reconnecting.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 08:35:24 -04:00
parent f3118ff401
commit 0b103ffafb
6 changed files with 256 additions and 30 deletions
+4 -2
View File
@@ -261,10 +261,12 @@ def _configured_domain() -> str:
def _apply_startup_enforcement():
try:
peers = peer_registry.list_peers()
cell_links = cell_link_manager.list_connections()
firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_cell_rules(cell_links)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain(),
cell_links=cell_link_manager.list_connections())
logger.info(f"Applied enforcement rules for {len(peers)} peers on startup")
cell_links=cell_links)
logger.info(f"Applied enforcement rules for {len(peers)} peers, {len(cell_links)} cells on startup")
except Exception as e:
logger.warning(f"Startup enforcement failed (non-fatal): {e}")
+90 -3
View File
@@ -5,6 +5,7 @@ CellLinkManager — manages site-to-site connections between PIC cells.
Each connection is stored in data/cell_links.json and manifests as:
- A WireGuard [Peer] block (AllowedIPs = remote cell's VPN subnet)
- A CoreDNS forwarding block (remote domain → remote cell's DNS IP)
- An iptables FORWARD rule set (service-level access control)
"""
import os
@@ -15,6 +16,20 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
VALID_SERVICES = ('calendar', 'files', 'mail', 'webdav')
_DEFAULT_PERMISSIONS = {
'inbound': {s: False for s in VALID_SERVICES},
'outbound': {s: False for s in VALID_SERVICES},
}
def _default_perms() -> Dict[str, Any]:
return {
'inbound': {s: False for s in VALID_SERVICES},
'outbound': {s: False for s in VALID_SERVICES},
}
class CellLinkManager:
def __init__(self, data_dir: str, config_dir: str, wireguard_manager, network_manager):
@@ -30,7 +45,16 @@ class CellLinkManager:
if os.path.exists(self.links_file):
try:
with open(self.links_file) as f:
return json.load(f)
links = json.load(f)
# Lazy migration: inject permissions field if missing
changed = False
for link in links:
if 'permissions' not in link:
link['permissions'] = _default_perms()
changed = True
if changed:
self._save(links)
return links
except Exception:
return []
return []
@@ -59,8 +83,13 @@ class CellLinkManager:
def list_connections(self) -> List[Dict[str, Any]]:
return self._load()
def add_connection(self, invite: Dict[str, Any]) -> Dict[str, Any]:
"""Import a remote cell's invite and establish the connection."""
def add_connection(self, invite: Dict[str, Any],
inbound_services: Optional[List[str]] = None) -> Dict[str, Any]:
"""Import a remote cell's invite and establish the connection.
inbound_services: which of THIS cell's services to share with the remote
cell immediately. Defaults to none (all-deny).
"""
links = self._load()
name = invite['cell_name']
if any(l['cell_name'] == name for l in links):
@@ -82,6 +111,11 @@ class CellLinkManager:
if dns_result.get('warnings'):
logger.warning('DNS forward warnings for %s: %s', name, dns_result['warnings'])
inbound = [s for s in (inbound_services or []) if s in VALID_SERVICES]
perms = _default_perms()
for s in inbound:
perms['inbound'][s] = True
link = {
'cell_name': name,
'public_key': invite['public_key'],
@@ -90,9 +124,18 @@ class CellLinkManager:
'dns_ip': invite['dns_ip'],
'domain': invite['domain'],
'connected_at': datetime.utcnow().isoformat(),
'permissions': perms,
}
links.append(link)
self._save(links)
# Apply iptables rules for the new cell (non-fatal if it fails)
try:
import firewall_manager as _fm
_fm.apply_cell_rules(name, invite['vpn_subnet'], inbound)
except Exception as e:
logger.warning(f"apply_cell_rules for {name} failed (non-fatal): {e}")
return link
def remove_connection(self, cell_name: str):
@@ -102,12 +145,56 @@ class CellLinkManager:
if not link:
raise ValueError(f"Cell '{cell_name}' not found")
# Clear firewall rules first (non-fatal)
try:
import firewall_manager as _fm
_fm.clear_cell_rules(cell_name)
except Exception as e:
logger.warning(f"clear_cell_rules for {cell_name} failed (non-fatal): {e}")
self.wireguard_manager.remove_peer(link['public_key'])
self.network_manager.remove_cell_dns_forward(link['domain'])
links = [l for l in links if l['cell_name'] != cell_name]
self._save(links)
def update_permissions(self, cell_name: str,
inbound: Dict[str, bool],
outbound: Dict[str, bool]) -> Dict[str, Any]:
"""Update service sharing permissions for a cell connection.
Validates service names, persists, and re-applies iptables rules.
Returns the updated link record.
"""
links = self._load()
link = next((l for l in links if l['cell_name'] == cell_name), None)
if not link:
raise ValueError(f"Cell '{cell_name}' not found")
# Validate and normalise — only known services, boolean values
clean_inbound = {s: bool(inbound.get(s, False)) for s in VALID_SERVICES}
clean_outbound = {s: bool(outbound.get(s, False)) for s in VALID_SERVICES}
link['permissions'] = {'inbound': clean_inbound, 'outbound': clean_outbound}
self._save(links)
# Re-apply firewall rules
inbound_list = [s for s, v in clean_inbound.items() if v]
try:
import firewall_manager as _fm
_fm.apply_cell_rules(cell_name, link['vpn_subnet'], inbound_list)
except Exception as e:
logger.warning(f"apply_cell_rules for {cell_name} failed (non-fatal): {e}")
return link
def get_permissions(self, cell_name: str) -> Dict[str, Any]:
"""Return the permissions dict for a connected cell."""
links = self._load()
link = next((l for l in links if l['cell_name'] == cell_name), None)
if not link:
raise ValueError(f"Cell '{cell_name}' not found")
return link.get('permissions', _default_perms())
def get_connection_status(self, cell_name: str) -> Dict[str, Any]:
"""Return link record enriched with live WireGuard handshake status."""
links = self._load()
+77
View File
@@ -221,6 +221,83 @@ def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None:
})
# ---------------------------------------------------------------------------
# Cell-to-cell firewall rules
# ---------------------------------------------------------------------------
def _cell_tag(cell_name: str) -> str:
"""iptables comment tag for cell rules — distinct prefix from pic-peer-* to prevent collision."""
safe = re.sub(r'[^a-z0-9]', '-', cell_name.lower())
return f'pic-cell-{safe}'
def clear_cell_rules(cell_name: str) -> None:
"""Remove all FORWARD rules tagged for this cell (atomic save/restore)."""
tag = _cell_tag(cell_name)
comment_re = re.compile(rf'--comment\s+["\']?{re.escape(tag)}["\']?(\s|$)')
try:
save = _wg_exec(['iptables-save'])
if save.returncode != 0:
return
lines = save.stdout.splitlines()
filtered = [l for l in lines if not comment_re.search(l)]
if len(filtered) == len(lines):
return
restore_input = '\n'.join(filtered) + '\n'
restore = subprocess.run(
['docker', 'exec', '-i', WIREGUARD_CONTAINER, 'iptables-restore'],
input=restore_input, capture_output=True, text=True, timeout=10
)
if restore.returncode != 0:
logger.warning(f"clear_cell_rules iptables-restore failed: {restore.stderr.strip()}")
except Exception as e:
logger.error(f"clear_cell_rules({cell_name}): {e}")
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str]) -> bool:
"""Apply FORWARD rules for a cell-to-cell peer.
Traffic from vpn_subnet is allowed only to service VIPs listed in
inbound_services; all other cell traffic is DROPped. Cells get no
internet or peer access — only explicit service VIPs.
Rule insertion order (last inserted → top of chain):
1. Catch-all DROP for the subnet (inserted first → bottom)
2. Per-service ACCEPT/DROP (inserted in reversed() order → top)
"""
try:
tag = _cell_tag(cell_name)
clear_cell_rules(cell_name)
# Catch-all DROP — inserted first so it ends up at the bottom
_iptables(['-I', 'FORWARD', '-s', vpn_subnet,
'-m', 'comment', '--comment', tag, '-j', 'DROP'])
# Per-service rules — inserted in reverse dict order, highest-priority last
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
target = 'ACCEPT' if service in inbound_services else 'DROP'
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', svc_ip,
'-m', 'comment', '--comment', tag, '-j', target])
logger.info(f"Applied cell rules for {cell_name} ({vpn_subnet}): inbound={inbound_services}")
return True
except Exception as e:
logger.error(f"apply_cell_rules({cell_name}): {e}")
return False
def apply_all_cell_rules(cell_links: List[Dict[str, Any]]) -> None:
"""Re-apply firewall rules for all cell connections (called on startup)."""
for link in cell_links:
name = link.get('cell_name')
subnet = link.get('vpn_subnet')
if not name or not subnet:
continue
perms = link.get('permissions', {})
inbound = [s for s, v in perms.get('inbound', {}).items() if v]
apply_cell_rules(name, subnet, inbound)
# ---------------------------------------------------------------------------
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------
+62 -1
View File
@@ -1,6 +1,7 @@
import logging
import os
from flask import Blueprint, request, jsonify
from cell_link_manager import VALID_SERVICES
logger = logging.getLogger('picell')
bp = Blueprint('cells', __name__)
@@ -16,6 +17,15 @@ def get_cell_invite():
logger.error(f"Error generating cell invite: {e}")
return jsonify({'error': str(e)}), 500
@bp.route('/api/cells/services', methods=['GET'])
def list_shareable_services():
"""Return the list of services that can be shared between cells."""
try:
from firewall_manager import SERVICE_IPS
return jsonify({'services': list(SERVICE_IPS.keys())})
except Exception as e:
return jsonify({'error': str(e)}), 500
@bp.route('/api/cells', methods=['GET'])
def list_cell_connections():
try:
@@ -34,10 +44,13 @@ def add_cell_connection():
for field in ('cell_name', 'public_key', 'vpn_subnet', 'dns_ip', 'domain'):
if field not in data:
return jsonify({'error': f'Missing field: {field}'}), 400
link = cell_link_manager.add_connection(data)
inbound_services = data.get('inbound_services', [])
link = cell_link_manager.add_connection(data, inbound_services=inbound_services)
return jsonify({'message': f"Connected to cell '{data['cell_name']}'", 'link': link}), 201
except ValueError as e:
return jsonify({'error': str(e)}), 400
except RuntimeError as e:
return jsonify({'error': str(e)}), 400
except Exception as e:
logger.error(f"Error adding cell connection: {e}")
return jsonify({'error': str(e)}), 500
@@ -63,3 +76,51 @@ def get_cell_connection_status(cell_name):
return jsonify({'error': str(e)}), 404
except Exception as e:
return jsonify({'error': str(e)}), 500
@bp.route('/api/cells/<cell_name>/permissions', methods=['GET'])
def get_cell_permissions(cell_name):
try:
from app import cell_link_manager
perms = cell_link_manager.get_permissions(cell_name)
return jsonify(perms)
except ValueError as e:
return jsonify({'error': str(e)}), 404
except Exception as e:
return jsonify({'error': str(e)}), 500
@bp.route('/api/cells/<cell_name>/permissions', methods=['PUT'])
def update_cell_permissions(cell_name):
try:
from app import cell_link_manager, firewall_manager, peer_registry
from app import COREFILE_PATH
data = request.get_json(silent=True)
if not data:
return jsonify({'error': 'No data provided'}), 400
# Validate service names in inbound/outbound
for direction in ('inbound', 'outbound'):
for service in data.get(direction, {}):
if service not in VALID_SERVICES:
return jsonify({'error': f'Unknown service: {service!r}'}), 400
inbound = data.get('inbound', {})
outbound = data.get('outbound', {})
link = cell_link_manager.update_permissions(cell_name, inbound, outbound)
# Regenerate Corefile so outbound DNS changes take effect
try:
from app import config_manager
domain = config_manager.configs.get('_identity', {}).get('domain', 'cell')
peers = peer_registry.list_peers()
cell_links = cell_link_manager.list_connections()
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, domain,
cell_links=cell_links)
except Exception as e:
logger.warning(f"DNS regen after permission update failed (non-fatal): {e}")
return jsonify({'message': f"Permissions updated for '{cell_name}'", 'link': link})
except ValueError as e:
return jsonify({'error': str(e)}), 404
except Exception as e:
logger.error(f"Error updating cell permissions: {e}")
return jsonify({'error': str(e)}), 500
+7 -23
View File
@@ -1074,33 +1074,17 @@ class RoutingManager(BaseServiceManager):
return False
def stop(self) -> bool:
"""Stop routing service"""
"""Stop routing service (state only — iptables rules are NOT flushed).
Flushing iptables here would destroy WireGuard MASQUERADE and all peer
FORWARD rules applied by firewall_manager. Individual rule removal is
handled by remove_nat_rule() / remove_firewall_rule().
"""
try:
# Set internal state to stopped
self._service_running = False
self._save_service_state()
# Try to clear all iptables rules (may fail in Docker without privileges)
try:
subprocess.run(['iptables', '-t', 'nat', '-F'],
check=True, timeout=10)
subprocess.run(['iptables', '-F'],
check=True, timeout=10)
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.warning(f"Could not clear iptables rules: {e}")
# Continue anyway - service is considered stopped
# Try to disable IP forwarding (may fail in Docker without privileges)
try:
subprocess.run(['sysctl', '-w', 'net.ipv4.ip_forward=0'],
check=True, timeout=10)
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.warning(f"Could not disable IP forwarding: {e}")
# Continue anyway - service is considered stopped
logger.info("Routing service stopped successfully")
logger.info("Routing service stopped (state only; iptables untouched)")
return True
except Exception as e:
logger.error(f"Failed to stop routing service: {e}")
# Even if system commands fail, we consider the service stopped
+16 -1
View File
@@ -365,6 +365,8 @@ class WireGuardManager(BaseServiceManager):
current_peer['ips'] = line.split('=', 1)[1].strip()
elif line.startswith('PersistentKeepalive'):
current_peer['ka'] = line.split('=', 1)[1].strip()
elif line.startswith('Endpoint'):
current_peer['endpoint'] = line.split('=', 1)[1].strip()
elif line == '' and 'pub' in current_peer:
desired[current_peer['pub']] = current_peer
current_peer = None
@@ -397,6 +399,8 @@ class WireGuardManager(BaseServiceManager):
'peer', pub,
'allowed-ips', p.get('ips', ''),
'persistent-keepalive', p.get('ka', '25')]
if p.get('endpoint'):
args += ['endpoint', p['endpoint']]
subprocess.run(args, capture_output=True, timeout=5)
logger.info(f'wg set applied: {len(desired)} peers')
@@ -483,7 +487,7 @@ class WireGuardManager(BaseServiceManager):
logger.error(f'add_cell_peer: invalid endpoint port: {endpoint!r}')
return False
try:
ipaddress.ip_network(vpn_subnet, strict=False)
remote_net = ipaddress.ip_network(vpn_subnet, strict=False)
except ValueError as e:
logger.error(f'add_cell_peer: invalid vpn_subnet {vpn_subnet!r}: {e}')
return False
@@ -491,6 +495,17 @@ class WireGuardManager(BaseServiceManager):
if any(c.isspace() for c in vpn_subnet):
logger.error(f'add_cell_peer: vpn_subnet contains whitespace: {vpn_subnet!r}')
return False
# Reject subnets that overlap the local WG network — would create a routing blackhole
try:
local_net = ipaddress.ip_network(self._get_configured_network(), strict=False)
if local_net.overlaps(remote_net):
logger.error(
f'add_cell_peer: vpn_subnet {vpn_subnet!r} overlaps local WG network '
f'{str(local_net)!r} — use a distinct subnet on the remote cell'
)
return False
except Exception:
pass
try:
content = self._read_config()
peer_block = (