feat: fix cross-cell service access — DNS DNAT, service DNAT, Caddy routing
DNS A records now return the WireGuard server IP (10.0.0.1) instead of Docker bridge VIPs so cross-cell peers resolve service names correctly regardless of their bridge subnet. DNAT rules (wg0:53→cell-dns:53 and wg0:80→cell-caddy:80) are applied at startup. Caddy routes by Host header, eliminating the Docker bridge subnet conflict. Firewall cell rules allow DNS and service (Caddy) traffic from linked cell subnets. Split-tunnel AllowedIPs now dynamically includes connected-cell VPN subnets and drops the 172.20.0.0/16 range. Peers with route_via set now receive full-tunnel config (0.0.0.0/0) so all their traffic exits via the remote cell. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+4
-1
@@ -305,6 +305,8 @@ def _apply_startup_enforcement():
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_cell_rules(cell_links)
|
||||
firewall_manager.ensure_cell_api_dnat()
|
||||
firewall_manager.ensure_dns_dnat()
|
||||
firewall_manager.ensure_service_dnat()
|
||||
# Restore any cell link WireGuard peers that were lost from wg0.conf
|
||||
# (happens if the container was rebuilt, wg0.conf was reset, etc.)
|
||||
_restore_cell_wg_peers(cell_links)
|
||||
@@ -337,7 +339,8 @@ def _bootstrap_dns():
|
||||
cell_name = identity.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
|
||||
domain = identity.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
|
||||
ip_range = identity.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
|
||||
network_manager.bootstrap_dns_records(cell_name, domain, ip_range)
|
||||
# Bootstrap on first start; then always regenerate to ensure A records use WG server IP.
|
||||
network_manager.apply_ip_range(ip_range, cell_name, domain)
|
||||
except Exception as e:
|
||||
logger.warning(f"DNS bootstrap failed (non-fatal): {e}")
|
||||
|
||||
|
||||
+119
-17
@@ -193,10 +193,14 @@ def apply_peer_rules(peer_ip: str, settings: Dict[str, Any]) -> bool:
|
||||
'-m', 'comment', '--comment', comment, '-j', target])
|
||||
|
||||
# --- Step 3 (inserted last → ends up at TOP of chain) ---
|
||||
# Per-service rules — inserted in reverse dict order so first service ends up at top
|
||||
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
|
||||
target = 'ACCEPT' if service in service_access else 'DROP'
|
||||
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', svc_ip,
|
||||
# Service access via Caddy: DNS returns WG server IP for all services;
|
||||
# ensure_service_dnat() routes wg0:80 to Caddy. One ACCEPT/DROP rule
|
||||
# controls service access; CoreDNS ACL enforces per-name granularity.
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if caddy_ip:
|
||||
target = 'ACCEPT' if service_access else 'DROP'
|
||||
_iptables(['-I', 'FORWARD', '-s', peer_ip, '-d', caddy_ip,
|
||||
'-p', 'tcp', '--dport', '80',
|
||||
'-m', 'comment', '--comment', comment, '-j', target])
|
||||
|
||||
logger.info(f"Applied rules for {peer_ip}: internet={internet_access} "
|
||||
@@ -298,24 +302,50 @@ def _get_cell_api_ip() -> Optional[str]:
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def _get_dns_container_ip() -> str:
|
||||
"""Return cell-dns container's Docker bridge IP. Falls back to 172.20.0.3."""
|
||||
try:
|
||||
r = _run(['docker', 'inspect', '--format',
|
||||
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
|
||||
'cell-dns'], check=False)
|
||||
return r.stdout.strip() or '172.20.0.3'
|
||||
except Exception:
|
||||
return '172.20.0.3'
|
||||
|
||||
|
||||
def _get_caddy_container_ip() -> str:
|
||||
"""Return cell-caddy container's Docker bridge IP. Falls back to 172.20.0.2."""
|
||||
try:
|
||||
r = _run(['docker', 'inspect', '--format',
|
||||
'{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}',
|
||||
'cell-caddy'], check=False)
|
||||
return r.stdout.strip() or '172.20.0.2'
|
||||
except Exception:
|
||||
return '172.20.0.2'
|
||||
|
||||
|
||||
def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str],
|
||||
exit_relay: bool = False) -> bool:
|
||||
"""Apply FORWARD rules for a cell-to-cell peer.
|
||||
|
||||
Traffic from vpn_subnet is allowed only to service VIPs listed in
|
||||
inbound_services; all other cell traffic is DROPped. Cells get no
|
||||
internet or peer access — only explicit service VIPs, plus the
|
||||
cell-api port (3000) for permission-sync pushes arriving via DNAT.
|
||||
internet or peer access — only explicit service access via Caddy on
|
||||
port 80, plus the cell-api port (3000) for permission-sync pushes.
|
||||
|
||||
DNS (port 53) is always allowed so cell peers can resolve service names.
|
||||
Service names resolve to the WG server IP; ensure_service_dnat() routes
|
||||
wg0:80 to Caddy, which routes by Host header.
|
||||
|
||||
When exit_relay=True, the remote cell's peers can route internet
|
||||
traffic through this cell (Phase 3). A broad ACCEPT for traffic
|
||||
going out eth0 is added below per-service rules but above catch-all.
|
||||
traffic through this cell (Phase 3).
|
||||
|
||||
Rule insertion order (first inserted = bottom, last inserted = top):
|
||||
1. Catch-all DROP for the subnet (inserted first → bottom)
|
||||
2. Exit relay ACCEPT (-o eth0) (if exit_relay, above catch-all)
|
||||
3. Per-service ACCEPT/DROP (inserted in reversed() order)
|
||||
4. API-sync ACCEPT (inserted last → top)
|
||||
3. Service ACCEPT to Caddy port 80 (if any inbound_services)
|
||||
4. DNS ACCEPT to cell-dns port 53 (UDP + TCP)
|
||||
5. API-sync ACCEPT (inserted last → top)
|
||||
"""
|
||||
try:
|
||||
tag = _cell_tag(cell_name)
|
||||
@@ -326,17 +356,27 @@ def apply_cell_rules(cell_name: str, vpn_subnet: str, inbound_services: List[str
|
||||
'-m', 'comment', '--comment', tag, '-j', 'DROP'])
|
||||
|
||||
# Exit relay ACCEPT — allow internet-bound traffic from this cell's peers.
|
||||
# Inserted ABOVE catch-all but BELOW per-service rules so service-level
|
||||
# DROP rules still take effect for specific service VIPs.
|
||||
if exit_relay:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-o', 'eth0',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# Per-service rules — inserted in reverse dict order, highest-priority last
|
||||
for service, svc_ip in reversed(list(SERVICE_IPS.items())):
|
||||
target = 'ACCEPT' if service in inbound_services else 'DROP'
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', svc_ip,
|
||||
'-m', 'comment', '--comment', tag, '-j', target])
|
||||
# Service access via Caddy — DNAT wg0:80 → Caddy; Host header routes to service.
|
||||
# Only add ACCEPT if this cell has any inbound services granted.
|
||||
if inbound_services:
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if caddy_ip:
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', caddy_ip,
|
||||
'-p', 'tcp', '--dport', '80',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# DNS ACCEPT — allow cross-cell peers to query CoreDNS via the WG server IP.
|
||||
# ensure_dns_dnat() routes wg0:53 to cell-dns; FORWARD must allow it.
|
||||
dns_ip = _get_dns_container_ip()
|
||||
if dns_ip:
|
||||
for proto in ('udp', 'tcp'):
|
||||
_iptables(['-I', 'FORWARD', '-s', vpn_subnet, '-d', dns_ip,
|
||||
'-p', proto, '--dport', '53',
|
||||
'-m', 'comment', '--comment', tag, '-j', 'ACCEPT'])
|
||||
|
||||
# API permission-sync ACCEPT — inserted LAST so it goes to position 1 (above
|
||||
# the catch-all DROP). Remote cells push permissions to our cell-api via the
|
||||
@@ -415,6 +455,68 @@ def ensure_cell_api_dnat() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def ensure_dns_dnat() -> bool:
|
||||
"""DNAT wg0:53 (UDP+TCP) → cell-dns:53 so VPN peers use the WG server IP for DNS.
|
||||
|
||||
Peers are configured with DNS = <wg_server_ip>. Their DNS queries arrive on
|
||||
wg0:53 and must be forwarded to cell-dns inside the Docker bridge.
|
||||
"""
|
||||
try:
|
||||
dns_ip = _get_dns_container_ip()
|
||||
if not dns_ip:
|
||||
logger.warning('ensure_dns_dnat: cell-dns not found')
|
||||
return False
|
||||
for proto in ('udp', 'tcp'):
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', proto,
|
||||
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', proto,
|
||||
'--dport', '53', '-j', 'DNAT', '--to-destination', f'{dns_ip}:53']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
for proto in ('udp', 'tcp'):
|
||||
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
|
||||
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', proto, '--dport', '53', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
logger.info(f'ensure_dns_dnat: wg0:53 → {dns_ip}:53')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_dns_dnat: {e}')
|
||||
return False
|
||||
|
||||
|
||||
def ensure_service_dnat() -> bool:
|
||||
"""DNAT wg0:80 → cell-caddy:80 so VPN peers reach services via Host-header routing.
|
||||
|
||||
All service DNS names resolve to the WG server IP. Traffic to wg0:80 is
|
||||
forwarded to Caddy, which routes to the correct backend by Host header.
|
||||
"""
|
||||
try:
|
||||
caddy_ip = _get_caddy_container_ip()
|
||||
if not caddy_ip:
|
||||
logger.warning('ensure_service_dnat: cell-caddy not found')
|
||||
return False
|
||||
dnat_check = ['-t', 'nat', '-C', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
dnat_add = ['-t', 'nat', '-A', 'PREROUTING', '-i', 'wg0', '-p', 'tcp',
|
||||
'--dport', '80', '-j', 'DNAT', '--to-destination', f'{caddy_ip}:80']
|
||||
if _wg_exec(['iptables'] + dnat_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + dnat_add)
|
||||
fwd_check = ['-C', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
|
||||
fwd_add = ['-I', 'FORWARD', '-i', 'wg0', '-o', 'eth0',
|
||||
'-p', 'tcp', '--dport', '80', '-j', 'ACCEPT']
|
||||
if _wg_exec(['iptables'] + fwd_check).returncode != 0:
|
||||
_wg_exec(['iptables'] + fwd_add)
|
||||
logger.info(f'ensure_service_dnat: wg0:80 → {caddy_ip}:80')
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f'ensure_service_dnat: {e}')
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+30
-18
@@ -179,27 +179,39 @@ class NetworkManager(BaseServiceManager):
|
||||
warnings.append(f'apply_ip_range failed: {e}')
|
||||
return {'restarted': restarted, 'warnings': warnings}
|
||||
|
||||
def _build_dns_records(self, cell_name: str, ip_range: str) -> List[Dict]:
|
||||
"""Build the standard set of DNS A records for the given subnet.
|
||||
def _get_wg_server_ip(self) -> str:
|
||||
"""Return the WireGuard server IP by reading wg0.conf. Falls back to 10.0.0.1."""
|
||||
try:
|
||||
import ipaddress
|
||||
conf = os.path.join(self.config_dir, 'wireguard', 'wg_confs', 'wg0.conf')
|
||||
with open(conf) as f:
|
||||
for line in f:
|
||||
stripped = line.strip()
|
||||
if stripped.lower().startswith('address'):
|
||||
addr = stripped.split('=', 1)[1].strip()
|
||||
return str(ipaddress.ip_interface(addr).ip)
|
||||
except Exception:
|
||||
pass
|
||||
return '10.0.0.1'
|
||||
|
||||
All user-facing names resolve to the Caddy reverse proxy (caddy IP) so
|
||||
the Host header is passed through and Caddy routes based on it.
|
||||
Exception: calendar/files/mail/webdav use dedicated virtual IPs so that
|
||||
iptables per-service firewall rules can target them by destination IP.
|
||||
api and webui also go through Caddy — they don't have their own VIPs and
|
||||
their containers don't serve HTTP on port 80.
|
||||
def _build_dns_records(self, cell_name: str, ip_range: str) -> List[Dict]:
|
||||
"""Build the standard set of DNS A records.
|
||||
|
||||
All service names resolve to the WG server IP so they are reachable
|
||||
from both local WG peers and cross-cell peers without Docker bridge
|
||||
subnet conflicts. ensure_service_dnat() routes wg0:80 to Caddy, which
|
||||
routes requests to the correct backend by Host header.
|
||||
"""
|
||||
import ip_utils
|
||||
ips = ip_utils.get_service_ips(ip_range)
|
||||
wg_ip = self._get_wg_server_ip()
|
||||
return [
|
||||
{'name': cell_name, 'type': 'A', 'value': ips['caddy']},
|
||||
{'name': 'api', 'type': 'A', 'value': ips['caddy']},
|
||||
{'name': 'webui', 'type': 'A', 'value': ips['caddy']},
|
||||
{'name': 'calendar', 'type': 'A', 'value': ips['vip_calendar']},
|
||||
{'name': 'files', 'type': 'A', 'value': ips['vip_files']},
|
||||
{'name': 'mail', 'type': 'A', 'value': ips['vip_mail']},
|
||||
{'name': 'webmail', 'type': 'A', 'value': ips['vip_mail']},
|
||||
{'name': 'webdav', 'type': 'A', 'value': ips['vip_webdav']},
|
||||
{'name': cell_name, 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'api', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'webui', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'calendar', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'files', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'mail', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'webmail', 'type': 'A', 'value': wg_ip},
|
||||
{'name': 'webdav', 'type': 'A', 'value': wg_ip},
|
||||
]
|
||||
|
||||
def get_dns_records(self, zone: str = 'cell') -> List[Dict]:
|
||||
|
||||
@@ -77,7 +77,9 @@ def peer_services():
|
||||
if peer_private_key:
|
||||
try:
|
||||
internet_access = peer.get('internet_access', True)
|
||||
allowed_ips = wireguard_manager.FULL_TUNNEL_IPS if internet_access else wireguard_manager.get_split_tunnel_ips()
|
||||
route_via = peer.get('route_via')
|
||||
use_full = internet_access or bool(route_via)
|
||||
allowed_ips = wireguard_manager.FULL_TUNNEL_IPS if use_full else wireguard_manager.get_split_tunnel_ips()
|
||||
wg_config = wireguard_manager.get_peer_config(
|
||||
peer_name=peer_name,
|
||||
peer_ip=peer_ip,
|
||||
|
||||
@@ -176,7 +176,11 @@ def get_peer_config():
|
||||
allowed_ips = data.get('allowed_ips') or None
|
||||
if not allowed_ips and registered:
|
||||
internet_access = registered.get('internet_access', True)
|
||||
allowed_ips = wireguard_manager.FULL_TUNNEL_IPS if internet_access else wireguard_manager.get_split_tunnel_ips()
|
||||
route_via = registered.get('route_via')
|
||||
# Full tunnel when internet is allowed OR when route_via is set
|
||||
# (route_via exits via a remote cell — all traffic must go through the tunnel)
|
||||
use_full = internet_access or bool(route_via)
|
||||
allowed_ips = wireguard_manager.FULL_TUNNEL_IPS if use_full else wireguard_manager.get_split_tunnel_ips()
|
||||
|
||||
result = wireguard_manager.get_peer_config(
|
||||
peer_name=peer_name,
|
||||
|
||||
@@ -203,8 +203,25 @@ class WireGuardManager(BaseServiceManager):
|
||||
return SERVER_NETWORK
|
||||
|
||||
def get_split_tunnel_ips(self) -> str:
|
||||
"""Return split-tunnel AllowedIPs: VPN subnet + Docker bridge."""
|
||||
return f'{self._get_configured_network()}, 172.20.0.0/16'
|
||||
"""Return split-tunnel AllowedIPs: local VPN subnet + all connected cell VPN subnets.
|
||||
|
||||
172.20.0.0/16 is intentionally excluded — all services are accessed via the
|
||||
WG server IP (ensure_service_dnat routes wg0:80 to Caddy). Including the
|
||||
Docker bridge subnet would cause routing conflicts when cells share the same range.
|
||||
"""
|
||||
local_net = self._get_configured_network()
|
||||
cell_links_file = os.path.join(self.data_dir, 'api', 'cell_links.json')
|
||||
cell_nets = []
|
||||
try:
|
||||
with open(cell_links_file) as f:
|
||||
links = json.load(f)
|
||||
for link in links:
|
||||
subnet = link.get('vpn_subnet', '')
|
||||
if subnet and subnet != local_net:
|
||||
cell_nets.append(subnet)
|
||||
except Exception:
|
||||
pass
|
||||
return ', '.join([local_net] + cell_nets)
|
||||
|
||||
def _load_registered_peers(self) -> list:
|
||||
"""Read active peers from peers.json for wg0.conf reconstruction after bootstrap."""
|
||||
@@ -733,7 +750,14 @@ class WireGuardManager(BaseServiceManager):
|
||||
if allowed_ips is None:
|
||||
allowed_ips = self.FULL_TUNNEL_IPS
|
||||
server_keys = self.get_keys()
|
||||
peer_dns = _resolve_peer_dns()
|
||||
# Use WG server IP for DNS: ensure_dns_dnat() routes wg0:53 → cell-dns.
|
||||
# This works for both split-tunnel (10.0.x.x in AllowedIPs) and cross-cell peers.
|
||||
addr_str = self._get_configured_address()
|
||||
try:
|
||||
import ipaddress as _ipaddress
|
||||
peer_dns = str(_ipaddress.ip_interface(addr_str).ip)
|
||||
except Exception:
|
||||
peer_dns = _resolve_peer_dns()
|
||||
port = self._get_configured_port()
|
||||
endpoint = server_endpoint if ':' in server_endpoint else f'{server_endpoint}:{port}'
|
||||
addr = peer_ip if '/' in peer_ip else f'{peer_ip}/32'
|
||||
|
||||
Reference in New Issue
Block a user