From c9ed28f258a6fca0a77c4af87bf0609275e36876 Mon Sep 17 00:00:00 2001 From: Dmitrii Iurco Date: Wed, 22 Apr 2026 15:02:26 -0400 Subject: [PATCH 1/3] fix: spawn helper container for all-services restart so API survives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When containers=['*'] (ip_range change or full restart), the previous code ran docker compose down/up in a background thread inside cell-api. docker compose down killed cell-api, terminating the thread before docker compose up could run — leaving all containers stopped. Fix: spawn an independent docker run --rm container (pic_api:latest) that has the docker socket and project dir mounted. This helper outlives cell-api being stopped and completes the up -d independently. For specific-container restarts (port changes), keep the direct approach since the API container is not in the affected set. Co-Authored-By: Claude Sonnet 4.6 --- api/app.py | 84 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/api/app.py b/api/app.py index 07507de..87baa41 100644 --- a/api/app.py +++ b/api/app.py @@ -692,40 +692,64 @@ def apply_pending_config(): # existing network's subnet in-place, so we need `down` + `up` in that case. needs_network_recreate = pending.get('network_recreate', False) + host_env = os.path.join(project_dir, '.env') + host_compose = os.path.join(project_dir, 'docker-compose.yml') + if '*' in containers: + # All-services restart: `docker compose down` or `up -d` may stop/recreate the + # API container itself, killing this background thread mid-operation. + # Spawn an independent helper container using pic_api:latest that has docker CLI + # and survives cell-api being stopped/recreated. if needs_network_recreate: - # down removes containers AND the bridge network; up recreates everything - compose_down_args = ['down'] - compose_up_args = ['up', '-d'] + helper_script = ( + f'sleep 2' + f' && docker compose --project-directory {project_dir}' + f' -f {host_compose} --env-file {host_env} down' + f' && docker compose --project-directory {project_dir}' + f' -f {host_compose} --env-file {host_env} up -d' + ) else: - compose_down_args = None - compose_up_args = ['up', '-d'] + helper_script = ( + f'sleep 2' + f' && docker compose --project-directory {project_dir}' + f' -f {host_compose} --env-file {host_env} up -d' + ) + + def _do_apply(): + import subprocess as _subprocess + _subprocess.Popen( + ['docker', 'run', '--rm', + '-v', '/var/run/docker.sock:/var/run/docker.sock', + '-v', f'{project_dir}:{project_dir}', + '--entrypoint', 'sh', + 'pic_api:latest', + '-c', helper_script], + close_fds=True, + stdout=_subprocess.DEVNULL, + stderr=_subprocess.DEVNULL, + ) + logger.info( + 'spawned helper container for all-services restart' + + (' (network_recreate)' if needs_network_recreate else '') + ) else: - compose_down_args = None - compose_up_args = ['up', '-d', '--no-deps'] + containers - - base_cmd = ['docker', 'compose', - '--project-directory', project_dir, - '-f', '/app/docker-compose.yml', - '--env-file', '/app/.env.compose'] - - # Run in a background thread; 0.3 s delay lets Flask send this response first. - def _do_apply(): - import time as _time - import subprocess as _subprocess - _time.sleep(0.3) - if compose_down_args: - r = _subprocess.run(base_cmd + compose_down_args, - capture_output=True, text=True, timeout=60) - if r.returncode != 0: - logger.error(f"docker compose down failed: {r.stderr.strip()}") - return - result = _subprocess.run(base_cmd + compose_up_args, - capture_output=True, text=True, timeout=120) - if result.returncode != 0: - logger.error(f"docker compose up failed: {result.stderr.strip()}") - else: - logger.info(f'docker compose {" ".join(compose_up_args)} completed successfully') + # Specific containers only — API is not affected, run directly from here. + def _do_apply(): + import time as _time + import subprocess as _subprocess + _time.sleep(0.3) + result = _subprocess.run( + ['docker', 'compose', + '--project-directory', project_dir, + '-f', '/app/docker-compose.yml', + '--env-file', '/app/.env.compose', + 'up', '-d', '--no-deps'] + containers, + capture_output=True, text=True, timeout=120, + ) + if result.returncode != 0: + logger.error(f"docker compose up failed: {result.stderr.strip()}") + else: + logger.info(f'docker compose up completed for: {containers}') threading.Thread(target=_do_apply, daemon=False).start() From e74d5e05041dc03d0d07d871557eab3e30118804 Mon Sep 17 00:00:00 2001 From: Dmitrii Iurco Date: Wed, 22 Apr 2026 15:18:37 -0400 Subject: [PATCH 2/3] fix: generate Caddyfile in setup and on identity changes `make reinstall` wipes config/ then `make setup` creates an empty Caddyfile (ensure_file just touches it). Add write_caddyfile() to ip_utils.py that generates the full reverse-proxy config from ip_range, cell_name, and domain. Call it from setup_cell.py so fresh installs always get a valid Caddyfile. Also regenerate it in app.py whenever ip_range, domain, or cell_name changes so Caddy stays in sync. Co-Authored-By: Claude Sonnet 4.6 --- api/app.py | 15 +++++++++ api/ip_utils.py | 78 +++++++++++++++++++++++++++++++++++++++++++ scripts/setup_cell.py | 13 +++++++- 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/api/app.py b/api/app.py index 87baa41..0f44975 100644 --- a/api/app.py +++ b/api/app.py @@ -487,6 +487,12 @@ def update_config(): net_result = network_manager.apply_domain(domain) all_restarted.extend(net_result.get('restarted', [])) all_warnings.extend(net_result.get('warnings', [])) + # Regenerate Caddyfile — virtual host names change with the domain + import ip_utils as _ip_domain + _cur_id = config_manager.configs.get('_identity', {}) + _cur_range = _cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')) + _cur_name = _cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell')) + _ip_domain.write_caddyfile(_cur_range, _cur_name, domain, '/app/config/caddy/Caddyfile') # Apply cell name change to DNS hostname record if identity_updates.get('cell_name'): @@ -496,6 +502,12 @@ def update_config(): cn_result = network_manager.apply_cell_name(old_name, new_name) all_restarted.extend(cn_result.get('restarted', [])) all_warnings.extend(cn_result.get('warnings', [])) + # Regenerate Caddyfile — main virtual host name changes with cell_name + import ip_utils as _ip_name + _cur_id2 = config_manager.configs.get('_identity', {}) + _cur_range2 = _cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16')) + _cur_domain2 = identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell')) + _ip_name.write_caddyfile(_cur_range2, new_name, _cur_domain2, '/app/config/caddy/Caddyfile') # Apply ip_range change: regenerate DNS records, update virtual IPs + firewall rules if identity_updates.get('ip_range'): @@ -514,6 +526,9 @@ def update_config(): # Write new .env with updated IPs (and current ports) for next container start env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose') ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs)) + # Regenerate Caddyfile with new VIPs + ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain, + '/app/config/caddy/Caddyfile') # Mark ALL containers as needing restart; network_recreate signals that # docker compose down is required before up (Docker can't change subnet in-place) _set_pending_restart( diff --git a/api/ip_utils.py b/api/ip_utils.py index 60491e2..007d17e 100644 --- a/api/ip_utils.py +++ b/api/ip_utils.py @@ -129,6 +129,84 @@ def get_virtual_ips(ip_range: str) -> Dict[str, str]: } +def write_caddyfile(ip_range: str, cell_name: str, domain: str, path: str) -> bool: + """ + Generate the Caddy reverse-proxy config from the current ip_range, cell_name, and domain. + + Must be called after any ip_range or domain change so Caddy routes correctly. + Container-internal ports are fixed by docker-compose and never change. + Returns True on success. + """ + try: + ips = get_service_ips(ip_range) + caddy_ip = ips['caddy'] + vip_calendar = ips['vip_calendar'] + vip_files = ips['vip_files'] + vip_mail = ips['vip_mail'] + vip_webdav = ips['vip_webdav'] + content = f"""\ +{{ + auto_https off +}} + +# Main cell domain — no service-IP restriction needed +http://{cell_name}.{domain}, http://{caddy_ip}:80 {{ + handle /api/* {{ + reverse_proxy cell-api:3000 + }} + handle /calendar* {{ + reverse_proxy cell-radicale:5232 + }} + handle /files* {{ + reverse_proxy cell-filegator:8080 + }} + handle /webmail* {{ + reverse_proxy cell-rainloop:8888 + }} + handle {{ + reverse_proxy cell-webui:80 + }} +}} + +# Per-service virtual IPs — each gets its own IP so iptables can target them +http://calendar.{domain}, http://{vip_calendar}:80 {{ + reverse_proxy cell-radicale:5232 +}} + +http://files.{domain}, http://{vip_files}:80 {{ + reverse_proxy cell-filegator:8080 +}} + +http://mail.{domain}, http://webmail.{domain}, http://{vip_mail}:80 {{ + reverse_proxy cell-rainloop:8888 +}} + +http://webdav.{domain}, http://{vip_webdav}:80 {{ + reverse_proxy cell-webdav:80 +}} + +http://api.{domain} {{ + reverse_proxy cell-api:3000 +}} + +# Catch-all for direct IP / localhost +:80 {{ + handle /api/* {{ + reverse_proxy cell-api:3000 + }} + handle {{ + reverse_proxy cell-webui:80 + }} +}} +""" + os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) + with open(path, 'w') as f: + f.write(content) + return True + except Exception: + return False + + def write_env_file(ip_range: str, path: str, ports: Optional[Dict[str, int]] = None) -> bool: """ Write (or overwrite) the docker-compose .env file with IPs and ports. diff --git a/scripts/setup_cell.py b/scripts/setup_cell.py index 78111fb..adb01b6 100644 --- a/scripts/setup_cell.py +++ b/scripts/setup_cell.py @@ -44,7 +44,6 @@ REQUIRED_DIRS = [ ] REQUIRED_FILES = [ - 'config/caddy/Caddyfile', 'config/dns/Corefile', 'config/dhcp/dnsmasq.conf', 'config/ntp/chrony.conf', @@ -205,6 +204,17 @@ def write_compose_env(ip_range: str): print(f'[WARN] Could not write .env — containers will use built-in default IPs/ports') +def write_caddy_config(ip_range: str, cell_name: str, domain: str): + """Generate Caddyfile with correct VIPs and hostnames for this cell.""" + sys.path.insert(0, os.path.join(ROOT, 'api')) + import ip_utils + caddyfile = os.path.join(ROOT, 'config', 'caddy', 'Caddyfile') + if ip_utils.write_caddyfile(ip_range, cell_name, domain, caddyfile): + print(f'[CREATED] config/caddy/Caddyfile (subnet={ip_range} domain={domain})') + else: + print(f'[WARN] Could not write Caddyfile') + + def _read_existing_ip_range() -> str: """Read ip_range from existing cell_config.json if present, else return None.""" cfg_path = os.path.join(ROOT, 'config', 'api', 'cell_config.json') @@ -237,6 +247,7 @@ def main(): write_wg0_conf(priv, vpn_address, wg_port) write_cell_config(cell_name, domain, wg_port) write_compose_env(ip_range) + write_caddy_config(ip_range, cell_name, domain) print() print('--- Setup complete! Run: make start ---') From 50671f71cbac0474293ab50eb6a77eec0ea9e1de Mon Sep 17 00:00:00 2001 From: Dmitrii Iurco Date: Wed, 22 Apr 2026 15:32:23 -0400 Subject: [PATCH 3/3] fix: use configured domain in CoreDNS Corefile generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs caused DNS to fail when the domain name changes: 1. generate_corefile() hardcoded 'cell' as the zone name instead of using the configured domain — on startup it would silently reset any domain change back to 'cell' 2. apply_domain() regex replaced ALL non-dot zones (including local.cell) with the new domain → duplicate zone blocks → CoreDNS crash Fix: add a domain parameter to generate_corefile/apply_all_dns_rules, add _configured_domain() helper in app.py, and delegate Corefile updates in apply_domain() to generate_corefile() so the logic is in one place. Also parameterise SERVICE_HOSTS ACL entries via the domain argument. Co-Authored-By: Claude Sonnet 4.6 --- api/app.py | 12 ++++++++---- api/firewall_manager.py | 42 ++++++++++++++++++++--------------------- api/network_manager.py | 31 +++++++++--------------------- config/dns/Corefile | 6 +++--- 4 files changed, 41 insertions(+), 50 deletions(-) diff --git a/api/app.py b/api/app.py index 0f44975..18a0520 100644 --- a/api/app.py +++ b/api/app.py @@ -188,11 +188,15 @@ cell_link_manager = CellLinkManager( ) # Apply firewall + DNS rules from stored peer settings (survives API restarts) +def _configured_domain() -> str: + return config_manager.configs.get('_identity', {}).get('domain', 'cell') + + def _apply_startup_enforcement(): try: peers = peer_registry.list_peers() firewall_manager.apply_all_peer_rules(peers) - firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH) + firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain()) logger.info(f"Applied enforcement rules for {len(peers)} peers on startup") except Exception as e: logger.warning(f"Startup enforcement failed (non-fatal): {e}") @@ -1387,7 +1391,7 @@ def apply_wireguard_enforcement(): try: peers = peer_registry.list_peers() firewall_manager.apply_all_peer_rules(peers) - firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH) + firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain()) return jsonify({'ok': True, 'peers': len(peers)}) except Exception as e: return jsonify({'error': str(e)}), 500 @@ -1528,7 +1532,7 @@ def add_peer(): if success: # Apply server-side enforcement immediately firewall_manager.apply_peer_rules(peer_info['ip'], peer_info) - firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH) + firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain()) return jsonify({"message": f"Peer {data['name']} added successfully", "ip": assigned_ip}), 201 else: return jsonify({"error": f"Peer {data['name']} already exists"}), 400 @@ -1564,7 +1568,7 @@ def update_peer(peer_name): updated_peer = peer_registry.get_peer(peer_name) if updated_peer: firewall_manager.apply_peer_rules(updated_peer['ip'], updated_peer) - firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH) + firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain()) result = {"message": f"Peer {peer_name} updated", "config_changed": config_changed} return jsonify(result) else: diff --git a/api/firewall_manager.py b/api/firewall_manager.py index 32e57d9..51d65e1 100644 --- a/api/firewall_manager.py +++ b/api/firewall_manager.py @@ -212,30 +212,27 @@ def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None: # DNS ACL (CoreDNS Corefile generation) # --------------------------------------------------------------------------- -# Map service name → DNS hostname in .cell zone -SERVICE_HOSTS = { - 'calendar': 'calendar.cell.', - 'files': 'files.cell.', - 'mail': 'mail.cell.', - 'webdav': 'webdav.cell.', -} +# Service subdomains that get per-peer ACL rules in the CoreDNS zone block +_ACL_SERVICES = ('calendar', 'files', 'mail', 'webdav') -def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str: +def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]], + domain: str = 'cell') -> str: """ Build CoreDNS ACL plugin stanzas. blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... } - Returns a string to embed in the `cell { }` zone block. + Returns a string to embed in the primary zone block. """ if not blocked_peers_by_service: return '' lines = [] - for service, peer_ips in blocked_peers_by_service.items(): - host = SERVICE_HOSTS.get(service) - if not host or not peer_ips: + for service in _ACL_SERVICES: + peer_ips = blocked_peers_by_service.get(service, []) + if not peer_ips: continue + host = f'{service}.{domain}.' for ip in peer_ips: lines.append(f' acl {host} {{') lines.append(f' block net {ip}/32') @@ -245,10 +242,12 @@ def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str: return '\n'.join(lines) -def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool: +def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH, + domain: str = 'cell') -> bool: """ Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin. The file is written to corefile_path (API-side path mapped into CoreDNS container). + domain: the configured cell domain (e.g. 'cell', 'dev') — must match zone file names. """ try: # Collect which peers block which services @@ -262,12 +261,12 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE if service not in allowed_services: blocked[service].append(ip) - acl_block = _build_acl_block(blocked) + acl_block = _build_acl_block(blocked, domain) - cell_zone_block = 'cell {\n file /data/cell.zone\n log\n' + primary_zone_block = f'{domain} {{\n file /data/{domain}.zone\n log\n' if acl_block: - cell_zone_block += acl_block + '\n' - cell_zone_block += '}\n' + primary_zone_block += acl_block + '\n' + primary_zone_block += '}\n' corefile = f""". {{ forward . 8.8.8.8 1.1.1.1 @@ -276,8 +275,8 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE health }} -{cell_zone_block} -local.cell {{ +{primary_zone_block} +local.{domain} {{ file /data/local.zone log }} @@ -307,9 +306,10 @@ def reload_coredns() -> bool: return False -def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool: +def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH, + domain: str = 'cell') -> bool: """Regenerate Corefile and reload CoreDNS.""" - ok = generate_corefile(peers, corefile_path) + ok = generate_corefile(peers, corefile_path, domain) if ok: reload_coredns() return ok diff --git a/api/network_manager.py b/api/network_manager.py index 75038b0..6721ec6 100644 --- a/api/network_manager.py +++ b/api/network_manager.py @@ -401,30 +401,17 @@ class NetworkManager(BaseServiceManager): except Exception as e: warnings.append(f"dnsmasq domain update failed: {e}") - # 2. Update Corefile: replace old primary zone block with new domain + # 2. Regenerate Corefile using generate_corefile so it always stays consistent try: + import firewall_manager as _fm corefile = os.path.join(self.config_dir, 'dns', 'Corefile') - if os.path.exists(corefile): - with open(corefile) as f: - content = f.read() - import re - # Replace first named zone block (not the catch-all .) with new domain - # Matches: { ... } blocks (zone names like "cell", "oldname") - def replace_zone(m): - zone = m.group(1) - if zone == '.': - return m.group(0) # keep catch-all - # Replace zone name with new domain; update file path reference - body = m.group(2) - body = re.sub(r'file\s+/data/\S+\.zone', - f'file /data/{domain}.zone', body) - return f'{domain} {{{body}}}' - new_content = re.sub( - r'(\S+)\s*\{([^}]*)\}', - replace_zone, content, flags=re.DOTALL - ) - with open(corefile, 'w') as f: - f.write(new_content) + peers_file = os.path.join(self.data_dir, 'peers.json') + try: + import json as _json + peers = _json.loads(open(peers_file).read()) if os.path.exists(peers_file) else [] + except Exception: + peers = [] + _fm.generate_corefile(peers, corefile, domain) except Exception as e: warnings.append(f"Corefile domain update failed: {e}") diff --git a/config/dns/Corefile b/config/dns/Corefile index b7001b5..ad1f4c2 100644 --- a/config/dns/Corefile +++ b/config/dns/Corefile @@ -5,12 +5,12 @@ health } -cell { - file /data/cell.zone +dev { + file /data/dev.zone log } -local.cell { +local.dev { file /data/local.zone log }