Merge branch 'feature/fix-apply-network-recreate' into 'main'

fix: spawn helper container for all-services restart so API survives

See merge request root/pic!7
This commit is contained in:
Administrator
2026-04-22 19:41:51 +00:00
6 changed files with 200 additions and 81 deletions
+70 -27
View File
@@ -188,11 +188,15 @@ cell_link_manager = CellLinkManager(
) )
# Apply firewall + DNS rules from stored peer settings (survives API restarts) # Apply firewall + DNS rules from stored peer settings (survives API restarts)
def _configured_domain() -> str:
return config_manager.configs.get('_identity', {}).get('domain', 'cell')
def _apply_startup_enforcement(): def _apply_startup_enforcement():
try: try:
peers = peer_registry.list_peers() peers = peer_registry.list_peers()
firewall_manager.apply_all_peer_rules(peers) firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH) firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
logger.info(f"Applied enforcement rules for {len(peers)} peers on startup") logger.info(f"Applied enforcement rules for {len(peers)} peers on startup")
except Exception as e: except Exception as e:
logger.warning(f"Startup enforcement failed (non-fatal): {e}") logger.warning(f"Startup enforcement failed (non-fatal): {e}")
@@ -487,6 +491,12 @@ def update_config():
net_result = network_manager.apply_domain(domain) net_result = network_manager.apply_domain(domain)
all_restarted.extend(net_result.get('restarted', [])) all_restarted.extend(net_result.get('restarted', []))
all_warnings.extend(net_result.get('warnings', [])) all_warnings.extend(net_result.get('warnings', []))
# Regenerate Caddyfile — virtual host names change with the domain
import ip_utils as _ip_domain
_cur_id = config_manager.configs.get('_identity', {})
_cur_range = _cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
_cur_name = _cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
_ip_domain.write_caddyfile(_cur_range, _cur_name, domain, '/app/config/caddy/Caddyfile')
# Apply cell name change to DNS hostname record # Apply cell name change to DNS hostname record
if identity_updates.get('cell_name'): if identity_updates.get('cell_name'):
@@ -496,6 +506,12 @@ def update_config():
cn_result = network_manager.apply_cell_name(old_name, new_name) cn_result = network_manager.apply_cell_name(old_name, new_name)
all_restarted.extend(cn_result.get('restarted', [])) all_restarted.extend(cn_result.get('restarted', []))
all_warnings.extend(cn_result.get('warnings', [])) all_warnings.extend(cn_result.get('warnings', []))
# Regenerate Caddyfile — main virtual host name changes with cell_name
import ip_utils as _ip_name
_cur_id2 = config_manager.configs.get('_identity', {})
_cur_range2 = _cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
_cur_domain2 = identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
_ip_name.write_caddyfile(_cur_range2, new_name, _cur_domain2, '/app/config/caddy/Caddyfile')
# Apply ip_range change: regenerate DNS records, update virtual IPs + firewall rules # Apply ip_range change: regenerate DNS records, update virtual IPs + firewall rules
if identity_updates.get('ip_range'): if identity_updates.get('ip_range'):
@@ -514,6 +530,9 @@ def update_config():
# Write new .env with updated IPs (and current ports) for next container start # Write new .env with updated IPs (and current ports) for next container start
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose') env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs)) ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs))
# Regenerate Caddyfile with new VIPs
ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain,
'/app/config/caddy/Caddyfile')
# Mark ALL containers as needing restart; network_recreate signals that # Mark ALL containers as needing restart; network_recreate signals that
# docker compose down is required before up (Docker can't change subnet in-place) # docker compose down is required before up (Docker can't change subnet in-place)
_set_pending_restart( _set_pending_restart(
@@ -692,40 +711,64 @@ def apply_pending_config():
# existing network's subnet in-place, so we need `down` + `up` in that case. # existing network's subnet in-place, so we need `down` + `up` in that case.
needs_network_recreate = pending.get('network_recreate', False) needs_network_recreate = pending.get('network_recreate', False)
host_env = os.path.join(project_dir, '.env')
host_compose = os.path.join(project_dir, 'docker-compose.yml')
if '*' in containers: if '*' in containers:
# All-services restart: `docker compose down` or `up -d` may stop/recreate the
# API container itself, killing this background thread mid-operation.
# Spawn an independent helper container using pic_api:latest that has docker CLI
# and survives cell-api being stopped/recreated.
if needs_network_recreate: if needs_network_recreate:
# down removes containers AND the bridge network; up recreates everything helper_script = (
compose_down_args = ['down'] f'sleep 2'
compose_up_args = ['up', '-d'] f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} down'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
else: else:
compose_down_args = None helper_script = (
compose_up_args = ['up', '-d'] f'sleep 2'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
def _do_apply():
import subprocess as _subprocess
_subprocess.Popen(
['docker', 'run', '--rm',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'-v', f'{project_dir}:{project_dir}',
'--entrypoint', 'sh',
'pic_api:latest',
'-c', helper_script],
close_fds=True,
stdout=_subprocess.DEVNULL,
stderr=_subprocess.DEVNULL,
)
logger.info(
'spawned helper container for all-services restart'
+ (' (network_recreate)' if needs_network_recreate else '')
)
else: else:
compose_down_args = None # Specific containers only — API is not affected, run directly from here.
compose_up_args = ['up', '-d', '--no-deps'] + containers
base_cmd = ['docker', 'compose',
'--project-directory', project_dir,
'-f', '/app/docker-compose.yml',
'--env-file', '/app/.env.compose']
# Run in a background thread; 0.3 s delay lets Flask send this response first.
def _do_apply(): def _do_apply():
import time as _time import time as _time
import subprocess as _subprocess import subprocess as _subprocess
_time.sleep(0.3) _time.sleep(0.3)
if compose_down_args: result = _subprocess.run(
r = _subprocess.run(base_cmd + compose_down_args, ['docker', 'compose',
capture_output=True, text=True, timeout=60) '--project-directory', project_dir,
if r.returncode != 0: '-f', '/app/docker-compose.yml',
logger.error(f"docker compose down failed: {r.stderr.strip()}") '--env-file', '/app/.env.compose',
return 'up', '-d', '--no-deps'] + containers,
result = _subprocess.run(base_cmd + compose_up_args, capture_output=True, text=True, timeout=120,
capture_output=True, text=True, timeout=120) )
if result.returncode != 0: if result.returncode != 0:
logger.error(f"docker compose up failed: {result.stderr.strip()}") logger.error(f"docker compose up failed: {result.stderr.strip()}")
else: else:
logger.info(f'docker compose {" ".join(compose_up_args)} completed successfully') logger.info(f'docker compose up completed for: {containers}')
threading.Thread(target=_do_apply, daemon=False).start() threading.Thread(target=_do_apply, daemon=False).start()
@@ -1348,7 +1391,7 @@ def apply_wireguard_enforcement():
try: try:
peers = peer_registry.list_peers() peers = peer_registry.list_peers()
firewall_manager.apply_all_peer_rules(peers) firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH) firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
return jsonify({'ok': True, 'peers': len(peers)}) return jsonify({'ok': True, 'peers': len(peers)})
except Exception as e: except Exception as e:
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500
@@ -1489,7 +1532,7 @@ def add_peer():
if success: if success:
# Apply server-side enforcement immediately # Apply server-side enforcement immediately
firewall_manager.apply_peer_rules(peer_info['ip'], peer_info) firewall_manager.apply_peer_rules(peer_info['ip'], peer_info)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH) firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
return jsonify({"message": f"Peer {data['name']} added successfully", "ip": assigned_ip}), 201 return jsonify({"message": f"Peer {data['name']} added successfully", "ip": assigned_ip}), 201
else: else:
return jsonify({"error": f"Peer {data['name']} already exists"}), 400 return jsonify({"error": f"Peer {data['name']} already exists"}), 400
@@ -1525,7 +1568,7 @@ def update_peer(peer_name):
updated_peer = peer_registry.get_peer(peer_name) updated_peer = peer_registry.get_peer(peer_name)
if updated_peer: if updated_peer:
firewall_manager.apply_peer_rules(updated_peer['ip'], updated_peer) firewall_manager.apply_peer_rules(updated_peer['ip'], updated_peer)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH) firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
result = {"message": f"Peer {peer_name} updated", "config_changed": config_changed} result = {"message": f"Peer {peer_name} updated", "config_changed": config_changed}
return jsonify(result) return jsonify(result)
else: else:
+21 -21
View File
@@ -212,30 +212,27 @@ def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None:
# DNS ACL (CoreDNS Corefile generation) # DNS ACL (CoreDNS Corefile generation)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Map service name → DNS hostname in .cell zone # Service subdomains that get per-peer ACL rules in the CoreDNS zone block
SERVICE_HOSTS = { _ACL_SERVICES = ('calendar', 'files', 'mail', 'webdav')
'calendar': 'calendar.cell.',
'files': 'files.cell.',
'mail': 'mail.cell.',
'webdav': 'webdav.cell.',
}
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str: def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]],
domain: str = 'cell') -> str:
""" """
Build CoreDNS ACL plugin stanzas. Build CoreDNS ACL plugin stanzas.
blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... } blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... }
Returns a string to embed in the `cell { }` zone block. Returns a string to embed in the primary zone block.
""" """
if not blocked_peers_by_service: if not blocked_peers_by_service:
return '' return ''
lines = [] lines = []
for service, peer_ips in blocked_peers_by_service.items(): for service in _ACL_SERVICES:
host = SERVICE_HOSTS.get(service) peer_ips = blocked_peers_by_service.get(service, [])
if not host or not peer_ips: if not peer_ips:
continue continue
host = f'{service}.{domain}.'
for ip in peer_ips: for ip in peer_ips:
lines.append(f' acl {host} {{') lines.append(f' acl {host} {{')
lines.append(f' block net {ip}/32') lines.append(f' block net {ip}/32')
@@ -245,10 +242,12 @@ def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str:
return '\n'.join(lines) return '\n'.join(lines)
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool: def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell') -> bool:
""" """
Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin. Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin.
The file is written to corefile_path (API-side path mapped into CoreDNS container). The file is written to corefile_path (API-side path mapped into CoreDNS container).
domain: the configured cell domain (e.g. 'cell', 'dev') must match zone file names.
""" """
try: try:
# Collect which peers block which services # Collect which peers block which services
@@ -262,12 +261,12 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
if service not in allowed_services: if service not in allowed_services:
blocked[service].append(ip) blocked[service].append(ip)
acl_block = _build_acl_block(blocked) acl_block = _build_acl_block(blocked, domain)
cell_zone_block = 'cell {\n file /data/cell.zone\n log\n' primary_zone_block = f'{domain} {{\n file /data/{domain}.zone\n log\n'
if acl_block: if acl_block:
cell_zone_block += acl_block + '\n' primary_zone_block += acl_block + '\n'
cell_zone_block += '}\n' primary_zone_block += '}\n'
corefile = f""". {{ corefile = f""". {{
forward . 8.8.8.8 1.1.1.1 forward . 8.8.8.8 1.1.1.1
@@ -276,8 +275,8 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
health health
}} }}
{cell_zone_block} {primary_zone_block}
local.cell {{ local.{domain} {{
file /data/local.zone file /data/local.zone
log log
}} }}
@@ -307,9 +306,10 @@ def reload_coredns() -> bool:
return False return False
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool: def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell') -> bool:
"""Regenerate Corefile and reload CoreDNS.""" """Regenerate Corefile and reload CoreDNS."""
ok = generate_corefile(peers, corefile_path) ok = generate_corefile(peers, corefile_path, domain)
if ok: if ok:
reload_coredns() reload_coredns()
return ok return ok
+78
View File
@@ -129,6 +129,84 @@ def get_virtual_ips(ip_range: str) -> Dict[str, str]:
} }
def write_caddyfile(ip_range: str, cell_name: str, domain: str, path: str) -> bool:
"""
Generate the Caddy reverse-proxy config from the current ip_range, cell_name, and domain.
Must be called after any ip_range or domain change so Caddy routes correctly.
Container-internal ports are fixed by docker-compose and never change.
Returns True on success.
"""
try:
ips = get_service_ips(ip_range)
caddy_ip = ips['caddy']
vip_calendar = ips['vip_calendar']
vip_files = ips['vip_files']
vip_mail = ips['vip_mail']
vip_webdav = ips['vip_webdav']
content = f"""\
{{
auto_https off
}}
# Main cell domain — no service-IP restriction needed
http://{cell_name}.{domain}, http://{caddy_ip}:80 {{
handle /api/* {{
reverse_proxy cell-api:3000
}}
handle /calendar* {{
reverse_proxy cell-radicale:5232
}}
handle /files* {{
reverse_proxy cell-filegator:8080
}}
handle /webmail* {{
reverse_proxy cell-rainloop:8888
}}
handle {{
reverse_proxy cell-webui:80
}}
}}
# Per-service virtual IPs — each gets its own IP so iptables can target them
http://calendar.{domain}, http://{vip_calendar}:80 {{
reverse_proxy cell-radicale:5232
}}
http://files.{domain}, http://{vip_files}:80 {{
reverse_proxy cell-filegator:8080
}}
http://mail.{domain}, http://webmail.{domain}, http://{vip_mail}:80 {{
reverse_proxy cell-rainloop:8888
}}
http://webdav.{domain}, http://{vip_webdav}:80 {{
reverse_proxy cell-webdav:80
}}
http://api.{domain} {{
reverse_proxy cell-api:3000
}}
# Catch-all for direct IP / localhost
:80 {{
handle /api/* {{
reverse_proxy cell-api:3000
}}
handle {{
reverse_proxy cell-webui:80
}}
}}
"""
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
with open(path, 'w') as f:
f.write(content)
return True
except Exception:
return False
def write_env_file(ip_range: str, path: str, ports: Optional[Dict[str, int]] = None) -> bool: def write_env_file(ip_range: str, path: str, ports: Optional[Dict[str, int]] = None) -> bool:
""" """
Write (or overwrite) the docker-compose .env file with IPs and ports. Write (or overwrite) the docker-compose .env file with IPs and ports.
+9 -22
View File
@@ -401,30 +401,17 @@ class NetworkManager(BaseServiceManager):
except Exception as e: except Exception as e:
warnings.append(f"dnsmasq domain update failed: {e}") warnings.append(f"dnsmasq domain update failed: {e}")
# 2. Update Corefile: replace old primary zone block with new domain # 2. Regenerate Corefile using generate_corefile so it always stays consistent
try: try:
import firewall_manager as _fm
corefile = os.path.join(self.config_dir, 'dns', 'Corefile') corefile = os.path.join(self.config_dir, 'dns', 'Corefile')
if os.path.exists(corefile): peers_file = os.path.join(self.data_dir, 'peers.json')
with open(corefile) as f: try:
content = f.read() import json as _json
import re peers = _json.loads(open(peers_file).read()) if os.path.exists(peers_file) else []
# Replace first named zone block (not the catch-all .) with new domain except Exception:
# Matches: <word> { ... } blocks (zone names like "cell", "oldname") peers = []
def replace_zone(m): _fm.generate_corefile(peers, corefile, domain)
zone = m.group(1)
if zone == '.':
return m.group(0) # keep catch-all
# Replace zone name with new domain; update file path reference
body = m.group(2)
body = re.sub(r'file\s+/data/\S+\.zone',
f'file /data/{domain}.zone', body)
return f'{domain} {{{body}}}'
new_content = re.sub(
r'(\S+)\s*\{([^}]*)\}',
replace_zone, content, flags=re.DOTALL
)
with open(corefile, 'w') as f:
f.write(new_content)
except Exception as e: except Exception as e:
warnings.append(f"Corefile domain update failed: {e}") warnings.append(f"Corefile domain update failed: {e}")
+3 -3
View File
@@ -5,12 +5,12 @@
health health
} }
cell { dev {
file /data/cell.zone file /data/dev.zone
log log
} }
local.cell { local.dev {
file /data/local.zone file /data/local.zone
log log
} }
+12 -1
View File
@@ -44,7 +44,6 @@ REQUIRED_DIRS = [
] ]
REQUIRED_FILES = [ REQUIRED_FILES = [
'config/caddy/Caddyfile',
'config/dns/Corefile', 'config/dns/Corefile',
'config/dhcp/dnsmasq.conf', 'config/dhcp/dnsmasq.conf',
'config/ntp/chrony.conf', 'config/ntp/chrony.conf',
@@ -205,6 +204,17 @@ def write_compose_env(ip_range: str):
print(f'[WARN] Could not write .env — containers will use built-in default IPs/ports') print(f'[WARN] Could not write .env — containers will use built-in default IPs/ports')
def write_caddy_config(ip_range: str, cell_name: str, domain: str):
"""Generate Caddyfile with correct VIPs and hostnames for this cell."""
sys.path.insert(0, os.path.join(ROOT, 'api'))
import ip_utils
caddyfile = os.path.join(ROOT, 'config', 'caddy', 'Caddyfile')
if ip_utils.write_caddyfile(ip_range, cell_name, domain, caddyfile):
print(f'[CREATED] config/caddy/Caddyfile (subnet={ip_range} domain={domain})')
else:
print(f'[WARN] Could not write Caddyfile')
def _read_existing_ip_range() -> str: def _read_existing_ip_range() -> str:
"""Read ip_range from existing cell_config.json if present, else return None.""" """Read ip_range from existing cell_config.json if present, else return None."""
cfg_path = os.path.join(ROOT, 'config', 'api', 'cell_config.json') cfg_path = os.path.join(ROOT, 'config', 'api', 'cell_config.json')
@@ -237,6 +247,7 @@ def main():
write_wg0_conf(priv, vpn_address, wg_port) write_wg0_conf(priv, vpn_address, wg_port)
write_cell_config(cell_name, domain, wg_port) write_cell_config(cell_name, domain, wg_port)
write_compose_env(ip_range) write_compose_env(ip_range)
write_caddy_config(ip_range, cell_name, domain)
print() print()
print('--- Setup complete! Run: make start ---') print('--- Setup complete! Run: make start ---')