Merge branch 'feature/fix-apply-network-recreate' into 'main'

fix: spawn helper container for all-services restart so API survives

See merge request root/pic!7
This commit is contained in:
Administrator
2026-04-22 19:41:51 +00:00
6 changed files with 200 additions and 81 deletions
+70 -27
View File
@@ -188,11 +188,15 @@ cell_link_manager = CellLinkManager(
)
# Apply firewall + DNS rules from stored peer settings (survives API restarts)
def _configured_domain() -> str:
return config_manager.configs.get('_identity', {}).get('domain', 'cell')
def _apply_startup_enforcement():
try:
peers = peer_registry.list_peers()
firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
logger.info(f"Applied enforcement rules for {len(peers)} peers on startup")
except Exception as e:
logger.warning(f"Startup enforcement failed (non-fatal): {e}")
@@ -487,6 +491,12 @@ def update_config():
net_result = network_manager.apply_domain(domain)
all_restarted.extend(net_result.get('restarted', []))
all_warnings.extend(net_result.get('warnings', []))
# Regenerate Caddyfile — virtual host names change with the domain
import ip_utils as _ip_domain
_cur_id = config_manager.configs.get('_identity', {})
_cur_range = _cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
_cur_name = _cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
_ip_domain.write_caddyfile(_cur_range, _cur_name, domain, '/app/config/caddy/Caddyfile')
# Apply cell name change to DNS hostname record
if identity_updates.get('cell_name'):
@@ -496,6 +506,12 @@ def update_config():
cn_result = network_manager.apply_cell_name(old_name, new_name)
all_restarted.extend(cn_result.get('restarted', []))
all_warnings.extend(cn_result.get('warnings', []))
# Regenerate Caddyfile — main virtual host name changes with cell_name
import ip_utils as _ip_name
_cur_id2 = config_manager.configs.get('_identity', {})
_cur_range2 = _cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
_cur_domain2 = identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
_ip_name.write_caddyfile(_cur_range2, new_name, _cur_domain2, '/app/config/caddy/Caddyfile')
# Apply ip_range change: regenerate DNS records, update virtual IPs + firewall rules
if identity_updates.get('ip_range'):
@@ -514,6 +530,9 @@ def update_config():
# Write new .env with updated IPs (and current ports) for next container start
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs))
# Regenerate Caddyfile with new VIPs
ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain,
'/app/config/caddy/Caddyfile')
# Mark ALL containers as needing restart; network_recreate signals that
# docker compose down is required before up (Docker can't change subnet in-place)
_set_pending_restart(
@@ -692,40 +711,64 @@ def apply_pending_config():
# existing network's subnet in-place, so we need `down` + `up` in that case.
needs_network_recreate = pending.get('network_recreate', False)
host_env = os.path.join(project_dir, '.env')
host_compose = os.path.join(project_dir, 'docker-compose.yml')
if '*' in containers:
# All-services restart: `docker compose down` or `up -d` may stop/recreate the
# API container itself, killing this background thread mid-operation.
# Spawn an independent helper container using pic_api:latest that has docker CLI
# and survives cell-api being stopped/recreated.
if needs_network_recreate:
# down removes containers AND the bridge network; up recreates everything
compose_down_args = ['down']
compose_up_args = ['up', '-d']
helper_script = (
f'sleep 2'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} down'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
else:
compose_down_args = None
compose_up_args = ['up', '-d']
helper_script = (
f'sleep 2'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
def _do_apply():
import subprocess as _subprocess
_subprocess.Popen(
['docker', 'run', '--rm',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'-v', f'{project_dir}:{project_dir}',
'--entrypoint', 'sh',
'pic_api:latest',
'-c', helper_script],
close_fds=True,
stdout=_subprocess.DEVNULL,
stderr=_subprocess.DEVNULL,
)
logger.info(
'spawned helper container for all-services restart'
+ (' (network_recreate)' if needs_network_recreate else '')
)
else:
compose_down_args = None
compose_up_args = ['up', '-d', '--no-deps'] + containers
base_cmd = ['docker', 'compose',
'--project-directory', project_dir,
'-f', '/app/docker-compose.yml',
'--env-file', '/app/.env.compose']
# Run in a background thread; 0.3 s delay lets Flask send this response first.
# Specific containers only — API is not affected, run directly from here.
def _do_apply():
import time as _time
import subprocess as _subprocess
_time.sleep(0.3)
if compose_down_args:
r = _subprocess.run(base_cmd + compose_down_args,
capture_output=True, text=True, timeout=60)
if r.returncode != 0:
logger.error(f"docker compose down failed: {r.stderr.strip()}")
return
result = _subprocess.run(base_cmd + compose_up_args,
capture_output=True, text=True, timeout=120)
result = _subprocess.run(
['docker', 'compose',
'--project-directory', project_dir,
'-f', '/app/docker-compose.yml',
'--env-file', '/app/.env.compose',
'up', '-d', '--no-deps'] + containers,
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
logger.error(f"docker compose up failed: {result.stderr.strip()}")
else:
logger.info(f'docker compose {" ".join(compose_up_args)} completed successfully')
logger.info(f'docker compose up completed for: {containers}')
threading.Thread(target=_do_apply, daemon=False).start()
@@ -1348,7 +1391,7 @@ def apply_wireguard_enforcement():
try:
peers = peer_registry.list_peers()
firewall_manager.apply_all_peer_rules(peers)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH)
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
return jsonify({'ok': True, 'peers': len(peers)})
except Exception as e:
return jsonify({'error': str(e)}), 500
@@ -1489,7 +1532,7 @@ def add_peer():
if success:
# Apply server-side enforcement immediately
firewall_manager.apply_peer_rules(peer_info['ip'], peer_info)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
return jsonify({"message": f"Peer {data['name']} added successfully", "ip": assigned_ip}), 201
else:
return jsonify({"error": f"Peer {data['name']} already exists"}), 400
@@ -1525,7 +1568,7 @@ def update_peer(peer_name):
updated_peer = peer_registry.get_peer(peer_name)
if updated_peer:
firewall_manager.apply_peer_rules(updated_peer['ip'], updated_peer)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH)
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
result = {"message": f"Peer {peer_name} updated", "config_changed": config_changed}
return jsonify(result)
else:
+21 -21
View File
@@ -212,30 +212,27 @@ def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None:
# DNS ACL (CoreDNS Corefile generation)
# ---------------------------------------------------------------------------
# Map service name → DNS hostname in .cell zone
SERVICE_HOSTS = {
'calendar': 'calendar.cell.',
'files': 'files.cell.',
'mail': 'mail.cell.',
'webdav': 'webdav.cell.',
}
# Service subdomains that get per-peer ACL rules in the CoreDNS zone block
_ACL_SERVICES = ('calendar', 'files', 'mail', 'webdav')
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str:
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]],
domain: str = 'cell') -> str:
"""
Build CoreDNS ACL plugin stanzas.
blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... }
Returns a string to embed in the `cell { }` zone block.
Returns a string to embed in the primary zone block.
"""
if not blocked_peers_by_service:
return ''
lines = []
for service, peer_ips in blocked_peers_by_service.items():
host = SERVICE_HOSTS.get(service)
if not host or not peer_ips:
for service in _ACL_SERVICES:
peer_ips = blocked_peers_by_service.get(service, [])
if not peer_ips:
continue
host = f'{service}.{domain}.'
for ip in peer_ips:
lines.append(f' acl {host} {{')
lines.append(f' block net {ip}/32')
@@ -245,10 +242,12 @@ def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str:
return '\n'.join(lines)
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool:
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell') -> bool:
"""
Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin.
The file is written to corefile_path (API-side path mapped into CoreDNS container).
domain: the configured cell domain (e.g. 'cell', 'dev') must match zone file names.
"""
try:
# Collect which peers block which services
@@ -262,12 +261,12 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
if service not in allowed_services:
blocked[service].append(ip)
acl_block = _build_acl_block(blocked)
acl_block = _build_acl_block(blocked, domain)
cell_zone_block = 'cell {\n file /data/cell.zone\n log\n'
primary_zone_block = f'{domain} {{\n file /data/{domain}.zone\n log\n'
if acl_block:
cell_zone_block += acl_block + '\n'
cell_zone_block += '}\n'
primary_zone_block += acl_block + '\n'
primary_zone_block += '}\n'
corefile = f""". {{
forward . 8.8.8.8 1.1.1.1
@@ -276,8 +275,8 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
health
}}
{cell_zone_block}
local.cell {{
{primary_zone_block}
local.{domain} {{
file /data/local.zone
log
}}
@@ -307,9 +306,10 @@ def reload_coredns() -> bool:
return False
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool:
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
domain: str = 'cell') -> bool:
"""Regenerate Corefile and reload CoreDNS."""
ok = generate_corefile(peers, corefile_path)
ok = generate_corefile(peers, corefile_path, domain)
if ok:
reload_coredns()
return ok
+78
View File
@@ -129,6 +129,84 @@ def get_virtual_ips(ip_range: str) -> Dict[str, str]:
}
def write_caddyfile(ip_range: str, cell_name: str, domain: str, path: str) -> bool:
"""
Generate the Caddy reverse-proxy config from the current ip_range, cell_name, and domain.
Must be called after any ip_range or domain change so Caddy routes correctly.
Container-internal ports are fixed by docker-compose and never change.
Returns True on success.
"""
try:
ips = get_service_ips(ip_range)
caddy_ip = ips['caddy']
vip_calendar = ips['vip_calendar']
vip_files = ips['vip_files']
vip_mail = ips['vip_mail']
vip_webdav = ips['vip_webdav']
content = f"""\
{{
auto_https off
}}
# Main cell domain — no service-IP restriction needed
http://{cell_name}.{domain}, http://{caddy_ip}:80 {{
handle /api/* {{
reverse_proxy cell-api:3000
}}
handle /calendar* {{
reverse_proxy cell-radicale:5232
}}
handle /files* {{
reverse_proxy cell-filegator:8080
}}
handle /webmail* {{
reverse_proxy cell-rainloop:8888
}}
handle {{
reverse_proxy cell-webui:80
}}
}}
# Per-service virtual IPs — each gets its own IP so iptables can target them
http://calendar.{domain}, http://{vip_calendar}:80 {{
reverse_proxy cell-radicale:5232
}}
http://files.{domain}, http://{vip_files}:80 {{
reverse_proxy cell-filegator:8080
}}
http://mail.{domain}, http://webmail.{domain}, http://{vip_mail}:80 {{
reverse_proxy cell-rainloop:8888
}}
http://webdav.{domain}, http://{vip_webdav}:80 {{
reverse_proxy cell-webdav:80
}}
http://api.{domain} {{
reverse_proxy cell-api:3000
}}
# Catch-all for direct IP / localhost
:80 {{
handle /api/* {{
reverse_proxy cell-api:3000
}}
handle {{
reverse_proxy cell-webui:80
}}
}}
"""
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
with open(path, 'w') as f:
f.write(content)
return True
except Exception:
return False
def write_env_file(ip_range: str, path: str, ports: Optional[Dict[str, int]] = None) -> bool:
"""
Write (or overwrite) the docker-compose .env file with IPs and ports.
+9 -22
View File
@@ -401,30 +401,17 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
warnings.append(f"dnsmasq domain update failed: {e}")
# 2. Update Corefile: replace old primary zone block with new domain
# 2. Regenerate Corefile using generate_corefile so it always stays consistent
try:
import firewall_manager as _fm
corefile = os.path.join(self.config_dir, 'dns', 'Corefile')
if os.path.exists(corefile):
with open(corefile) as f:
content = f.read()
import re
# Replace first named zone block (not the catch-all .) with new domain
# Matches: <word> { ... } blocks (zone names like "cell", "oldname")
def replace_zone(m):
zone = m.group(1)
if zone == '.':
return m.group(0) # keep catch-all
# Replace zone name with new domain; update file path reference
body = m.group(2)
body = re.sub(r'file\s+/data/\S+\.zone',
f'file /data/{domain}.zone', body)
return f'{domain} {{{body}}}'
new_content = re.sub(
r'(\S+)\s*\{([^}]*)\}',
replace_zone, content, flags=re.DOTALL
)
with open(corefile, 'w') as f:
f.write(new_content)
peers_file = os.path.join(self.data_dir, 'peers.json')
try:
import json as _json
peers = _json.loads(open(peers_file).read()) if os.path.exists(peers_file) else []
except Exception:
peers = []
_fm.generate_corefile(peers, corefile, domain)
except Exception as e:
warnings.append(f"Corefile domain update failed: {e}")
+3 -3
View File
@@ -5,12 +5,12 @@
health
}
cell {
file /data/cell.zone
dev {
file /data/dev.zone
log
}
local.cell {
local.dev {
file /data/local.zone
log
}
+12 -1
View File
@@ -44,7 +44,6 @@ REQUIRED_DIRS = [
]
REQUIRED_FILES = [
'config/caddy/Caddyfile',
'config/dns/Corefile',
'config/dhcp/dnsmasq.conf',
'config/ntp/chrony.conf',
@@ -205,6 +204,17 @@ def write_compose_env(ip_range: str):
print(f'[WARN] Could not write .env — containers will use built-in default IPs/ports')
def write_caddy_config(ip_range: str, cell_name: str, domain: str):
"""Generate Caddyfile with correct VIPs and hostnames for this cell."""
sys.path.insert(0, os.path.join(ROOT, 'api'))
import ip_utils
caddyfile = os.path.join(ROOT, 'config', 'caddy', 'Caddyfile')
if ip_utils.write_caddyfile(ip_range, cell_name, domain, caddyfile):
print(f'[CREATED] config/caddy/Caddyfile (subnet={ip_range} domain={domain})')
else:
print(f'[WARN] Could not write Caddyfile')
def _read_existing_ip_range() -> str:
"""Read ip_range from existing cell_config.json if present, else return None."""
cfg_path = os.path.join(ROOT, 'config', 'api', 'cell_config.json')
@@ -237,6 +247,7 @@ def main():
write_wg0_conf(priv, vpn_address, wg_port)
write_cell_config(cell_name, domain, wg_port)
write_compose_env(ip_range)
write_caddy_config(ip_range, cell_name, domain)
print()
print('--- Setup complete! Run: make start ---')