Merge branch 'feature/fix-apply-network-recreate' into 'main'
fix: spawn helper container for all-services restart so API survives See merge request root/pic!7
This commit is contained in:
+77
-34
@@ -188,11 +188,15 @@ cell_link_manager = CellLinkManager(
|
||||
)
|
||||
|
||||
# Apply firewall + DNS rules from stored peer settings (survives API restarts)
|
||||
def _configured_domain() -> str:
|
||||
return config_manager.configs.get('_identity', {}).get('domain', 'cell')
|
||||
|
||||
|
||||
def _apply_startup_enforcement():
|
||||
try:
|
||||
peers = peer_registry.list_peers()
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH)
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
|
||||
logger.info(f"Applied enforcement rules for {len(peers)} peers on startup")
|
||||
except Exception as e:
|
||||
logger.warning(f"Startup enforcement failed (non-fatal): {e}")
|
||||
@@ -487,6 +491,12 @@ def update_config():
|
||||
net_result = network_manager.apply_domain(domain)
|
||||
all_restarted.extend(net_result.get('restarted', []))
|
||||
all_warnings.extend(net_result.get('warnings', []))
|
||||
# Regenerate Caddyfile — virtual host names change with the domain
|
||||
import ip_utils as _ip_domain
|
||||
_cur_id = config_manager.configs.get('_identity', {})
|
||||
_cur_range = _cur_id.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
|
||||
_cur_name = _cur_id.get('cell_name', os.environ.get('CELL_NAME', 'mycell'))
|
||||
_ip_domain.write_caddyfile(_cur_range, _cur_name, domain, '/app/config/caddy/Caddyfile')
|
||||
|
||||
# Apply cell name change to DNS hostname record
|
||||
if identity_updates.get('cell_name'):
|
||||
@@ -496,6 +506,12 @@ def update_config():
|
||||
cn_result = network_manager.apply_cell_name(old_name, new_name)
|
||||
all_restarted.extend(cn_result.get('restarted', []))
|
||||
all_warnings.extend(cn_result.get('warnings', []))
|
||||
# Regenerate Caddyfile — main virtual host name changes with cell_name
|
||||
import ip_utils as _ip_name
|
||||
_cur_id2 = config_manager.configs.get('_identity', {})
|
||||
_cur_range2 = _cur_id2.get('ip_range', os.environ.get('CELL_IP_RANGE', '172.20.0.0/16'))
|
||||
_cur_domain2 = identity_updates.get('domain') or _cur_id2.get('domain', os.environ.get('CELL_DOMAIN', 'cell'))
|
||||
_ip_name.write_caddyfile(_cur_range2, new_name, _cur_domain2, '/app/config/caddy/Caddyfile')
|
||||
|
||||
# Apply ip_range change: regenerate DNS records, update virtual IPs + firewall rules
|
||||
if identity_updates.get('ip_range'):
|
||||
@@ -514,6 +530,9 @@ def update_config():
|
||||
# Write new .env with updated IPs (and current ports) for next container start
|
||||
env_file = os.environ.get('COMPOSE_ENV_FILE', '/app/.env.compose')
|
||||
ip_utils.write_env_file(new_range, env_file, _collect_service_ports(config_manager.configs))
|
||||
# Regenerate Caddyfile with new VIPs
|
||||
ip_utils.write_caddyfile(new_range, cur_cell_name, cur_domain,
|
||||
'/app/config/caddy/Caddyfile')
|
||||
# Mark ALL containers as needing restart; network_recreate signals that
|
||||
# docker compose down is required before up (Docker can't change subnet in-place)
|
||||
_set_pending_restart(
|
||||
@@ -692,40 +711,64 @@ def apply_pending_config():
|
||||
# existing network's subnet in-place, so we need `down` + `up` in that case.
|
||||
needs_network_recreate = pending.get('network_recreate', False)
|
||||
|
||||
host_env = os.path.join(project_dir, '.env')
|
||||
host_compose = os.path.join(project_dir, 'docker-compose.yml')
|
||||
|
||||
if '*' in containers:
|
||||
# All-services restart: `docker compose down` or `up -d` may stop/recreate the
|
||||
# API container itself, killing this background thread mid-operation.
|
||||
# Spawn an independent helper container using pic_api:latest that has docker CLI
|
||||
# and survives cell-api being stopped/recreated.
|
||||
if needs_network_recreate:
|
||||
# down removes containers AND the bridge network; up recreates everything
|
||||
compose_down_args = ['down']
|
||||
compose_up_args = ['up', '-d']
|
||||
helper_script = (
|
||||
f'sleep 2'
|
||||
f' && docker compose --project-directory {project_dir}'
|
||||
f' -f {host_compose} --env-file {host_env} down'
|
||||
f' && docker compose --project-directory {project_dir}'
|
||||
f' -f {host_compose} --env-file {host_env} up -d'
|
||||
)
|
||||
else:
|
||||
compose_down_args = None
|
||||
compose_up_args = ['up', '-d']
|
||||
helper_script = (
|
||||
f'sleep 2'
|
||||
f' && docker compose --project-directory {project_dir}'
|
||||
f' -f {host_compose} --env-file {host_env} up -d'
|
||||
)
|
||||
|
||||
def _do_apply():
|
||||
import subprocess as _subprocess
|
||||
_subprocess.Popen(
|
||||
['docker', 'run', '--rm',
|
||||
'-v', '/var/run/docker.sock:/var/run/docker.sock',
|
||||
'-v', f'{project_dir}:{project_dir}',
|
||||
'--entrypoint', 'sh',
|
||||
'pic_api:latest',
|
||||
'-c', helper_script],
|
||||
close_fds=True,
|
||||
stdout=_subprocess.DEVNULL,
|
||||
stderr=_subprocess.DEVNULL,
|
||||
)
|
||||
logger.info(
|
||||
'spawned helper container for all-services restart'
|
||||
+ (' (network_recreate)' if needs_network_recreate else '')
|
||||
)
|
||||
else:
|
||||
compose_down_args = None
|
||||
compose_up_args = ['up', '-d', '--no-deps'] + containers
|
||||
|
||||
base_cmd = ['docker', 'compose',
|
||||
'--project-directory', project_dir,
|
||||
'-f', '/app/docker-compose.yml',
|
||||
'--env-file', '/app/.env.compose']
|
||||
|
||||
# Run in a background thread; 0.3 s delay lets Flask send this response first.
|
||||
def _do_apply():
|
||||
import time as _time
|
||||
import subprocess as _subprocess
|
||||
_time.sleep(0.3)
|
||||
if compose_down_args:
|
||||
r = _subprocess.run(base_cmd + compose_down_args,
|
||||
capture_output=True, text=True, timeout=60)
|
||||
if r.returncode != 0:
|
||||
logger.error(f"docker compose down failed: {r.stderr.strip()}")
|
||||
return
|
||||
result = _subprocess.run(base_cmd + compose_up_args,
|
||||
capture_output=True, text=True, timeout=120)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"docker compose up failed: {result.stderr.strip()}")
|
||||
else:
|
||||
logger.info(f'docker compose {" ".join(compose_up_args)} completed successfully')
|
||||
# Specific containers only — API is not affected, run directly from here.
|
||||
def _do_apply():
|
||||
import time as _time
|
||||
import subprocess as _subprocess
|
||||
_time.sleep(0.3)
|
||||
result = _subprocess.run(
|
||||
['docker', 'compose',
|
||||
'--project-directory', project_dir,
|
||||
'-f', '/app/docker-compose.yml',
|
||||
'--env-file', '/app/.env.compose',
|
||||
'up', '-d', '--no-deps'] + containers,
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"docker compose up failed: {result.stderr.strip()}")
|
||||
else:
|
||||
logger.info(f'docker compose up completed for: {containers}')
|
||||
|
||||
threading.Thread(target=_do_apply, daemon=False).start()
|
||||
|
||||
@@ -1348,7 +1391,7 @@ def apply_wireguard_enforcement():
|
||||
try:
|
||||
peers = peer_registry.list_peers()
|
||||
firewall_manager.apply_all_peer_rules(peers)
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH)
|
||||
firewall_manager.apply_all_dns_rules(peers, COREFILE_PATH, _configured_domain())
|
||||
return jsonify({'ok': True, 'peers': len(peers)})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
@@ -1489,7 +1532,7 @@ def add_peer():
|
||||
if success:
|
||||
# Apply server-side enforcement immediately
|
||||
firewall_manager.apply_peer_rules(peer_info['ip'], peer_info)
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH)
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
|
||||
return jsonify({"message": f"Peer {data['name']} added successfully", "ip": assigned_ip}), 201
|
||||
else:
|
||||
return jsonify({"error": f"Peer {data['name']} already exists"}), 400
|
||||
@@ -1525,7 +1568,7 @@ def update_peer(peer_name):
|
||||
updated_peer = peer_registry.get_peer(peer_name)
|
||||
if updated_peer:
|
||||
firewall_manager.apply_peer_rules(updated_peer['ip'], updated_peer)
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH)
|
||||
firewall_manager.apply_all_dns_rules(peer_registry.list_peers(), COREFILE_PATH, _configured_domain())
|
||||
result = {"message": f"Peer {peer_name} updated", "config_changed": config_changed}
|
||||
return jsonify(result)
|
||||
else:
|
||||
|
||||
+21
-21
@@ -212,30 +212,27 @@ def apply_all_peer_rules(peers: List[Dict[str, Any]]) -> None:
|
||||
# DNS ACL (CoreDNS Corefile generation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Map service name → DNS hostname in .cell zone
|
||||
SERVICE_HOSTS = {
|
||||
'calendar': 'calendar.cell.',
|
||||
'files': 'files.cell.',
|
||||
'mail': 'mail.cell.',
|
||||
'webdav': 'webdav.cell.',
|
||||
}
|
||||
# Service subdomains that get per-peer ACL rules in the CoreDNS zone block
|
||||
_ACL_SERVICES = ('calendar', 'files', 'mail', 'webdav')
|
||||
|
||||
|
||||
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str:
|
||||
def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]],
|
||||
domain: str = 'cell') -> str:
|
||||
"""
|
||||
Build CoreDNS ACL plugin stanzas.
|
||||
|
||||
blocked_peers_by_service: { 'calendar': ['10.0.0.2', '10.0.0.3'], ... }
|
||||
Returns a string to embed in the `cell { }` zone block.
|
||||
Returns a string to embed in the primary zone block.
|
||||
"""
|
||||
if not blocked_peers_by_service:
|
||||
return ''
|
||||
|
||||
lines = []
|
||||
for service, peer_ips in blocked_peers_by_service.items():
|
||||
host = SERVICE_HOSTS.get(service)
|
||||
if not host or not peer_ips:
|
||||
for service in _ACL_SERVICES:
|
||||
peer_ips = blocked_peers_by_service.get(service, [])
|
||||
if not peer_ips:
|
||||
continue
|
||||
host = f'{service}.{domain}.'
|
||||
for ip in peer_ips:
|
||||
lines.append(f' acl {host} {{')
|
||||
lines.append(f' block net {ip}/32')
|
||||
@@ -245,10 +242,12 @@ def _build_acl_block(blocked_peers_by_service: Dict[str, List[str]]) -> str:
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool:
|
||||
def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
|
||||
domain: str = 'cell') -> bool:
|
||||
"""
|
||||
Rewrite the CoreDNS Corefile with per-peer ACL rules and reload plugin.
|
||||
The file is written to corefile_path (API-side path mapped into CoreDNS container).
|
||||
domain: the configured cell domain (e.g. 'cell', 'dev') — must match zone file names.
|
||||
"""
|
||||
try:
|
||||
# Collect which peers block which services
|
||||
@@ -262,12 +261,12 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
|
||||
if service not in allowed_services:
|
||||
blocked[service].append(ip)
|
||||
|
||||
acl_block = _build_acl_block(blocked)
|
||||
acl_block = _build_acl_block(blocked, domain)
|
||||
|
||||
cell_zone_block = 'cell {\n file /data/cell.zone\n log\n'
|
||||
primary_zone_block = f'{domain} {{\n file /data/{domain}.zone\n log\n'
|
||||
if acl_block:
|
||||
cell_zone_block += acl_block + '\n'
|
||||
cell_zone_block += '}\n'
|
||||
primary_zone_block += acl_block + '\n'
|
||||
primary_zone_block += '}\n'
|
||||
|
||||
corefile = f""". {{
|
||||
forward . 8.8.8.8 1.1.1.1
|
||||
@@ -276,8 +275,8 @@ def generate_corefile(peers: List[Dict[str, Any]], corefile_path: str = COREFILE
|
||||
health
|
||||
}}
|
||||
|
||||
{cell_zone_block}
|
||||
local.cell {{
|
||||
{primary_zone_block}
|
||||
local.{domain} {{
|
||||
file /data/local.zone
|
||||
log
|
||||
}}
|
||||
@@ -307,9 +306,10 @@ def reload_coredns() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH) -> bool:
|
||||
def apply_all_dns_rules(peers: List[Dict[str, Any]], corefile_path: str = COREFILE_PATH,
|
||||
domain: str = 'cell') -> bool:
|
||||
"""Regenerate Corefile and reload CoreDNS."""
|
||||
ok = generate_corefile(peers, corefile_path)
|
||||
ok = generate_corefile(peers, corefile_path, domain)
|
||||
if ok:
|
||||
reload_coredns()
|
||||
return ok
|
||||
|
||||
@@ -129,6 +129,84 @@ def get_virtual_ips(ip_range: str) -> Dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def write_caddyfile(ip_range: str, cell_name: str, domain: str, path: str) -> bool:
|
||||
"""
|
||||
Generate the Caddy reverse-proxy config from the current ip_range, cell_name, and domain.
|
||||
|
||||
Must be called after any ip_range or domain change so Caddy routes correctly.
|
||||
Container-internal ports are fixed by docker-compose and never change.
|
||||
Returns True on success.
|
||||
"""
|
||||
try:
|
||||
ips = get_service_ips(ip_range)
|
||||
caddy_ip = ips['caddy']
|
||||
vip_calendar = ips['vip_calendar']
|
||||
vip_files = ips['vip_files']
|
||||
vip_mail = ips['vip_mail']
|
||||
vip_webdav = ips['vip_webdav']
|
||||
content = f"""\
|
||||
{{
|
||||
auto_https off
|
||||
}}
|
||||
|
||||
# Main cell domain — no service-IP restriction needed
|
||||
http://{cell_name}.{domain}, http://{caddy_ip}:80 {{
|
||||
handle /api/* {{
|
||||
reverse_proxy cell-api:3000
|
||||
}}
|
||||
handle /calendar* {{
|
||||
reverse_proxy cell-radicale:5232
|
||||
}}
|
||||
handle /files* {{
|
||||
reverse_proxy cell-filegator:8080
|
||||
}}
|
||||
handle /webmail* {{
|
||||
reverse_proxy cell-rainloop:8888
|
||||
}}
|
||||
handle {{
|
||||
reverse_proxy cell-webui:80
|
||||
}}
|
||||
}}
|
||||
|
||||
# Per-service virtual IPs — each gets its own IP so iptables can target them
|
||||
http://calendar.{domain}, http://{vip_calendar}:80 {{
|
||||
reverse_proxy cell-radicale:5232
|
||||
}}
|
||||
|
||||
http://files.{domain}, http://{vip_files}:80 {{
|
||||
reverse_proxy cell-filegator:8080
|
||||
}}
|
||||
|
||||
http://mail.{domain}, http://webmail.{domain}, http://{vip_mail}:80 {{
|
||||
reverse_proxy cell-rainloop:8888
|
||||
}}
|
||||
|
||||
http://webdav.{domain}, http://{vip_webdav}:80 {{
|
||||
reverse_proxy cell-webdav:80
|
||||
}}
|
||||
|
||||
http://api.{domain} {{
|
||||
reverse_proxy cell-api:3000
|
||||
}}
|
||||
|
||||
# Catch-all for direct IP / localhost
|
||||
:80 {{
|
||||
handle /api/* {{
|
||||
reverse_proxy cell-api:3000
|
||||
}}
|
||||
handle {{
|
||||
reverse_proxy cell-webui:80
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
||||
with open(path, 'w') as f:
|
||||
f.write(content)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def write_env_file(ip_range: str, path: str, ports: Optional[Dict[str, int]] = None) -> bool:
|
||||
"""
|
||||
Write (or overwrite) the docker-compose .env file with IPs and ports.
|
||||
|
||||
+9
-22
@@ -401,30 +401,17 @@ class NetworkManager(BaseServiceManager):
|
||||
except Exception as e:
|
||||
warnings.append(f"dnsmasq domain update failed: {e}")
|
||||
|
||||
# 2. Update Corefile: replace old primary zone block with new domain
|
||||
# 2. Regenerate Corefile using generate_corefile so it always stays consistent
|
||||
try:
|
||||
import firewall_manager as _fm
|
||||
corefile = os.path.join(self.config_dir, 'dns', 'Corefile')
|
||||
if os.path.exists(corefile):
|
||||
with open(corefile) as f:
|
||||
content = f.read()
|
||||
import re
|
||||
# Replace first named zone block (not the catch-all .) with new domain
|
||||
# Matches: <word> { ... } blocks (zone names like "cell", "oldname")
|
||||
def replace_zone(m):
|
||||
zone = m.group(1)
|
||||
if zone == '.':
|
||||
return m.group(0) # keep catch-all
|
||||
# Replace zone name with new domain; update file path reference
|
||||
body = m.group(2)
|
||||
body = re.sub(r'file\s+/data/\S+\.zone',
|
||||
f'file /data/{domain}.zone', body)
|
||||
return f'{domain} {{{body}}}'
|
||||
new_content = re.sub(
|
||||
r'(\S+)\s*\{([^}]*)\}',
|
||||
replace_zone, content, flags=re.DOTALL
|
||||
)
|
||||
with open(corefile, 'w') as f:
|
||||
f.write(new_content)
|
||||
peers_file = os.path.join(self.data_dir, 'peers.json')
|
||||
try:
|
||||
import json as _json
|
||||
peers = _json.loads(open(peers_file).read()) if os.path.exists(peers_file) else []
|
||||
except Exception:
|
||||
peers = []
|
||||
_fm.generate_corefile(peers, corefile, domain)
|
||||
except Exception as e:
|
||||
warnings.append(f"Corefile domain update failed: {e}")
|
||||
|
||||
|
||||
+3
-3
@@ -5,12 +5,12 @@
|
||||
health
|
||||
}
|
||||
|
||||
cell {
|
||||
file /data/cell.zone
|
||||
dev {
|
||||
file /data/dev.zone
|
||||
log
|
||||
}
|
||||
|
||||
local.cell {
|
||||
local.dev {
|
||||
file /data/local.zone
|
||||
log
|
||||
}
|
||||
|
||||
+12
-1
@@ -44,7 +44,6 @@ REQUIRED_DIRS = [
|
||||
]
|
||||
|
||||
REQUIRED_FILES = [
|
||||
'config/caddy/Caddyfile',
|
||||
'config/dns/Corefile',
|
||||
'config/dhcp/dnsmasq.conf',
|
||||
'config/ntp/chrony.conf',
|
||||
@@ -205,6 +204,17 @@ def write_compose_env(ip_range: str):
|
||||
print(f'[WARN] Could not write .env — containers will use built-in default IPs/ports')
|
||||
|
||||
|
||||
def write_caddy_config(ip_range: str, cell_name: str, domain: str):
|
||||
"""Generate Caddyfile with correct VIPs and hostnames for this cell."""
|
||||
sys.path.insert(0, os.path.join(ROOT, 'api'))
|
||||
import ip_utils
|
||||
caddyfile = os.path.join(ROOT, 'config', 'caddy', 'Caddyfile')
|
||||
if ip_utils.write_caddyfile(ip_range, cell_name, domain, caddyfile):
|
||||
print(f'[CREATED] config/caddy/Caddyfile (subnet={ip_range} domain={domain})')
|
||||
else:
|
||||
print(f'[WARN] Could not write Caddyfile')
|
||||
|
||||
|
||||
def _read_existing_ip_range() -> str:
|
||||
"""Read ip_range from existing cell_config.json if present, else return None."""
|
||||
cfg_path = os.path.join(ROOT, 'config', 'api', 'cell_config.json')
|
||||
@@ -237,6 +247,7 @@ def main():
|
||||
write_wg0_conf(priv, vpn_address, wg_port)
|
||||
write_cell_config(cell_name, domain, wg_port)
|
||||
write_compose_env(ip_range)
|
||||
write_caddy_config(ip_range, cell_name, domain)
|
||||
|
||||
print()
|
||||
print('--- Setup complete! Run: make start ---')
|
||||
|
||||
Reference in New Issue
Block a user