refactor: Network Services rebuilt, DHCP decommissioned, infra cleanup

Network Services page is rebuilt around real API data: GET /api/dns/overview
returns provider-aware records; per-service Cloudflare sync is exposed via
POST /api/ddns/sync; effective domain is displayed so operators can verify
what external name resolves to the cell; NTP status reflects the actual
systemd-timesyncd state rather than a hardcoded boolean.

DHCP is fully decommissioned: the cell-dhcp container is removed from
docker-compose.yml, DHCP methods are stripped from network_manager, the
setup_cell script no longer seeds DHCP config, and the Settings DHCP field
is gone. DHCP was never a PIC responsibility and the container was consuming
resources for no benefit.

Dead code removed: api/config.py (superseded by config_manager), the
standalone Email/Calendar/Files pages (these are now optional store services
and do not need dedicated pages). api/constants.py is introduced to hold
RESERVED_SUBDOMAINS in one place rather than scattered literals.

Docker resource limits (mem_limit, cpus, pids_limit) are added to all
compose services so a runaway process cannot starve the host.

Makefile gains a warning before the backup target so operators are not
surprised by the archive path. Settings same/accept state fix ensures
the Cell Identity section correctly shows the accept/discard banner and
does not flash a false-positive change indicator on first load.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 08:50:00 -04:00
parent 6232ef23a9
commit c41cadafb4
16 changed files with 575 additions and 1059 deletions
+150 -203
View File
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Network Manager for Personal Internet Cell
Handles DNS, DHCP, and NTP functionality
Handles DNS and NTP functionality
"""
import os
@@ -11,23 +11,24 @@ import subprocess
import logging
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Any
import requests
from base_service_manager import BaseServiceManager
logger = logging.getLogger(__name__)
class NetworkManager(BaseServiceManager):
"""Manages network services (DNS, DHCP, NTP)"""
"""Manages network services (DNS, NTP)"""
def __init__(self, data_dir: str = '/app/data', config_dir: str = '/app/config',
service_registry=None):
super().__init__('network', data_dir, config_dir)
self.dns_zones_dir = os.path.join(data_dir, 'dns')
self.dhcp_leases_file = os.path.join(data_dir, 'dhcp', 'leases')
self._service_registry = service_registry
# Ensure directories exist
self.safe_makedirs(self.dns_zones_dir)
self.safe_makedirs(os.path.dirname(self.dhcp_leases_file))
def update_dns_zone(self, zone_name: str, records: List[Dict]) -> bool:
"""Update DNS zone file with new records"""
@@ -309,13 +310,137 @@ class NetworkManager(BaseServiceManager):
logger.error(f"Failed to list DNS records: {e}")
return all_records
def _service_subdomain_routes(self) -> List[Dict[str, str]]:
"""Return validated service subdomain → backend pairs from the registry."""
registry = getattr(self, '_service_registry', None)
if registry is None:
return []
try:
routes: List[Dict[str, str]] = []
for route in registry.get_caddy_routes():
pairs = [(route['subdomain'], route.get('backend', ''))]
extra_backends = route.get('extra_backends') or {}
for sub in route.get('extra_subdomains') or []:
pairs.append((sub, extra_backends.get(sub, route.get('backend', ''))))
for sub, backend in pairs:
if self._SUBDOMAIN_RE.match(sub):
routes.append({'subdomain': sub, 'backend': backend})
else:
logger.warning('_service_subdomain_routes: skipping invalid subdomain %r', sub)
return routes
except Exception as exc:
logger.warning('_service_subdomain_routes: registry error: %s', exc)
return []
def get_dns_overview(self, config_manager, ddns_manager=None,
public_ip: Optional[str] = None) -> Dict[str, Any]:
"""Compose a provider-aware DNS overview from the existing managers.
Does NOT write DNS — it only reads from config_manager (identity/effective
domain), the service registry (subdomains), the internal zone files, and the
DDNS manager (registration status). public_ip may be supplied by the caller
(cached); otherwise it is fetched on demand.
"""
identity = config_manager.get_identity() or {}
mode = identity.get('domain_mode', 'lan')
effective_domain = config_manager.get_effective_domain()
internal_domain = config_manager.get_internal_domain()
ddns_cfg = config_manager.configs.get('ddns', {}) or {}
provider = ddns_cfg.get('provider', '') or ''
if public_ip is None and mode != 'lan':
public_ip = self._fetch_public_ip()
service_subdomains = []
for route in self._service_subdomain_routes():
sub = route['subdomain']
service_subdomains.append({
'subdomain': sub,
'fqdn': f'{sub}.{effective_domain}',
'backend': route['backend'],
})
registration_status: Dict[str, Any] = {}
registered = False
if ddns_manager is not None:
try:
registration_status = ddns_manager.get_status() or {}
except Exception as exc:
logger.warning('get_dns_overview: ddns_manager.get_status failed: %s', exc)
try:
registered = bool(config_manager.get_ddns_token())
except Exception:
registered = False
registration_status.setdefault('registered', registered)
public_records = self._build_public_records(
mode, effective_domain, public_ip, service_subdomains, registered)
return {
'mode': mode,
'provider': provider,
'effective_domain': effective_domain,
'internal_domain': internal_domain,
'public_ip': public_ip,
'public_records': public_records,
'internal_records': self.get_dns_records(),
'service_subdomains': service_subdomains,
'registration_status': registration_status,
}
def _build_public_records(self, mode: str, effective_domain: str,
public_ip: Optional[str],
service_subdomains: List[Dict[str, str]],
registered: bool) -> List[Dict[str, str]]:
"""Derive the public A records the cell publishes (or should publish) per mode."""
ip = public_ip or ''
status = 'registered' if registered else 'unregistered'
records: List[Dict[str, str]] = []
if mode == 'lan':
return records
if mode == 'pic_ngo':
records.append({'name': effective_domain, 'type': 'A',
'value': ip, 'status': status})
records.append({'name': f'*.{effective_domain}', 'type': 'A',
'value': ip, 'status': status})
return records
if mode in ('cloudflare', 'custom'):
records.append({'name': effective_domain, 'type': 'A',
'value': ip, 'status': status})
for svc in service_subdomains:
records.append({'name': svc['fqdn'], 'type': 'A',
'value': ip, 'status': status})
return records
if mode == 'duckdns':
records.append({'name': effective_domain, 'type': 'A',
'value': ip, 'status': status})
records.append({'name': f'*.{effective_domain}', 'type': 'A',
'value': ip, 'status': status})
return records
return records
def _fetch_public_ip(self) -> Optional[str]:
"""Return the current public IPv4 address using ipify, or None on failure."""
try:
resp = requests.get('https://api.ipify.org', timeout=5)
if resp.ok:
return resp.text.strip()
except Exception as exc:
logger.warning('get_dns_overview: could not determine public IP: %s', exc)
return None
def _load_dns_records(self, zone: str) -> List[Dict]:
"""Load DNS records from zone file"""
zone_file = os.path.join(self.dns_zones_dir, f'{zone}.zone')
if not os.path.exists(zone_file):
return []
records = []
try:
with open(zone_file, 'r') as f:
@@ -344,80 +469,6 @@ class NetworkManager(BaseServiceManager):
return records
def get_dhcp_leases(self) -> List[Dict]:
"""Get current DHCP leases"""
leases = []
try:
if os.path.exists(self.dhcp_leases_file):
with open(self.dhcp_leases_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
parts = line.split()
if len(parts) >= 4:
leases.append({
'mac': parts[1],
'ip': parts[2],
'hostname': parts[3] if len(parts) > 3 else '',
'timestamp': parts[0]
})
except Exception as e:
logger.error(f"Failed to load DHCP leases: {e}")
return leases
def add_dhcp_reservation(self, mac: str, ip: str, hostname: str = '') -> bool:
"""Add a DHCP reservation"""
try:
reservation_file = os.path.join(self.config_dir, 'dhcp', 'reservations.conf')
# Ensure directory exists
self.safe_makedirs(os.path.dirname(reservation_file))
# Add reservation
with open(reservation_file, 'a') as f:
f.write(f"dhcp-host={mac},{ip},{hostname}\n")
# Reload DHCP service
self._reload_dhcp_service()
logger.info(f"Added DHCP reservation: {mac} -> {ip}")
return True
except Exception as e:
logger.error(f"Failed to add DHCP reservation: {e}")
return False
def remove_dhcp_reservation(self, mac: str) -> bool:
"""Remove a DHCP reservation"""
try:
reservation_file = os.path.join(self.config_dir, 'dhcp', 'reservations.conf')
if not os.path.exists(reservation_file):
return True
# Read existing reservations
with open(reservation_file, 'r') as f:
lines = f.readlines()
# Remove matching reservation
lines = [line for line in lines if not line.startswith(f"dhcp-host={mac},")]
# Write back
with open(reservation_file, 'w') as f:
f.writelines(lines)
# Reload DHCP service
self._reload_dhcp_service()
logger.info(f"Removed DHCP reservation: {mac}")
return True
except Exception as e:
logger.error(f"Failed to remove DHCP reservation: {e}")
return False
def get_ntp_status(self) -> Dict:
"""Get NTP service status"""
try:
@@ -460,36 +511,10 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
logger.error(f"Failed to reload DNS service: {e}")
def _reload_dhcp_service(self):
"""Reload DHCP service"""
try:
subprocess.run(['docker', 'exec', 'cell-dhcp', 'kill', '-HUP', '1'],
capture_output=True, timeout=10)
except Exception as e:
logger.error(f"Failed to reload DHCP service: {e}")
def apply_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""Write config to real service files and reload/restart affected containers."""
restarted = []
warnings = []
dnsmasq_changed = False
# DHCP range
if 'dhcp_range' in config:
try:
dhcp_conf = os.path.join(self.config_dir, 'dhcp', 'dnsmasq.conf')
if os.path.exists(dhcp_conf):
with open(dhcp_conf) as f:
lines = f.readlines()
lines = [
f"dhcp-range={config['dhcp_range']}\n" if l.startswith('dhcp-range=') else l
for l in lines
]
with open(dhcp_conf, 'w') as f:
f.writelines(lines)
dnsmasq_changed = True
except Exception as e:
warnings.append(f"dhcp_range write failed: {e}")
# NTP servers
if 'ntp_servers' in config and config['ntp_servers']:
@@ -509,39 +534,17 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
warnings.append(f"ntp_servers write failed: {e}")
if dnsmasq_changed:
self._reload_dhcp_service()
restarted.append('cell-dhcp (reloaded)')
return {'restarted': restarted, 'warnings': warnings}
def apply_domain(self, domain: str, reload: bool = True) -> Dict[str, Any]:
"""Update domain across dnsmasq, Corefile, and zone file; reload DNS + DHCP.
"""Update domain across the Corefile and zone file; reload DNS.
reload=False writes config files only — use when deferring container restart.
"""
restarted = []
warnings = []
# 1. Update dnsmasq.conf domain= line
try:
dhcp_conf = os.path.join(self.config_dir, 'dhcp', 'dnsmasq.conf')
if os.path.exists(dhcp_conf):
with open(dhcp_conf) as f:
lines = f.readlines()
lines = [
f"domain={domain}\n" if l.startswith('domain=') else l
for l in lines
]
with open(dhcp_conf, 'w') as f:
f.writelines(lines)
if reload:
self._reload_dhcp_service()
restarted.append('cell-dhcp (reloaded)')
except Exception as e:
warnings.append(f"dnsmasq domain update failed: {e}")
# 2. Regenerate Corefile — include cell-to-cell forwarding stanzas so a
# 1. Regenerate Corefile — include cell-to-cell forwarding stanzas so a
# domain/ip_range change doesn't wipe cross-cell DNS forwarding zones.
try:
import firewall_manager as _fm
@@ -562,7 +565,7 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
warnings.append(f"Corefile domain update failed: {e}")
# 3. Update zone file: rename and rewrite $ORIGIN / SOA, remove stale zones
# 2. Update zone file: rename and rewrite $ORIGIN / SOA, remove stale zones
try:
dns_data = os.path.join(self.data_dir, 'dns')
if os.path.isdir(dns_data):
@@ -599,7 +602,7 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
warnings.append(f"zone file domain update failed: {e}")
# 4. Reload CoreDNS (only when not deferring to Apply)
# 3. Reload CoreDNS (only when not deferring to Apply)
if reload:
try:
self._reload_dns_service()
@@ -758,29 +761,6 @@ class NetworkManager(BaseServiceManager):
except Exception as e:
return {'success': False, 'output': '', 'error': str(e)}
def test_dhcp_functionality(self) -> Dict:
"""Test DHCP functionality"""
try:
# Check if DHCP service is running
result = subprocess.run(['docker', 'ps', '--filter', 'name=cell-dhcp', '--format', '{{.Names}}'],
capture_output=True, text=True)
is_running = len(result.stdout.strip()) > 0
# Get DHCP leases
leases = self.get_dhcp_leases()
return {
'success': is_running,
'running': is_running,
'leases_count': len(leases),
'leases': leases
}
except Exception as e:
logger.error(f"Failed to test DHCP functionality: {e}")
return {'success': False, 'running': False, 'leases_count': 0, 'leases': []}
def test_ntp_functionality(self) -> Dict:
"""Test NTP functionality"""
try:
@@ -879,19 +859,16 @@ class NetworkManager(BaseServiceManager):
if is_docker:
# Check if network containers are actually running
dns_running = self._check_dns_container_status()
dhcp_running = self._check_dhcp_container_status()
ntp_running = self._check_ntp_container_status()
all_running = dns_running and dhcp_running and ntp_running
all_running = dns_running and ntp_running
status = {
'dns_running': dns_running,
'dhcp_running': dhcp_running,
'ntp_running': ntp_running,
'running': all_running,
'status': 'online' if all_running else 'offline',
'network': {
'dns_running': dns_running,
'dhcp_running': dhcp_running,
'ntp_running': ntp_running,
'running': all_running,
'status': 'online' if all_running else 'offline'
@@ -901,25 +878,22 @@ class NetworkManager(BaseServiceManager):
else:
# Check actual service status in production
dns_running = self._check_dns_status()
dhcp_running = self._check_dhcp_status()
ntp_running = self._check_ntp_status()
status = {
'dns_running': dns_running,
'dhcp_running': dhcp_running,
'ntp_running': ntp_running,
'running': dns_running and dhcp_running and ntp_running,
'status': 'online' if (dns_running and dhcp_running and ntp_running) else 'offline',
'running': dns_running and ntp_running,
'status': 'online' if (dns_running and ntp_running) else 'offline',
'network': {
'dns_running': dns_running,
'dhcp_running': dhcp_running,
'ntp_running': ntp_running,
'running': dns_running and dhcp_running and ntp_running,
'status': 'online' if (dns_running and dhcp_running and ntp_running) else 'offline'
'running': dns_running and ntp_running,
'status': 'online' if (dns_running and ntp_running) else 'offline'
},
'timestamp': datetime.utcnow().isoformat()
}
return status
except Exception as e:
return self.handle_error(e, "get_status")
@@ -934,16 +908,6 @@ class NetworkManager(BaseServiceManager):
except Exception:
return False
def _check_dhcp_container_status(self) -> bool:
"""Check if DHCP Docker container is running"""
try:
import docker
client = docker.from_env()
containers = client.containers.list(filters={'name': 'cell-dhcp'})
return len(containers) > 0
except Exception:
return False
def _check_ntp_container_status(self) -> bool:
"""Check if NTP Docker container is running"""
try:
@@ -958,31 +922,28 @@ class NetworkManager(BaseServiceManager):
"""Test network service connectivity"""
try:
dns_test = self.test_dns_resolution('google.com')
dhcp_test = self.test_dhcp_functionality()
ntp_test = self.test_ntp_functionality()
results = {
'dns_test': dns_test,
'dhcp_test': dhcp_test,
'ntp_test': ntp_test,
'timestamp': datetime.utcnow().isoformat()
}
# Determine overall success
success = all(
result.get('success', False)
for result in [dns_test, dhcp_test, ntp_test]
result.get('success', False)
for result in [dns_test, ntp_test]
)
results['success'] = success
# Add network key for compatibility
results['network'] = {
'dns_test': dns_test,
'dhcp_test': dhcp_test,
'ntp_test': ntp_test,
'success': success
}
return results
except Exception as e:
return self.handle_error(e, "test_connectivity")
@@ -1001,20 +962,6 @@ class NetworkManager(BaseServiceManager):
except Exception:
return False
def _check_dhcp_status(self) -> bool:
"""Check if DHCP service is running"""
try:
result = subprocess.run(['systemctl', 'is-active', 'dnsmasq'],
capture_output=True, text=True, timeout=5)
return result.returncode == 0 and result.stdout.strip() == 'active'
except Exception:
# Fallback: check if port 67 is listening
try:
result = subprocess.run(['netstat', '-tuln'], capture_output=True, text=True)
return ':67 ' in result.stdout
except Exception:
return False
def _check_ntp_status(self) -> bool:
"""Check if NTP service is running"""
try: