feat: replace hardcoded service names with ServiceRegistry-driven Caddy and CoreDNS config
Unit Tests / test (push) Failing after 11s
Unit Tests / test (push) Failing after 11s
Previously, CaddyManager and NetworkManager contained hardcoded lists of service names (calendar, files, mail, webdav, etc.), meaning every new service required a code change to appear in Caddy routes and DNS records. Now both managers accept a service_registry parameter and derive their service lists dynamically from the registry at runtime. - CaddyManager: new _build_registry_service_routes() and _http01_service_pairs() methods pull routes from the registry - NetworkManager: new _get_service_subdomains() method returns registry subdomains with a hardcoded fallback when no registry is wired in; _build_dns_records, stale-record detection, and service name sets all use the registry - managers.py: service_registry constructed before network_manager so it can be injected into both CaddyManager and NetworkManager - service_registry.py: validation chokepoint in get_caddy_routes() rejects invalid subdomain/backend values and reserved service names - service_store_manager.py: _validate_manifest now validates top-level subdomain, backend, extra_subdomains, and extra_backends fields - tests: 24 new tests covering registry-driven routing and DNS subdomain generation (test_caddy_registry_integration.py) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+139
-4
@@ -6,6 +6,8 @@ Centralized configuration management for all services
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import yaml
|
||||
import shutil
|
||||
import hashlib
|
||||
@@ -14,6 +16,9 @@ from typing import Dict, List, Optional, Any
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
_SAFE_CONTAINER_RE = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,63}$')
|
||||
_SAFE_VOL_NAME_RE = re.compile(r'^[a-zA-Z0-9_.-]{1,64}$')
|
||||
|
||||
# The Caddyfile lives on a separate volume mount from the rest of config
|
||||
LIVE_CADDYFILE = os.environ.get('CADDYFILE_PATH', '/app/config-caddy/Caddyfile')
|
||||
|
||||
@@ -228,8 +233,128 @@ class ConfigManager:
|
||||
"warnings": warnings
|
||||
}
|
||||
|
||||
def backup_config(self) -> str:
|
||||
"""Create a backup of cell_config.json, secrets, Caddyfile, .env, Corefile, and DNS zones."""
|
||||
@staticmethod
|
||||
def _validate_vol_entry(service_id: str, vol: dict) -> bool:
|
||||
"""Return True if a backup volume entry is safe to use; log and return False otherwise."""
|
||||
container = vol.get('container', '')
|
||||
path = vol.get('path', '')
|
||||
name = vol.get('name', '')
|
||||
if not _SAFE_CONTAINER_RE.match(container):
|
||||
logger.warning('Backup: unsafe container name %r for %s — skipping', container, service_id)
|
||||
return False
|
||||
if not path.startswith('/') or '..' in path.split('/') or '\x00' in path:
|
||||
logger.warning('Backup: unsafe volume path %r for %s — skipping', path, service_id)
|
||||
return False
|
||||
if not _SAFE_VOL_NAME_RE.match(name):
|
||||
logger.warning('Backup: unsafe volume name %r for %s — skipping', name, service_id)
|
||||
return False
|
||||
return True
|
||||
|
||||
def _backup_service_volumes(self, backup_path: Path, service_registry) -> None:
|
||||
"""Stream service data out of each container via 'docker exec tar'.
|
||||
|
||||
Archives are relative (created with -C <path> .) so they can be safely
|
||||
restored with -C <path> without risk of path traversal outside the volume.
|
||||
Writes to a .partial temp file then renames atomically on success.
|
||||
"""
|
||||
try:
|
||||
plan = service_registry.get_backup_plan()
|
||||
except Exception as e:
|
||||
logger.warning('_backup_service_volumes: could not get backup plan: %s', e)
|
||||
return
|
||||
for entry in plan:
|
||||
service_id = entry['service_id']
|
||||
volumes = entry.get('volumes') or []
|
||||
if not volumes:
|
||||
continue
|
||||
svc_dir = backup_path / 'service_data' / service_id
|
||||
svc_dir.mkdir(parents=True, exist_ok=True)
|
||||
for vol in volumes:
|
||||
if not self._validate_vol_entry(service_id, vol):
|
||||
continue
|
||||
container = vol['container']
|
||||
path = vol['path']
|
||||
name = vol['name']
|
||||
archive_path = svc_dir / f'{name}.tar.gz'
|
||||
tmp_path = svc_dir / f'{name}.tar.gz.partial'
|
||||
try:
|
||||
with open(tmp_path, 'wb') as af:
|
||||
result = subprocess.run(
|
||||
# -C path; then '.' archives the whole dir with relative entries.
|
||||
# '--' prevents path/container from being parsed as options.
|
||||
['docker', 'exec', '--', container,
|
||||
'tar', '-C', path, '-czf', '-', '.'],
|
||||
stdout=af,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=300,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(
|
||||
'Backup: docker exec tar failed for %s/%s: %s',
|
||||
service_id, name, result.stderr.decode(errors='replace'),
|
||||
)
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
else:
|
||||
os.replace(tmp_path, archive_path)
|
||||
logger.info('Backup: archived %s/%s', service_id, name)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning('Backup: timed out streaming %s/%s', service_id, name)
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
logger.warning('Backup: failed to archive %s/%s: %s', service_id, name, e)
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
def _restore_service_volumes(self, backup_path: Path, service_registry) -> None:
|
||||
"""Pipe archived service data back into containers via 'docker exec -i tar'.
|
||||
|
||||
Extracts with -C <path>, matching how archives were created (relative paths).
|
||||
This bounds extraction to within the declared volume directory.
|
||||
"""
|
||||
svc_data_dir = backup_path / 'service_data'
|
||||
if not svc_data_dir.is_dir():
|
||||
return
|
||||
for svc_dir in svc_data_dir.iterdir():
|
||||
if not svc_dir.is_dir():
|
||||
continue
|
||||
service_id = svc_dir.name
|
||||
svc = service_registry.get(service_id)
|
||||
if not svc:
|
||||
logger.warning('Restore: unknown service %s in backup, skipping', service_id)
|
||||
continue
|
||||
volumes = (svc.get('backup') or {}).get('volumes') or []
|
||||
for vol in volumes:
|
||||
if not self._validate_vol_entry(service_id, vol):
|
||||
continue
|
||||
container = vol['container']
|
||||
path = vol['path']
|
||||
name = vol['name']
|
||||
archive_path = svc_dir / f'{name}.tar.gz'
|
||||
if not archive_path.exists():
|
||||
continue
|
||||
try:
|
||||
with open(archive_path, 'rb') as af:
|
||||
result = subprocess.run(
|
||||
['docker', 'exec', '-i', '--', container,
|
||||
'tar', '-C', path, '-xzf', '-'],
|
||||
stdin=af,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=300,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(
|
||||
'Restore: docker exec tar failed for %s/%s: %s',
|
||||
service_id, name, result.stderr.decode(errors='replace'),
|
||||
)
|
||||
else:
|
||||
logger.info('Restore: restored %s/%s', service_id, name)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning('Restore: timed out restoring %s/%s', service_id, name)
|
||||
except Exception as e:
|
||||
logger.warning('Restore: failed to restore %s/%s: %s', service_id, name, e)
|
||||
|
||||
def backup_config(self, service_registry=None) -> str:
|
||||
"""Create a backup of cell_config.json, secrets, Caddyfile, .env, Corefile, DNS zones,
|
||||
and (when service_registry is provided) live service data volumes."""
|
||||
try:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
backup_id = f"backup_{timestamp}"
|
||||
@@ -278,12 +403,17 @@ class ConfigManager:
|
||||
except (PermissionError, OSError) as e:
|
||||
logger.warning(f"Could not back up {src.name}: {e} (skipping)")
|
||||
|
||||
# Live service data volumes (streamed via docker exec)
|
||||
if service_registry is not None:
|
||||
self._backup_service_volumes(backup_path, service_registry)
|
||||
|
||||
services = ['identity'] + list(self.service_schemas.keys())
|
||||
manifest = {
|
||||
"backup_id": backup_id,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"services": services,
|
||||
"files": [f.name for f in backup_path.iterdir()],
|
||||
"includes_service_data": service_registry is not None,
|
||||
}
|
||||
with open(backup_path / 'manifest.json', 'w') as f:
|
||||
json.dump(manifest, f, indent=2)
|
||||
@@ -294,8 +424,9 @@ class ConfigManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating backup: {e}")
|
||||
raise
|
||||
|
||||
def restore_config(self, backup_id: str, services: list = None) -> bool:
|
||||
|
||||
def restore_config(self, backup_id: str, services: list = None,
|
||||
service_registry=None) -> bool:
|
||||
"""Restore from backup. If services list given, only restore those service configs (selective)."""
|
||||
try:
|
||||
backup_path = self.backup_dir / backup_id
|
||||
@@ -373,6 +504,10 @@ class ConfigManager:
|
||||
except (PermissionError, OSError) as e:
|
||||
logger.warning(f"Could not restore {dest.name}: {e} (skipping)")
|
||||
|
||||
# Live service data volumes
|
||||
if service_registry is not None:
|
||||
self._restore_service_volumes(backup_path, service_registry)
|
||||
|
||||
self.configs = self._load_all_configs()
|
||||
logger.info(f"Restored configuration from backup: {backup_id}")
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user