238db60702
Unit Tests / test (push) Successful in 13m28s
- config/cosign/cosign.pub: public verification key committed to repo (safe); cosign private key lives in /home/roof/.pic-secrets/ and is NEVER committed - api/config_manager.py: image_verification config block (modes: off|warn|enforce, default: warn) so existing deployments are unaffected until images are signed - api/service_composer.py: cosign verify before pull/up; enforce aborts the operation, warn logs and proceeds, off skips entirely; also fixes the prior unsafe proceed-on-pull-failure path - api/service_store_manager.py: store-image digest requirement (warn default, reject under enforce) - api/Dockerfile: cosign binary copied from the official cosign image - docker-compose.yml: config/cosign/ bind-mounted into cell-api container - install.sh: ensure/verify bundled cosign pubkey on new cell installs - api/manifest_validator.py: validate_build_context() — Dockerfile lint - tests: full coverage for config modes, composer verify paths, store digest guard, and validate_build_context Verification defaults to warn so nothing breaks in production until images are signed (phase 2). Private key stored outside git at /home/roof/.pic-secrets/. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1343 lines
60 KiB
Python
1343 lines
60 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Configuration Manager for Personal Internet Cell
|
|
Centralized configuration management for all services
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import tarfile
|
|
import io
|
|
import fnmatch
|
|
import yaml
|
|
import shutil
|
|
import hashlib
|
|
import threading
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any
|
|
from pathlib import Path
|
|
import logging
|
|
|
|
import backup_crypto
|
|
|
|
_SAFE_CONTAINER_RE = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_.-]{0,63}$')
|
|
_SAFE_VOL_NAME_RE = re.compile(r'^[a-zA-Z0-9_.-]{1,64}$')
|
|
|
|
# The Caddyfile lives on a separate volume mount from the rest of config
|
|
LIVE_CADDYFILE = os.environ.get('CADDYFILE_PATH', '/app/config-caddy/Caddyfile')
|
|
|
|
# Trash that must never end up inside a backup. Matched against each file's
|
|
# path relative to the data dir (posix-style), and bare filenames.
|
|
_BACKUP_EXCLUDE_GLOBS = (
|
|
'logs/*', 'logs/**',
|
|
'api/config_backups/*', 'api/config_backups/**',
|
|
'*.tmp', '*.partial',
|
|
'__pycache__/*', '**/__pycache__/**',
|
|
)
|
|
# Specific files (by path relative to data dir) to never copy.
|
|
_BACKUP_EXCLUDE_FILES = (
|
|
'api/.test_admin_pass',
|
|
'api/.gitkeep',
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Valid Python logging levels for the `logging` config section.
|
|
_VALID_LOG_LEVELS = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
|
|
|
|
# Image signature verification modes (see get/set_image_verification).
|
|
_IMAGE_VERIFY_MODES = ('off', 'warn', 'enforce')
|
|
|
|
# Per-service Python loggers exposed in the verbosity panel.
|
|
_LOGGING_PYTHON_SERVICES = (
|
|
'network', 'wireguard', 'email', 'calendar',
|
|
'files', 'routing', 'vault', 'api',
|
|
)
|
|
|
|
# Container services whose log level we can influence (hot for caddy/coredns,
|
|
# pending_restart for env-driven containers).
|
|
_LOGGING_CONTAINERS = ('caddy', 'coredns', 'wireguard', 'mailserver', 'api')
|
|
|
|
|
|
def _default_logging_config() -> Dict[str, Any]:
|
|
"""Return the default `logging` section for cell_config."""
|
|
return {
|
|
'python': {
|
|
'root': 'INFO',
|
|
'services': {svc: 'INFO' for svc in _LOGGING_PYTHON_SERVICES},
|
|
},
|
|
'containers': {c: 'INFO' for c in _LOGGING_CONTAINERS},
|
|
}
|
|
|
|
|
|
class ConfigManager:
|
|
"""Centralized configuration management for all services (unified config)"""
|
|
|
|
def __init__(self, config_file: str = '/app/config/cell_config.json', data_dir: str = '/app/data'):
|
|
config_file = Path(config_file)
|
|
if config_file.is_dir():
|
|
config_file = config_file / 'cell_config.json'
|
|
print(f"[DEBUG] ConfigManager.__init__: config_file = {config_file}")
|
|
self.config_file = config_file
|
|
self.data_dir = Path(data_dir)
|
|
self.backup_dir = self.data_dir / 'config_backups'
|
|
self.secrets_file = self.config_file.parent / 'secrets.yaml'
|
|
try:
|
|
self.backup_dir.mkdir(parents=True, exist_ok=True)
|
|
except (PermissionError, OSError):
|
|
pass
|
|
self.service_schemas = self._load_service_schemas()
|
|
self.configs = self._load_all_configs()
|
|
# Guards concurrent reads/writes of the connectivity v2 section.
|
|
self._connectivity_lock = threading.RLock()
|
|
# Optional callback invoked to migrate the legacy connectivity section
|
|
# to v2 on first access. Wired by ConnectivityManager (which owns the
|
|
# resource-allocation logic). Until set, get_connectivity() returns the
|
|
# raw (possibly legacy) section without migrating.
|
|
self._connectivity_migrator = None
|
|
# Ensure _identity key always exists
|
|
if '_identity' not in self.configs:
|
|
self.configs['_identity'] = {}
|
|
# Phase 5: ensure connectivity section exists with empty defaults.
|
|
if 'connectivity' not in self.configs:
|
|
self.configs['connectivity'] = {'exits': {}, 'peer_exit_map': {}}
|
|
self._ensure_logging_config()
|
|
if not self.config_file.exists():
|
|
self._save_all_configs()
|
|
# Silent migration: when DDNS is active but the internal domain is still
|
|
# the generic "cell" default, give CoreDNS a unique zone name so multiple
|
|
# cells on the same LAN don't collide.
|
|
try:
|
|
_ident = self.configs.get('_identity', {})
|
|
_mode = _ident.get('domain_mode', 'lan')
|
|
_domain = _ident.get('domain', '')
|
|
_cell_name = _ident.get('cell_name', '')
|
|
if (_mode != 'lan' and _cell_name
|
|
and (_domain in ('cell', '', None))):
|
|
_new_domain = f'{_cell_name}.local'
|
|
self.configs['_identity']['domain'] = _new_domain
|
|
self._save_all_configs()
|
|
except Exception:
|
|
pass
|
|
|
|
def _load_service_schemas(self) -> Dict[str, Dict]:
|
|
"""Load configuration schemas for all services"""
|
|
return {
|
|
'network': {
|
|
'required': ['dns_port', 'ntp_servers'],
|
|
'optional': ['dns_zones'],
|
|
'types': {
|
|
'dns_port': int,
|
|
'ntp_servers': list
|
|
}
|
|
},
|
|
'wireguard': {
|
|
'required': ['port', 'private_key', 'address'],
|
|
'optional': ['peers', 'allowed_ips'],
|
|
'types': {
|
|
'port': int,
|
|
'private_key': str,
|
|
'address': str
|
|
}
|
|
},
|
|
'email': {
|
|
'required': ['domain', 'smtp_port', 'imap_port'],
|
|
'optional': ['users', 'ssl_cert', 'ssl_key', 'submission_port', 'webmail_port'],
|
|
'types': {
|
|
'smtp_port': int,
|
|
'submission_port': int,
|
|
'imap_port': int,
|
|
'webmail_port': int,
|
|
'domain': str
|
|
}
|
|
},
|
|
'calendar': {
|
|
'required': ['port', 'data_dir'],
|
|
'optional': ['users', 'calendars'],
|
|
'types': {
|
|
'port': int,
|
|
'data_dir': str
|
|
}
|
|
},
|
|
'files': {
|
|
'required': ['port', 'data_dir'],
|
|
'optional': ['users', 'quota', 'manager_port'],
|
|
'types': {
|
|
'port': int,
|
|
'manager_port': int,
|
|
'data_dir': str,
|
|
'quota': int
|
|
}
|
|
},
|
|
'routing': {
|
|
'required': ['nat_enabled', 'firewall_enabled'],
|
|
'optional': ['nat_rules', 'firewall_rules', 'peer_routes'],
|
|
'types': {
|
|
'nat_enabled': bool,
|
|
'firewall_enabled': bool
|
|
}
|
|
},
|
|
'vault': {
|
|
'required': ['ca_configured', 'fernet_configured'],
|
|
'optional': ['certificates', 'trusted_keys'],
|
|
'types': {
|
|
'ca_configured': bool,
|
|
'fernet_configured': bool
|
|
}
|
|
},
|
|
'connectivity': {
|
|
'required': [],
|
|
'optional': ['exits', 'peer_exit_map'],
|
|
'types': {
|
|
'exits': dict,
|
|
'peer_exit_map': dict,
|
|
}
|
|
}
|
|
}
|
|
|
|
def _load_all_configs(self) -> Dict[str, Dict]:
|
|
"""Load all existing service configurations"""
|
|
if self.config_file.exists():
|
|
try:
|
|
with open(self.config_file, 'r') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
logger.error(f"Error loading unified config: {e}")
|
|
return {}
|
|
return {}
|
|
|
|
def _save_all_configs(self):
|
|
"""Save all service configurations to the unified config file (atomic write)."""
|
|
try:
|
|
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp = self.config_file.with_suffix('.tmp')
|
|
with open(tmp, 'w') as f:
|
|
json.dump(self.configs, f, indent=2)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.replace(tmp, self.config_file)
|
|
except (PermissionError, OSError) as e:
|
|
logger.error('_save_all_configs: write failed — config NOT persisted to disk: %s', e)
|
|
|
|
def get_service_config(self, service: str) -> Dict[str, Any]:
|
|
"""Get configuration for a specific service"""
|
|
if service not in self.service_schemas:
|
|
raise ValueError(f"Unknown service: {service}")
|
|
return self.configs.get(service, {})
|
|
|
|
def update_service_config(self, service: str, config: Dict[str, Any]) -> bool:
|
|
"""Update configuration for a specific service"""
|
|
if service not in self.service_schemas:
|
|
raise ValueError(f"Unknown service: {service}")
|
|
try:
|
|
# Validate types only (required fields are checked by validate_config, not here)
|
|
schema = self.service_schemas[service]
|
|
for field, expected_type in schema['types'].items():
|
|
if field in config and not isinstance(config[field], expected_type):
|
|
logger.error(f"Invalid type for {field}: expected {expected_type.__name__}")
|
|
return False
|
|
|
|
# Backup current config
|
|
self._backup_service_config(service)
|
|
|
|
# Update configuration
|
|
self.configs[service] = config
|
|
self._save_all_configs()
|
|
|
|
logger.info(f"Updated configuration for {service}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating config for {service}: {e}")
|
|
return False
|
|
|
|
def validate_config(self, service: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Validate configuration for a service"""
|
|
if service not in self.service_schemas:
|
|
return {
|
|
"valid": False,
|
|
"errors": [f"Unknown service: {service}"],
|
|
"warnings": []
|
|
}
|
|
|
|
schema = self.service_schemas[service]
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Check required fields (missing = error, wrong type = error)
|
|
for field in schema['required']:
|
|
if field not in config:
|
|
errors.append(f"Missing required field: {field}")
|
|
elif field in schema['types']:
|
|
expected_type = schema['types'][field]
|
|
if not isinstance(config[field], expected_type):
|
|
errors.append(f"Field {field} must be of type {expected_type.__name__}")
|
|
|
|
# Check optional fields
|
|
for field in schema['optional']:
|
|
if field in config and field in schema['types']:
|
|
expected_type = schema['types'][field]
|
|
if not isinstance(config[field], expected_type):
|
|
warnings.append(f"Field {field} should be of type {expected_type.__name__}")
|
|
|
|
return {
|
|
"valid": len(errors) == 0,
|
|
"errors": errors,
|
|
"warnings": warnings
|
|
}
|
|
|
|
@staticmethod
|
|
def _validate_vol_entry(service_id: str, vol: dict) -> bool:
|
|
"""Return True if a backup volume entry is safe to use; log and return False otherwise."""
|
|
container = vol.get('container', '')
|
|
path = vol.get('path', '')
|
|
name = vol.get('name', '')
|
|
if not _SAFE_CONTAINER_RE.match(container):
|
|
logger.warning('Backup: unsafe container name %r for %s — skipping', container, service_id)
|
|
return False
|
|
if not path.startswith('/') or '..' in path.split('/') or '\x00' in path:
|
|
logger.warning('Backup: unsafe volume path %r for %s — skipping', path, service_id)
|
|
return False
|
|
if not _SAFE_VOL_NAME_RE.match(name):
|
|
logger.warning('Backup: unsafe volume name %r for %s — skipping', name, service_id)
|
|
return False
|
|
return True
|
|
|
|
@staticmethod
|
|
def _is_excluded(rel_path: str) -> bool:
|
|
"""Return True if a data-relative path should be excluded from backups."""
|
|
rel_path = rel_path.replace(os.sep, '/')
|
|
name = rel_path.rsplit('/', 1)[-1]
|
|
if rel_path in _BACKUP_EXCLUDE_FILES:
|
|
return True
|
|
for pat in _BACKUP_EXCLUDE_GLOBS:
|
|
if fnmatch.fnmatch(rel_path, pat) or fnmatch.fnmatch(name, pat):
|
|
return True
|
|
# '**' segments: also match any path that has the prefix dir
|
|
if pat.endswith('/**') and rel_path.startswith(pat[:-3] + '/'):
|
|
return True
|
|
return False
|
|
|
|
def _copy_data_path(self, rel_src: str, backup_path: Path) -> None:
|
|
"""Copy a file or directory tree from data_dir/<rel_src> into the backup
|
|
under data/<rel_src>, honouring the exclude list. Skips silently if the
|
|
source does not exist or cannot be read."""
|
|
src = self.data_dir / rel_src
|
|
if not src.exists():
|
|
return
|
|
try:
|
|
if src.is_file():
|
|
if self._is_excluded(rel_src):
|
|
return
|
|
dest = backup_path / 'data' / rel_src
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(src, dest)
|
|
return
|
|
for root, dirs, files in os.walk(src):
|
|
root_p = Path(root)
|
|
rel_root = (Path(rel_src) / root_p.relative_to(src)).as_posix()
|
|
dirs[:] = [d for d in dirs
|
|
if not self._is_excluded(f'{rel_root}/{d}'.lstrip('./'))]
|
|
for fname in files:
|
|
rel_file = f'{rel_root}/{fname}'.lstrip('./')
|
|
rel_file = rel_file.replace('//', '/')
|
|
if self._is_excluded(rel_file):
|
|
continue
|
|
dest = backup_path / 'data' / rel_file
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
try:
|
|
shutil.copy2(root_p / fname, dest)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning('Backup: could not copy %s: %s (skipping)', rel_file, e)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning('Backup: could not copy %s: %s (skipping)', rel_src, e)
|
|
|
|
def _backup_service_volumes(self, backup_path: Path, service_registry) -> None:
|
|
"""Stream service data out of each container via 'docker exec tar'.
|
|
|
|
Archives are relative (created with -C <path> .) so they can be safely
|
|
restored with -C <path> without risk of path traversal outside the volume.
|
|
Writes to a .partial temp file then renames atomically on success.
|
|
"""
|
|
try:
|
|
plan = service_registry.get_backup_plan()
|
|
except Exception as e:
|
|
logger.warning('_backup_service_volumes: could not get backup plan: %s', e)
|
|
return
|
|
for entry in plan:
|
|
service_id = entry['service_id']
|
|
volumes = entry.get('volumes') or []
|
|
if not volumes:
|
|
continue
|
|
svc_dir = backup_path / 'service_data' / service_id
|
|
svc_dir.mkdir(parents=True, exist_ok=True)
|
|
for vol in volumes:
|
|
if not self._validate_vol_entry(service_id, vol):
|
|
continue
|
|
container = vol['container']
|
|
path = vol['path']
|
|
name = vol['name']
|
|
archive_path = svc_dir / f'{name}.tar.gz'
|
|
tmp_path = svc_dir / f'{name}.tar.gz.partial'
|
|
try:
|
|
with open(tmp_path, 'wb') as af:
|
|
result = subprocess.run(
|
|
# -C path; then '.' archives the whole dir with relative entries.
|
|
# '--' prevents path/container from being parsed as options.
|
|
['docker', 'exec', '--', container,
|
|
'tar', '-C', path, '-czf', '-', '.'],
|
|
stdout=af,
|
|
stderr=subprocess.PIPE,
|
|
timeout=300,
|
|
)
|
|
if result.returncode != 0:
|
|
logger.warning(
|
|
'Backup: docker exec tar failed for %s/%s: %s',
|
|
service_id, name, result.stderr.decode(errors='replace'),
|
|
)
|
|
tmp_path.unlink(missing_ok=True)
|
|
else:
|
|
os.replace(tmp_path, archive_path)
|
|
logger.info('Backup: archived %s/%s', service_id, name)
|
|
except subprocess.TimeoutExpired:
|
|
logger.warning('Backup: timed out streaming %s/%s', service_id, name)
|
|
tmp_path.unlink(missing_ok=True)
|
|
except Exception as e:
|
|
logger.warning('Backup: failed to archive %s/%s: %s', service_id, name, e)
|
|
tmp_path.unlink(missing_ok=True)
|
|
|
|
def _restore_service_volumes(self, backup_path: Path, service_registry) -> None:
|
|
"""Pipe archived service data back into containers via 'docker exec -i tar'.
|
|
|
|
Extracts with -C <path>, matching how archives were created (relative paths).
|
|
This bounds extraction to within the declared volume directory.
|
|
"""
|
|
svc_data_dir = backup_path / 'service_data'
|
|
if not svc_data_dir.is_dir():
|
|
return
|
|
for svc_dir in svc_data_dir.iterdir():
|
|
if not svc_dir.is_dir():
|
|
continue
|
|
service_id = svc_dir.name
|
|
svc = service_registry.get(service_id)
|
|
if not svc:
|
|
logger.warning('Restore: unknown service %s in backup, skipping', service_id)
|
|
continue
|
|
volumes = (svc.get('backup') or {}).get('volumes') or []
|
|
for vol in volumes:
|
|
if not self._validate_vol_entry(service_id, vol):
|
|
continue
|
|
container = vol['container']
|
|
path = vol['path']
|
|
name = vol['name']
|
|
archive_path = svc_dir / f'{name}.tar.gz'
|
|
if not archive_path.exists():
|
|
continue
|
|
try:
|
|
with open(archive_path, 'rb') as af:
|
|
result = subprocess.run(
|
|
['docker', 'exec', '-i', '--', container,
|
|
'tar', '-C', path, '-xzf', '-'],
|
|
stdin=af,
|
|
stderr=subprocess.PIPE,
|
|
timeout=300,
|
|
)
|
|
if result.returncode != 0:
|
|
logger.warning(
|
|
'Restore: docker exec tar failed for %s/%s: %s',
|
|
service_id, name, result.stderr.decode(errors='replace'),
|
|
)
|
|
else:
|
|
logger.info('Restore: restored %s/%s', service_id, name)
|
|
except subprocess.TimeoutExpired:
|
|
logger.warning('Restore: timed out restoring %s/%s', service_id, name)
|
|
except Exception as e:
|
|
logger.warning('Restore: failed to restore %s/%s: %s', service_id, name, e)
|
|
|
|
def backup_config(self, service_registry=None, passphrase: Optional[str] = None) -> str:
|
|
"""Create a backup of cell_config.json, all critical secrets/keys, runtime
|
|
config and (when service_registry is provided) live service data volumes.
|
|
|
|
When *passphrase* is supplied the staged backup directory is packed into an
|
|
encrypted archive (<backup_id>.tar.gz.age) and the plaintext staging dir is
|
|
removed. The archive contains key material; it is written mode 0600.
|
|
"""
|
|
try:
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
backup_id = f"backup_{timestamp}"
|
|
backup_path = self.backup_dir / backup_id
|
|
backup_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Primary config and secrets
|
|
if self.config_file.exists():
|
|
shutil.copy2(self.config_file, backup_path / 'cell_config.json')
|
|
if self.secrets_file.exists():
|
|
shutil.copy2(self.secrets_file, backup_path / 'secrets.yaml')
|
|
|
|
# Runtime-generated files that must match cell_config.json after restore
|
|
config_dir = Path(os.environ.get('CONFIG_DIR', '/app/config'))
|
|
env_file = Path(os.environ.get('ENV_FILE', '/app/.env'))
|
|
|
|
extra = [
|
|
(Path(LIVE_CADDYFILE), 'Caddyfile'),
|
|
(config_dir / 'dns' / 'Corefile', 'Corefile'),
|
|
(env_file, '.env'),
|
|
]
|
|
for src, dest_name in extra:
|
|
if src.exists():
|
|
shutil.copy2(src, backup_path / dest_name)
|
|
|
|
# DNS zone files
|
|
dns_data = self.data_dir / 'dns'
|
|
if dns_data.is_dir():
|
|
zones_dir = backup_path / 'dns_zones'
|
|
zones_dir.mkdir(exist_ok=True)
|
|
for zone_file in dns_data.glob('*.zone'):
|
|
shutil.copy2(zone_file, zones_dir / zone_file.name)
|
|
|
|
# Service-specific user account files (authoritative source of truth —
|
|
# cell_config.json only carries a best-effort sync of these).
|
|
svc_user_files = [
|
|
(self.data_dir / 'email' / 'users.json', 'email_users.json'),
|
|
(self.data_dir / 'calendar' / 'users.json', 'calendar_users.json'),
|
|
(self.data_dir / 'calendar' / 'calendars.json', 'calendar_calendars.json'),
|
|
]
|
|
for src, dest_name in svc_user_files:
|
|
if src.exists():
|
|
try:
|
|
shutil.copy2(src, backup_path / dest_name)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not back up {src.name}: {e} (skipping)")
|
|
|
|
# CRITICAL secrets, keys and state under data/. Losing any of these on a
|
|
# restore would lock out the admin, re-provision all WireGuard peers, or
|
|
# render vault-encrypted secrets unrecoverable. Each path is copied under
|
|
# data/<rel> in the archive and skipped gracefully if absent.
|
|
critical_data_paths = [
|
|
# API auth + identity
|
|
'api/auth_users.json',
|
|
'api/.flask_secret_key',
|
|
'api/peers.json',
|
|
'api/peer_service_credentials.json',
|
|
'api/cell_links.json',
|
|
'api/ddns_token',
|
|
# Append-only audit trail (who changed what) + rotated segments
|
|
'api/audit',
|
|
# WireGuard key material (server + peers) and live confs
|
|
'wireguard/keys',
|
|
'wireguard/wg_confs',
|
|
'api/wireguard/keys',
|
|
# Vault: internal CA, certs, fernet.key, trust, encrypted secrets.
|
|
# Without keys/fernet.key all vault secrets are unrecoverable.
|
|
'vault',
|
|
# Connectivity instance configs (host bind-mounts, not docker volumes):
|
|
# wg_ext0.conf, redsocks.conf, sshuttle keys/known_hosts, etc.
|
|
'api/services',
|
|
'services',
|
|
# Caddy issued certs / ACME state (avoid re-issuance + rate-limits)
|
|
'caddy',
|
|
]
|
|
for rel in critical_data_paths:
|
|
self._copy_data_path(rel, backup_path)
|
|
|
|
# Live service data volumes (streamed via docker exec)
|
|
if service_registry is not None:
|
|
self._backup_service_volumes(backup_path, service_registry)
|
|
|
|
services = ['identity'] + list(self.service_schemas.keys())
|
|
encrypted = bool(passphrase)
|
|
manifest = {
|
|
"backup_id": backup_id,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"services": services,
|
|
"files": sorted(p.relative_to(backup_path).as_posix()
|
|
for p in backup_path.rglob('*') if p.is_file()),
|
|
"includes_service_data": service_registry is not None,
|
|
"encrypted": encrypted,
|
|
"contains_secrets": True,
|
|
}
|
|
with open(backup_path / 'manifest.json', 'w') as f:
|
|
json.dump(manifest, f, indent=2)
|
|
|
|
if encrypted:
|
|
archive_id = self._pack_and_encrypt(backup_path, backup_id, passphrase)
|
|
logger.info(f"Created encrypted configuration backup: {archive_id}")
|
|
return archive_id
|
|
|
|
# Plaintext backup: lock the staging dir down — it holds key material.
|
|
try:
|
|
os.chmod(backup_path, 0o700)
|
|
except OSError:
|
|
pass
|
|
logger.info(f"Created configuration backup: {backup_id}")
|
|
return backup_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating backup: {e}")
|
|
raise
|
|
|
|
def _pack_and_encrypt(self, backup_path: Path, backup_id: str,
|
|
passphrase: str) -> str:
|
|
"""Tar+gzip the staged backup dir, encrypt with the passphrase, write
|
|
<backup_id>.tar.gz.age (mode 0600), and remove the plaintext staging dir."""
|
|
buf = io.BytesIO()
|
|
with tarfile.open(fileobj=buf, mode='w:gz') as tar:
|
|
tar.add(backup_path, arcname=backup_id)
|
|
blob = backup_crypto.encrypt_bytes(buf.getvalue(), passphrase)
|
|
archive_name = f'{backup_id}.tar.gz.age'
|
|
archive_path = self.backup_dir / archive_name
|
|
fd = os.open(str(archive_path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
|
with os.fdopen(fd, 'wb') as f:
|
|
f.write(blob)
|
|
os.chmod(str(archive_path), 0o600)
|
|
shutil.rmtree(backup_path, ignore_errors=True)
|
|
return archive_name
|
|
|
|
def _resolve_backup_dir(self, backup_id: str, passphrase: Optional[str]):
|
|
"""Return (backup_path, cleanup_dir) for a backup id.
|
|
|
|
For a plaintext backup, backup_path is the on-disk directory and
|
|
cleanup_dir is None. For an encrypted archive (<id>.tar.gz.age, detected
|
|
either by the id ending in .age or by an archive file existing), the
|
|
archive is decrypted and extracted to a temp dir which the caller must
|
|
remove via cleanup_dir. Raises PermissionError on a bad/missing
|
|
passphrase so the route can return 400.
|
|
"""
|
|
import tempfile
|
|
archive_path = None
|
|
if backup_id.endswith('.age'):
|
|
archive_path = self.backup_dir / backup_id
|
|
else:
|
|
candidate = self.backup_dir / f'{backup_id}.tar.gz.age'
|
|
if candidate.exists() and not (self.backup_dir / backup_id).is_dir():
|
|
archive_path = candidate
|
|
if archive_path is None:
|
|
return self.backup_dir / backup_id, None
|
|
|
|
if not archive_path.exists():
|
|
raise ValueError(f"Backup {backup_id} not found")
|
|
blob = archive_path.read_bytes()
|
|
try:
|
|
plaintext = backup_crypto.decrypt_bytes(blob, passphrase or '')
|
|
except backup_crypto.BackupDecryptError as e:
|
|
raise PermissionError(str(e)) from e
|
|
tmpdir = Path(tempfile.mkdtemp(prefix='pic_restore_'))
|
|
with tarfile.open(fileobj=io.BytesIO(plaintext), mode='r:gz') as tar:
|
|
tar.extractall(tmpdir)
|
|
inner = [p for p in tmpdir.iterdir() if p.is_dir()]
|
|
backup_path = inner[0] if len(inner) == 1 else tmpdir
|
|
return backup_path, tmpdir
|
|
|
|
def restore_config(self, backup_id: str, services: list = None,
|
|
service_registry=None, passphrase: Optional[str] = None) -> bool:
|
|
"""Restore from backup. If services list given, only restore those service configs (selective).
|
|
|
|
Encrypted archives (<id>.tar.gz.age) are auto-detected and require the
|
|
passphrase; a wrong/missing passphrase raises PermissionError (route → 400).
|
|
"""
|
|
cleanup_dir = None
|
|
try:
|
|
backup_path, cleanup_dir = self._resolve_backup_dir(backup_id, passphrase)
|
|
if not backup_path.exists():
|
|
raise ValueError(f"Backup {backup_id} not found")
|
|
manifest_file = backup_path / 'manifest.json'
|
|
if not manifest_file.exists():
|
|
raise ValueError(f"Backup manifest not found")
|
|
|
|
if services is not None:
|
|
# Selective restore: only update specified services in running config
|
|
backup_cfg_path = backup_path / 'cell_config.json'
|
|
if backup_cfg_path.exists():
|
|
with open(backup_cfg_path) as f:
|
|
backup_cfg = json.load(f)
|
|
for svc in services:
|
|
if svc == 'identity':
|
|
if '_identity' in backup_cfg:
|
|
self.configs['_identity'] = backup_cfg['_identity']
|
|
elif svc in backup_cfg:
|
|
self.configs[svc] = backup_cfg[svc]
|
|
self._save_all_configs()
|
|
logger.info(f"Selectively restored {services} from backup: {backup_id}")
|
|
return True
|
|
|
|
# ── Full restore ─────────────────────────────────────────────────
|
|
# Ordering matters: vault (incl. fernet.key) is restored FIRST because
|
|
# everything else's secrets are encrypted with it; then identity/.env;
|
|
# then WireGuard key material; then cell links; then generated config;
|
|
# then per-service connectivity configs; then auth/ddns.
|
|
config_dir = Path(os.environ.get('CONFIG_DIR', '/app/config'))
|
|
env_file = Path(os.environ.get('ENV_FILE', '/app/.env'))
|
|
|
|
# (1) Vault FIRST — internal CA, certs, fernet.key, trust, secrets.
|
|
self._restore_data_path(backup_path, 'vault')
|
|
|
|
# (2) Identity / primary config + secrets + .env
|
|
config_backup = backup_path / 'cell_config.json'
|
|
if config_backup.exists():
|
|
shutil.copy2(config_backup, self.config_file)
|
|
secrets_backup = backup_path / 'secrets.yaml'
|
|
if secrets_backup.exists():
|
|
shutil.copy2(secrets_backup, self.secrets_file)
|
|
if (backup_path / '.env').exists():
|
|
try:
|
|
env_file.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(backup_path / '.env', env_file)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not restore .env: {e} (skipping)")
|
|
|
|
# (3) WireGuard key material + live confs, then peers.json
|
|
for rel in ('wireguard/keys', 'wireguard/wg_confs', 'api/wireguard/keys'):
|
|
self._restore_data_path(backup_path, rel)
|
|
for rel in ('api/peers.json', 'api/peer_service_credentials.json'):
|
|
self._restore_data_path(backup_path, rel)
|
|
|
|
# (4) Cell-to-cell links / permissions + audit trail
|
|
self._restore_data_path(backup_path, 'api/cell_links.json')
|
|
self._restore_data_path(backup_path, 'api/audit')
|
|
|
|
# (5) Caddy issued certs/ACME, DNS Corefile + zones (generated files are
|
|
# reapplied below, but restoring them gives a correct starting point).
|
|
self._restore_data_path(backup_path, 'caddy')
|
|
if (backup_path / 'Caddyfile').exists():
|
|
try:
|
|
Path(LIVE_CADDYFILE).parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(backup_path / 'Caddyfile', Path(LIVE_CADDYFILE))
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not restore Caddyfile: {e} (skipping)")
|
|
if (backup_path / 'Corefile').exists():
|
|
try:
|
|
dest = config_dir / 'dns' / 'Corefile'
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(backup_path / 'Corefile', dest)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not restore Corefile: {e} (skipping)")
|
|
zones_backup = backup_path / 'dns_zones'
|
|
if zones_backup.is_dir():
|
|
dns_data = self.data_dir / 'dns'
|
|
try:
|
|
dns_data.mkdir(parents=True, exist_ok=True)
|
|
for zone_file in zones_backup.glob('*.zone'):
|
|
try:
|
|
shutil.copy2(zone_file, dns_data / zone_file.name)
|
|
except (PermissionError, OSError) as zone_err:
|
|
logger.warning(f"Could not restore zone {zone_file.name}: {zone_err} (skipping)")
|
|
except (PermissionError, OSError) as dir_err:
|
|
logger.warning(f"Could not create dns data dir {dns_data}: {dir_err} (skipping)")
|
|
|
|
# (6) Per-service connectivity configs (host bind-mounts)
|
|
for rel in ('api/services', 'services'):
|
|
self._restore_data_path(backup_path, rel)
|
|
|
|
# (7) Auth users, flask secret, ddns token (after vault, before recompose)
|
|
for rel in ('api/auth_users.json', 'api/.flask_secret_key', 'api/ddns_token'):
|
|
self._restore_data_path(backup_path, rel)
|
|
|
|
# Service-specific user account files
|
|
svc_restore_map = [
|
|
(backup_path / 'email_users.json', self.data_dir / 'email' / 'users.json'),
|
|
(backup_path / 'calendar_users.json', self.data_dir / 'calendar' / 'users.json'),
|
|
(backup_path / 'calendar_calendars.json', self.data_dir / 'calendar' / 'calendars.json'),
|
|
]
|
|
for src, dest in svc_restore_map:
|
|
if src.exists():
|
|
try:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(src, dest)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not restore {dest.name}: {e} (skipping)")
|
|
|
|
# Reload config now that cell_config.json is restored.
|
|
self.configs = self._load_all_configs()
|
|
|
|
# (8) Live service data volumes (after containers exist — best-effort)
|
|
if service_registry is not None:
|
|
self._restore_service_volumes(backup_path, service_registry)
|
|
|
|
# (9) Reapply runtime state: regenerate generated config from the
|
|
# restored source-of-truth and re-apply routing/links.
|
|
self._reapply_runtime_state()
|
|
|
|
logger.info(f"Restored configuration from backup: {backup_id}")
|
|
return True
|
|
except PermissionError:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error restoring backup {backup_id}: {e}")
|
|
return False
|
|
finally:
|
|
if cleanup_dir is not None:
|
|
shutil.rmtree(cleanup_dir, ignore_errors=True)
|
|
|
|
def _restore_data_path(self, backup_path: Path, rel: str) -> None:
|
|
"""Restore data/<rel> from the backup into self.data_dir/<rel>.
|
|
Handles both files and directory trees. Skips silently if absent."""
|
|
src = backup_path / 'data' / rel
|
|
if not src.exists():
|
|
return
|
|
dest = self.data_dir / rel
|
|
try:
|
|
if src.is_dir():
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
for root, _dirs, files in os.walk(src):
|
|
root_p = Path(root)
|
|
rel_root = root_p.relative_to(src)
|
|
for fname in files:
|
|
out = dest / rel_root / fname
|
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(root_p / fname, out)
|
|
else:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(src, dest)
|
|
except (PermissionError, OSError) as e:
|
|
logger.warning(f"Could not restore {rel}: {e} (skipping)")
|
|
|
|
def _reapply_runtime_state(self) -> None:
|
|
"""Regenerate generated config (Caddyfile, Corefile) from the restored
|
|
source-of-truth and re-apply routing / cell links. Uses the live
|
|
managers; every step is best-effort so a missing manager during a
|
|
partial/offline restore never aborts the whole operation.
|
|
|
|
NOTE: this does NOT stop/start containers. A full restore should be
|
|
followed by `make restart` so containers pick up restored key material
|
|
and regenerated config. See restore_config docstring / README.
|
|
"""
|
|
try:
|
|
from managers import (caddy_manager, firewall_manager,
|
|
connectivity_manager, cell_link_manager,
|
|
service_composer, peer_registry)
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: managers unavailable ({e}); skipping reapply")
|
|
return
|
|
|
|
try:
|
|
caddy_manager.regenerate_with_installed([])
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: regenerate Caddyfile failed: {e}")
|
|
|
|
try:
|
|
peers = peer_registry.list_peers() if peer_registry else []
|
|
cell_links = cell_link_manager.list_connections() if cell_link_manager else None
|
|
firewall_manager.generate_corefile(
|
|
peers, domain=self.get_internal_domain(), cell_links=cell_links)
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: regenerate Corefile failed: {e}")
|
|
|
|
try:
|
|
if service_composer is not None:
|
|
service_composer.reapply_active_services()
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: reapply_active_services failed: {e}")
|
|
|
|
try:
|
|
if connectivity_manager is not None:
|
|
connectivity_manager.apply_routes()
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: apply_routes failed: {e}")
|
|
|
|
try:
|
|
if cell_link_manager is not None:
|
|
cell_link_manager.replay_pending_pushes()
|
|
except Exception as e:
|
|
logger.warning(f"Reapply: replay_pending_pushes failed: {e}")
|
|
|
|
def list_backups(self) -> List[Dict[str, Any]]:
|
|
"""List all available backups (plaintext dirs and encrypted archives)."""
|
|
backups = []
|
|
for entry in self.backup_dir.iterdir():
|
|
if entry.is_dir():
|
|
manifest_file = entry / 'manifest.json'
|
|
if manifest_file.exists():
|
|
try:
|
|
with open(manifest_file, 'r') as f:
|
|
manifest = json.load(f)
|
|
backups.append(manifest)
|
|
except Exception as e:
|
|
logger.error(f"Error reading backup manifest {entry.name}: {e}")
|
|
elif entry.is_file() and entry.name.endswith('.tar.gz.age'):
|
|
# Encrypted archive: manifest is inside and undecryptable without a
|
|
# passphrase, so synthesise a listing entry from the filename.
|
|
backup_id = entry.name[:-len('.tar.gz')] if entry.name.endswith('.tar.gz.age') else entry.name
|
|
# backup_<ts>.tar.gz.age → backup_<ts>
|
|
stem = entry.name[:-len('.tar.gz.age')]
|
|
ts = stem.replace('backup_', '').replace('_', 'T', 1)
|
|
backups.append({
|
|
'backup_id': entry.name,
|
|
'timestamp': ts,
|
|
'encrypted': True,
|
|
'contains_secrets': True,
|
|
})
|
|
|
|
return sorted(backups, key=lambda x: x.get('timestamp', ''), reverse=True)
|
|
|
|
def delete_backup(self, backup_id: str) -> bool:
|
|
"""Delete a backup (plaintext directory or encrypted archive)."""
|
|
try:
|
|
backup_path = self.backup_dir / backup_id
|
|
if backup_path.is_dir():
|
|
shutil.rmtree(backup_path)
|
|
elif backup_path.is_file():
|
|
backup_path.unlink()
|
|
else:
|
|
raise ValueError(f"Backup {backup_id} not found")
|
|
logger.info(f"Deleted backup: {backup_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error deleting backup {backup_id}: {e}")
|
|
return False
|
|
|
|
def get_config_hash(self, service: str) -> str:
|
|
"""Get hash of service configuration for change detection"""
|
|
config = self.get_service_config(service)
|
|
config_str = json.dumps(config, sort_keys=True)
|
|
return hashlib.sha256(config_str.encode()).hexdigest()
|
|
|
|
def has_config_changed(self, service: str, previous_hash: str) -> bool:
|
|
"""Check if configuration has changed"""
|
|
current_hash = self.get_config_hash(service)
|
|
return current_hash != previous_hash
|
|
|
|
def export_config(self, format: str = 'json', services: list = None) -> str:
|
|
"""Export service configurations (excludes internal state like pending_restart)."""
|
|
try:
|
|
export_data = {}
|
|
# Include identity under a clean key
|
|
if '_identity' in self.configs:
|
|
export_data['identity'] = dict(self.configs['_identity'])
|
|
# Include service configs, skip internal _ keys
|
|
for key, val in self.configs.items():
|
|
if key.startswith('_'):
|
|
continue
|
|
if services is not None and key not in services:
|
|
continue
|
|
export_data[key] = val
|
|
if format == 'json':
|
|
return json.dumps(export_data, indent=2)
|
|
elif format == 'yaml':
|
|
return yaml.dump(export_data, default_flow_style=False)
|
|
else:
|
|
raise ValueError(f"Unsupported format: {format}")
|
|
except Exception as e:
|
|
logger.error(f"Error exporting config: {e}")
|
|
raise
|
|
|
|
def import_config(self, config_data: str, format: str = 'json', services: list = None) -> bool:
|
|
"""Import configurations from string. Merges into existing config."""
|
|
try:
|
|
if format == 'json':
|
|
configs = json.loads(config_data)
|
|
elif format == 'yaml':
|
|
configs = yaml.safe_load(config_data)
|
|
else:
|
|
raise ValueError(f"Unsupported format: {format}")
|
|
|
|
# Handle identity (exported as 'identity', stored as '_identity')
|
|
if 'identity' in configs and (services is None or 'identity' in services):
|
|
ident = configs['identity']
|
|
cur = dict(self.configs.get('_identity', {}))
|
|
for k in ('cell_name', 'domain', 'ip_range', 'wireguard_port'):
|
|
if k in ident:
|
|
cur[k] = ident[k]
|
|
self.configs['_identity'] = cur
|
|
|
|
# Merge service configs (don't replace wholesale — keep existing fields not in import)
|
|
for key, val in configs.items():
|
|
if key == 'identity':
|
|
continue
|
|
if key not in self.service_schemas:
|
|
continue
|
|
if services is not None and key not in services:
|
|
continue
|
|
cur_svc = dict(self.configs.get(key, {}))
|
|
cur_svc.update(val)
|
|
self.configs[key] = cur_svc
|
|
|
|
self._save_all_configs()
|
|
logger.info("Imported configurations successfully")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error importing config: {e}")
|
|
return False
|
|
|
|
def _backup_service_config(self, service: str):
|
|
"""Create backup of specific service config before update"""
|
|
# No-op for unified config, but keep for compatibility
|
|
pass
|
|
|
|
def get_identity(self) -> Dict[str, Any]:
|
|
"""Return the current identity configuration."""
|
|
return self.configs.get('_identity', {})
|
|
|
|
def get_effective_domain(self) -> str:
|
|
"""Return the FQDN that public-facing services should use.
|
|
In lan mode: _identity.domain. Otherwise: _identity.domain_name
|
|
(falls back to domain if domain_name not yet registered)."""
|
|
ident = self.get_identity()
|
|
mode = ident.get('domain_mode', 'lan')
|
|
if mode == 'lan':
|
|
return ident.get('domain') or os.environ.get('CELL_DOMAIN', 'cell')
|
|
return (ident.get('domain_name')
|
|
or ident.get('domain')
|
|
or os.environ.get('CELL_DOMAIN', 'cell'))
|
|
|
|
def get_internal_domain(self) -> str:
|
|
"""Return the CoreDNS zone name (always _identity.domain)."""
|
|
ident = self.get_identity()
|
|
return ident.get('domain') or os.environ.get('CELL_DOMAIN', 'cell')
|
|
|
|
def set_identity_field(self, key: str, value: Any):
|
|
"""Set a single field in the identity configuration and persist."""
|
|
if '_identity' not in self.configs:
|
|
self.configs['_identity'] = {}
|
|
self.configs['_identity'][key] = value
|
|
self._save_all_configs()
|
|
|
|
def get_installed_services(self) -> dict:
|
|
return self.configs.get('_identity', {}).get('installed_services', {})
|
|
|
|
def set_installed_service(self, service_id: str, record: dict):
|
|
ident = self.configs.setdefault('_identity', {})
|
|
ident.setdefault('installed_services', {})[service_id] = record
|
|
self._save_all_configs()
|
|
|
|
def remove_installed_service(self, service_id: str):
|
|
ident = self.configs.setdefault('_identity', {})
|
|
ident.setdefault('installed_services', {}).pop(service_id, None)
|
|
ident.setdefault('service_ips', {}).pop(service_id, None)
|
|
self._save_all_configs()
|
|
|
|
# ── Image signature verification configuration ────────────────────────
|
|
#
|
|
# Controls how a cell treats store-service container images at install:
|
|
# off — skip cosign verification and the digest-pin requirement
|
|
# warn — log a warning on a missing digest / failed signature, proceed
|
|
# enforce — refuse to start a service whose image is undigested,
|
|
# unsigned, or whose signature does not verify
|
|
#
|
|
# Default is "warn" until the publish pipeline signs all store images; a
|
|
# later phase flips the default to "enforce". The section is backed up and
|
|
# restored with the rest of cell_config.json automatically.
|
|
|
|
def get_image_verification(self) -> Dict[str, Any]:
|
|
"""Return the image verification config, e.g. {'mode': 'warn'}."""
|
|
cfg = self.configs.get('image_verification')
|
|
if not isinstance(cfg, dict) or cfg.get('mode') not in _IMAGE_VERIFY_MODES:
|
|
cfg = {'mode': 'warn'}
|
|
self.configs['image_verification'] = cfg
|
|
return dict(cfg)
|
|
|
|
def get_image_verification_mode(self) -> str:
|
|
"""Return just the verification mode string (off|warn|enforce)."""
|
|
return self.get_image_verification()['mode']
|
|
|
|
def set_image_verification_mode(self, mode: str) -> None:
|
|
"""Persist the verification mode. Raises ValueError on an invalid mode."""
|
|
mode = (mode or '').lower()
|
|
if mode not in _IMAGE_VERIFY_MODES:
|
|
raise ValueError(
|
|
f"Invalid image verification mode: {mode!r} "
|
|
f"(expected one of {sorted(_IMAGE_VERIFY_MODES)})"
|
|
)
|
|
self.configs['image_verification'] = {'mode': mode}
|
|
self._save_all_configs()
|
|
|
|
# ── Logging verbosity configuration ───────────────────────────────────
|
|
def _ensure_logging_config(self) -> None:
|
|
"""Ensure a well-formed `logging` section exists, migrating the legacy
|
|
config/api/log_levels.json side-file on first load.
|
|
|
|
The legacy file held a flat {service: LEVEL} map for the picell.* python
|
|
loggers. It is read once and merged in; the section then becomes the
|
|
single source of truth (the side-file is ignored thereafter).
|
|
"""
|
|
cfg = self.configs.get('logging')
|
|
if not isinstance(cfg, dict):
|
|
cfg = _default_logging_config()
|
|
self.configs['logging'] = cfg
|
|
|
|
python = cfg.setdefault('python', {})
|
|
if not python.get('root') or python['root'] not in _VALID_LOG_LEVELS:
|
|
python['root'] = 'INFO'
|
|
services = python.setdefault('services', {})
|
|
for svc in _LOGGING_PYTHON_SERVICES:
|
|
if services.get(svc) not in _VALID_LOG_LEVELS:
|
|
services.setdefault(svc, 'INFO')
|
|
|
|
containers = cfg.setdefault('containers', {})
|
|
for c in _LOGGING_CONTAINERS:
|
|
if containers.get(c) not in _VALID_LOG_LEVELS:
|
|
containers.setdefault(c, 'INFO')
|
|
|
|
# One-time migration from the legacy side-file.
|
|
if not cfg.get('_migrated_log_levels'):
|
|
legacy = self.config_file.parent / 'api' / 'log_levels.json'
|
|
legacy_flat = self.config_file.parent / 'log_levels.json'
|
|
for path in (legacy, legacy_flat):
|
|
try:
|
|
if path.exists():
|
|
with open(path) as lf:
|
|
for svc, lvl in (json.load(lf) or {}).items():
|
|
if (isinstance(lvl, str)
|
|
and lvl.upper() in _VALID_LOG_LEVELS
|
|
and svc in services):
|
|
services[svc] = lvl.upper()
|
|
except Exception as e:
|
|
logger.warning('log_levels.json migration skipped (%s): %s', path, e)
|
|
cfg['_migrated_log_levels'] = True
|
|
|
|
def get_logging_config(self) -> Dict[str, Any]:
|
|
"""Return the full logging config (python + containers sections)."""
|
|
self._ensure_logging_config()
|
|
cfg = self.configs['logging']
|
|
return {
|
|
'python': {
|
|
'root': cfg['python']['root'],
|
|
'services': dict(cfg['python']['services']),
|
|
},
|
|
'containers': dict(cfg['containers']),
|
|
}
|
|
|
|
def set_python_log_level(self, service: str, level: str) -> None:
|
|
"""Persist a python service (or 'root') log level. Raises ValueError on
|
|
an invalid level."""
|
|
level = (level or '').upper()
|
|
if level not in _VALID_LOG_LEVELS:
|
|
raise ValueError(f"Invalid log level: {level!r}")
|
|
self._ensure_logging_config()
|
|
python = self.configs['logging']['python']
|
|
if service == 'root':
|
|
python['root'] = level
|
|
else:
|
|
python.setdefault('services', {})[service] = level
|
|
self._save_all_configs()
|
|
|
|
def set_container_log_level(self, container: str, level: str) -> None:
|
|
"""Persist a container log level. Raises ValueError on an invalid level."""
|
|
level = (level or '').upper()
|
|
if level not in _VALID_LOG_LEVELS:
|
|
raise ValueError(f"Invalid log level: {level!r}")
|
|
self._ensure_logging_config()
|
|
self.configs['logging']['containers'][container] = level
|
|
self._save_all_configs()
|
|
|
|
# Phase 5 — Extended connectivity configuration helpers
|
|
def get_connectivity_config(self) -> Dict[str, Any]:
|
|
"""Return the full connectivity config (exits + peer_exit_map)."""
|
|
cfg = self.configs.get('connectivity')
|
|
if not isinstance(cfg, dict):
|
|
cfg = {'exits': {}, 'peer_exit_map': {}}
|
|
self.configs['connectivity'] = cfg
|
|
cfg.setdefault('exits', {})
|
|
cfg.setdefault('peer_exit_map', {})
|
|
return dict(cfg)
|
|
|
|
def set_ddns_config(self, ddns_cfg: Dict[str, Any]) -> None:
|
|
"""Replace the top-level ddns section and persist.
|
|
Never writes a 'token' key into cell_config.json — tokens live in data/.
|
|
"""
|
|
ddns_cfg = {k: v for k, v in ddns_cfg.items() if k != 'token'}
|
|
self.configs['ddns'] = ddns_cfg
|
|
self._save_all_configs()
|
|
|
|
@property
|
|
def _ddns_token_path(self) -> Path:
|
|
return self.data_dir / 'api' / 'ddns_token'
|
|
|
|
def get_ddns_token(self) -> str:
|
|
"""Return the DDNS bearer token from data/api/ddns_token.
|
|
|
|
Migrates automatically from the old cell_config.json location on first
|
|
call so existing installs keep working without manual intervention.
|
|
"""
|
|
path = self._ddns_token_path
|
|
if path.exists():
|
|
try:
|
|
tok = path.read_text().strip()
|
|
if tok:
|
|
return tok
|
|
except (PermissionError, OSError):
|
|
pass
|
|
# Migrate legacy token from cell_config.json
|
|
old_token = self.configs.get('ddns', {}).get('token', '')
|
|
if old_token:
|
|
self.set_ddns_token(old_token)
|
|
return old_token
|
|
|
|
def set_ddns_token(self, token: str) -> None:
|
|
"""Write the DDNS bearer token to data/api/ddns_token (not cell_config.json)."""
|
|
path = self._ddns_token_path
|
|
try:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(token)
|
|
except (PermissionError, OSError) as exc:
|
|
logger.error('set_ddns_token: failed to write token file: %s', exc)
|
|
return
|
|
# Remove from cell_config.json if a legacy copy is there
|
|
if self.configs.get('ddns', {}).get('token'):
|
|
ddns_cfg = {k: v for k, v in self.configs.get('ddns', {}).items() if k != 'token'}
|
|
self.configs['ddns'] = ddns_cfg
|
|
self._save_all_configs()
|
|
|
|
def set_connectivity_field(self, field: str, value: Any) -> bool:
|
|
"""Set a single field within the connectivity config and persist."""
|
|
cfg = self.configs.setdefault('connectivity', {'exits': {}, 'peer_exit_map': {}})
|
|
cfg[field] = value
|
|
try:
|
|
self._save_all_configs()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"set_connectivity_field({field}): {e}")
|
|
return False
|
|
|
|
# ── Connectivity v2 — named connection instances ──────────────────────
|
|
#
|
|
# The legacy schema stored at most one exit per type under
|
|
# `connectivity.exits` plus a `peer_exit_map`. v2 replaces this with a list
|
|
# of named connection instances under `connectivity.connections`, each with
|
|
# its own allocated routing resources (mark/table/iface/redirect_port) and
|
|
# vault secret references. The legacy keys are kept readable so the one-time
|
|
# migration can consume them; the new code path uses `connections`.
|
|
|
|
def register_connectivity_migrator(self, migrator) -> None:
|
|
"""Register the v1→v2 migration callback (owned by ConnectivityManager).
|
|
|
|
`migrator(legacy_section) -> list[connection_record]` builds the v2
|
|
connection records (allocating resources, repointing secrets) from the
|
|
legacy section. Called at most once, lazily, on first get_connectivity().
|
|
"""
|
|
self._connectivity_migrator = migrator
|
|
|
|
def get_connectivity(self) -> Dict[str, Any]:
|
|
"""Return the connectivity v2 dict, running v1→v2 migration if needed.
|
|
|
|
Idempotent: once `version` is 2 the stored section is returned as-is.
|
|
When `version` < 2 and a migrator is registered, the legacy exits are
|
|
converted to connection instances exactly once and the result persisted.
|
|
"""
|
|
with self._connectivity_lock:
|
|
cfg = self.configs.get('connectivity')
|
|
if not isinstance(cfg, dict):
|
|
cfg = {}
|
|
if cfg.get('version') == 2 and isinstance(cfg.get('connections'), list):
|
|
return self._copy_connectivity(cfg)
|
|
|
|
connections: List[Dict[str, Any]] = []
|
|
if self._connectivity_migrator is not None:
|
|
try:
|
|
built = self._connectivity_migrator(dict(cfg))
|
|
if isinstance(built, list):
|
|
connections = built
|
|
except Exception as e:
|
|
logger.error(f"connectivity v1→v2 migration failed: {e}")
|
|
raise
|
|
|
|
new_cfg = dict(cfg)
|
|
new_cfg['version'] = 2
|
|
new_cfg['connections'] = connections
|
|
self.configs['connectivity'] = new_cfg
|
|
self._save_all_configs()
|
|
return self._copy_connectivity(new_cfg)
|
|
|
|
@staticmethod
|
|
def _copy_connectivity(cfg: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Deep-ish copy of the connectivity section so callers can't mutate state."""
|
|
out = dict(cfg)
|
|
out['connections'] = [dict(c) for c in cfg.get('connections', [])]
|
|
return out
|
|
|
|
def list_connections(self) -> List[Dict[str, Any]]:
|
|
"""Return a copy of all v2 connection records."""
|
|
with self._connectivity_lock:
|
|
return self.get_connectivity().get('connections', [])
|
|
|
|
def get_connection(self, conn_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Return a copy of one connection record by id, or None."""
|
|
with self._connectivity_lock:
|
|
for conn in self.get_connectivity().get('connections', []):
|
|
if conn.get('id') == conn_id:
|
|
return dict(conn)
|
|
return None
|
|
|
|
def add_connection(self, record: Dict[str, Any]) -> bool:
|
|
"""Append a connection record and persist atomically."""
|
|
with self._connectivity_lock:
|
|
cfg = self.get_connectivity()
|
|
conns = cfg.get('connections', [])
|
|
conns.append(dict(record))
|
|
self.configs['connectivity'] = {
|
|
**self.configs.get('connectivity', {}),
|
|
'version': 2,
|
|
'connections': conns,
|
|
}
|
|
self._save_all_configs()
|
|
return True
|
|
|
|
def update_connection(self, conn_id: str, fields: Dict[str, Any]) -> bool:
|
|
"""Merge `fields` into the connection record with id `conn_id`."""
|
|
with self._connectivity_lock:
|
|
cfg = self.get_connectivity()
|
|
conns = cfg.get('connections', [])
|
|
found = False
|
|
for conn in conns:
|
|
if conn.get('id') == conn_id:
|
|
conn.update(fields)
|
|
found = True
|
|
break
|
|
if not found:
|
|
return False
|
|
self.configs['connectivity'] = {
|
|
**self.configs.get('connectivity', {}),
|
|
'version': 2,
|
|
'connections': conns,
|
|
}
|
|
self._save_all_configs()
|
|
return True
|
|
|
|
def delete_connection(self, conn_id: str) -> bool:
|
|
"""Remove the connection record with id `conn_id`."""
|
|
with self._connectivity_lock:
|
|
cfg = self.get_connectivity()
|
|
conns = cfg.get('connections', [])
|
|
remaining = [c for c in conns if c.get('id') != conn_id]
|
|
if len(remaining) == len(conns):
|
|
return False
|
|
self.configs['connectivity'] = {
|
|
**self.configs.get('connectivity', {}),
|
|
'version': 2,
|
|
'connections': remaining,
|
|
}
|
|
self._save_all_configs()
|
|
return True
|
|
|
|
def set_connection_status(self, conn_id: str, status: Dict[str, Any]) -> bool:
|
|
"""Replace the `status` sub-dict of one connection record."""
|
|
return self.update_connection(conn_id, {'status': dict(status)})
|
|
|
|
def get_all_configs(self) -> Dict[str, Dict]:
|
|
"""Get all service configurations"""
|
|
return self.configs.copy()
|
|
|
|
def get_config_summary(self) -> Dict[str, Any]:
|
|
"""Get summary of all configurations"""
|
|
summary = {
|
|
"total_services": len(self.service_schemas),
|
|
"configured_services": [],
|
|
"unconfigured_services": [],
|
|
"backup_count": len(self.list_backups()),
|
|
"last_backup": None
|
|
}
|
|
|
|
backups = self.list_backups()
|
|
if backups:
|
|
summary["last_backup"] = backups[0]["timestamp"]
|
|
|
|
for service in self.service_schemas.keys():
|
|
config = self.get_service_config(service)
|
|
if config and not config.get("error"):
|
|
summary["configured_services"].append(service)
|
|
else:
|
|
summary["unconfigured_services"].append(service)
|
|
|
|
return summary |