Files
pic/api/service_composer.py
T
roof 18b50d08c1
Unit Tests / test (push) Successful in 11m31s
fix: post-Phase-0 corrections — data-dir bind mounts, reserved subdomains, list_active()
Three related fixes discovered during review of Phase 0 and Phase 1 manifests:

1. validate_rendered_compose(): add allowed_data_dir param. After ${PIC_DATA_DIR}
   substitution, compose templates produce absolute paths; without this the
   validator would reject every service install.  ServiceComposer.write_compose()
   now passes its resolved data_dir so only the designated data directory is
   exempt — /etc, /proc, docker.sock etc. still blocked.

2. _RESERVED_SUBDOMAINS: remove service-level subdomains (mail, calendar, files,
   webdav, webmail). The reserved list should protect PIC infrastructure endpoints
   (api, webui, admin) — not service subdomains that official store services
   (calendar, files, webmail) must be allowed to claim.  Aligns with the
   existing _RESERVED_SUBS in service_registry.py.

3. ServiceRegistry.list_active(): new method returning only installed store
   services (no builtins). This is the forward-looking API that Phase 2 will
   make the primary read path once builtins are deleted. Adding it now unblocks
   the QA agent's test_optional_services_feature.py which was already testing
   the expected Phase 2 behaviour.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 07:35:43 -04:00

327 lines
14 KiB
Python

"""
ServiceComposer — docker-compose generation and container lifecycle for PIC services.
Responsibilities:
- Render compose-template.yml → per-service docker-compose.yml with PIC_* substitution
- Manage store-service container lifecycle (up / down / restart / status / reconfigure)
- Manage builtin-service restarts and status via the main compose stack
- Generate and persist PIC_SECRET_* variables in a dedicated secrets file
Template variable reference (for compose-template.yml authors):
${PIC_CFG_<KEY>} — value from manifest config_schema, uppercased
${PIC_SECRET_<NAME>} — auto-generated random secret, persisted across reconfigures
${PIC_DOMAIN} — effective domain (e.g. cell.pic.ngo)
${PIC_CELL_NAME} — cell name (e.g. mycell)
${PIC_SERVICE_ID} — service identifier (e.g. nextcloud)
"""
import json
import logging
import os
import re
import secrets as _secrets_lib
import shutil
import subprocess
import threading
from pathlib import Path
from typing import Dict, List, Optional
from manifest_validator import validate_rendered_compose
logger = logging.getLogger('picell')
_SECRET_RE = re.compile(r'\$\{(PIC_SECRET_\w+)\}')
_SAFE_ID_RE = re.compile(r'^[a-z0-9][a-z0-9_-]{0,63}$')
class ServiceComposer:
def __init__(self, config_manager, data_dir: str):
self.cm = config_manager
self.data_dir = data_dir
self._services_dir = os.path.join(data_dir, 'services')
self._secrets_path = os.path.join(data_dir, 'service_secrets.json')
self._lock = threading.Lock()
# ── Path helpers ──────────────────────────────────────────────────────
@staticmethod
def _validate_service_id(service_id: str) -> None:
"""Raise ValueError if service_id could be used for path traversal."""
if not _SAFE_ID_RE.match(service_id):
raise ValueError(
f'Invalid service_id {service_id!r}: '
'must match ^[a-z0-9][a-z0-9_-]{{0,63}}$'
)
def _svc_dir(self, service_id: str) -> str:
self._validate_service_id(service_id)
candidate = os.path.join(self._services_dir, service_id)
# Paranoia: ensure the resolved path stays inside _services_dir
real_base = os.path.realpath(self._services_dir)
real_cand = os.path.realpath(candidate)
if not real_cand.startswith(real_base + os.sep) and real_cand != real_base:
raise ValueError(f'service_id {service_id!r} escapes services directory')
return candidate
def _compose_path(self, service_id: str) -> str:
return os.path.join(self._svc_dir(service_id), 'docker-compose.yml')
def has_compose_file(self, service_id: str) -> bool:
try:
return os.path.exists(self._compose_path(service_id))
except ValueError:
return False
# ── Secrets management ────────────────────────────────────────────────
def _load_secrets(self) -> Dict:
if not os.path.exists(self._secrets_path):
return {}
try:
with open(self._secrets_path) as f:
return json.load(f)
except (OSError, json.JSONDecodeError) as e:
logger.warning('ServiceComposer: failed to load secrets: %s', e)
return {}
def _save_secrets(self, secrets: Dict) -> None:
tmp = self._secrets_path + '.tmp'
# 0o600: readable only by the process owner — secrets must not be world-readable
with open(tmp, 'w',
opener=lambda path, flags: os.open(path, flags, 0o600)) as f:
json.dump(secrets, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.replace(tmp, self._secrets_path)
def _get_or_create_secret(self, service_id: str, var_name: str) -> str:
with self._lock:
secrets = self._load_secrets()
svc_secrets = secrets.setdefault(service_id, {})
if var_name not in svc_secrets:
svc_secrets[var_name] = _secrets_lib.token_urlsafe(24)
self._save_secrets(secrets)
return svc_secrets[var_name]
def _clear_secrets(self, service_id: str) -> None:
with self._lock:
secrets = self._load_secrets()
if service_id in secrets:
del secrets[service_id]
self._save_secrets(secrets)
# ── Template rendering ────────────────────────────────────────────────
def render_template(self, service_id: str, manifest: Dict,
template_content: str) -> str:
"""
Substitute all PIC_* variables in a compose-template.yml string.
Returns the rendered compose YAML.
"""
schema = manifest.get('config_schema') or {}
saved = self.cm.configs.get(service_id, {})
config: Dict = {k: v['default'] for k, v in schema.items() if 'default' in v}
config.update({k: saved[k] for k in schema if k in saved})
identity = self.cm.get_identity()
domain = self.cm.get_effective_domain() or identity.get('domain', 'cell.local')
cell_name = identity.get('cell_name', 'mycell')
result = template_content
for key, value in config.items():
# Strip newlines/tabs to prevent YAML injection (a config string containing
# \n could inject new YAML keys into the compose file)
safe_val = str(value).replace('\n', '').replace('\r', '').replace('\t', ' ')
result = result.replace(f'${{PIC_CFG_{key.upper()}}}', safe_val)
result = result.replace('${PIC_DOMAIN}', domain)
result = result.replace('${PIC_CELL_NAME}', cell_name)
result = result.replace('${PIC_SERVICE_ID}', service_id)
result = result.replace('${PIC_DATA_DIR}', str(Path(self.data_dir).resolve()))
# PIC_SECRET_* — generate on first use, reuse on reconfigure
for match in _SECRET_RE.finditer(template_content):
var_name = match.group(1)
secret = self._get_or_create_secret(service_id, var_name)
result = result.replace(f'${{{var_name}}}', secret)
return result
def write_compose(self, service_id: str, manifest: Dict,
template_content: str) -> str:
"""Render and atomically write the per-service compose file. Returns rendered content."""
os.makedirs(self._svc_dir(service_id), exist_ok=True)
content = self.render_template(service_id, manifest, template_content)
# Validate before any file I/O so a bad template never touches disk.
# Pass the resolved data_dir so that bind mounts created by ${PIC_DATA_DIR}
# substitution are allowed; all other absolute paths are still rejected.
ok, errs = validate_rendered_compose(
content, allowed_data_dir=str(Path(self.data_dir).resolve())
)
if not ok:
raise ValueError(
f'Compose template failed security validation: {"; ".join(errs)}'
)
path = self._compose_path(service_id)
tmp = path + '.tmp'
with open(tmp, 'w') as f:
f.write(content)
f.flush()
os.fsync(f.fileno())
os.replace(tmp, path)
logger.info('ServiceComposer: wrote compose file for %s', service_id)
return content
# ── Subprocess helper ─────────────────────────────────────────────────
def _run(self, cmd: List[str], timeout: int = 120) -> Dict:
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
if r.returncode != 0 and r.stderr:
logger.warning('ServiceComposer command failed: %s', r.stderr.strip())
return {
'ok': r.returncode == 0,
'stdout': r.stdout.strip(),
'stderr': r.stderr.strip(),
}
except subprocess.TimeoutExpired:
return {'ok': False, 'error': 'docker compose command timed out'}
except Exception as e:
logger.error('ServiceComposer._run error: %s', e)
return {'ok': False, 'error': str(e)}
@staticmethod
def _parse_ps_json(output: str) -> List[Dict]:
"""Parse `docker compose ps --format json` output (one JSON object per line)."""
containers = []
for line in output.splitlines():
line = line.strip()
if not line:
continue
try:
containers.append(json.loads(line))
except json.JSONDecodeError:
pass
return containers
# ── Store-service lifecycle (per-service compose file) ────────────────
def _store_cmd(self, service_id: str, *args, timeout: int = 120) -> Dict:
compose_file = self._compose_path(service_id)
if not os.path.exists(compose_file):
return {'ok': False, 'error': f'No compose file found for service {service_id!r}'}
cmd = [
'docker', 'compose',
'-f', compose_file,
'--project-name', f'pic-{service_id}',
*args,
]
return self._run(cmd, timeout)
def up(self, service_id: str) -> Dict:
# 600s: image pulls on slow connections can take several minutes
return self._store_cmd(service_id, 'up', '-d', '--remove-orphans', timeout=600)
def down(self, service_id: str, remove_volumes: bool = False) -> Dict:
args = ['down']
if remove_volumes:
args.append('--volumes')
return self._store_cmd(service_id, *args)
def restart(self, service_id: str) -> Dict:
return self._store_cmd(service_id, 'restart')
def status(self, service_id: str) -> Dict:
result = self._store_cmd(service_id, 'ps', '--format', 'json')
result['containers'] = self._parse_ps_json(result.get('stdout', ''))
return result
def reconfigure(self, service_id: str, manifest: Dict,
template_content: str) -> Dict:
"""Re-render the compose file then re-apply with `up -d` (rolling update)."""
self.write_compose(service_id, manifest, template_content)
return self.up(service_id)
def install(self, service_id: str, manifest: Dict,
template_content: str) -> Dict:
"""Write compose file and start containers."""
self.write_compose(service_id, manifest, template_content)
return self.up(service_id)
def remove(self, service_id: str, purge_data: bool = False) -> Dict:
"""Stop containers, optionally delete compose file, secrets, and service data dir."""
result = self.down(service_id, remove_volumes=purge_data)
if purge_data:
self._clear_secrets(service_id)
svc_dir = self._svc_dir(service_id) # already validates service_id + realpath
if os.path.isdir(svc_dir):
# Final realpath check: reject symlinks that escape the services dir
real_svc = os.path.realpath(svc_dir)
real_base = os.path.realpath(self._services_dir)
if not real_svc.startswith(real_base + os.sep):
logger.error('ServiceComposer: refusing rmtree outside services dir: %s', svc_dir)
else:
try:
shutil.rmtree(svc_dir)
except OSError as e:
logger.warning('ServiceComposer: could not remove %s: %s', svc_dir, e)
elif os.path.exists(self._compose_path(service_id)):
# Remove compose file even without purge so stale file doesn't confuse future installs
try:
os.remove(self._compose_path(service_id))
except OSError:
pass
return result
# ── Builtin-service lifecycle (main compose stack) ─────────────────────
@staticmethod
def _main_compose() -> str:
return os.environ.get('COMPOSE_FILE', '/app/docker-compose.yml')
def restart_builtin(self, container_names: List[str]) -> Dict:
"""Restart one or more containers that live in the main docker-compose stack."""
if not container_names:
return {'ok': False, 'error': 'No container names provided'}
cmd = ['docker', 'compose', '-f', self._main_compose(),
'restart', *container_names]
return self._run(cmd)
def status_builtin(self, container_names: List[str]) -> Dict:
"""Return status of containers from the main compose stack."""
if not container_names:
return {'ok': False, 'error': 'No container names provided'}
cmd = ['docker', 'compose', '-f', self._main_compose(),
'ps', '--format', 'json', *container_names]
result = self._run(cmd)
result['containers'] = self._parse_ps_json(result.get('stdout', ''))
return result
# ── Unified lifecycle (dispatches based on service kind) ───────────────
def restart_service(self, service_id: str, manifest: Dict) -> Dict:
"""
Restart any service — builtin or store — using the right compose stack.
Builtin: uses manifest.containers + main docker-compose.yml.
Store: uses per-service compose file.
"""
if manifest.get('kind') == 'builtin':
containers = manifest.get('containers') or []
return self.restart_builtin(containers)
return self.restart(service_id)
def status_service(self, service_id: str, manifest: Dict) -> Dict:
"""
Return container status for any service.
Builtin: queries manifest.containers from main compose stack.
Store: queries per-service compose project.
"""
if manifest.get('kind') == 'builtin':
containers = manifest.get('containers') or []
return self.status_builtin(containers)
return self.status(service_id)