f7bb2cc962
Unit Tests / test (push) Successful in 11m25s
Two manifest validation bugs blocked all store service installs: 1. service_store_manager.RESERVED_SUBDOMAINS included 'mail', which prevented the email service from using its required subdomain. Removed mail/calendar/files/webmail — they belong to official PIC store services and must be claimable by them. 2. manifest_validator required @sha256 digest pins on ALL images, including first-party git.pic.ngo/roof/* images that the PIC team builds and controls. service_store_manager._validate_manifest already only warned for first-party images; the secondary validator was stricter than intended, causing a hard reject on :latest tags. Aligned to warn-not-reject for first-party; malformed digests (when provided) are still a hard error. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
439 lines
18 KiB
Python
439 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Service Store Manager for Personal Internet Cell.
|
|
|
|
Manages installation, removal, and lifecycle of third-party services from the
|
|
PIC service store index. Each installed service runs as a Docker container
|
|
declared in a compose override file and has:
|
|
- An allocated IP in the service pool (172.20.0.20–254 by default)
|
|
- Optional iptables FORWARD rules declared in its manifest
|
|
- Optional Caddy reverse-proxy route declared in its manifest
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
import threading
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import json
|
|
|
|
import requests
|
|
|
|
from base_service_manager import BaseServiceManager
|
|
from manifest_validator import validate_manifest, validate_provision_hook
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constants
|
|
# ---------------------------------------------------------------------------
|
|
|
|
INDEX_URL_DEFAULT = (
|
|
'https://git.pic.ngo/roof/pic-services/raw/branch/main/index.json'
|
|
)
|
|
MANIFEST_URL_TPL = (
|
|
'https://git.pic.ngo/roof/pic-services/raw/branch/main/services/{id}/manifest.json'
|
|
)
|
|
TEMPLATE_URL_TPL = (
|
|
'https://git.pic.ngo/roof/pic-services/raw/branch/main/services/{id}/compose-template.yml'
|
|
)
|
|
|
|
IMAGE_ALLOWLIST_RE = re.compile(
|
|
r'^git\.pic\.ngo/roof/[a-z0-9._/-]+(:[a-zA-Z0-9._-]+)?(@sha256:[a-f0-9]{64})?$'
|
|
)
|
|
|
|
# Images from well-known vendors that pre-date digest pinning in PIC.
|
|
# These are allowed to ship without a @sha256 digest; all others require one
|
|
# or must come from git.pic.ngo/roof/*.
|
|
TRUSTED_IMAGES_NO_DIGEST = frozenset({
|
|
'mailserver/docker-mailserver',
|
|
'tomsquest/docker-radicale',
|
|
'bytemark/webdav',
|
|
'filegator/filegator',
|
|
'hardware/rainloop',
|
|
})
|
|
FORBIDDEN_MOUNTS = frozenset([
|
|
'/', '/etc', '/var', '/proc', '/sys', '/dev', '/app', '/run', '/boot',
|
|
])
|
|
RESERVED_SUBDOMAINS = frozenset([
|
|
'api', 'webui', 'admin', 'www', 'ns1', 'ns2',
|
|
'git', 'registry', 'install',
|
|
# mail, calendar, files, webmail are intentionally absent:
|
|
# they are claimed by official PIC store services.
|
|
])
|
|
ENV_VALUE_RE = re.compile(r'^[A-Za-z0-9._@:/+\-= ]*$')
|
|
SUBDOMAIN_RE = re.compile(r'^[a-z][a-z0-9-]{0,30}$')
|
|
BACKEND_RE = re.compile(r'^[A-Za-z0-9._-]+:\d{1,5}$')
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ServiceStoreManager
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ServiceStoreManager(BaseServiceManager):
|
|
"""Manages service store: install, remove, and list available/installed services."""
|
|
|
|
def __init__(self, config_manager, caddy_manager, container_manager,
|
|
data_dir: str = '', config_dir: str = '',
|
|
service_composer=None, egress_manager=None):
|
|
super().__init__('service_store', data_dir, config_dir)
|
|
self.config_manager = config_manager
|
|
self.caddy_manager = caddy_manager
|
|
self.container_manager = container_manager
|
|
self.service_composer = service_composer
|
|
self.egress_manager = egress_manager
|
|
self.compose_override = os.environ.get(
|
|
'COMPOSE_SERVICES_PATH', '/app/docker-compose.services.yml'
|
|
)
|
|
self.index_url = os.environ.get('PIC_STORE_INDEX_URL', INDEX_URL_DEFAULT)
|
|
self._lock = threading.Lock()
|
|
self._index_cache: Optional[list] = None
|
|
self._index_cache_time: float = 0
|
|
self._cache_ttl: int = 300 # 5 min
|
|
|
|
# ── BaseServiceManager required ───────────────────────────────────────
|
|
|
|
def get_status(self) -> Dict[str, Any]:
|
|
installed = self.config_manager.get_installed_services()
|
|
return {
|
|
'service': self.service_name,
|
|
'running': True,
|
|
'installed_count': len(installed),
|
|
}
|
|
|
|
def test_connectivity(self) -> Dict[str, Any]:
|
|
try:
|
|
resp = requests.get(self.index_url, timeout=5)
|
|
return {'success': resp.status_code == 200}
|
|
except Exception as e:
|
|
return {'success': False, 'error': str(e)}
|
|
|
|
# ── Manifest validation ───────────────────────────────────────────────
|
|
|
|
@staticmethod
|
|
def _validate_manifest(m: dict) -> Tuple[bool, List[str]]:
|
|
"""Validate a service manifest. Returns (ok, [errors])."""
|
|
errors: List[str] = []
|
|
|
|
# Required top-level fields
|
|
for field in ('id', 'name', 'version', 'author', 'image', 'container_name'):
|
|
if not m.get(field):
|
|
errors.append(f'Missing required field: {field}')
|
|
|
|
# Image allowlist
|
|
image = m.get('image', '')
|
|
if image and not IMAGE_ALLOWLIST_RE.match(image):
|
|
errors.append(
|
|
f'image must match git.pic.ngo/roof/* pattern, got: {image}'
|
|
)
|
|
elif image:
|
|
# Warn when a digest pin is absent so operators know exact-version
|
|
# tracking is not guaranteed. Images in TRUSTED_IMAGES_NO_DIGEST
|
|
# and images from our own git.pic.ngo/roof/* registry (which we
|
|
# build and tag) get warnings rather than hard errors; any other
|
|
# image that somehow passes the allowlist gets a hard error.
|
|
if '@sha256:' not in image:
|
|
image_base = image.split(':')[0].split('@')[0]
|
|
is_own_registry = image_base.startswith('git.pic.ngo/roof/')
|
|
if image_base in TRUSTED_IMAGES_NO_DIGEST or is_own_registry:
|
|
logger.warning('image %s has no digest pin', image)
|
|
else:
|
|
errors.append(
|
|
f'image {image!r} must include a @sha256:<digest> pin'
|
|
)
|
|
|
|
# Volume mount safety
|
|
for vol in m.get('volumes', []):
|
|
mount = vol.get('mount', '')
|
|
if mount in FORBIDDEN_MOUNTS:
|
|
errors.append(f'Forbidden volume mount: {mount}')
|
|
elif mount.startswith('/home/roof/pic'):
|
|
errors.append(f'Volume mount cannot be a prefix of /home/roof/pic: {mount}')
|
|
|
|
# iptables rules
|
|
for rule in m.get('iptables_rules', []):
|
|
if rule.get('type') != 'ACCEPT':
|
|
errors.append(
|
|
f'iptables_rules[].type must be ACCEPT, got: {rule.get("type")}'
|
|
)
|
|
if rule.get('dest_ip') != '${SERVICE_IP}':
|
|
errors.append(
|
|
f'iptables_rules[].dest_ip must be exactly ${{SERVICE_IP}}, '
|
|
f'got: {rule.get("dest_ip")}'
|
|
)
|
|
port = rule.get('dest_port')
|
|
if not isinstance(port, int) or not (1 <= port <= 65535):
|
|
errors.append(
|
|
f'iptables_rules[].dest_port must be an integer 1-65535, got: {port}'
|
|
)
|
|
proto = rule.get('proto', 'tcp')
|
|
if proto not in ('tcp', 'udp'):
|
|
errors.append(
|
|
f'iptables_rules[].proto must be tcp or udp, got: {proto}'
|
|
)
|
|
|
|
# Legacy caddy_route dict subdomain (for store manifests using the old format)
|
|
caddy_route = m.get('caddy_route') or {}
|
|
if isinstance(caddy_route, dict):
|
|
legacy_sub = caddy_route.get('subdomain', '')
|
|
else:
|
|
legacy_sub = ''
|
|
if legacy_sub:
|
|
if legacy_sub in RESERVED_SUBDOMAINS:
|
|
errors.append(f'caddy_route.subdomain is reserved: {legacy_sub}')
|
|
elif not SUBDOMAIN_RE.match(legacy_sub):
|
|
errors.append(
|
|
f'caddy_route.subdomain must match ^[a-z][a-z0-9-]{{0,30}}$, '
|
|
f'got: {legacy_sub}'
|
|
)
|
|
|
|
# Top-level subdomain + backend (consumed by ServiceRegistry.get_caddy_routes)
|
|
subdomain = m.get('subdomain', '')
|
|
if subdomain:
|
|
if subdomain in RESERVED_SUBDOMAINS:
|
|
errors.append(f'subdomain is reserved: {subdomain}')
|
|
elif not SUBDOMAIN_RE.match(subdomain):
|
|
errors.append(
|
|
f'subdomain must match ^[a-z][a-z0-9-]{{0,30}}$, got: {subdomain}'
|
|
)
|
|
|
|
backend = m.get('backend', '')
|
|
if backend and not BACKEND_RE.match(backend):
|
|
errors.append(f'backend must be host:port (e.g. cell-foo:8080), got: {backend}')
|
|
|
|
for sub in m.get('extra_subdomains') or []:
|
|
if not isinstance(sub, str):
|
|
errors.append('extra_subdomains entries must be strings')
|
|
elif sub in RESERVED_SUBDOMAINS:
|
|
errors.append(f'extra_subdomains entry is reserved: {sub}')
|
|
elif not SUBDOMAIN_RE.match(sub):
|
|
errors.append(
|
|
f'extra_subdomains entry must match ^[a-z][a-z0-9-]{{0,30}}$, got: {sub}'
|
|
)
|
|
|
|
for sub, bknd in (m.get('extra_backends') or {}).items():
|
|
if not isinstance(sub, str) or not SUBDOMAIN_RE.match(sub):
|
|
errors.append(
|
|
f'extra_backends key must match ^[a-z][a-z0-9-]{{0,30}}$, got: {sub!r}'
|
|
)
|
|
elif sub in RESERVED_SUBDOMAINS:
|
|
errors.append(f'extra_backends key is reserved: {sub}')
|
|
if not isinstance(bknd, str) or not BACKEND_RE.match(bknd):
|
|
errors.append(
|
|
f'extra_backends[{sub!r}] value must be host:port, got: {bknd!r}'
|
|
)
|
|
|
|
# Env value safety
|
|
for env_entry in m.get('env', []):
|
|
val = str(env_entry.get('value', ''))
|
|
if not ENV_VALUE_RE.match(val):
|
|
errors.append(
|
|
f'env[].value contains disallowed characters: {val!r}'
|
|
)
|
|
|
|
# Security layer: delegate to manifest_validator for cap_add, backend
|
|
# denylist, provision_hook, reserved container names, and kind guard.
|
|
ok, sec_errs = validate_manifest(m)
|
|
if not ok:
|
|
errors.extend(sec_errs)
|
|
|
|
return (len(errors) == 0, errors)
|
|
|
|
# ── Index / manifest fetching ─────────────────────────────────────────
|
|
|
|
def fetch_index(self) -> list:
|
|
"""Fetch and cache the service index."""
|
|
import time
|
|
_SIZE_LIMIT = 256 * 1024
|
|
now = time.time()
|
|
if self._index_cache is not None and (now - self._index_cache_time) < self._cache_ttl:
|
|
return self._index_cache
|
|
try:
|
|
resp = requests.get(self.index_url, timeout=10, stream=True)
|
|
resp.raise_for_status()
|
|
content = resp.raw.read(_SIZE_LIMIT + 1, decode_content=True)
|
|
if len(content) > _SIZE_LIMIT:
|
|
raise ValueError('Index response exceeds 256 KB limit')
|
|
data = json.loads(content)
|
|
self._index_cache = data if isinstance(data, list) else data.get('services', [])
|
|
self._index_cache_time = now
|
|
return self._index_cache
|
|
except Exception as e:
|
|
logger.warning(f'fetch_index failed: {e}')
|
|
return self._index_cache or []
|
|
|
|
def _fetch_manifest(self, service_id: str) -> dict:
|
|
"""Fetch a service manifest by ID."""
|
|
_SIZE_LIMIT = 256 * 1024
|
|
url = MANIFEST_URL_TPL.format(id=service_id)
|
|
resp = requests.get(url, timeout=10, stream=True)
|
|
resp.raise_for_status()
|
|
content = resp.raw.read(_SIZE_LIMIT + 1, decode_content=True)
|
|
if len(content) > _SIZE_LIMIT:
|
|
raise ValueError(
|
|
f'Manifest response for {service_id} exceeds 256 KB limit'
|
|
)
|
|
return json.loads(content)
|
|
|
|
def _fetch_template(self, service_id: str, manifest: dict) -> str:
|
|
"""Fetch the compose template for a service."""
|
|
_SIZE_LIMIT = 256 * 1024
|
|
url = TEMPLATE_URL_TPL.format(id=service_id)
|
|
resp = requests.get(url, timeout=10, stream=True)
|
|
resp.raise_for_status()
|
|
content = resp.raw.read(_SIZE_LIMIT + 1, decode_content=True)
|
|
if len(content) > _SIZE_LIMIT:
|
|
raise ValueError(f'Compose template for {service_id} exceeds 256 KB limit')
|
|
return content.decode('utf-8')
|
|
|
|
# ── Core operations ───────────────────────────────────────────────────
|
|
|
|
def install(self, service_id: str) -> dict:
|
|
"""Install a service from the store."""
|
|
with self._lock:
|
|
installed = self.config_manager.get_installed_services()
|
|
if service_id in installed:
|
|
return {'ok': True, 'already_installed': True}
|
|
|
|
# Fetch and validate manifest
|
|
try:
|
|
manifest = self._fetch_manifest(service_id)
|
|
except Exception as e:
|
|
return {'ok': False, 'error': f'Failed to fetch manifest: {e}'}
|
|
|
|
ok, errs = self._validate_manifest(manifest)
|
|
if not ok:
|
|
return {'ok': False, 'errors': errs}
|
|
|
|
ok2, errs2 = validate_manifest(manifest)
|
|
if not ok2:
|
|
return {'ok': False, 'errors': errs2}
|
|
|
|
# Dependency check
|
|
if self.service_composer is not None:
|
|
err = self.service_composer._resolve_requires(manifest, installed)
|
|
if err:
|
|
return {'ok': False, 'error': err}
|
|
|
|
# Fetch compose template
|
|
try:
|
|
template_content = self._fetch_template(service_id, manifest)
|
|
except Exception as e:
|
|
return {'ok': False, 'error': f'Failed to fetch compose template: {e}'}
|
|
|
|
# Write compose file and start containers (validation inside write_compose)
|
|
if self.service_composer is not None:
|
|
try:
|
|
result = self.service_composer.install(service_id, manifest, template_content)
|
|
except ValueError as e:
|
|
return {'ok': False, 'error': str(e)}
|
|
except Exception as e:
|
|
return {'ok': False, 'error': f'Failed to start service: {e}'}
|
|
if not result.get('ok'):
|
|
return {'ok': False, 'error': result.get('error') or result.get('stderr', 'docker up failed')}
|
|
|
|
# Persist minimal install record
|
|
record = {
|
|
'id': service_id,
|
|
'manifest': manifest,
|
|
'installed_at': datetime.utcnow().isoformat(),
|
|
}
|
|
self.config_manager.set_installed_service(service_id, record)
|
|
|
|
# Regenerate Caddy (registry now drives routes, no caddy_routes list needed)
|
|
try:
|
|
self.caddy_manager.regenerate_with_installed([])
|
|
except Exception as e:
|
|
logger.warning('install: caddy regenerate failed for %s (non-fatal): %s', service_id, e)
|
|
|
|
if self.egress_manager:
|
|
try:
|
|
self.egress_manager.apply_service(service_id)
|
|
except Exception as exc:
|
|
logger.warning('Egress apply failed for %s (non-fatal): %s', service_id, exc)
|
|
|
|
return {'ok': True}
|
|
|
|
def remove(self, service_id: str, purge_data: bool = False) -> dict:
|
|
"""Remove an installed service."""
|
|
with self._lock:
|
|
installed = self.config_manager.get_installed_services()
|
|
if service_id not in installed:
|
|
return {'ok': False, 'error': f'Service {service_id} is not installed'}
|
|
|
|
# Prevent removing a service that others depend on
|
|
if self.service_composer is not None:
|
|
dependents = self.service_composer._resolve_dependents(service_id, installed)
|
|
if dependents:
|
|
return {
|
|
'ok': False,
|
|
'error': f'Cannot remove {service_id}: required by {", ".join(sorted(dependents))}',
|
|
}
|
|
|
|
if self.egress_manager:
|
|
try:
|
|
self.egress_manager.clear_service(service_id)
|
|
except Exception as exc:
|
|
logger.warning('Egress clear failed for %s (non-fatal): %s', service_id, exc)
|
|
|
|
# Stop and remove containers (best-effort)
|
|
if self.service_composer is not None:
|
|
try:
|
|
self.service_composer.remove(service_id, purge_data=purge_data)
|
|
except Exception as e:
|
|
logger.warning('remove: composer.remove failed for %s (non-fatal): %s', service_id, e)
|
|
|
|
# Remove from config
|
|
self.config_manager.remove_installed_service(service_id)
|
|
|
|
# Regenerate Caddy
|
|
try:
|
|
self.caddy_manager.regenerate_with_installed([])
|
|
except Exception as e:
|
|
logger.warning('remove: caddy regenerate failed for %s (non-fatal): %s', service_id, e)
|
|
|
|
return {'ok': True}
|
|
|
|
def list_services(self) -> dict:
|
|
"""Return available (from index) and installed services."""
|
|
available = self.fetch_index()
|
|
installed = self.config_manager.get_installed_services()
|
|
return {'available': available, 'installed': installed}
|
|
|
|
def reapply_on_startup(self) -> None:
|
|
"""Re-apply firewall and Caddy rules for all installed services on startup."""
|
|
from firewall_manager import apply_service_rules
|
|
|
|
installed = self.config_manager.get_installed_services()
|
|
if not installed:
|
|
return
|
|
|
|
# Re-apply iptables rules
|
|
for svc_id, record in installed.items():
|
|
ip = record.get('service_ip', '')
|
|
rules = record.get('iptables_rules', [])
|
|
try:
|
|
apply_service_rules(svc_id, ip, rules)
|
|
except Exception as e:
|
|
logger.warning(f'reapply_on_startup: apply_service_rules({svc_id}) failed: {e}')
|
|
|
|
# Regenerate Caddyfile
|
|
try:
|
|
caddy_routes = [
|
|
r.get('caddy_route')
|
|
for r in installed.values()
|
|
if r.get('caddy_route')
|
|
]
|
|
self.caddy_manager.regenerate_with_installed(caddy_routes)
|
|
except Exception as e:
|
|
logger.warning(f'reapply_on_startup: caddy regenerate failed: {e}')
|
|
|
|
# Bring up per-service compose stacks
|
|
if self.service_composer is not None:
|
|
try:
|
|
self.service_composer.reapply_active_services()
|
|
except Exception as e:
|
|
logger.warning('reapply_on_startup: reapply_active_services failed: %s', e)
|