feat: Phase 3 — ServiceComposer deps + store install via per-service compose
Unit Tests / test (push) Successful in 11m21s
Unit Tests / test (push) Successful in 11m21s
ServiceStoreManager.install() now delegates container lifecycle to ServiceComposer (per-service docker-compose.yml) instead of appending to a shared compose override. This eliminates IP pool allocation, compose override rendering, and the single-stack docker exec approach. Changes: - service_composer.py: add _resolve_requires(), _resolve_dependents(), reapply_active_services() — dependency graph and startup reapply - service_store_manager.py: rewrite install() and remove() to use ServiceComposer; add _fetch_template(); delete _allocate_service_ip(), _render_compose_override(), _write_compose_override(); remove() now guards against removing services that others depend on - managers.py: pass service_composer= to ServiceStoreManager - Tests: 13 new composer dep tests; TestInstall/TestRemove rewritten for the new composer-driven path; test_optional_services_feature.py updated Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+74
-260
@@ -14,17 +14,14 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import json
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
from base_service_manager import BaseServiceManager
|
||||
from ip_utils import CONTAINER_OFFSETS
|
||||
from manifest_validator import validate_manifest, validate_provision_hook
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -33,15 +30,15 @@ logger = logging.getLogger(__name__)
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SERVICE_POOL_START = 20
|
||||
SERVICE_POOL_END = 254
|
||||
|
||||
INDEX_URL_DEFAULT = (
|
||||
'https://git.pic.ngo/roof/pic-services/raw/branch/main/index.json'
|
||||
)
|
||||
MANIFEST_URL_TPL = (
|
||||
'https://git.pic.ngo/roof/pic-services/raw/branch/main/services/{id}/manifest.json'
|
||||
)
|
||||
TEMPLATE_URL_TPL = (
|
||||
'https://git.pic.ngo/roof/pic-services/raw/branch/main/services/{id}/compose-template.yml'
|
||||
)
|
||||
|
||||
IMAGE_ALLOWLIST_RE = re.compile(
|
||||
r'^git\.pic\.ngo/roof/[a-z0-9._/-]+(:[a-zA-Z0-9._-]+)?(@sha256:[a-f0-9]{64})?$'
|
||||
@@ -77,11 +74,13 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
"""Manages service store: install, remove, and list available/installed services."""
|
||||
|
||||
def __init__(self, config_manager, caddy_manager, container_manager,
|
||||
data_dir: str = '', config_dir: str = ''):
|
||||
data_dir: str = '', config_dir: str = '',
|
||||
service_composer=None):
|
||||
super().__init__('service_store', data_dir, config_dir)
|
||||
self.config_manager = config_manager
|
||||
self.caddy_manager = caddy_manager
|
||||
self.container_manager = container_manager
|
||||
self.service_composer = service_composer
|
||||
self.compose_override = os.environ.get(
|
||||
'COMPOSE_SERVICES_PATH', '/app/docker-compose.services.yml'
|
||||
)
|
||||
@@ -239,125 +238,6 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
|
||||
return (len(errors) == 0, errors)
|
||||
|
||||
# ── IP allocation ─────────────────────────────────────────────────────
|
||||
|
||||
def _allocate_service_ip(self, service_id: str) -> str:
|
||||
"""Allocate the next free IP from the service pool."""
|
||||
identity = self.config_manager.get_identity()
|
||||
ip_range = identity.get('ip_range', '172.20.0.0/16')
|
||||
|
||||
import ipaddress
|
||||
network = ipaddress.IPv4Network(ip_range, strict=False)
|
||||
base = int(network.network_address)
|
||||
|
||||
# IPs already assigned to named containers
|
||||
reserved_offsets = set(CONTAINER_OFFSETS.values())
|
||||
|
||||
# IPs already assigned to installed services
|
||||
service_ips: Dict[str, str] = identity.get('service_ips', {})
|
||||
taken_ips = set(service_ips.values())
|
||||
|
||||
for offset in range(SERVICE_POOL_START, SERVICE_POOL_END + 1):
|
||||
if offset in reserved_offsets:
|
||||
continue
|
||||
candidate = str(ipaddress.IPv4Address(base + offset))
|
||||
if candidate not in taken_ips:
|
||||
return candidate
|
||||
|
||||
raise RuntimeError('Service IP pool exhausted (offsets 20-254 all taken)')
|
||||
|
||||
# ── Compose override ──────────────────────────────────────────────────
|
||||
|
||||
def _render_compose_override(self, installed_records: dict) -> str:
|
||||
"""Generate docker-compose YAML override for all installed services."""
|
||||
services: Dict[str, Any] = {}
|
||||
|
||||
for svc_id, record in installed_records.items():
|
||||
manifest = record.get('manifest', {})
|
||||
container_name = record.get('container_name', svc_id)
|
||||
image = manifest.get('image', record.get('image', ''))
|
||||
service_ip = record.get('service_ip', '')
|
||||
|
||||
# Volumes
|
||||
volumes = []
|
||||
for vol in manifest.get('volumes', []):
|
||||
vol_name = vol.get('name', '')
|
||||
mount = vol.get('mount', '')
|
||||
if vol_name and mount:
|
||||
volumes.append(f'{vol_name}:{mount}')
|
||||
|
||||
# Environment
|
||||
environment: Dict[str, str] = {}
|
||||
for env_entry in manifest.get('env', []):
|
||||
k = env_entry.get('key', '')
|
||||
v = str(env_entry.get('value', ''))
|
||||
if k:
|
||||
environment[k] = v
|
||||
|
||||
svc_def: Dict[str, Any] = {
|
||||
'image': image,
|
||||
'container_name': container_name,
|
||||
'restart': 'unless-stopped',
|
||||
'logging': {
|
||||
'driver': 'json-file',
|
||||
'options': {
|
||||
'max-size': '10m',
|
||||
'max-file': '5',
|
||||
},
|
||||
},
|
||||
'networks': {
|
||||
'cell-network': {
|
||||
'ipv4_address': service_ip,
|
||||
}
|
||||
},
|
||||
}
|
||||
if volumes:
|
||||
svc_def['volumes'] = volumes
|
||||
if environment:
|
||||
svc_def['environment'] = environment
|
||||
|
||||
services[container_name] = svc_def
|
||||
|
||||
# Collect named volumes
|
||||
named_volumes: Dict[str, Any] = {}
|
||||
for svc_id, record in installed_records.items():
|
||||
manifest = record.get('manifest', {})
|
||||
for vol in manifest.get('volumes', []):
|
||||
vol_name = vol.get('name', '')
|
||||
if vol_name:
|
||||
named_volumes[vol_name] = None # Docker default driver
|
||||
|
||||
doc: Dict[str, Any] = {
|
||||
'version': '3.8',
|
||||
'services': services,
|
||||
'networks': {
|
||||
'cell-network': {
|
||||
'external': True,
|
||||
}
|
||||
},
|
||||
}
|
||||
if named_volumes:
|
||||
doc['volumes'] = named_volumes
|
||||
|
||||
return yaml.dump(doc, default_flow_style=False, allow_unicode=True)
|
||||
|
||||
def _write_compose_override(self, content: str) -> None:
|
||||
"""Atomic write of the compose override file."""
|
||||
tmp_path = self.compose_override + '.tmp'
|
||||
try:
|
||||
os.makedirs(os.path.dirname(os.path.abspath(self.compose_override)),
|
||||
exist_ok=True)
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
with open(tmp_path, 'w') as f:
|
||||
f.write(content)
|
||||
f.flush()
|
||||
try:
|
||||
os.fsync(f.fileno())
|
||||
except OSError:
|
||||
pass
|
||||
os.replace(tmp_path, self.compose_override)
|
||||
|
||||
# ── Index / manifest fetching ─────────────────────────────────────────
|
||||
|
||||
def fetch_index(self) -> list:
|
||||
@@ -394,14 +274,22 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
)
|
||||
return json.loads(content)
|
||||
|
||||
def _fetch_template(self, service_id: str, manifest: dict) -> str:
|
||||
"""Fetch the compose template for a service."""
|
||||
_SIZE_LIMIT = 256 * 1024
|
||||
url = TEMPLATE_URL_TPL.format(id=service_id)
|
||||
resp = requests.get(url, timeout=10, stream=True)
|
||||
resp.raise_for_status()
|
||||
content = resp.raw.read(_SIZE_LIMIT + 1, decode_content=True)
|
||||
if len(content) > _SIZE_LIMIT:
|
||||
raise ValueError(f'Compose template for {service_id} exceeds 256 KB limit')
|
||||
return content.decode('utf-8')
|
||||
|
||||
# ── Core operations ───────────────────────────────────────────────────
|
||||
|
||||
def install(self, service_id: str) -> dict:
|
||||
"""Install a service from the store."""
|
||||
from firewall_manager import apply_service_rules
|
||||
|
||||
with self._lock:
|
||||
# Already installed?
|
||||
installed = self.config_manager.get_installed_services()
|
||||
if service_id in installed:
|
||||
return {'ok': True, 'already_installed': True}
|
||||
@@ -416,154 +304,80 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
if not ok:
|
||||
return {'ok': False, 'errors': errs}
|
||||
|
||||
# Allocate IP
|
||||
try:
|
||||
ip = self._allocate_service_ip(service_id)
|
||||
except RuntimeError as e:
|
||||
return {'ok': False, 'error': str(e)}
|
||||
ok2, errs2 = validate_manifest(manifest)
|
||||
if not ok2:
|
||||
return {'ok': False, 'errors': errs2}
|
||||
|
||||
# Build install record
|
||||
# Dependency check
|
||||
if self.service_composer is not None:
|
||||
err = self.service_composer._resolve_requires(manifest, installed)
|
||||
if err:
|
||||
return {'ok': False, 'error': err}
|
||||
|
||||
# Fetch compose template
|
||||
try:
|
||||
template_content = self._fetch_template(service_id, manifest)
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to fetch compose template: {e}'}
|
||||
|
||||
# Write compose file and start containers (validation inside write_compose)
|
||||
if self.service_composer is not None:
|
||||
try:
|
||||
result = self.service_composer.install(service_id, manifest, template_content)
|
||||
except ValueError as e:
|
||||
return {'ok': False, 'error': str(e)}
|
||||
except Exception as e:
|
||||
return {'ok': False, 'error': f'Failed to start service: {e}'}
|
||||
if not result.get('ok'):
|
||||
return {'ok': False, 'error': result.get('error') or result.get('stderr', 'docker up failed')}
|
||||
|
||||
# Persist minimal install record
|
||||
record = {
|
||||
'id': service_id,
|
||||
'name': manifest.get('name', service_id),
|
||||
'container_name': manifest['container_name'],
|
||||
'image': manifest.get('image', ''),
|
||||
'service_ip': ip,
|
||||
'caddy_route': manifest.get('caddy_route'),
|
||||
'iptables_rules': manifest.get('iptables_rules', []),
|
||||
'manifest': manifest,
|
||||
'installed_at': datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
# Persist to config
|
||||
self.config_manager.set_installed_service(service_id, record)
|
||||
identity = self.config_manager.get_identity()
|
||||
service_ips = dict(identity.get('service_ips', {}))
|
||||
service_ips[service_id] = ip
|
||||
self.config_manager.set_identity_field('service_ips', service_ips)
|
||||
|
||||
# Write compose override
|
||||
all_installed = self.config_manager.get_installed_services()
|
||||
# Regenerate Caddy (registry now drives routes, no caddy_routes list needed)
|
||||
try:
|
||||
content = self._render_compose_override(all_installed)
|
||||
self._write_compose_override(content)
|
||||
self.caddy_manager.regenerate_with_installed([])
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to write compose override: {e}')
|
||||
logger.warning('install: caddy regenerate failed for %s (non-fatal): %s', service_id, e)
|
||||
|
||||
# Apply iptables rules (best-effort)
|
||||
try:
|
||||
apply_service_rules(service_id, ip, manifest.get('iptables_rules', []))
|
||||
except Exception as e:
|
||||
logger.warning(f'apply_service_rules for {service_id} failed (non-fatal): {e}')
|
||||
|
||||
# Regenerate Caddyfile
|
||||
try:
|
||||
caddy_routes = [
|
||||
r.get('caddy_route')
|
||||
for r in all_installed.values()
|
||||
if r.get('caddy_route')
|
||||
]
|
||||
self.caddy_manager.regenerate_with_installed(caddy_routes)
|
||||
except Exception as e:
|
||||
logger.warning(f'caddy regenerate for {service_id} failed (non-fatal): {e}')
|
||||
|
||||
# Start the container via docker compose
|
||||
base_compose = os.environ.get('COMPOSE_FILE', '/app/docker-compose.yml')
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['docker', 'compose',
|
||||
'-f', base_compose,
|
||||
'-f', self.compose_override,
|
||||
'up', '-d', manifest['container_name']],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning(
|
||||
f'docker compose up for {service_id} failed: {result.stderr.strip()}'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'docker compose up for {service_id} failed (non-fatal): {e}')
|
||||
|
||||
return {
|
||||
'ok': True,
|
||||
'service_ip': ip,
|
||||
'container_name': manifest['container_name'],
|
||||
}
|
||||
return {'ok': True}
|
||||
|
||||
def remove(self, service_id: str, purge_data: bool = False) -> dict:
|
||||
"""Remove an installed service."""
|
||||
from firewall_manager import clear_service_rules
|
||||
|
||||
with self._lock:
|
||||
installed = self.config_manager.get_installed_services()
|
||||
record = installed.get(service_id)
|
||||
if not record:
|
||||
if service_id not in installed:
|
||||
return {'ok': False, 'error': f'Service {service_id} is not installed'}
|
||||
|
||||
container_name = record.get('container_name', service_id)
|
||||
manifest = record.get('manifest', {})
|
||||
base_compose = os.environ.get('COMPOSE_FILE', '/app/docker-compose.yml')
|
||||
# Prevent removing a service that others depend on
|
||||
if self.service_composer is not None:
|
||||
dependents = self.service_composer._resolve_dependents(service_id, installed)
|
||||
if dependents:
|
||||
return {
|
||||
'ok': False,
|
||||
'error': f'Cannot remove {service_id}: required by {", ".join(sorted(dependents))}',
|
||||
}
|
||||
|
||||
# Stop and remove container
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'compose',
|
||||
'-f', base_compose,
|
||||
'-f', self.compose_override,
|
||||
'stop', container_name],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'docker compose stop for {service_id} failed (non-fatal): {e}')
|
||||
# Stop and remove containers (best-effort)
|
||||
if self.service_composer is not None:
|
||||
try:
|
||||
self.service_composer.remove(service_id, purge_data=purge_data)
|
||||
except Exception as e:
|
||||
logger.warning('remove: composer.remove failed for %s (non-fatal): %s', service_id, e)
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'rm', '-f', container_name],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'docker rm for {service_id} failed (non-fatal): {e}')
|
||||
|
||||
# Clear iptables rules
|
||||
try:
|
||||
clear_service_rules(service_id)
|
||||
except Exception as e:
|
||||
logger.warning(f'clear_service_rules for {service_id} failed (non-fatal): {e}')
|
||||
|
||||
# Remove from config, regenerate compose + caddy
|
||||
# Remove from config
|
||||
self.config_manager.remove_installed_service(service_id)
|
||||
remaining = self.config_manager.get_installed_services()
|
||||
|
||||
# Regenerate Caddy
|
||||
try:
|
||||
content = self._render_compose_override(remaining)
|
||||
self._write_compose_override(content)
|
||||
self.caddy_manager.regenerate_with_installed([])
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to write compose override after remove: {e}')
|
||||
|
||||
try:
|
||||
caddy_routes = [
|
||||
r.get('caddy_route')
|
||||
for r in remaining.values()
|
||||
if r.get('caddy_route')
|
||||
]
|
||||
self.caddy_manager.regenerate_with_installed(caddy_routes)
|
||||
except Exception as e:
|
||||
logger.warning(f'caddy regenerate after remove failed (non-fatal): {e}')
|
||||
|
||||
# Purge named volumes if requested
|
||||
if purge_data:
|
||||
for vol in manifest.get('volumes', []):
|
||||
vol_name = vol.get('name', '')
|
||||
if vol_name:
|
||||
try:
|
||||
subprocess.run(
|
||||
['docker', 'volume', 'rm', vol_name],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f'docker volume rm {vol_name} failed (non-fatal): {e}'
|
||||
)
|
||||
logger.warning('remove: caddy regenerate failed for %s (non-fatal): %s', service_id, e)
|
||||
|
||||
return {'ok': True}
|
||||
|
||||
@@ -581,13 +395,6 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
if not installed:
|
||||
return
|
||||
|
||||
# Regenerate compose override in case it was deleted
|
||||
try:
|
||||
content = self._render_compose_override(installed)
|
||||
self._write_compose_override(content)
|
||||
except Exception as e:
|
||||
logger.warning(f'reapply_on_startup: compose override write failed: {e}')
|
||||
|
||||
# Re-apply iptables rules
|
||||
for svc_id, record in installed.items():
|
||||
ip = record.get('service_ip', '')
|
||||
@@ -607,3 +414,10 @@ class ServiceStoreManager(BaseServiceManager):
|
||||
self.caddy_manager.regenerate_with_installed(caddy_routes)
|
||||
except Exception as e:
|
||||
logger.warning(f'reapply_on_startup: caddy regenerate failed: {e}')
|
||||
|
||||
# Bring up per-service compose stacks
|
||||
if self.service_composer is not None:
|
||||
try:
|
||||
self.service_composer.reapply_active_services()
|
||||
except Exception as e:
|
||||
logger.warning('reapply_on_startup: reapply_active_services failed: %s', e)
|
||||
|
||||
Reference in New Issue
Block a user