feat: Phase 0 — manifest_validator, compose YAML safety check, cap_add allowlist, backend denylist, provision hook enforcement, size cap

Introduces api/manifest_validator.py as a single security chokepoint
imported by both ServiceComposer and ServiceStoreManager:

- validate_manifest(): rejects kind=builtin, reserved container names,
  reserved subdomains, backend denylist (localhost, cell-api, etc.),
  cap_add outside allowlist / in denylist, shell-string provision hooks,
  and env values with shell-special characters
- validate_rendered_compose(): walks the rendered YAML and rejects
  privileged:true, host network/pid/ipc/userns, absolute bind mounts,
  denied capabilities, devices key, apparmor/seccomp unconfined, and
  string-form command/entrypoint (shell-injection vector)
- validate_provision_hook(): requires argv list form, lowercase binary,
  rejects NUL bytes

ServiceStoreManager changes:
- _validate_manifest() delegates to validate_manifest() after existing checks
- _fetch_manifest() and fetch_index() now stream with a 256 KB size cap
  (prevents memory exhaustion from a malicious or compromised index)
- Digest-pin warning for images missing @sha256 (hard error for unknown
  registries, warning for git.pic.ngo/roof/* and TRUSTED_IMAGES_NO_DIGEST)

ServiceComposer changes:
- write_compose() calls validate_rendered_compose() before any disk write
  so no partial file is left if validation fails
- render_template() substitutes ${PIC_DATA_DIR} with the resolved data_dir path

102 new tests in tests/test_manifest_validator.py covering all five P0
security issues.  Existing test mocks updated to use streaming response
pattern (stream=True + raw.read) and valid compose YAML templates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 07:10:12 -04:00
parent 5e438aa991
commit c40919d374
6 changed files with 1412 additions and 38 deletions
+18 -30
View File
@@ -6,6 +6,7 @@ All external I/O (requests, subprocess, docker, config_manager, caddy_manager,
container_manager) is mocked so these tests run without any live infrastructure.
"""
import json
import os
import sys
import time
@@ -24,6 +25,17 @@ from ip_utils import CONTAINER_OFFSETS
# Helpers
# ---------------------------------------------------------------------------
def _make_streaming_mock(data):
"""Return a MagicMock that simulates a requests streaming response for ``data``."""
encoded = json.dumps(data).encode()
raw = MagicMock()
raw.read.return_value = encoded
mock_resp = MagicMock(status_code=200)
mock_resp.raise_for_status = MagicMock()
mock_resp.raw = raw
return mock_resp
def _make_manager(tmp_dir=None, installed=None, identity=None):
"""Build a ServiceStoreManager backed by mock dependencies."""
cm = MagicMock()
@@ -711,11 +723,7 @@ class TestListServices(unittest.TestCase):
def test_returns_available_and_installed_keys(self):
mgr = _make_manager()
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: self._fake_index(),
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock(self._fake_index())
result = mgr.list_services()
self.assertIn('available', result)
self.assertIn('installed', result)
@@ -723,11 +731,7 @@ class TestListServices(unittest.TestCase):
def test_available_list_comes_from_index(self):
mgr = _make_manager()
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: self._fake_index(),
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock(self._fake_index())
result = mgr.list_services()
self.assertEqual(len(result['available']), 2)
self.assertEqual(result['available'][0]['id'], 'svc1')
@@ -736,22 +740,14 @@ class TestListServices(unittest.TestCase):
installed = {'svc1': {'id': 'svc1', 'name': 'Service One'}}
mgr = _make_manager(installed=installed)
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: self._fake_index(),
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock(self._fake_index())
result = mgr.list_services()
self.assertIn('svc1', result['installed'])
def test_cache_prevents_second_http_request_within_ttl(self):
mgr = _make_manager()
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: self._fake_index(),
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock(self._fake_index())
mgr.fetch_index()
mgr.fetch_index()
# Only one HTTP call despite two fetches
@@ -761,11 +757,7 @@ class TestListServices(unittest.TestCase):
mgr = _make_manager()
mgr._cache_ttl = 1 # 1 second TTL for the test
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: self._fake_index(),
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock(self._fake_index())
mgr.fetch_index()
# Simulate TTL expiry by winding back the cache timestamp
mgr._index_cache_time -= 2
@@ -776,11 +768,7 @@ class TestListServices(unittest.TestCase):
"""Index JSON wrapped in {'services': [...]} is also handled."""
mgr = _make_manager()
with patch('service_store_manager.requests.get') as mock_get:
mock_get.return_value = MagicMock(
status_code=200,
json=lambda: {'services': self._fake_index()},
)
mock_get.return_value.raise_for_status = MagicMock()
mock_get.return_value = _make_streaming_mock({'services': self._fake_index()})
result = mgr.list_services()
self.assertEqual(len(result['available']), 2)