fix: architecture audit — security, atomicity, broken endpoints, test coverage

Sprint 1 — Security & correctness:
- Restore all 10 commented-out is_local_request() checks (vault, containers, images, volumes)
- Fix XFF spoofing: only trust the LAST X-Forwarded-For entry (Caddy's append), not all
- Require prefix length in wireguard.address (was accepting bare IPs like 10.0.0.1)
- Validate service_access list in add_peer (valid: calendar/files/mail/webdav)
- Fix dhcp/reservations POST/DELETE: unpack mac/ip/hostname from body (was passing dict as positional arg)
- Fix network/test POST: remove spurious data arg (test_connectivity takes no args)
- Fix remove_peer: clear iptables rules and regenerate DNS ACLs on deletion (was leaving stale rules)
- Fix CoreDNS reload: SIGHUP → SIGUSR1 (SIGHUP kills the process; SIGUSR1 triggers reload plugin)
- Remove local.{domain} block from Corefile template (local.zone doesn't exist, caused log spam)
- Fix routing_manager._remove_nat_rule: targeted -D instead of flushing entire POSTROUTING chain

Sprint 2 — State consistency:
- Atomic config writes in config_manager, ip_utils, firewall_manager, network_manager
  (write to .tmp → fsync → os.replace, prevents truncated files on kill)
- backup_config: now also backs up Caddyfile, Corefile, .env, DNS zone files
- restore_config: restores all of the above so config stays consistent after restore

Sprint 3 — Dead code / documentation:
- Remove CellManager instantiation from app startup (was never called, double-instantiated all managers)
- Document routing_manager scope (targets host, not cell-wireguard; methods not called by any active route)

Sprint 4 — Test infrastructure:
- Add tests/conftest.py with shared tmp_dir, tmp_config_dir, tmp_data_dir, flask_client fixtures
- Add tests/test_config_validation.py: 400 paths for ip_range, port, wireguard.address validation
- Add tests/test_ip_utils_caddyfile.py: 14 tests for write_caddyfile (was completely untested)
- Expand test_app_misc.py: 7 new is_local_request tests covering XFF spoofing and cell-network IPs
- Add --cov-fail-under=70 to make test-coverage
- Add pre-commit hook that runs pytest before every commit

414 tests pass (was 372).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-24 03:27:52 -04:00
parent 55bec04603
commit d5018c2b34
13 changed files with 801 additions and 633 deletions
+62 -22
View File
@@ -117,11 +117,15 @@ class ConfigManager:
return {}
def _save_all_configs(self):
"""Save all service configurations to the unified config file"""
"""Save all service configurations to the unified config file (atomic write)."""
try:
self.config_file.parent.mkdir(parents=True, exist_ok=True)
with open(self.config_file, 'w') as f:
tmp = self.config_file.with_suffix('.tmp')
with open(tmp, 'w') as f:
json.dump(self.configs, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.replace(tmp, self.config_file)
except (PermissionError, OSError):
pass
@@ -208,62 +212,98 @@ class ConfigManager:
}
def backup_config(self) -> str:
"""Create a backup of all configurations"""
"""Create a backup of cell_config.json, secrets, Caddyfile, .env, Corefile, and DNS zones."""
try:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_id = f"backup_{timestamp}"
backup_path = self.backup_dir / backup_id
# Create backup directory
backup_path.mkdir(parents=True, exist_ok=True)
# Copy all config files
# Primary config and secrets
if self.config_file.exists():
shutil.copy2(self.config_file, backup_path / 'cell_config.json')
# Copy secrets file if it exists
if self.secrets_file.exists():
shutil.copy2(self.secrets_file, backup_path / 'secrets.yaml')
# Create backup manifest
# Runtime-generated files that must match cell_config.json after restore
config_dir = Path(os.environ.get('CONFIG_DIR', '/app/config'))
data_dir = Path(os.environ.get('DATA_DIR', '/app/data'))
env_file = Path(os.environ.get('ENV_FILE', '/app/.env'))
extra = [
(config_dir / 'caddy' / 'Caddyfile', 'Caddyfile'),
(config_dir / 'dns' / 'Corefile', 'Corefile'),
(env_file, '.env'),
]
for src, dest_name in extra:
if src.exists():
shutil.copy2(src, backup_path / dest_name)
# DNS zone files
dns_data = data_dir / 'dns'
if dns_data.is_dir():
zones_dir = backup_path / 'dns_zones'
zones_dir.mkdir(exist_ok=True)
for zone_file in dns_data.glob('*.zone'):
shutil.copy2(zone_file, zones_dir / zone_file.name)
manifest = {
"backup_id": backup_id,
"timestamp": datetime.now().isoformat(),
"services": list(self.service_schemas.keys()),
"files": [f.name for f in backup_path.iterdir()]
"files": [f.name for f in backup_path.iterdir()],
}
with open(backup_path / 'manifest.json', 'w') as f:
json.dump(manifest, f, indent=2)
logger.info(f"Created configuration backup: {backup_id}")
return backup_id
except Exception as e:
logger.error(f"Error creating backup: {e}")
raise
def restore_config(self, backup_id: str) -> bool:
"""Restore configuration from backup"""
"""Restore cell_config.json, secrets, Caddyfile, .env, Corefile, and DNS zones from backup."""
try:
backup_path = self.backup_dir / backup_id
if not backup_path.exists():
raise ValueError(f"Backup {backup_id} not found")
# Read manifest
manifest_file = backup_path / 'manifest.json'
if not manifest_file.exists():
raise ValueError(f"Backup manifest not found")
with open(manifest_file, 'r') as f:
manifest = json.load(f)
# Restore config files
# Restore primary config
config_backup = backup_path / 'cell_config.json'
if config_backup.exists():
shutil.copy2(config_backup, self.config_file)
# Restore secrets file if it exists
secrets_backup = backup_path / 'secrets.yaml'
if secrets_backup.exists():
shutil.copy2(secrets_backup, self.secrets_file)
# Reload configurations — restore only what was in the backup
# Restore runtime-generated files so they stay consistent with cell_config.json
config_dir = Path(os.environ.get('CONFIG_DIR', '/app/config'))
data_dir = Path(os.environ.get('DATA_DIR', '/app/data'))
env_file = Path(os.environ.get('ENV_FILE', '/app/.env'))
restore_map = [
(backup_path / 'Caddyfile', config_dir / 'caddy' / 'Caddyfile'),
(backup_path / 'Corefile', config_dir / 'dns' / 'Corefile'),
(backup_path / '.env', env_file),
]
for src, dest in restore_map:
if src.exists():
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)
# Restore DNS zone files
zones_backup = backup_path / 'dns_zones'
if zones_backup.is_dir():
dns_data = data_dir / 'dns'
dns_data.mkdir(parents=True, exist_ok=True)
for zone_file in zones_backup.glob('*.zone'):
shutil.copy2(zone_file, dns_data / zone_file.name)
self.configs = self._load_all_configs()
logger.info(f"Restored configuration from backup: {backup_id}")
return True