8b50fb1036
Unit Tests / test (push) Successful in 12m47s
Add AuditManager (api/audit_manager.py): JSONL append-only log at data/api/audit/audit.log with SHA-256 hash chain for tamper detection, verify endpoint, size-based rotation, and automatic redaction of secret fields before any entry is written. Supports structured query (actor, action, date range) and CSV export. Wire an @app.after_request hook in app.py that fires on every mutating /api/* request: captures actor, role, remote IP, and maps the route + method to a human-readable action via ROUTE_ACTION_MAP. Explicit audit entries for password_change and password_reset are added in auth_routes.py so those events record the actor without logging secret values. Expose an admin-only blueprint (api/routes/audit.py): GET /api/audit — paginated query GET /api/audit/export — CSV download GET /api/audit/verify — hash-chain integrity check Register AuditManager in managers.py and add api/audit to config_manager.py critical_data_paths so it is included in backups and restored with other persistent state. Add Activity page (webui/src/pages/Activity.jsx, admin-only) reachable from the nav in App.jsx. New auditAPI helper in api.js covers all three endpoints. Tests: test_audit_manager.py (unit: hash chain, redaction, rotation, query, csv, verify) and test_audit_hook_routes.py (integration: hook fires on mutating routes, skips safe methods, records actor/ip/action, backup-inclusion assertion). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
331 lines
14 KiB
Python
331 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Audit Manager for Personal Internet Cell.
|
|
|
|
Owner-visible, append-only audit trail of WHO (actor + role + ip) did WHAT
|
|
(action) to WHICH target, WHEN, with a redacted summary. Storage is a JSONL
|
|
file with a per-entry SHA-256 hash chain so tampering is detectable. Request
|
|
bodies and secret values are never written; summaries only ever list changed
|
|
config KEY NAMES, never their values.
|
|
"""
|
|
|
|
import os
|
|
import io
|
|
import re
|
|
import csv
|
|
import json
|
|
import hashlib
|
|
import logging
|
|
import threading
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any
|
|
|
|
from base_service_manager import BaseServiceManager
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _utcnow_iso() -> str:
|
|
return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
|
|
# Keys whose values must never be recorded — name-only in summaries.
|
|
_SECRET_KEY_RE = re.compile(r'(pass|secret|key|token|private|cred|otp|psk)', re.IGNORECASE)
|
|
# Final scrub of anything that looks like base64 key material / encoded blobs.
|
|
_BASE64_BLOCK_RE = re.compile(r'[A-Za-z0-9+/]{40,}={0,2}')
|
|
# bcrypt and age secret prefixes.
|
|
_SECRET_PREFIX_RE = re.compile(
|
|
r'(\$2[aby]\$[^\s]+|AGE-SECRET-KEY-[^\s]+|age1[^\s]+|-----BEGIN[^\n]+)'
|
|
)
|
|
|
|
_VALID_RESULTS = ('success', 'failure')
|
|
|
|
|
|
class AuditManager(BaseServiceManager):
|
|
"""Append-only, hash-chained audit trail."""
|
|
|
|
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB before rotation
|
|
BACKUP_COUNT = 10 # audit.log.1 .. audit.log.10
|
|
|
|
def __init__(self, data_dir: str = '/app/data', config_dir: str = '/app/config',
|
|
tamper_chain: bool = True):
|
|
super().__init__('audit', data_dir=data_dir, config_dir=config_dir)
|
|
self.tamper_chain = tamper_chain
|
|
self._lock = threading.RLock()
|
|
self._audit_dir = os.path.join(self.data_dir, 'api', 'audit')
|
|
self._audit_file = os.path.join(self._audit_dir, 'audit.log')
|
|
self._seq = 0
|
|
self._prev_hash = ''
|
|
self.safe_makedirs(self._audit_dir)
|
|
self._load_chain_state()
|
|
|
|
# ── chain bootstrap ─────────────────────────────────────────────────────
|
|
def _load_chain_state(self) -> None:
|
|
"""Recover seq + prev_hash from the last line of the live file."""
|
|
try:
|
|
if not os.path.exists(self._audit_file):
|
|
return
|
|
last = None
|
|
with open(self._audit_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line:
|
|
last = line
|
|
if last:
|
|
entry = json.loads(last)
|
|
self._seq = int(entry.get('seq', 0))
|
|
self._prev_hash = entry.get('hash', '') or ''
|
|
except Exception as e:
|
|
logger.warning(f"audit: could not load chain state: {e}")
|
|
|
|
# ── redaction ───────────────────────────────────────────────────────────
|
|
@staticmethod
|
|
def _scrub(text: str) -> str:
|
|
"""Strip anything resembling a secret value from a summary string."""
|
|
if not text:
|
|
return ''
|
|
text = _SECRET_PREFIX_RE.sub('[REDACTED]', text)
|
|
text = _BASE64_BLOCK_RE.sub('[REDACTED]', text)
|
|
return text
|
|
|
|
@classmethod
|
|
def _redact(cls, entry: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Enforce the redaction rules on a built entry before write.
|
|
|
|
- summary is scrubbed of base64/secret-prefixed blobs.
|
|
- any string field is scrubbed too (defence in depth).
|
|
Request bodies are never present — the caller passes only a summary.
|
|
"""
|
|
for field in ('summary', 'target_id', 'action', 'path'):
|
|
val = entry.get(field)
|
|
if isinstance(val, str):
|
|
entry[field] = cls._scrub(val)
|
|
return entry
|
|
|
|
@classmethod
|
|
def summarize_keys(cls, keys: List[str]) -> str:
|
|
"""Build a redacted summary listing changed config KEY NAMES only.
|
|
|
|
Secret-looking key names are kept (they are names, not values) but the
|
|
whole string is still scrubbed of any accidental value material.
|
|
"""
|
|
names = [str(k) for k in keys if k is not None]
|
|
return cls._scrub('changed: ' + ', '.join(names)) if names else 'no changes'
|
|
|
|
# ── hashing ─────────────────────────────────────────────────────────────
|
|
@staticmethod
|
|
def _canonical(entry: Dict[str, Any]) -> str:
|
|
return json.dumps(entry, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
|
|
|
|
def _hash_entry(self, entry_without_hash: Dict[str, Any]) -> str:
|
|
return hashlib.sha256(self._canonical(entry_without_hash).encode('utf-8')).hexdigest()
|
|
|
|
# ── recording ───────────────────────────────────────────────────────────
|
|
def record(self, actor: str, role: str, ip: str, action: str,
|
|
target_type: str = '', target_id: str = '', summary: str = '',
|
|
result: str = 'success', status: int = 200, method: str = '',
|
|
path: str = '', request_id: str = '') -> Optional[Dict[str, Any]]:
|
|
"""Append one redacted, hash-chained JSON line. Never raises."""
|
|
try:
|
|
with self._lock:
|
|
self._maybe_rotate()
|
|
self._seq += 1
|
|
if result not in _VALID_RESULTS:
|
|
result = 'success' if int(status or 200) < 400 else 'failure'
|
|
entry: Dict[str, Any] = {
|
|
'ts': _utcnow_iso(),
|
|
'actor': actor or 'anonymous',
|
|
'role': role or 'system',
|
|
'ip': ip or '',
|
|
'action': action or '',
|
|
'target_type': target_type or '',
|
|
'target_id': target_id or '',
|
|
'summary': summary or '',
|
|
'result': result,
|
|
'status': int(status or 0),
|
|
'method': method or '',
|
|
'path': path or '',
|
|
'request_id': request_id or '',
|
|
'seq': self._seq,
|
|
'prev_hash': self._prev_hash if self.tamper_chain else '',
|
|
}
|
|
entry = self._redact(entry)
|
|
if self.tamper_chain:
|
|
entry['hash'] = self._hash_entry(entry)
|
|
else:
|
|
entry['hash'] = ''
|
|
self._append_line(json.dumps(entry, ensure_ascii=False))
|
|
self._prev_hash = entry['hash']
|
|
return entry
|
|
except Exception as e:
|
|
logger.warning(f"audit.record failed: {e}")
|
|
return None
|
|
|
|
def _append_line(self, line: str) -> None:
|
|
self.safe_makedirs(self._audit_dir)
|
|
fd = os.open(self._audit_file, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
|
|
try:
|
|
os.write(fd, (line + '\n').encode('utf-8'))
|
|
finally:
|
|
os.close(fd)
|
|
try:
|
|
os.chmod(self._audit_file, 0o600)
|
|
except OSError:
|
|
pass
|
|
|
|
# ── rotation ────────────────────────────────────────────────────────────
|
|
def _maybe_rotate(self) -> None:
|
|
try:
|
|
if not os.path.exists(self._audit_file):
|
|
return
|
|
if os.path.getsize(self._audit_file) < self.MAX_FILE_SIZE:
|
|
return
|
|
except OSError:
|
|
return
|
|
# audit.log.(N-1) -> audit.log.N, ... audit.log -> audit.log.1
|
|
for i in range(self.BACKUP_COUNT - 1, 0, -1):
|
|
src = f"{self._audit_file}.{i}"
|
|
dst = f"{self._audit_file}.{i + 1}"
|
|
if os.path.exists(src):
|
|
try:
|
|
os.replace(src, dst)
|
|
except OSError as e:
|
|
logger.warning(f"audit rotate {src}->{dst}: {e}")
|
|
try:
|
|
os.replace(self._audit_file, f"{self._audit_file}.1")
|
|
except OSError as e:
|
|
logger.warning(f"audit rotate live->.1: {e}")
|
|
|
|
def _segment_files(self) -> List[str]:
|
|
"""Live file first (newest), then rotated segments .1 .. .N (older)."""
|
|
files = []
|
|
if os.path.exists(self._audit_file):
|
|
files.append(self._audit_file)
|
|
for i in range(1, self.BACKUP_COUNT + 1):
|
|
seg = f"{self._audit_file}.{i}"
|
|
if os.path.exists(seg):
|
|
files.append(seg)
|
|
return files
|
|
|
|
# ── reading / filtering ─────────────────────────────────────────────────
|
|
@staticmethod
|
|
def _matches(entry: Dict[str, Any], filters: Dict[str, Any]) -> bool:
|
|
for field in ('actor', 'action', 'target_type', 'target_id', 'result'):
|
|
want = filters.get(field)
|
|
if want and str(entry.get(field, '')) != str(want):
|
|
return False
|
|
since = filters.get('since')
|
|
until = filters.get('until')
|
|
ts = entry.get('ts', '')
|
|
if since and ts < since:
|
|
return False
|
|
if until and ts > until:
|
|
return False
|
|
return True
|
|
|
|
def _read_all(self, filters: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""Return matching entries, newest-first across all segments."""
|
|
results: List[Dict[str, Any]] = []
|
|
with self._lock:
|
|
for seg in self._segment_files():
|
|
try:
|
|
with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
|
|
lines = f.readlines()
|
|
except OSError:
|
|
continue
|
|
for line in reversed(lines):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
entry = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
if self._matches(entry, filters):
|
|
results.append(entry)
|
|
return results
|
|
|
|
def query(self, filters: Optional[Dict[str, Any]] = None,
|
|
limit: int = 100, offset: int = 0) -> Dict[str, Any]:
|
|
filters = filters or {}
|
|
try:
|
|
limit = max(1, min(int(limit), 1000))
|
|
except (TypeError, ValueError):
|
|
limit = 100
|
|
try:
|
|
offset = max(0, int(offset))
|
|
except (TypeError, ValueError):
|
|
offset = 0
|
|
entries = self._read_all(filters)
|
|
total = len(entries)
|
|
page = entries[offset:offset + limit]
|
|
next_offset = offset + limit if offset + limit < total else None
|
|
return {'entries': page, 'total': total, 'next_offset': next_offset}
|
|
|
|
def export_csv(self, filters: Optional[Dict[str, Any]] = None) -> str:
|
|
filters = filters or {}
|
|
entries = self._read_all(filters)
|
|
fields = ['ts', 'actor', 'role', 'ip', 'action', 'target_type',
|
|
'target_id', 'summary', 'result', 'status', 'method', 'path',
|
|
'request_id', 'seq']
|
|
buf = io.StringIO()
|
|
writer = csv.writer(buf)
|
|
writer.writerow(fields)
|
|
for e in entries:
|
|
writer.writerow([e.get(f, '') for f in fields])
|
|
return buf.getvalue()
|
|
|
|
# ── integrity ───────────────────────────────────────────────────────────
|
|
def verify_chain(self) -> Dict[str, Any]:
|
|
"""Walk all segments oldest-first; verify each entry's hash + link."""
|
|
if not self.tamper_chain:
|
|
return {'ok': True, 'broken_at_seq': None, 'disabled': True}
|
|
with self._lock:
|
|
segs = list(reversed(self._segment_files())) # oldest -> newest
|
|
prev_hash = ''
|
|
first = True # oldest available record: its predecessor may be pruned
|
|
for seg in segs:
|
|
try:
|
|
with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
|
|
lines = f.readlines()
|
|
except OSError:
|
|
continue
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
entry = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
return {'ok': False, 'broken_at_seq': None}
|
|
stored_hash = entry.get('hash', '')
|
|
# Don't fail the prev_hash link on the very first available
|
|
# record — older segments may have rotated off the end.
|
|
if not first and entry.get('prev_hash', '') != prev_hash:
|
|
return {'ok': False, 'broken_at_seq': entry.get('seq')}
|
|
recomputed = self._hash_entry({k: v for k, v in entry.items() if k != 'hash'})
|
|
if recomputed != stored_hash:
|
|
return {'ok': False, 'broken_at_seq': entry.get('seq')}
|
|
prev_hash = stored_hash
|
|
first = False
|
|
return {'ok': True, 'broken_at_seq': None}
|
|
|
|
# ── BaseServiceManager interface ────────────────────────────────────────
|
|
def get_status(self) -> Dict[str, Any]:
|
|
size = 0
|
|
try:
|
|
if os.path.exists(self._audit_file):
|
|
size = os.path.getsize(self._audit_file)
|
|
except OSError:
|
|
pass
|
|
return {
|
|
'running': True,
|
|
'tamper_chain': self.tamper_chain,
|
|
'seq': self._seq,
|
|
'file': self._audit_file,
|
|
'file_size': size,
|
|
}
|
|
|
|
def test_connectivity(self) -> Dict[str, Any]:
|
|
return {'success': True}
|