feat: audit/change log — owner-visible record of who changed what
Unit Tests / test (push) Successful in 12m47s

Add AuditManager (api/audit_manager.py): JSONL append-only log at
data/api/audit/audit.log with SHA-256 hash chain for tamper detection,
verify endpoint, size-based rotation, and automatic redaction of secret
fields before any entry is written. Supports structured query (actor,
action, date range) and CSV export.

Wire an @app.after_request hook in app.py that fires on every mutating
/api/* request: captures actor, role, remote IP, and maps the route +
method to a human-readable action via ROUTE_ACTION_MAP. Explicit audit
entries for password_change and password_reset are added in
auth_routes.py so those events record the actor without logging secret
values.

Expose an admin-only blueprint (api/routes/audit.py):
  GET /api/audit          — paginated query
  GET /api/audit/export   — CSV download
  GET /api/audit/verify   — hash-chain integrity check

Register AuditManager in managers.py and add api/audit to
config_manager.py critical_data_paths so it is included in backups and
restored with other persistent state.

Add Activity page (webui/src/pages/Activity.jsx, admin-only) reachable
from the nav in App.jsx. New auditAPI helper in api.js covers all three
endpoints.

Tests: test_audit_manager.py (unit: hash chain, redaction, rotation,
query, csv, verify) and test_audit_hook_routes.py (integration: hook
fires on mutating routes, skips safe methods, records actor/ip/action,
backup-inclusion assertion).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 20:19:38 -04:00
parent 13074f56cb
commit 8b50fb1036
12 changed files with 1246 additions and 2 deletions
+330
View File
@@ -0,0 +1,330 @@
#!/usr/bin/env python3
"""
Audit Manager for Personal Internet Cell.
Owner-visible, append-only audit trail of WHO (actor + role + ip) did WHAT
(action) to WHICH target, WHEN, with a redacted summary. Storage is a JSONL
file with a per-entry SHA-256 hash chain so tampering is detectable. Request
bodies and secret values are never written; summaries only ever list changed
config KEY NAMES, never their values.
"""
import os
import io
import re
import csv
import json
import hashlib
import logging
import threading
from datetime import datetime
from typing import Dict, List, Optional, Any
from base_service_manager import BaseServiceManager
logger = logging.getLogger(__name__)
def _utcnow_iso() -> str:
return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
# Keys whose values must never be recorded — name-only in summaries.
_SECRET_KEY_RE = re.compile(r'(pass|secret|key|token|private|cred|otp|psk)', re.IGNORECASE)
# Final scrub of anything that looks like base64 key material / encoded blobs.
_BASE64_BLOCK_RE = re.compile(r'[A-Za-z0-9+/]{40,}={0,2}')
# bcrypt and age secret prefixes.
_SECRET_PREFIX_RE = re.compile(
r'(\$2[aby]\$[^\s]+|AGE-SECRET-KEY-[^\s]+|age1[^\s]+|-----BEGIN[^\n]+)'
)
_VALID_RESULTS = ('success', 'failure')
class AuditManager(BaseServiceManager):
"""Append-only, hash-chained audit trail."""
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB before rotation
BACKUP_COUNT = 10 # audit.log.1 .. audit.log.10
def __init__(self, data_dir: str = '/app/data', config_dir: str = '/app/config',
tamper_chain: bool = True):
super().__init__('audit', data_dir=data_dir, config_dir=config_dir)
self.tamper_chain = tamper_chain
self._lock = threading.RLock()
self._audit_dir = os.path.join(self.data_dir, 'api', 'audit')
self._audit_file = os.path.join(self._audit_dir, 'audit.log')
self._seq = 0
self._prev_hash = ''
self.safe_makedirs(self._audit_dir)
self._load_chain_state()
# ── chain bootstrap ─────────────────────────────────────────────────────
def _load_chain_state(self) -> None:
"""Recover seq + prev_hash from the last line of the live file."""
try:
if not os.path.exists(self._audit_file):
return
last = None
with open(self._audit_file, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
if line:
last = line
if last:
entry = json.loads(last)
self._seq = int(entry.get('seq', 0))
self._prev_hash = entry.get('hash', '') or ''
except Exception as e:
logger.warning(f"audit: could not load chain state: {e}")
# ── redaction ───────────────────────────────────────────────────────────
@staticmethod
def _scrub(text: str) -> str:
"""Strip anything resembling a secret value from a summary string."""
if not text:
return ''
text = _SECRET_PREFIX_RE.sub('[REDACTED]', text)
text = _BASE64_BLOCK_RE.sub('[REDACTED]', text)
return text
@classmethod
def _redact(cls, entry: Dict[str, Any]) -> Dict[str, Any]:
"""Enforce the redaction rules on a built entry before write.
- summary is scrubbed of base64/secret-prefixed blobs.
- any string field is scrubbed too (defence in depth).
Request bodies are never present — the caller passes only a summary.
"""
for field in ('summary', 'target_id', 'action', 'path'):
val = entry.get(field)
if isinstance(val, str):
entry[field] = cls._scrub(val)
return entry
@classmethod
def summarize_keys(cls, keys: List[str]) -> str:
"""Build a redacted summary listing changed config KEY NAMES only.
Secret-looking key names are kept (they are names, not values) but the
whole string is still scrubbed of any accidental value material.
"""
names = [str(k) for k in keys if k is not None]
return cls._scrub('changed: ' + ', '.join(names)) if names else 'no changes'
# ── hashing ─────────────────────────────────────────────────────────────
@staticmethod
def _canonical(entry: Dict[str, Any]) -> str:
return json.dumps(entry, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
def _hash_entry(self, entry_without_hash: Dict[str, Any]) -> str:
return hashlib.sha256(self._canonical(entry_without_hash).encode('utf-8')).hexdigest()
# ── recording ───────────────────────────────────────────────────────────
def record(self, actor: str, role: str, ip: str, action: str,
target_type: str = '', target_id: str = '', summary: str = '',
result: str = 'success', status: int = 200, method: str = '',
path: str = '', request_id: str = '') -> Optional[Dict[str, Any]]:
"""Append one redacted, hash-chained JSON line. Never raises."""
try:
with self._lock:
self._maybe_rotate()
self._seq += 1
if result not in _VALID_RESULTS:
result = 'success' if int(status or 200) < 400 else 'failure'
entry: Dict[str, Any] = {
'ts': _utcnow_iso(),
'actor': actor or 'anonymous',
'role': role or 'system',
'ip': ip or '',
'action': action or '',
'target_type': target_type or '',
'target_id': target_id or '',
'summary': summary or '',
'result': result,
'status': int(status or 0),
'method': method or '',
'path': path or '',
'request_id': request_id or '',
'seq': self._seq,
'prev_hash': self._prev_hash if self.tamper_chain else '',
}
entry = self._redact(entry)
if self.tamper_chain:
entry['hash'] = self._hash_entry(entry)
else:
entry['hash'] = ''
self._append_line(json.dumps(entry, ensure_ascii=False))
self._prev_hash = entry['hash']
return entry
except Exception as e:
logger.warning(f"audit.record failed: {e}")
return None
def _append_line(self, line: str) -> None:
self.safe_makedirs(self._audit_dir)
fd = os.open(self._audit_file, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
try:
os.write(fd, (line + '\n').encode('utf-8'))
finally:
os.close(fd)
try:
os.chmod(self._audit_file, 0o600)
except OSError:
pass
# ── rotation ────────────────────────────────────────────────────────────
def _maybe_rotate(self) -> None:
try:
if not os.path.exists(self._audit_file):
return
if os.path.getsize(self._audit_file) < self.MAX_FILE_SIZE:
return
except OSError:
return
# audit.log.(N-1) -> audit.log.N, ... audit.log -> audit.log.1
for i in range(self.BACKUP_COUNT - 1, 0, -1):
src = f"{self._audit_file}.{i}"
dst = f"{self._audit_file}.{i + 1}"
if os.path.exists(src):
try:
os.replace(src, dst)
except OSError as e:
logger.warning(f"audit rotate {src}->{dst}: {e}")
try:
os.replace(self._audit_file, f"{self._audit_file}.1")
except OSError as e:
logger.warning(f"audit rotate live->.1: {e}")
def _segment_files(self) -> List[str]:
"""Live file first (newest), then rotated segments .1 .. .N (older)."""
files = []
if os.path.exists(self._audit_file):
files.append(self._audit_file)
for i in range(1, self.BACKUP_COUNT + 1):
seg = f"{self._audit_file}.{i}"
if os.path.exists(seg):
files.append(seg)
return files
# ── reading / filtering ─────────────────────────────────────────────────
@staticmethod
def _matches(entry: Dict[str, Any], filters: Dict[str, Any]) -> bool:
for field in ('actor', 'action', 'target_type', 'target_id', 'result'):
want = filters.get(field)
if want and str(entry.get(field, '')) != str(want):
return False
since = filters.get('since')
until = filters.get('until')
ts = entry.get('ts', '')
if since and ts < since:
return False
if until and ts > until:
return False
return True
def _read_all(self, filters: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Return matching entries, newest-first across all segments."""
results: List[Dict[str, Any]] = []
with self._lock:
for seg in self._segment_files():
try:
with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except OSError:
continue
for line in reversed(lines):
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
if self._matches(entry, filters):
results.append(entry)
return results
def query(self, filters: Optional[Dict[str, Any]] = None,
limit: int = 100, offset: int = 0) -> Dict[str, Any]:
filters = filters or {}
try:
limit = max(1, min(int(limit), 1000))
except (TypeError, ValueError):
limit = 100
try:
offset = max(0, int(offset))
except (TypeError, ValueError):
offset = 0
entries = self._read_all(filters)
total = len(entries)
page = entries[offset:offset + limit]
next_offset = offset + limit if offset + limit < total else None
return {'entries': page, 'total': total, 'next_offset': next_offset}
def export_csv(self, filters: Optional[Dict[str, Any]] = None) -> str:
filters = filters or {}
entries = self._read_all(filters)
fields = ['ts', 'actor', 'role', 'ip', 'action', 'target_type',
'target_id', 'summary', 'result', 'status', 'method', 'path',
'request_id', 'seq']
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow(fields)
for e in entries:
writer.writerow([e.get(f, '') for f in fields])
return buf.getvalue()
# ── integrity ───────────────────────────────────────────────────────────
def verify_chain(self) -> Dict[str, Any]:
"""Walk all segments oldest-first; verify each entry's hash + link."""
if not self.tamper_chain:
return {'ok': True, 'broken_at_seq': None, 'disabled': True}
with self._lock:
segs = list(reversed(self._segment_files())) # oldest -> newest
prev_hash = ''
first = True # oldest available record: its predecessor may be pruned
for seg in segs:
try:
with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except OSError:
continue
for line in lines:
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
return {'ok': False, 'broken_at_seq': None}
stored_hash = entry.get('hash', '')
# Don't fail the prev_hash link on the very first available
# record — older segments may have rotated off the end.
if not first and entry.get('prev_hash', '') != prev_hash:
return {'ok': False, 'broken_at_seq': entry.get('seq')}
recomputed = self._hash_entry({k: v for k, v in entry.items() if k != 'hash'})
if recomputed != stored_hash:
return {'ok': False, 'broken_at_seq': entry.get('seq')}
prev_hash = stored_hash
first = False
return {'ok': True, 'broken_at_seq': None}
# ── BaseServiceManager interface ────────────────────────────────────────
def get_status(self) -> Dict[str, Any]:
size = 0
try:
if os.path.exists(self._audit_file):
size = os.path.getsize(self._audit_file)
except OSError:
pass
return {
'running': True,
'tamper_chain': self.tamper_chain,
'seq': self._seq,
'file': self._audit_file,
'file_size': size,
}
def test_connectivity(self) -> Dict[str, Any]:
return {'success': True}