feat: audit/change log — owner-visible record of who changed what

Add AuditManager (api/audit_manager.py): JSONL append-only log at data/api/audit/audit.log with SHA-256 hash chain for tamper detection, verify endpoint, size-based rotation, and automatic redaction of secret fields before any entry is written. Supports structured query (actor, action, date range) and CSV export. Wire an @app.after_request hook in app.py that fires on every mutating /api/* request: captures actor, role, remote IP, and maps the route + method to a human-readable action via ROUTE_ACTION_MAP. Explicit audit entries for password_change and password_reset are added in auth_routes.py so those events record the actor without logging secret values. Expose an admin-only blueprint (api/routes/audit.py): GET /api/audit — paginated query GET /api/audit/export — CSV download GET /api/audit/verify — hash-chain integrity check Register AuditManager in managers.py and add api/audit to config_manager.py critical_data_paths so it is included in backups and restored with other persistent state. Add Activity page (webui/src/pages/Activity.jsx, admin-only) reachable from the nav in App.jsx. New auditAPI helper in api.js covers all three endpoints. Tests: test_audit_manager.py (unit: hash chain, redaction, rotation, query, csv, verify) and test_audit_hook_routes.py (integration: hook fires on mutating routes, skips safe methods, records actor/ip/action, backup-inclusion assertion). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 20:19:38 -04:00
parent 13074f56cb
commit 8b50fb1036
12 changed files with 1246 additions and 2 deletions
@@ -0,0 +1,330 @@
+#!/usr/bin/env python3
+"""
+Audit Manager for Personal Internet Cell.
+
+Owner-visible, append-only audit trail of WHO (actor + role + ip) did WHAT
+(action) to WHICH target, WHEN, with a redacted summary. Storage is a JSONL
+file with a per-entry SHA-256 hash chain so tampering is detectable. Request
+bodies and secret values are never written; summaries only ever list changed
+config KEY NAMES, never their values.
+"""
+
+import os
+import io
+import re
+import csv
+import json
+import hashlib
+import logging
+import threading
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+
+from base_service_manager import BaseServiceManager
+
+logger = logging.getLogger(__name__)
+
+
+def _utcnow_iso() -> str:
+    return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
+
+
+# Keys whose values must never be recorded — name-only in summaries.
+_SECRET_KEY_RE = re.compile(r'(pass|secret|key|token|private|cred|otp|psk)', re.IGNORECASE)
+# Final scrub of anything that looks like base64 key material / encoded blobs.
+_BASE64_BLOCK_RE = re.compile(r'[A-Za-z0-9+/]{40,}={0,2}')
+# bcrypt and age secret prefixes.
+_SECRET_PREFIX_RE = re.compile(
+    r'(\$2[aby]\$[^\s]+|AGE-SECRET-KEY-[^\s]+|age1[^\s]+|-----BEGIN[^\n]+)'
+)
+
+_VALID_RESULTS = ('success', 'failure')
+
+
+class AuditManager(BaseServiceManager):
+    """Append-only, hash-chained audit trail."""
+
+    MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB before rotation
+    BACKUP_COUNT = 10                 # audit.log.1 .. audit.log.10
+
+    def __init__(self, data_dir: str = '/app/data', config_dir: str = '/app/config',
+                 tamper_chain: bool = True):
+        super().__init__('audit', data_dir=data_dir, config_dir=config_dir)
+        self.tamper_chain = tamper_chain
+        self._lock = threading.RLock()
+        self._audit_dir = os.path.join(self.data_dir, 'api', 'audit')
+        self._audit_file = os.path.join(self._audit_dir, 'audit.log')
+        self._seq = 0
+        self._prev_hash = ''
+        self.safe_makedirs(self._audit_dir)
+        self._load_chain_state()
+
+    # ── chain bootstrap ─────────────────────────────────────────────────────
+    def _load_chain_state(self) -> None:
+        """Recover seq + prev_hash from the last line of the live file."""
+        try:
+            if not os.path.exists(self._audit_file):
+                return
+            last = None
+            with open(self._audit_file, 'r', encoding='utf-8', errors='ignore') as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        last = line
+            if last:
+                entry = json.loads(last)
+                self._seq = int(entry.get('seq', 0))
+                self._prev_hash = entry.get('hash', '') or ''
+        except Exception as e:
+            logger.warning(f"audit: could not load chain state: {e}")
+
+    # ── redaction ───────────────────────────────────────────────────────────
+    @staticmethod
+    def _scrub(text: str) -> str:
+        """Strip anything resembling a secret value from a summary string."""
+        if not text:
+            return ''
+        text = _SECRET_PREFIX_RE.sub('[REDACTED]', text)
+        text = _BASE64_BLOCK_RE.sub('[REDACTED]', text)
+        return text
+
+    @classmethod
+    def _redact(cls, entry: Dict[str, Any]) -> Dict[str, Any]:
+        """Enforce the redaction rules on a built entry before write.
+
+        - summary is scrubbed of base64/secret-prefixed blobs.
+        - any string field is scrubbed too (defence in depth).
+        Request bodies are never present — the caller passes only a summary.
+        """
+        for field in ('summary', 'target_id', 'action', 'path'):
+            val = entry.get(field)
+            if isinstance(val, str):
+                entry[field] = cls._scrub(val)
+        return entry
+
+    @classmethod
+    def summarize_keys(cls, keys: List[str]) -> str:
+        """Build a redacted summary listing changed config KEY NAMES only.
+
+        Secret-looking key names are kept (they are names, not values) but the
+        whole string is still scrubbed of any accidental value material.
+        """
+        names = [str(k) for k in keys if k is not None]
+        return cls._scrub('changed: ' + ', '.join(names)) if names else 'no changes'
+
+    # ── hashing ─────────────────────────────────────────────────────────────
+    @staticmethod
+    def _canonical(entry: Dict[str, Any]) -> str:
+        return json.dumps(entry, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
+
+    def _hash_entry(self, entry_without_hash: Dict[str, Any]) -> str:
+        return hashlib.sha256(self._canonical(entry_without_hash).encode('utf-8')).hexdigest()
+
+    # ── recording ───────────────────────────────────────────────────────────
+    def record(self, actor: str, role: str, ip: str, action: str,
+               target_type: str = '', target_id: str = '', summary: str = '',
+               result: str = 'success', status: int = 200, method: str = '',
+               path: str = '', request_id: str = '') -> Optional[Dict[str, Any]]:
+        """Append one redacted, hash-chained JSON line. Never raises."""
+        try:
+            with self._lock:
+                self._maybe_rotate()
+                self._seq += 1
+                if result not in _VALID_RESULTS:
+                    result = 'success' if int(status or 200) < 400 else 'failure'
+                entry: Dict[str, Any] = {
+                    'ts': _utcnow_iso(),
+                    'actor': actor or 'anonymous',
+                    'role': role or 'system',
+                    'ip': ip or '',
+                    'action': action or '',
+                    'target_type': target_type or '',
+                    'target_id': target_id or '',
+                    'summary': summary or '',
+                    'result': result,
+                    'status': int(status or 0),
+                    'method': method or '',
+                    'path': path or '',
+                    'request_id': request_id or '',
+                    'seq': self._seq,
+                    'prev_hash': self._prev_hash if self.tamper_chain else '',
+                }
+                entry = self._redact(entry)
+                if self.tamper_chain:
+                    entry['hash'] = self._hash_entry(entry)
+                else:
+                    entry['hash'] = ''
+                self._append_line(json.dumps(entry, ensure_ascii=False))
+                self._prev_hash = entry['hash']
+                return entry
+        except Exception as e:
+            logger.warning(f"audit.record failed: {e}")
+            return None
+
+    def _append_line(self, line: str) -> None:
+        self.safe_makedirs(self._audit_dir)
+        fd = os.open(self._audit_file, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
+        try:
+            os.write(fd, (line + '\n').encode('utf-8'))
+        finally:
+            os.close(fd)
+        try:
+            os.chmod(self._audit_file, 0o600)
+        except OSError:
+            pass
+
+    # ── rotation ────────────────────────────────────────────────────────────
+    def _maybe_rotate(self) -> None:
+        try:
+            if not os.path.exists(self._audit_file):
+                return
+            if os.path.getsize(self._audit_file) < self.MAX_FILE_SIZE:
+                return
+        except OSError:
+            return
+        # audit.log.(N-1) -> audit.log.N, ... audit.log -> audit.log.1
+        for i in range(self.BACKUP_COUNT - 1, 0, -1):
+            src = f"{self._audit_file}.{i}"
+            dst = f"{self._audit_file}.{i + 1}"
+            if os.path.exists(src):
+                try:
+                    os.replace(src, dst)
+                except OSError as e:
+                    logger.warning(f"audit rotate {src}->{dst}: {e}")
+        try:
+            os.replace(self._audit_file, f"{self._audit_file}.1")
+        except OSError as e:
+            logger.warning(f"audit rotate live->.1: {e}")
+
+    def _segment_files(self) -> List[str]:
+        """Live file first (newest), then rotated segments .1 .. .N (older)."""
+        files = []
+        if os.path.exists(self._audit_file):
+            files.append(self._audit_file)
+        for i in range(1, self.BACKUP_COUNT + 1):
+            seg = f"{self._audit_file}.{i}"
+            if os.path.exists(seg):
+                files.append(seg)
+        return files
+
+    # ── reading / filtering ─────────────────────────────────────────────────
+    @staticmethod
+    def _matches(entry: Dict[str, Any], filters: Dict[str, Any]) -> bool:
+        for field in ('actor', 'action', 'target_type', 'target_id', 'result'):
+            want = filters.get(field)
+            if want and str(entry.get(field, '')) != str(want):
+                return False
+        since = filters.get('since')
+        until = filters.get('until')
+        ts = entry.get('ts', '')
+        if since and ts < since:
+            return False
+        if until and ts > until:
+            return False
+        return True
+
+    def _read_all(self, filters: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Return matching entries, newest-first across all segments."""
+        results: List[Dict[str, Any]] = []
+        with self._lock:
+            for seg in self._segment_files():
+                try:
+                    with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
+                        lines = f.readlines()
+                except OSError:
+                    continue
+                for line in reversed(lines):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        entry = json.loads(line)
+                    except json.JSONDecodeError:
+                        continue
+                    if self._matches(entry, filters):
+                        results.append(entry)
+        return results
+
+    def query(self, filters: Optional[Dict[str, Any]] = None,
+              limit: int = 100, offset: int = 0) -> Dict[str, Any]:
+        filters = filters or {}
+        try:
+            limit = max(1, min(int(limit), 1000))
+        except (TypeError, ValueError):
+            limit = 100
+        try:
+            offset = max(0, int(offset))
+        except (TypeError, ValueError):
+            offset = 0
+        entries = self._read_all(filters)
+        total = len(entries)
+        page = entries[offset:offset + limit]
+        next_offset = offset + limit if offset + limit < total else None
+        return {'entries': page, 'total': total, 'next_offset': next_offset}
+
+    def export_csv(self, filters: Optional[Dict[str, Any]] = None) -> str:
+        filters = filters or {}
+        entries = self._read_all(filters)
+        fields = ['ts', 'actor', 'role', 'ip', 'action', 'target_type',
+                  'target_id', 'summary', 'result', 'status', 'method', 'path',
+                  'request_id', 'seq']
+        buf = io.StringIO()
+        writer = csv.writer(buf)
+        writer.writerow(fields)
+        for e in entries:
+            writer.writerow([e.get(f, '') for f in fields])
+        return buf.getvalue()
+
+    # ── integrity ───────────────────────────────────────────────────────────
+    def verify_chain(self) -> Dict[str, Any]:
+        """Walk all segments oldest-first; verify each entry's hash + link."""
+        if not self.tamper_chain:
+            return {'ok': True, 'broken_at_seq': None, 'disabled': True}
+        with self._lock:
+            segs = list(reversed(self._segment_files()))  # oldest -> newest
+            prev_hash = ''
+            first = True  # oldest available record: its predecessor may be pruned
+            for seg in segs:
+                try:
+                    with open(seg, 'r', encoding='utf-8', errors='ignore') as f:
+                        lines = f.readlines()
+                except OSError:
+                    continue
+                for line in lines:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        entry = json.loads(line)
+                    except json.JSONDecodeError:
+                        return {'ok': False, 'broken_at_seq': None}
+                    stored_hash = entry.get('hash', '')
+                    # Don't fail the prev_hash link on the very first available
+                    # record — older segments may have rotated off the end.
+                    if not first and entry.get('prev_hash', '') != prev_hash:
+                        return {'ok': False, 'broken_at_seq': entry.get('seq')}
+                    recomputed = self._hash_entry({k: v for k, v in entry.items() if k != 'hash'})
+                    if recomputed != stored_hash:
+                        return {'ok': False, 'broken_at_seq': entry.get('seq')}
+                    prev_hash = stored_hash
+                    first = False
+        return {'ok': True, 'broken_at_seq': None}
+
+    # ── BaseServiceManager interface ────────────────────────────────────────
+    def get_status(self) -> Dict[str, Any]:
+        size = 0
+        try:
+            if os.path.exists(self._audit_file):
+                size = os.path.getsize(self._audit_file)
+        except OSError:
+            pass
+        return {
+            'running': True,
+            'tamper_chain': self.tamper_chain,
+            'seq': self._seq,
+            'file': self._audit_file,
+            'file_size': size,
+        }
+
+    def test_connectivity(self) -> Dict[str, Any]:
+        return {'success': True}