#!/usr/bin/env python3 """ Audit Manager for Personal Internet Cell. Owner-visible, append-only audit trail of WHO (actor + role + ip) did WHAT (action) to WHICH target, WHEN, with a redacted summary. Storage is a JSONL file with a per-entry SHA-256 hash chain so tampering is detectable. Request bodies and secret values are never written; summaries only ever list changed config KEY NAMES, never their values. """ import os import io import re import csv import json import hashlib import logging import threading from datetime import datetime from typing import Dict, List, Optional, Any from base_service_manager import BaseServiceManager logger = logging.getLogger(__name__) def _utcnow_iso() -> str: return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') # Keys whose values must never be recorded — name-only in summaries. _SECRET_KEY_RE = re.compile(r'(pass|secret|key|token|private|cred|otp|psk)', re.IGNORECASE) # Final scrub of anything that looks like base64 key material / encoded blobs. _BASE64_BLOCK_RE = re.compile(r'[A-Za-z0-9+/]{40,}={0,2}') # bcrypt and age secret prefixes. _SECRET_PREFIX_RE = re.compile( r'(\$2[aby]\$[^\s]+|AGE-SECRET-KEY-[^\s]+|age1[^\s]+|-----BEGIN[^\n]+)' ) _VALID_RESULTS = ('success', 'failure') class AuditManager(BaseServiceManager): """Append-only, hash-chained audit trail.""" MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB before rotation BACKUP_COUNT = 10 # audit.log.1 .. audit.log.10 def __init__(self, data_dir: str = '/app/data', config_dir: str = '/app/config', tamper_chain: bool = True): super().__init__('audit', data_dir=data_dir, config_dir=config_dir) self.tamper_chain = tamper_chain self._lock = threading.RLock() self._audit_dir = os.path.join(self.data_dir, 'api', 'audit') self._audit_file = os.path.join(self._audit_dir, 'audit.log') self._seq = 0 self._prev_hash = '' self.safe_makedirs(self._audit_dir) self._load_chain_state() # ── chain bootstrap ───────────────────────────────────────────────────── def _load_chain_state(self) -> None: """Recover seq + prev_hash from the last line of the live file.""" try: if not os.path.exists(self._audit_file): return last = None with open(self._audit_file, 'r', encoding='utf-8', errors='ignore') as f: for line in f: line = line.strip() if line: last = line if last: entry = json.loads(last) self._seq = int(entry.get('seq', 0)) self._prev_hash = entry.get('hash', '') or '' except Exception as e: logger.warning(f"audit: could not load chain state: {e}") # ── redaction ─────────────────────────────────────────────────────────── @staticmethod def _scrub(text: str) -> str: """Strip anything resembling a secret value from a summary string.""" if not text: return '' text = _SECRET_PREFIX_RE.sub('[REDACTED]', text) text = _BASE64_BLOCK_RE.sub('[REDACTED]', text) return text @classmethod def _redact(cls, entry: Dict[str, Any]) -> Dict[str, Any]: """Enforce the redaction rules on a built entry before write. - summary is scrubbed of base64/secret-prefixed blobs. - any string field is scrubbed too (defence in depth). Request bodies are never present — the caller passes only a summary. """ for field in ('summary', 'target_id', 'action', 'path'): val = entry.get(field) if isinstance(val, str): entry[field] = cls._scrub(val) return entry @classmethod def summarize_keys(cls, keys: List[str]) -> str: """Build a redacted summary listing changed config KEY NAMES only. Secret-looking key names are kept (they are names, not values) but the whole string is still scrubbed of any accidental value material. """ names = [str(k) for k in keys if k is not None] return cls._scrub('changed: ' + ', '.join(names)) if names else 'no changes' # ── hashing ───────────────────────────────────────────────────────────── @staticmethod def _canonical(entry: Dict[str, Any]) -> str: return json.dumps(entry, sort_keys=True, separators=(',', ':'), ensure_ascii=False) def _hash_entry(self, entry_without_hash: Dict[str, Any]) -> str: return hashlib.sha256(self._canonical(entry_without_hash).encode('utf-8')).hexdigest() # ── recording ─────────────────────────────────────────────────────────── def record(self, actor: str, role: str, ip: str, action: str, target_type: str = '', target_id: str = '', summary: str = '', result: str = 'success', status: int = 200, method: str = '', path: str = '', request_id: str = '') -> Optional[Dict[str, Any]]: """Append one redacted, hash-chained JSON line. Never raises.""" try: with self._lock: self._maybe_rotate() self._seq += 1 if result not in _VALID_RESULTS: result = 'success' if int(status or 200) < 400 else 'failure' entry: Dict[str, Any] = { 'ts': _utcnow_iso(), 'actor': actor or 'anonymous', 'role': role or 'system', 'ip': ip or '', 'action': action or '', 'target_type': target_type or '', 'target_id': target_id or '', 'summary': summary or '', 'result': result, 'status': int(status or 0), 'method': method or '', 'path': path or '', 'request_id': request_id or '', 'seq': self._seq, 'prev_hash': self._prev_hash if self.tamper_chain else '', } entry = self._redact(entry) if self.tamper_chain: entry['hash'] = self._hash_entry(entry) else: entry['hash'] = '' self._append_line(json.dumps(entry, ensure_ascii=False)) self._prev_hash = entry['hash'] return entry except Exception as e: logger.warning(f"audit.record failed: {e}") return None def _append_line(self, line: str) -> None: self.safe_makedirs(self._audit_dir) fd = os.open(self._audit_file, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) try: os.write(fd, (line + '\n').encode('utf-8')) finally: os.close(fd) try: os.chmod(self._audit_file, 0o600) except OSError: pass # ── rotation ──────────────────────────────────────────────────────────── def _maybe_rotate(self) -> None: try: if not os.path.exists(self._audit_file): return if os.path.getsize(self._audit_file) < self.MAX_FILE_SIZE: return except OSError: return # audit.log.(N-1) -> audit.log.N, ... audit.log -> audit.log.1 for i in range(self.BACKUP_COUNT - 1, 0, -1): src = f"{self._audit_file}.{i}" dst = f"{self._audit_file}.{i + 1}" if os.path.exists(src): try: os.replace(src, dst) except OSError as e: logger.warning(f"audit rotate {src}->{dst}: {e}") try: os.replace(self._audit_file, f"{self._audit_file}.1") except OSError as e: logger.warning(f"audit rotate live->.1: {e}") def _segment_files(self) -> List[str]: """Live file first (newest), then rotated segments .1 .. .N (older).""" files = [] if os.path.exists(self._audit_file): files.append(self._audit_file) for i in range(1, self.BACKUP_COUNT + 1): seg = f"{self._audit_file}.{i}" if os.path.exists(seg): files.append(seg) return files # ── reading / filtering ───────────────────────────────────────────────── @staticmethod def _matches(entry: Dict[str, Any], filters: Dict[str, Any]) -> bool: for field in ('actor', 'action', 'target_type', 'target_id', 'result'): want = filters.get(field) if want and str(entry.get(field, '')) != str(want): return False since = filters.get('since') until = filters.get('until') ts = entry.get('ts', '') if since and ts < since: return False if until and ts > until: return False return True def _read_all(self, filters: Dict[str, Any]) -> List[Dict[str, Any]]: """Return matching entries, newest-first across all segments.""" results: List[Dict[str, Any]] = [] with self._lock: for seg in self._segment_files(): try: with open(seg, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() except OSError: continue for line in reversed(lines): line = line.strip() if not line: continue try: entry = json.loads(line) except json.JSONDecodeError: continue if self._matches(entry, filters): results.append(entry) return results def query(self, filters: Optional[Dict[str, Any]] = None, limit: int = 100, offset: int = 0) -> Dict[str, Any]: filters = filters or {} try: limit = max(1, min(int(limit), 1000)) except (TypeError, ValueError): limit = 100 try: offset = max(0, int(offset)) except (TypeError, ValueError): offset = 0 entries = self._read_all(filters) total = len(entries) page = entries[offset:offset + limit] next_offset = offset + limit if offset + limit < total else None return {'entries': page, 'total': total, 'next_offset': next_offset} def export_csv(self, filters: Optional[Dict[str, Any]] = None) -> str: filters = filters or {} entries = self._read_all(filters) fields = ['ts', 'actor', 'role', 'ip', 'action', 'target_type', 'target_id', 'summary', 'result', 'status', 'method', 'path', 'request_id', 'seq'] buf = io.StringIO() writer = csv.writer(buf) writer.writerow(fields) for e in entries: writer.writerow([e.get(f, '') for f in fields]) return buf.getvalue() # ── integrity ─────────────────────────────────────────────────────────── def verify_chain(self) -> Dict[str, Any]: """Walk all segments oldest-first; verify each entry's hash + link.""" if not self.tamper_chain: return {'ok': True, 'broken_at_seq': None, 'disabled': True} with self._lock: segs = list(reversed(self._segment_files())) # oldest -> newest prev_hash = '' first = True # oldest available record: its predecessor may be pruned for seg in segs: try: with open(seg, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() except OSError: continue for line in lines: line = line.strip() if not line: continue try: entry = json.loads(line) except json.JSONDecodeError: return {'ok': False, 'broken_at_seq': None} stored_hash = entry.get('hash', '') # Don't fail the prev_hash link on the very first available # record — older segments may have rotated off the end. if not first and entry.get('prev_hash', '') != prev_hash: return {'ok': False, 'broken_at_seq': entry.get('seq')} recomputed = self._hash_entry({k: v for k, v in entry.items() if k != 'hash'}) if recomputed != stored_hash: return {'ok': False, 'broken_at_seq': entry.get('seq')} prev_hash = stored_hash first = False return {'ok': True, 'broken_at_seq': None} # ── BaseServiceManager interface ──────────────────────────────────────── def get_status(self) -> Dict[str, Any]: size = 0 try: if os.path.exists(self._audit_file): size = os.path.getsize(self._audit_file) except OSError: pass return { 'running': True, 'tamper_chain': self.tamper_chain, 'seq': self._seq, 'file': self._audit_file, 'file_size': size, } def test_connectivity(self) -> Dict[str, Any]: return {'success': True}