"""Append-only auth audit log service. Writes one row per auth event into the ``auth_events`` table. The rest of the auth stack calls :meth:`AuditService.record` to persist a structured, queryable audit trail without having to know the SQL or the row schema. Security notes -------------- - NEVER pass raw tokens, raw session IDs, or email bodies into ``detail``. Correlate sessions via the last 6 hex chars of their stored hash, never the full hash and never the raw value (CWE-200). - ``detail`` is persisted as JSON text; the schema column is ``TEXT NOT NULL DEFAULT '{}'`` and the writer always provides a valid JSON object. - All writes go through parameterized SQL with ``sqlalchemy.text`` ``:bind`` parameters; no string interpolation (CWE-89). """ from __future__ import annotations import json from datetime import datetime, timezone from typing import Any, Mapping, Optional import structlog from sqlalchemy import Engine, text _log = structlog.get_logger(__name__) class AuditService: """Persist rows into ``auth_events``. The service is intentionally tiny: one write method plus a helper fetcher used by tests. No caching (this is an append-only audit log and reads are rare). """ def __init__(self, engine: Engine) -> None: """Store the shared SQLAlchemy engine by reference. The service never opens its own engine — it reuses the one wired on ``app.state.engine``. """ self._engine: Engine = engine def record( self, event_type: str, *, email: Optional[str] = None, user_id: Optional[int] = None, ip: str = "", user_agent: str = "", detail: Optional[Mapping[str, Any]] = None, ) -> None: """Insert a single audit row. Parameters ---------- event_type: One of the Phase 3 event types: ``link_requested``, ``link_consumed``, ``consume_failed``, ``session_created``, ``session_revoked``, ``rate_limited``. email: Submitted / target email (nullable when the event doesn't have one, e.g. session_revoked where we key off user_id). user_id: Foreign key into ``users``; nullable for pre-auth events. ip: Client IP at time of event. Always captured when available; empty string is acceptable for events originating outside a request context (which Phase 3 does not currently emit, but the column is NOT NULL and we want the door closed). user_agent: Client UA at time of event. Same NOT-NULL rationale as ``ip``. detail: Event-specific structured context (dict-like). Serialized to a compact JSON string. Defaults to ``{}`` when absent. NEVER put a raw token or session ID here — only hashes (or their last 6 chars) and other non-sensitive metadata. """ # Always serialize to JSON text; the DB column enforces NOT NULL # with an empty-object default, and we honor that contract here # rather than relying on the default. detail_json = json.dumps(dict(detail) if detail is not None else {}) now_iso = datetime.now(timezone.utc).isoformat() with self._engine.begin() as conn: conn.execute( text( "INSERT INTO auth_events" " (event_type, email, user_id, ip, user_agent," " created_at, detail)" " VALUES (:event_type, :email, :user_id, :ip," " :user_agent, :created_at, :detail)" ), { "event_type": event_type, "email": email, "user_id": user_id, "ip": ip or "", "user_agent": user_agent or "", "created_at": now_iso, "detail": detail_json, }, ) # Mirror the audit row to structured logs at INFO. We never log # the raw token / session ID, only the same detail dict (which # the caller already scrubbed) and the non-sensitive envelope. _log.info( "auth_event", event_type=event_type, email=email, user_id=user_id, detail=detail or {}, )