"""SQLAlchemy engine factory, SQLite PRAGMA hookup, and migration runner. Responsibilities in this module: 1. **Engine construction** — :func:`build_engine` produces a ``sqlalchemy.Engine`` from the application's ``DATABASE_URL``, threaded-safe for uvicorn's worker pool. 2. **Per-connection PRAGMAs** — a single ``@event.listens_for(Engine, "connect")`` hook sets ``journal_mode = WAL`` and ``foreign_keys = ON`` on *every* new SQLite connection, not just the first. SQLite applies both pragmas per-connection, so doing this once at startup would silently leave FKs disabled for every worker. 3. **Migration runner** — :func:`run_migrations` applies every ``.sql`` file under :mod:`app.models.migrations` in lexicographic order, tracking applied files in a ``schema_migrations`` table. Migrations are trusted developer-authored SQL loaded via :meth:`sqlite3.Connection.executescript`; they never touch user input. No Python code in this module builds a SQL statement by string interpolation. Queries go through ``sqlalchemy.text(":bind")``. """ from __future__ import annotations import os from datetime import datetime, timezone from pathlib import Path from typing import Final import structlog from sqlalchemy import Engine, create_engine, event, text # Directory containing the ``NNN_description.sql`` migration files. Kept # as a module-level constant so tests can reason about it without # importing the runner internals. _MIGRATIONS_DIR: Final[Path] = Path(__file__).resolve().parent / "models" / "migrations" _log = structlog.get_logger(__name__) def build_engine(database_url: str) -> Engine: """Build a SQLAlchemy :class:`Engine` for the app's SQLite database. Parameters ---------- database_url: A SQLAlchemy URL. In production this is ``sqlite:///data/app.db``; tests pass a tmp-path file URL. Notes ----- - ``check_same_thread=False`` is required because uvicorn services requests from a worker-thread pool; SQLAlchemy's connection pool plus our explicit transactions keep this safe. - For file-backed SQLite URLs we eagerly create the parent directory (SQLite refuses to create missing directories). - ``future=True`` opts into SQLAlchemy 2.x semantics; redundant on 2.0+ but explicit is better than implicit. """ # Ensure the on-disk directory exists for file-backed SQLite URLs. # In-memory databases and ``:memory:`` URLs are left alone. if database_url.startswith("sqlite:///"): db_path_str = database_url[len("sqlite:///"):] if db_path_str and db_path_str != ":memory:": db_path = Path(db_path_str) # Relative paths resolve against the current working # directory. This matches uvicorn's default cwd (the repo # root) and Docker's WORKDIR. parent = db_path.parent if str(parent) and parent != Path("."): os.makedirs(parent, exist_ok=True) engine = create_engine( database_url, future=True, connect_args={"check_same_thread": False}, ) _install_sqlite_pragmas(engine) return engine def _install_sqlite_pragmas(engine: Engine) -> None: """Attach a connect-event listener that enforces our SQLite PRAGMAs. ``journal_mode = WAL`` and ``foreign_keys = ON`` are both per-connection settings in SQLite. Applying them on every new connection — rather than once at startup — is the only way to guarantee foreign-key enforcement across all pool workers. """ @event.listens_for(engine, "connect") def _on_connect(dbapi_connection, connection_record) -> None: # type: ignore[no-untyped-def] """Run per-connection SQLite initialization. Uses the raw DB-API cursor (not SQLAlchemy ``text`` wrappers) because PRAGMA calls are not valid parameterized SQL — they are trusted, developer-authored literals with no external input. """ cursor = dbapi_connection.cursor() try: # WAL improves concurrency (readers don't block the # single writer) and is well-suited to our read-heavy # workload. It persists on the database file, so # re-setting is a cheap no-op after the first call. cursor.execute("PRAGMA journal_mode = WAL") # foreign_keys is per-connection; SQLite defaults to OFF, # so we MUST set it here to have referential integrity. cursor.execute("PRAGMA foreign_keys = ON") finally: cursor.close() def run_migrations(engine: Engine) -> list[str]: """Apply any un-applied SQL files from :mod:`app.models.migrations`. Behavior: - Creates a ``schema_migrations`` tracker table if missing. - Lists ``.sql`` files in :data:`_MIGRATIONS_DIR` in sorted order. - For each file not yet in ``schema_migrations``, runs its content via :meth:`sqlite3.Connection.executescript` (necessary because a migration file may contain multiple statements) inside a single ``BEGIN IMMEDIATE`` transaction, then records the version. Already-applied files are skipped. Returns ------- list[str] The ordered list of versions applied on *this* call. Empty when the DB is already up to date, useful for logs and tests. Security note ------------- Migration SQL is trusted input from the repository; it does not mix with user-origin data and therefore does not need bind parameters. User data still flows exclusively through parameterized queries elsewhere (see ``docs/security.md`` CWE-89). """ files = sorted(p for p in _MIGRATIONS_DIR.glob("*.sql")) applied_now: list[str] = [] # A single "raw connection" over the life of the migration run # lets us mix executescript (DDL) with ordinary parameterized # bookkeeping cleanly. We commit per file so a failure partway # through leaves earlier files recorded. with engine.connect() as conn: # Ensure the tracker table exists. Can't use schema_migrations # itself to gate this since it may not exist yet. conn.execute( text( "CREATE TABLE IF NOT EXISTS schema_migrations (" " version TEXT PRIMARY KEY," " applied_at TEXT NOT NULL" ")" ) ) conn.commit() # Pull the set of already-applied versions once. already_applied = { row[0] for row in conn.execute( text("SELECT version FROM schema_migrations") ).fetchall() } for path in files: version = path.stem if version in already_applied: continue sql_text = path.read_text(encoding="utf-8") # executescript is only exposed on the DB-API connection, # so we reach through the SQLAlchemy connection's raw # cursor. Trust boundary: the file is checked into git, # never user-supplied, so there is no injection vector. raw = conn.connection raw.executescript(sql_text) conn.execute( text( "INSERT INTO schema_migrations (version, applied_at) " "VALUES (:v, :t)" ), { "v": version, "t": datetime.now(timezone.utc).isoformat(), }, ) conn.commit() applied_now.append(version) _log.info("migration_applied", version=version) if not applied_now: _log.info("migrations_up_to_date") return applied_now