Files
chicken_babies_site/app/db.py
Phillip Tarrant 0306f71763 feat: phase 2 content model + cache — SQLite schema, markdown, TTL
Stand up the full SQLite content layer: all 7 tables from the authoritative
schema with WAL + foreign-keys enforced per-connection, entity dataclasses
plus row mappers, hand-rolled versioned migrations tracked in
schema_migrations, and an idempotent Python seed (system user + welcome
post + About page).

Add a Markdown->HTML service using markdown-it-py with a strict bleach
allowlist (tables intentionally omitted on both sides). Add a typed
in-process TTLCache[K,V] and wire it into real DB-backed PostService and
PageService, both exposing invalidate_all() for Phase 4 admin writes.

Rewire / and /about to read from the DB; homepage renders the seeded
welcome post, About renders page.title + sanitized body_html_cached.
Update the Phase 1 route tests accordingly.

Mark Phase 2 complete in docs/ROADMAP.md.
2026-04-21 15:40:35 -05:00

201 lines
7.6 KiB
Python

"""SQLAlchemy engine factory, SQLite PRAGMA hookup, and migration runner.
Responsibilities in this module:
1. **Engine construction** — :func:`build_engine` produces a
``sqlalchemy.Engine`` from the application's ``DATABASE_URL``,
threaded-safe for uvicorn's worker pool.
2. **Per-connection PRAGMAs** — a single ``@event.listens_for(Engine,
"connect")`` hook sets ``journal_mode = WAL`` and ``foreign_keys =
ON`` on *every* new SQLite connection, not just the first. SQLite
applies both pragmas per-connection, so doing this once at startup
would silently leave FKs disabled for every worker.
3. **Migration runner** — :func:`run_migrations` applies every
``.sql`` file under :mod:`app.models.migrations` in lexicographic
order, tracking applied files in a ``schema_migrations`` table.
Migrations are trusted developer-authored SQL loaded via
:meth:`sqlite3.Connection.executescript`; they never touch user
input.
No Python code in this module builds a SQL statement by string
interpolation. Queries go through ``sqlalchemy.text(":bind")``.
"""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Final
import structlog
from sqlalchemy import Engine, create_engine, event, text
# Directory containing the ``NNN_description.sql`` migration files. Kept
# as a module-level constant so tests can reason about it without
# importing the runner internals.
_MIGRATIONS_DIR: Final[Path] = Path(__file__).resolve().parent / "models" / "migrations"
_log = structlog.get_logger(__name__)
def build_engine(database_url: str) -> Engine:
"""Build a SQLAlchemy :class:`Engine` for the app's SQLite database.
Parameters
----------
database_url:
A SQLAlchemy URL. In production this is
``sqlite:///data/app.db``; tests pass a tmp-path file URL.
Notes
-----
- ``check_same_thread=False`` is required because uvicorn services
requests from a worker-thread pool; SQLAlchemy's connection pool
plus our explicit transactions keep this safe.
- For file-backed SQLite URLs we eagerly create the parent
directory (SQLite refuses to create missing directories).
- ``future=True`` opts into SQLAlchemy 2.x semantics; redundant on
2.0+ but explicit is better than implicit.
"""
# Ensure the on-disk directory exists for file-backed SQLite URLs.
# In-memory databases and ``:memory:`` URLs are left alone.
if database_url.startswith("sqlite:///"):
db_path_str = database_url[len("sqlite:///"):]
if db_path_str and db_path_str != ":memory:":
db_path = Path(db_path_str)
# Relative paths resolve against the current working
# directory. This matches uvicorn's default cwd (the repo
# root) and Docker's WORKDIR.
parent = db_path.parent
if str(parent) and parent != Path("."):
os.makedirs(parent, exist_ok=True)
engine = create_engine(
database_url,
future=True,
connect_args={"check_same_thread": False},
)
_install_sqlite_pragmas(engine)
return engine
def _install_sqlite_pragmas(engine: Engine) -> None:
"""Attach a connect-event listener that enforces our SQLite PRAGMAs.
``journal_mode = WAL`` and ``foreign_keys = ON`` are both
per-connection settings in SQLite. Applying them on every new
connection — rather than once at startup — is the only way to
guarantee foreign-key enforcement across all pool workers.
"""
@event.listens_for(engine, "connect")
def _on_connect(dbapi_connection, connection_record) -> None: # type: ignore[no-untyped-def]
"""Run per-connection SQLite initialization.
Uses the raw DB-API cursor (not SQLAlchemy ``text`` wrappers)
because PRAGMA calls are not valid parameterized SQL — they
are trusted, developer-authored literals with no external
input.
"""
cursor = dbapi_connection.cursor()
try:
# WAL improves concurrency (readers don't block the
# single writer) and is well-suited to our read-heavy
# workload. It persists on the database file, so
# re-setting is a cheap no-op after the first call.
cursor.execute("PRAGMA journal_mode = WAL")
# foreign_keys is per-connection; SQLite defaults to OFF,
# so we MUST set it here to have referential integrity.
cursor.execute("PRAGMA foreign_keys = ON")
finally:
cursor.close()
def run_migrations(engine: Engine) -> list[str]:
"""Apply any un-applied SQL files from :mod:`app.models.migrations`.
Behavior:
- Creates a ``schema_migrations`` tracker table if missing.
- Lists ``.sql`` files in :data:`_MIGRATIONS_DIR` in sorted order.
- For each file not yet in ``schema_migrations``, runs its content
via :meth:`sqlite3.Connection.executescript` (necessary because
a migration file may contain multiple statements) inside a
single ``BEGIN IMMEDIATE`` transaction, then records the
version. Already-applied files are skipped.
Returns
-------
list[str]
The ordered list of versions applied on *this* call. Empty
when the DB is already up to date, useful for logs and tests.
Security note
-------------
Migration SQL is trusted input from the repository; it does not
mix with user-origin data and therefore does not need bind
parameters. User data still flows exclusively through
parameterized queries elsewhere (see ``docs/security.md`` CWE-89).
"""
files = sorted(p for p in _MIGRATIONS_DIR.glob("*.sql"))
applied_now: list[str] = []
# A single "raw connection" over the life of the migration run
# lets us mix executescript (DDL) with ordinary parameterized
# bookkeeping cleanly. We commit per file so a failure partway
# through leaves earlier files recorded.
with engine.connect() as conn:
# Ensure the tracker table exists. Can't use schema_migrations
# itself to gate this since it may not exist yet.
conn.execute(
text(
"CREATE TABLE IF NOT EXISTS schema_migrations ("
" version TEXT PRIMARY KEY,"
" applied_at TEXT NOT NULL"
")"
)
)
conn.commit()
# Pull the set of already-applied versions once.
already_applied = {
row[0]
for row in conn.execute(
text("SELECT version FROM schema_migrations")
).fetchall()
}
for path in files:
version = path.stem
if version in already_applied:
continue
sql_text = path.read_text(encoding="utf-8")
# executescript is only exposed on the DB-API connection,
# so we reach through the SQLAlchemy connection's raw
# cursor. Trust boundary: the file is checked into git,
# never user-supplied, so there is no injection vector.
raw = conn.connection
raw.executescript(sql_text)
conn.execute(
text(
"INSERT INTO schema_migrations (version, applied_at) "
"VALUES (:v, :t)"
),
{
"v": version,
"t": datetime.now(timezone.utc).isoformat(),
},
)
conn.commit()
applied_now.append(version)
_log.info("migration_applied", version=version)
if not applied_now:
_log.info("migrations_up_to_date")
return applied_now