Stand up the full SQLite content layer: all 7 tables from the authoritative schema with WAL + foreign-keys enforced per-connection, entity dataclasses plus row mappers, hand-rolled versioned migrations tracked in schema_migrations, and an idempotent Python seed (system user + welcome post + About page). Add a Markdown->HTML service using markdown-it-py with a strict bleach allowlist (tables intentionally omitted on both sides). Add a typed in-process TTLCache[K,V] and wire it into real DB-backed PostService and PageService, both exposing invalidate_all() for Phase 4 admin writes. Rewire / and /about to read from the DB; homepage renders the seeded welcome post, About renders page.title + sanitized body_html_cached. Update the Phase 1 route tests accordingly. Mark Phase 2 complete in docs/ROADMAP.md.
201 lines
7.6 KiB
Python
201 lines
7.6 KiB
Python
"""SQLAlchemy engine factory, SQLite PRAGMA hookup, and migration runner.
|
|
|
|
Responsibilities in this module:
|
|
|
|
1. **Engine construction** — :func:`build_engine` produces a
|
|
``sqlalchemy.Engine`` from the application's ``DATABASE_URL``,
|
|
threaded-safe for uvicorn's worker pool.
|
|
2. **Per-connection PRAGMAs** — a single ``@event.listens_for(Engine,
|
|
"connect")`` hook sets ``journal_mode = WAL`` and ``foreign_keys =
|
|
ON`` on *every* new SQLite connection, not just the first. SQLite
|
|
applies both pragmas per-connection, so doing this once at startup
|
|
would silently leave FKs disabled for every worker.
|
|
3. **Migration runner** — :func:`run_migrations` applies every
|
|
``.sql`` file under :mod:`app.models.migrations` in lexicographic
|
|
order, tracking applied files in a ``schema_migrations`` table.
|
|
Migrations are trusted developer-authored SQL loaded via
|
|
:meth:`sqlite3.Connection.executescript`; they never touch user
|
|
input.
|
|
|
|
No Python code in this module builds a SQL statement by string
|
|
interpolation. Queries go through ``sqlalchemy.text(":bind")``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Final
|
|
|
|
import structlog
|
|
from sqlalchemy import Engine, create_engine, event, text
|
|
|
|
# Directory containing the ``NNN_description.sql`` migration files. Kept
|
|
# as a module-level constant so tests can reason about it without
|
|
# importing the runner internals.
|
|
_MIGRATIONS_DIR: Final[Path] = Path(__file__).resolve().parent / "models" / "migrations"
|
|
|
|
|
|
_log = structlog.get_logger(__name__)
|
|
|
|
|
|
def build_engine(database_url: str) -> Engine:
|
|
"""Build a SQLAlchemy :class:`Engine` for the app's SQLite database.
|
|
|
|
Parameters
|
|
----------
|
|
database_url:
|
|
A SQLAlchemy URL. In production this is
|
|
``sqlite:///data/app.db``; tests pass a tmp-path file URL.
|
|
|
|
Notes
|
|
-----
|
|
- ``check_same_thread=False`` is required because uvicorn services
|
|
requests from a worker-thread pool; SQLAlchemy's connection pool
|
|
plus our explicit transactions keep this safe.
|
|
- For file-backed SQLite URLs we eagerly create the parent
|
|
directory (SQLite refuses to create missing directories).
|
|
- ``future=True`` opts into SQLAlchemy 2.x semantics; redundant on
|
|
2.0+ but explicit is better than implicit.
|
|
"""
|
|
# Ensure the on-disk directory exists for file-backed SQLite URLs.
|
|
# In-memory databases and ``:memory:`` URLs are left alone.
|
|
if database_url.startswith("sqlite:///"):
|
|
db_path_str = database_url[len("sqlite:///"):]
|
|
if db_path_str and db_path_str != ":memory:":
|
|
db_path = Path(db_path_str)
|
|
# Relative paths resolve against the current working
|
|
# directory. This matches uvicorn's default cwd (the repo
|
|
# root) and Docker's WORKDIR.
|
|
parent = db_path.parent
|
|
if str(parent) and parent != Path("."):
|
|
os.makedirs(parent, exist_ok=True)
|
|
|
|
engine = create_engine(
|
|
database_url,
|
|
future=True,
|
|
connect_args={"check_same_thread": False},
|
|
)
|
|
_install_sqlite_pragmas(engine)
|
|
return engine
|
|
|
|
|
|
def _install_sqlite_pragmas(engine: Engine) -> None:
|
|
"""Attach a connect-event listener that enforces our SQLite PRAGMAs.
|
|
|
|
``journal_mode = WAL`` and ``foreign_keys = ON`` are both
|
|
per-connection settings in SQLite. Applying them on every new
|
|
connection — rather than once at startup — is the only way to
|
|
guarantee foreign-key enforcement across all pool workers.
|
|
"""
|
|
|
|
@event.listens_for(engine, "connect")
|
|
def _on_connect(dbapi_connection, connection_record) -> None: # type: ignore[no-untyped-def]
|
|
"""Run per-connection SQLite initialization.
|
|
|
|
Uses the raw DB-API cursor (not SQLAlchemy ``text`` wrappers)
|
|
because PRAGMA calls are not valid parameterized SQL — they
|
|
are trusted, developer-authored literals with no external
|
|
input.
|
|
"""
|
|
cursor = dbapi_connection.cursor()
|
|
try:
|
|
# WAL improves concurrency (readers don't block the
|
|
# single writer) and is well-suited to our read-heavy
|
|
# workload. It persists on the database file, so
|
|
# re-setting is a cheap no-op after the first call.
|
|
cursor.execute("PRAGMA journal_mode = WAL")
|
|
# foreign_keys is per-connection; SQLite defaults to OFF,
|
|
# so we MUST set it here to have referential integrity.
|
|
cursor.execute("PRAGMA foreign_keys = ON")
|
|
finally:
|
|
cursor.close()
|
|
|
|
|
|
def run_migrations(engine: Engine) -> list[str]:
|
|
"""Apply any un-applied SQL files from :mod:`app.models.migrations`.
|
|
|
|
Behavior:
|
|
|
|
- Creates a ``schema_migrations`` tracker table if missing.
|
|
- Lists ``.sql`` files in :data:`_MIGRATIONS_DIR` in sorted order.
|
|
- For each file not yet in ``schema_migrations``, runs its content
|
|
via :meth:`sqlite3.Connection.executescript` (necessary because
|
|
a migration file may contain multiple statements) inside a
|
|
single ``BEGIN IMMEDIATE`` transaction, then records the
|
|
version. Already-applied files are skipped.
|
|
|
|
Returns
|
|
-------
|
|
list[str]
|
|
The ordered list of versions applied on *this* call. Empty
|
|
when the DB is already up to date, useful for logs and tests.
|
|
|
|
Security note
|
|
-------------
|
|
Migration SQL is trusted input from the repository; it does not
|
|
mix with user-origin data and therefore does not need bind
|
|
parameters. User data still flows exclusively through
|
|
parameterized queries elsewhere (see ``docs/security.md`` CWE-89).
|
|
"""
|
|
files = sorted(p for p in _MIGRATIONS_DIR.glob("*.sql"))
|
|
applied_now: list[str] = []
|
|
|
|
# A single "raw connection" over the life of the migration run
|
|
# lets us mix executescript (DDL) with ordinary parameterized
|
|
# bookkeeping cleanly. We commit per file so a failure partway
|
|
# through leaves earlier files recorded.
|
|
with engine.connect() as conn:
|
|
# Ensure the tracker table exists. Can't use schema_migrations
|
|
# itself to gate this since it may not exist yet.
|
|
conn.execute(
|
|
text(
|
|
"CREATE TABLE IF NOT EXISTS schema_migrations ("
|
|
" version TEXT PRIMARY KEY,"
|
|
" applied_at TEXT NOT NULL"
|
|
")"
|
|
)
|
|
)
|
|
conn.commit()
|
|
|
|
# Pull the set of already-applied versions once.
|
|
already_applied = {
|
|
row[0]
|
|
for row in conn.execute(
|
|
text("SELECT version FROM schema_migrations")
|
|
).fetchall()
|
|
}
|
|
|
|
for path in files:
|
|
version = path.stem
|
|
if version in already_applied:
|
|
continue
|
|
|
|
sql_text = path.read_text(encoding="utf-8")
|
|
|
|
# executescript is only exposed on the DB-API connection,
|
|
# so we reach through the SQLAlchemy connection's raw
|
|
# cursor. Trust boundary: the file is checked into git,
|
|
# never user-supplied, so there is no injection vector.
|
|
raw = conn.connection
|
|
raw.executescript(sql_text)
|
|
|
|
conn.execute(
|
|
text(
|
|
"INSERT INTO schema_migrations (version, applied_at) "
|
|
"VALUES (:v, :t)"
|
|
),
|
|
{
|
|
"v": version,
|
|
"t": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
)
|
|
conn.commit()
|
|
applied_now.append(version)
|
|
_log.info("migration_applied", version=version)
|
|
|
|
if not applied_now:
|
|
_log.info("migrations_up_to_date")
|
|
return applied_now
|