feat: phase 2 content model + cache — SQLite schema, markdown, TTL

Stand up the full SQLite content layer: all 7 tables from the authoritative
schema with WAL + foreign-keys enforced per-connection, entity dataclasses
plus row mappers, hand-rolled versioned migrations tracked in
schema_migrations, and an idempotent Python seed (system user + welcome
post + About page).

Add a Markdown->HTML service using markdown-it-py with a strict bleach
allowlist (tables intentionally omitted on both sides). Add a typed
in-process TTLCache[K,V] and wire it into real DB-backed PostService and
PageService, both exposing invalidate_all() for Phase 4 admin writes.

Rewire / and /about to read from the DB; homepage renders the seeded
welcome post, About renders page.title + sanitized body_html_cached.
Update the Phase 1 route tests accordingly.

Mark Phase 2 complete in docs/ROADMAP.md.
This commit is contained in:
2026-04-21 15:40:35 -05:00
parent 28168f57b6
commit 0306f71763
21 changed files with 2055 additions and 108 deletions

195
app/models/entities.py Normal file
View File

@@ -0,0 +1,195 @@
"""Canonical persistence-layer dataclasses.
One dataclass per table in the authoritative SQLite schema documented in
``docs/ROADMAP.md`` ("SQLite Schema (authoritative)"). These map 1:1 to
the columns of each table — field names, types, and nullability all
match — so the mapper layer (:mod:`app.models.mappers`) can convert
``sqlalchemy.Row`` objects to dataclass instances with no guesswork.
Design notes
------------
- Dataclasses are *not* frozen. Later phases mutate fields such as
``User.last_login_at`` or ``MagicLinkToken.used_at`` on successful
auth events; freezing would force service code into hand-rolled
copying. Immutability for view-layer projections is still enforced
via ``PostSummary`` in :mod:`app.models.posts`.
- Datetimes are always timezone-aware UTC at the Python boundary. The
SQLite columns are ``TEXT`` holding ISO-8601 strings; conversion
happens only in :mod:`app.models.mappers`, so application code never
sees a naive datetime.
- ``PostStatus`` is a string-valued ``Enum`` to keep JSON/template
rendering trivial while still providing type-level safety.
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Optional
class PostStatus(str, Enum):
"""Publication lifecycle for a blog post.
The string values match the ``CHECK`` constraint on
``posts.status`` in the SQLite schema; adding a new value here
would require a migration, so this enum is deliberately small.
"""
DRAFT = "draft"
PUBLISHED = "published"
@dataclass
class User:
"""Admin user row.
Phase 2 seeds a single inactive system user (``id=1``) so the
``posts.author_user_id`` foreign key has something to reference;
real admin users are provisioned in Phase 3's magic-link flow.
"""
id: int
email: str
display_name: str
created_at: datetime
last_login_at: Optional[datetime]
active: bool
@dataclass
class MagicLinkToken:
"""Single-use email-login token.
``token_hash`` stores the SHA-256 of the raw token; the raw token
is emailed to the user and never persisted. ``used_at`` is set
when the token is consumed so we can refuse replay attempts
without deleting the audit row.
"""
id: int
email: str
token_hash: str
created_at: datetime
expires_at: datetime
used_at: Optional[datetime]
request_ip: str
@dataclass
class Session:
"""Authenticated admin session.
``revoked_at`` records logouts without deleting the row, so the
audit log remains complete. ``ip`` / ``user_agent`` are snapshots
from session creation, not live values.
"""
id: int
user_id: int
token_hash: str
created_at: datetime
expires_at: datetime
ip: str
user_agent: str
revoked_at: Optional[datetime]
@dataclass
class Page:
"""Static-ish content page (e.g. About).
``body_html_cached`` is regenerated on write by the Phase 4 admin
flow via the Markdown pipeline and stored here so render time
costs only a SELECT, not a sanitize. See "Caching Strategy" in
``docs/ROADMAP.md``.
"""
id: int
slug: str
title: str
body_md: str
body_html_cached: str
updated_at: datetime
published: bool
@dataclass
class Post:
"""Blog post row.
Mirrors the ``posts`` table exactly. ``body_html_cached`` follows
the same regenerate-on-write convention as :class:`Page`.
"""
id: int
slug: str
title: str
body_md: str
body_html_cached: str
status: PostStatus
published_at: Optional[datetime]
updated_at: datetime
author_user_id: int
@dataclass
class Media:
"""Uploaded image metadata.
``filename`` is the random storage name assigned on upload; the
original client-supplied filename is preserved for display only
and NEVER used to build a filesystem path. ``stored_path`` is
relative to the project root.
"""
id: int
filename: str
original_filename: str
content_type: str
size_bytes: int
stored_path: str
alt_text: str
uploaded_by: int
uploaded_at: datetime
@dataclass
class ContactSubmission:
"""Submission from the public ``/contact`` form.
``handled`` flips true once Head Hen has actioned the submission;
retained indefinitely as part of the contact audit log. No
sensitive fields — by design we only capture what the form asks
for.
"""
id: int
name: str
email: str
message: str
ip: str
user_agent: str
submitted_at: datetime
handled: bool
@dataclass
class AuthEvent:
"""Append-only audit record for auth-related events.
``event_type`` values are one of ``link_requested``,
``link_consumed``, ``session_revoked``, ``rate_limited`` (see
Phase 3). ``detail`` is a JSON string so we can attach
event-specific context without schema churn.
"""
id: int
event_type: str
email: Optional[str]
user_id: Optional[int]
ip: str
user_agent: str
created_at: datetime
detail: str

190
app/models/mappers.py Normal file
View File

@@ -0,0 +1,190 @@
"""SQL row to dataclass converters.
One ``row_to_<entity>`` function per table. All functions accept a
mapping-like object (``sqlalchemy.Row``, :class:`sqlite3.Row`, or plain
``dict``) and return the corresponding dataclass from
:mod:`app.models.entities`.
Boundary responsibilities handled here (so service code never has to):
- Parse ISO-8601 ``TEXT`` columns into timezone-aware :class:`datetime`
instances (always UTC).
- Coerce SQLite ``INTEGER`` booleans (``0`` / ``1``) into real ``bool``.
- Translate ``posts.status`` strings into :class:`PostStatus` members.
Anything that isn't safe to assume (e.g. that ``published_at`` might be
NULL) is handled explicitly via :func:`_parse_optional_datetime`.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Mapping, Optional
from app.models.entities import (
AuthEvent,
ContactSubmission,
MagicLinkToken,
Media,
Page,
Post,
PostStatus,
Session,
User,
)
def _parse_datetime(value: str) -> datetime:
"""Parse a stored ISO-8601 string into a timezone-aware UTC datetime.
All write paths use :func:`datetime.now` with ``tz=timezone.utc``
and serialize via ``.isoformat()``, so the stored strings always
include an offset. We still call ``astimezone(timezone.utc)`` to
normalize anything that sneaks through with a different offset —
an inexpensive belt-and-braces guard.
"""
parsed = datetime.fromisoformat(value)
if parsed.tzinfo is None:
# Defensive: legacy rows (none exist yet) or a bad write path.
# Treat as UTC rather than raising; we never intentionally
# persist naive datetimes.
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
def _parse_optional_datetime(value: Optional[str]) -> Optional[datetime]:
"""Return ``None`` for NULL rows; otherwise parse as UTC.
Thin wrapper around :func:`_parse_datetime` kept for readability at
call sites that deal with nullable columns.
"""
if value is None:
return None
return _parse_datetime(value)
def _as_bool(value: Any) -> bool:
"""Coerce a SQLite INTEGER column into a Python ``bool``.
SQLite stores booleans as ``0`` / ``1`` integers. ``bool(0) is
False`` and ``bool(1) is True`` both behave correctly; this
wrapper exists so the intent is explicit at the mapper boundary
rather than relying on implicit truthiness.
"""
return bool(value)
def row_to_user(row: Mapping[str, Any]) -> User:
"""Map a ``users`` row to :class:`User`."""
return User(
id=int(row["id"]),
email=row["email"],
display_name=row["display_name"],
created_at=_parse_datetime(row["created_at"]),
last_login_at=_parse_optional_datetime(row["last_login_at"]),
active=_as_bool(row["active"]),
)
def row_to_magic_link_token(row: Mapping[str, Any]) -> MagicLinkToken:
"""Map a ``magic_link_tokens`` row to :class:`MagicLinkToken`."""
return MagicLinkToken(
id=int(row["id"]),
email=row["email"],
token_hash=row["token_hash"],
created_at=_parse_datetime(row["created_at"]),
expires_at=_parse_datetime(row["expires_at"]),
used_at=_parse_optional_datetime(row["used_at"]),
request_ip=row["request_ip"],
)
def row_to_session(row: Mapping[str, Any]) -> Session:
"""Map a ``sessions`` row to :class:`Session`."""
return Session(
id=int(row["id"]),
user_id=int(row["user_id"]),
token_hash=row["token_hash"],
created_at=_parse_datetime(row["created_at"]),
expires_at=_parse_datetime(row["expires_at"]),
ip=row["ip"],
user_agent=row["user_agent"],
revoked_at=_parse_optional_datetime(row["revoked_at"]),
)
def row_to_page(row: Mapping[str, Any]) -> Page:
"""Map a ``pages`` row to :class:`Page`."""
return Page(
id=int(row["id"]),
slug=row["slug"],
title=row["title"],
body_md=row["body_md"],
body_html_cached=row["body_html_cached"],
updated_at=_parse_datetime(row["updated_at"]),
published=_as_bool(row["published"]),
)
def row_to_post(row: Mapping[str, Any]) -> Post:
"""Map a ``posts`` row to :class:`Post`.
``status`` goes through the :class:`PostStatus` constructor which
enforces the same set the ``CHECK`` constraint does; a value that
somehow bypassed the constraint would raise ``ValueError`` here
rather than silently flowing into business logic.
"""
return Post(
id=int(row["id"]),
slug=row["slug"],
title=row["title"],
body_md=row["body_md"],
body_html_cached=row["body_html_cached"],
status=PostStatus(row["status"]),
published_at=_parse_optional_datetime(row["published_at"]),
updated_at=_parse_datetime(row["updated_at"]),
author_user_id=int(row["author_user_id"]),
)
def row_to_media(row: Mapping[str, Any]) -> Media:
"""Map a ``media`` row to :class:`Media`."""
return Media(
id=int(row["id"]),
filename=row["filename"],
original_filename=row["original_filename"],
content_type=row["content_type"],
size_bytes=int(row["size_bytes"]),
stored_path=row["stored_path"],
alt_text=row["alt_text"],
uploaded_by=int(row["uploaded_by"]),
uploaded_at=_parse_datetime(row["uploaded_at"]),
)
def row_to_contact_submission(row: Mapping[str, Any]) -> ContactSubmission:
"""Map a ``contact_submissions`` row to :class:`ContactSubmission`."""
return ContactSubmission(
id=int(row["id"]),
name=row["name"],
email=row["email"],
message=row["message"],
ip=row["ip"],
user_agent=row["user_agent"],
submitted_at=_parse_datetime(row["submitted_at"]),
handled=_as_bool(row["handled"]),
)
def row_to_auth_event(row: Mapping[str, Any]) -> AuthEvent:
"""Map an ``auth_events`` row to :class:`AuthEvent`."""
return AuthEvent(
id=int(row["id"]),
event_type=row["event_type"],
email=row["email"],
user_id=int(row["user_id"]) if row["user_id"] is not None else None,
ip=row["ip"],
user_agent=row["user_agent"],
created_at=_parse_datetime(row["created_at"]),
detail=row["detail"],
)

View File

@@ -0,0 +1,108 @@
-- 001_init.sql
--
-- Initial schema for Chicken Babies R Us. Authoritative copy of the
-- tables + indexes + check constraints documented in
-- ``docs/ROADMAP.md`` (see "SQLite Schema (authoritative)").
--
-- Idempotency: every statement uses IF NOT EXISTS so re-running the
-- file on a partially-migrated database is still safe. The migration
-- runner also gates execution via the schema_migrations tracker, so
-- this belt-and-braces approach is defensive only.
--
-- No PRAGMA statements here: journal_mode = WAL and foreign_keys = ON
-- are applied per-connection via the SQLAlchemy connect-event
-- listener in ``app/db.py``. Setting them inside a migration file
-- would be a no-op on every connection except the one that ran the
-- migration, which is the opposite of what we want.
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY,
email TEXT NOT NULL UNIQUE,
display_name TEXT NOT NULL,
created_at TEXT NOT NULL,
last_login_at TEXT,
active INTEGER NOT NULL DEFAULT 1
);
CREATE TABLE IF NOT EXISTS magic_link_tokens (
id INTEGER PRIMARY KEY,
email TEXT NOT NULL,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
used_at TEXT,
request_ip TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_magic_email_created
ON magic_link_tokens(email, created_at);
CREATE TABLE IF NOT EXISTS sessions (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
revoked_at TEXT
);
CREATE TABLE IF NOT EXISTS pages (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
body_md TEXT NOT NULL,
body_html_cached TEXT NOT NULL,
updated_at TEXT NOT NULL,
published INTEGER NOT NULL DEFAULT 1
);
CREATE TABLE IF NOT EXISTS posts (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
body_md TEXT NOT NULL,
body_html_cached TEXT NOT NULL,
status TEXT NOT NULL CHECK (status IN ('draft','published')),
published_at TEXT,
updated_at TEXT NOT NULL,
author_user_id INTEGER NOT NULL REFERENCES users(id)
);
CREATE INDEX IF NOT EXISTS idx_posts_status_pub
ON posts(status, published_at DESC);
CREATE TABLE IF NOT EXISTS media (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
original_filename TEXT NOT NULL,
content_type TEXT NOT NULL,
size_bytes INTEGER NOT NULL,
stored_path TEXT NOT NULL,
alt_text TEXT NOT NULL DEFAULT '',
uploaded_by INTEGER NOT NULL REFERENCES users(id),
uploaded_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS contact_submissions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT NOT NULL,
message TEXT NOT NULL,
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
submitted_at TEXT NOT NULL,
handled INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE IF NOT EXISTS auth_events (
id INTEGER PRIMARY KEY,
event_type TEXT NOT NULL,
email TEXT,
user_id INTEGER REFERENCES users(id),
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
created_at TEXT NOT NULL,
detail TEXT NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_auth_events_created
ON auth_events(created_at DESC);

View File

@@ -0,0 +1,12 @@
"""SQL migration files applied by :mod:`app.db` at startup.
This package holds the authoritative schema history for the
``chicken_babies_site`` database. Each ``.sql`` file is applied exactly
once in lexicographic order; the runner tracks which files have been
applied in a ``schema_migrations`` table.
No Python code lives here — the files are trusted, developer-authored
SQL loaded via ``sqlite3.Connection.executescript`` at boot.
"""
from __future__ import annotations

204
app/models/seed.py Normal file
View File

@@ -0,0 +1,204 @@
"""Idempotent seed data for first-run databases.
Creates the minimum content needed so the public site is not blank
before an admin exists:
- System seed user (``users.id = 1``). Inactive and not on the
``ADMIN_EMAILS`` allowlist — cannot log in. Exists only so
``posts.author_user_id`` has a foreign-key target.
- Welcome blog post (``slug = 'welcome-to-the-farm'``).
- About page (``slug = 'about'``) ported from the Phase 1 static copy.
Idempotency is enforced two ways:
1. A marker row in ``schema_migrations`` (``version = 'seed_001'``)
— if present, the whole seed is a no-op.
2. As a belt-and-braces guard, each INSERT is gated by ``INSERT OR
IGNORE`` on a unique key (``users.email``, ``posts.slug``,
``pages.slug``) so a partially-applied seed never duplicates.
Running this twice is safe and logs ``seed_skipped`` on the second
boot, which the Phase 2 verification run depends on.
"""
from __future__ import annotations
from datetime import datetime, timezone
import structlog
from sqlalchemy import Engine, text
from app.services.markdown import MarkdownService
_log = structlog.get_logger(__name__)
# Marker row used to short-circuit the seed on subsequent boots.
# Namespaced with the ``seed_`` prefix so it cannot collide with a
# real migration file name.
_SEED_MARKER: str = "seed_001"
# --- Content --------------------------------------------------------------
#
# The About body is a Markdown translation of the Phase 1
# ``app/templates/public/about.html`` narrative. Kept close to the
# original wording so returning visitors see familiar copy; Head Hen
# rewrites via the Phase 4 admin.
#
# The welcome post is three short paragraphs: a greeting, a Morrison,
# TN mention (no street address — per CLAUDE.md), and a teaser of what
# future updates will cover.
_WELCOME_POST_TITLE: str = "Welcome to the Farm"
_WELCOME_POST_SLUG: str = "welcome-to-the-farm"
_WELCOME_POST_MD: str = (
"Hi there, and thanks for stopping by Chicken Babies R Us! "
"We're a small family farm and we're glad you found us.\n\n"
"We're based in Morrison, Tennessee, tucked into the rolling "
"hills of the middle part of the state. Our flock is growing, "
"our waterfowl are loud, and our coffee cups are never quite "
"empty.\n\n"
"Check back soon for updates on hatching plans, new chicks and "
"ducklings, fresh-egg availability, and whatever the geese "
"decided to get into this week."
)
_ABOUT_PAGE_TITLE: str = "About the Farm"
_ABOUT_PAGE_SLUG: str = "about"
_ABOUT_PAGE_MD: str = (
"Chicken Babies R Us is a small family farm tucked into the "
"rolling hills of Morrison, Tennessee. What started as a "
"handful of chicks in a backyard brooder has grown into a flock "
"of chickens, ducks, and geese that keep us busy (and "
"entertained) year round.\n\n"
"The operation is run by Head Hen — the chief wrangler, egg "
"gatherer, waterfowl-whisperer, and unofficial chicken "
"photographer. She handles the day-to-day care of the birds "
"and does most of the writing you'll find on this site. Expect "
"updates on hatching plans, new arrivals, the occasional coop "
"mishap, and whatever the geese decided to get into this "
"week.\n\n"
"We're a hobby farm at heart, not a commercial one, which "
"means we can take the time to know our birds and raise them "
"the way we think they ought to be raised. If you're curious "
"about what we've got going on — or just want to say hello — "
"pop over to the contact page."
)
# Seed user constants. ``active=0`` + the local-only email keep this
# user out of any real auth flow. Phase 3's magic-link issuer MUST
# refuse to issue links for non-allowlisted or inactive emails;
# Phase 3 tests assert that behavior directly.
_SEED_USER_ID: int = 1
_SEED_USER_EMAIL: str = "seed@chickenbabies.local"
_SEED_USER_DISPLAY: str = "Head Hen"
def run_seed(engine: Engine) -> bool:
"""Populate the database with first-run content, if not already done.
Parameters
----------
engine:
SQLAlchemy engine. Must already have had migrations applied
(this function does not create tables).
Returns
-------
bool
``True`` when seed rows were inserted on this call, ``False``
when the marker was already present (no-op). Useful for
verification scripts and tests that need to assert
``seed_skipped`` on second boot.
"""
now_iso = datetime.now(timezone.utc).isoformat()
markdown = MarkdownService()
with engine.connect() as conn:
# Short-circuit via the migration-tracker marker. Cheaper than
# counting rows and survives the edge case of a manually
# wiped posts/pages table that we wouldn't want to reseed
# automatically.
marker_row = conn.execute(
text(
"SELECT version FROM schema_migrations WHERE version = :v"
),
{"v": _SEED_MARKER},
).first()
if marker_row is not None:
_log.info("seed_skipped", marker=_SEED_MARKER)
return False
# --- Seed user ------------------------------------------------
# The explicit id=1 pin keeps the ``posts.author_user_id``
# foreign key stable even if a future migration renumbers.
# Inline comment below repeats the intent for anyone reading
# the DB directly.
conn.execute(
text(
# seed artifact; not a real admin — see Phase 3 for real users
"INSERT OR IGNORE INTO users"
" (id, email, display_name, created_at, last_login_at, active)"
" VALUES (:id, :email, :display_name, :created_at, NULL, 0)"
),
{
"id": _SEED_USER_ID,
"email": _SEED_USER_EMAIL,
"display_name": _SEED_USER_DISPLAY,
"created_at": now_iso,
},
)
# --- Welcome post --------------------------------------------
welcome_html = markdown.render(_WELCOME_POST_MD)
conn.execute(
text(
"INSERT OR IGNORE INTO posts"
" (slug, title, body_md, body_html_cached, status,"
" published_at, updated_at, author_user_id)"
" VALUES (:slug, :title, :body_md, :body_html,"
" 'published', :published_at, :updated_at, :author_id)"
),
{
"slug": _WELCOME_POST_SLUG,
"title": _WELCOME_POST_TITLE,
"body_md": _WELCOME_POST_MD,
"body_html": welcome_html,
"published_at": now_iso,
"updated_at": now_iso,
"author_id": _SEED_USER_ID,
},
)
# --- About page ----------------------------------------------
about_html = markdown.render(_ABOUT_PAGE_MD)
conn.execute(
text(
"INSERT OR IGNORE INTO pages"
" (slug, title, body_md, body_html_cached, updated_at,"
" published)"
" VALUES (:slug, :title, :body_md, :body_html,"
" :updated_at, 1)"
),
{
"slug": _ABOUT_PAGE_SLUG,
"title": _ABOUT_PAGE_TITLE,
"body_md": _ABOUT_PAGE_MD,
"body_html": about_html,
"updated_at": now_iso,
},
)
# --- Marker ---------------------------------------------------
conn.execute(
text(
"INSERT INTO schema_migrations (version, applied_at)"
" VALUES (:v, :t)"
),
{"v": _SEED_MARKER, "t": now_iso},
)
conn.commit()
_log.info("seed_applied", marker=_SEED_MARKER)
return True