feat: phase 2 content model + cache — SQLite schema, markdown, TTL

Stand up the full SQLite content layer: all 7 tables from the authoritative
schema with WAL + foreign-keys enforced per-connection, entity dataclasses
plus row mappers, hand-rolled versioned migrations tracked in
schema_migrations, and an idempotent Python seed (system user + welcome
post + About page).

Add a Markdown->HTML service using markdown-it-py with a strict bleach
allowlist (tables intentionally omitted on both sides). Add a typed
in-process TTLCache[K,V] and wire it into real DB-backed PostService and
PageService, both exposing invalidate_all() for Phase 4 admin writes.

Rewire / and /about to read from the DB; homepage renders the seeded
welcome post, About renders page.title + sanitized body_html_cached.
Update the Phase 1 route tests accordingly.

Mark Phase 2 complete in docs/ROADMAP.md.
This commit is contained in:
2026-04-21 15:40:35 -05:00
parent 28168f57b6
commit 0306f71763
21 changed files with 2055 additions and 108 deletions

200
app/db.py Normal file
View File

@@ -0,0 +1,200 @@
"""SQLAlchemy engine factory, SQLite PRAGMA hookup, and migration runner.
Responsibilities in this module:
1. **Engine construction** — :func:`build_engine` produces a
``sqlalchemy.Engine`` from the application's ``DATABASE_URL``,
threaded-safe for uvicorn's worker pool.
2. **Per-connection PRAGMAs** — a single ``@event.listens_for(Engine,
"connect")`` hook sets ``journal_mode = WAL`` and ``foreign_keys =
ON`` on *every* new SQLite connection, not just the first. SQLite
applies both pragmas per-connection, so doing this once at startup
would silently leave FKs disabled for every worker.
3. **Migration runner** — :func:`run_migrations` applies every
``.sql`` file under :mod:`app.models.migrations` in lexicographic
order, tracking applied files in a ``schema_migrations`` table.
Migrations are trusted developer-authored SQL loaded via
:meth:`sqlite3.Connection.executescript`; they never touch user
input.
No Python code in this module builds a SQL statement by string
interpolation. Queries go through ``sqlalchemy.text(":bind")``.
"""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Final
import structlog
from sqlalchemy import Engine, create_engine, event, text
# Directory containing the ``NNN_description.sql`` migration files. Kept
# as a module-level constant so tests can reason about it without
# importing the runner internals.
_MIGRATIONS_DIR: Final[Path] = Path(__file__).resolve().parent / "models" / "migrations"
_log = structlog.get_logger(__name__)
def build_engine(database_url: str) -> Engine:
"""Build a SQLAlchemy :class:`Engine` for the app's SQLite database.
Parameters
----------
database_url:
A SQLAlchemy URL. In production this is
``sqlite:///data/app.db``; tests pass a tmp-path file URL.
Notes
-----
- ``check_same_thread=False`` is required because uvicorn services
requests from a worker-thread pool; SQLAlchemy's connection pool
plus our explicit transactions keep this safe.
- For file-backed SQLite URLs we eagerly create the parent
directory (SQLite refuses to create missing directories).
- ``future=True`` opts into SQLAlchemy 2.x semantics; redundant on
2.0+ but explicit is better than implicit.
"""
# Ensure the on-disk directory exists for file-backed SQLite URLs.
# In-memory databases and ``:memory:`` URLs are left alone.
if database_url.startswith("sqlite:///"):
db_path_str = database_url[len("sqlite:///"):]
if db_path_str and db_path_str != ":memory:":
db_path = Path(db_path_str)
# Relative paths resolve against the current working
# directory. This matches uvicorn's default cwd (the repo
# root) and Docker's WORKDIR.
parent = db_path.parent
if str(parent) and parent != Path("."):
os.makedirs(parent, exist_ok=True)
engine = create_engine(
database_url,
future=True,
connect_args={"check_same_thread": False},
)
_install_sqlite_pragmas(engine)
return engine
def _install_sqlite_pragmas(engine: Engine) -> None:
"""Attach a connect-event listener that enforces our SQLite PRAGMAs.
``journal_mode = WAL`` and ``foreign_keys = ON`` are both
per-connection settings in SQLite. Applying them on every new
connection — rather than once at startup — is the only way to
guarantee foreign-key enforcement across all pool workers.
"""
@event.listens_for(engine, "connect")
def _on_connect(dbapi_connection, connection_record) -> None: # type: ignore[no-untyped-def]
"""Run per-connection SQLite initialization.
Uses the raw DB-API cursor (not SQLAlchemy ``text`` wrappers)
because PRAGMA calls are not valid parameterized SQL — they
are trusted, developer-authored literals with no external
input.
"""
cursor = dbapi_connection.cursor()
try:
# WAL improves concurrency (readers don't block the
# single writer) and is well-suited to our read-heavy
# workload. It persists on the database file, so
# re-setting is a cheap no-op after the first call.
cursor.execute("PRAGMA journal_mode = WAL")
# foreign_keys is per-connection; SQLite defaults to OFF,
# so we MUST set it here to have referential integrity.
cursor.execute("PRAGMA foreign_keys = ON")
finally:
cursor.close()
def run_migrations(engine: Engine) -> list[str]:
"""Apply any un-applied SQL files from :mod:`app.models.migrations`.
Behavior:
- Creates a ``schema_migrations`` tracker table if missing.
- Lists ``.sql`` files in :data:`_MIGRATIONS_DIR` in sorted order.
- For each file not yet in ``schema_migrations``, runs its content
via :meth:`sqlite3.Connection.executescript` (necessary because
a migration file may contain multiple statements) inside a
single ``BEGIN IMMEDIATE`` transaction, then records the
version. Already-applied files are skipped.
Returns
-------
list[str]
The ordered list of versions applied on *this* call. Empty
when the DB is already up to date, useful for logs and tests.
Security note
-------------
Migration SQL is trusted input from the repository; it does not
mix with user-origin data and therefore does not need bind
parameters. User data still flows exclusively through
parameterized queries elsewhere (see ``docs/security.md`` CWE-89).
"""
files = sorted(p for p in _MIGRATIONS_DIR.glob("*.sql"))
applied_now: list[str] = []
# A single "raw connection" over the life of the migration run
# lets us mix executescript (DDL) with ordinary parameterized
# bookkeeping cleanly. We commit per file so a failure partway
# through leaves earlier files recorded.
with engine.connect() as conn:
# Ensure the tracker table exists. Can't use schema_migrations
# itself to gate this since it may not exist yet.
conn.execute(
text(
"CREATE TABLE IF NOT EXISTS schema_migrations ("
" version TEXT PRIMARY KEY,"
" applied_at TEXT NOT NULL"
")"
)
)
conn.commit()
# Pull the set of already-applied versions once.
already_applied = {
row[0]
for row in conn.execute(
text("SELECT version FROM schema_migrations")
).fetchall()
}
for path in files:
version = path.stem
if version in already_applied:
continue
sql_text = path.read_text(encoding="utf-8")
# executescript is only exposed on the DB-API connection,
# so we reach through the SQLAlchemy connection's raw
# cursor. Trust boundary: the file is checked into git,
# never user-supplied, so there is no injection vector.
raw = conn.connection
raw.executescript(sql_text)
conn.execute(
text(
"INSERT INTO schema_migrations (version, applied_at) "
"VALUES (:v, :t)"
),
{
"v": version,
"t": datetime.now(timezone.utc).isoformat(),
},
)
conn.commit()
applied_now.append(version)
_log.info("migration_applied", version=version)
if not applied_now:
_log.info("migrations_up_to_date")
return applied_now

View File

@@ -4,6 +4,15 @@ The factory pattern (``create_app``) keeps test setup straightforward and
lets us swap in alternate configurations without module-level side lets us swap in alternate configurations without module-level side
effects. ``app = create_app()`` at import time is what Uvicorn references effects. ``app = create_app()`` at import time is what Uvicorn references
via ``app.main:app``. via ``app.main:app``.
Phase 2 additions:
- Build a shared SQLAlchemy :class:`~sqlalchemy.Engine` from
``settings.database_url`` and attach the per-connection
PRAGMA listener (WAL + foreign keys).
- Apply SQL migrations from :mod:`app.models.migrations`.
- Run the idempotent seed (welcome post, About page, system user).
- Instantiate :class:`PostService` and :class:`PageService` and
expose them on ``app.state`` for route-level DI.
""" """
from __future__ import annotations from __future__ import annotations
@@ -17,9 +26,13 @@ from fastapi.templating import Jinja2Templates
from app import __version__ from app import __version__
from app.config import get_settings from app.config import get_settings
from app.db import build_engine, run_migrations
from app.logging_config import configure_logging from app.logging_config import configure_logging
from app.models.seed import run_seed
from app.routes.health import router as health_router from app.routes.health import router as health_router
from app.routes.public import router as public_router from app.routes.public import router as public_router
from app.services.pages import PageService
from app.services.posts import PostService
# Resolve the package root once so template / static paths stay correct # Resolve the package root once so template / static paths stay correct
@@ -33,16 +46,19 @@ _STATIC_DIR: Path = _PACKAGE_ROOT / "static"
def create_app() -> FastAPI: def create_app() -> FastAPI:
"""Build and return the FastAPI application. """Build and return the FastAPI application.
Responsibilities: Responsibilities (in strict order):
- Load validated configuration via :func:`get_settings`.
- Initialize structured logging *before* any logger is used. 1. Load validated configuration via :func:`get_settings`.
- Instantiate FastAPI with canonical title + version. 2. Initialize structured logging *before* any logger is used.
- Mount the ``/static`` directory for CSS, JS, and image assets. 3. Build the SQLAlchemy engine and install the PRAGMA listener.
- Attach the shared :class:`Jinja2Templates` to ``app.state`` so route 4. Apply SQL migrations (idempotent — no-op after first boot).
dependencies can retrieve it without a circular import on this 5. Run the seed (idempotent — marked via ``schema_migrations``).
module. 6. Instantiate :class:`PostService` / :class:`PageService` and
- Register routers (Phase 1: health + public). attach them to ``app.state`` so route dependencies can resolve
- Emit a single ``app_started`` structured log event. them via ``request.app.state``.
7. Mount static files, attach the shared :class:`Jinja2Templates`,
and register routers.
8. Emit a single ``app_started`` structured log event.
""" """
# Parse + validate configuration first so a bad environment fails fast # Parse + validate configuration first so a bad environment fails fast
# with a clear pydantic error before we touch logging / FastAPI. # with a clear pydantic error before we touch logging / FastAPI.
@@ -52,6 +68,13 @@ def create_app() -> FastAPI:
# very first log line already flows through our processor chain. # very first log line already flows through our processor chain.
configure_logging(settings.app_env) configure_logging(settings.app_env)
# --- Database plumbing --------------------------------------------------
# Engine is a process-wide resource. Built here so that migrations
# and seed both run on the same pool/config as the running app.
engine = build_engine(settings.database_url)
run_migrations(engine)
run_seed(engine)
application = FastAPI( application = FastAPI(
title="Chicken Babies R Us", title="Chicken Babies R Us",
version=__version__, version=__version__,
@@ -78,6 +101,13 @@ def create_app() -> FastAPI:
# function defined next to the routes. # function defined next to the routes.
application.state.templates = Jinja2Templates(directory=_TEMPLATES_DIR) application.state.templates = Jinja2Templates(directory=_TEMPLATES_DIR)
# Store the engine + services on ``app.state`` so the
# dependency-injection helpers in :mod:`app.services.*` can find
# them without importing this module (circular-import-safe).
application.state.engine = engine
application.state.post_service = PostService(engine)
application.state.page_service = PageService(engine)
# Register routers. Kept explicit (no dynamic discovery) so the set of # Register routers. Kept explicit (no dynamic discovery) so the set of
# mounted endpoints is trivially auditable. # mounted endpoints is trivially auditable.
application.include_router(health_router) application.include_router(health_router)

195
app/models/entities.py Normal file
View File

@@ -0,0 +1,195 @@
"""Canonical persistence-layer dataclasses.
One dataclass per table in the authoritative SQLite schema documented in
``docs/ROADMAP.md`` ("SQLite Schema (authoritative)"). These map 1:1 to
the columns of each table — field names, types, and nullability all
match — so the mapper layer (:mod:`app.models.mappers`) can convert
``sqlalchemy.Row`` objects to dataclass instances with no guesswork.
Design notes
------------
- Dataclasses are *not* frozen. Later phases mutate fields such as
``User.last_login_at`` or ``MagicLinkToken.used_at`` on successful
auth events; freezing would force service code into hand-rolled
copying. Immutability for view-layer projections is still enforced
via ``PostSummary`` in :mod:`app.models.posts`.
- Datetimes are always timezone-aware UTC at the Python boundary. The
SQLite columns are ``TEXT`` holding ISO-8601 strings; conversion
happens only in :mod:`app.models.mappers`, so application code never
sees a naive datetime.
- ``PostStatus`` is a string-valued ``Enum`` to keep JSON/template
rendering trivial while still providing type-level safety.
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Optional
class PostStatus(str, Enum):
"""Publication lifecycle for a blog post.
The string values match the ``CHECK`` constraint on
``posts.status`` in the SQLite schema; adding a new value here
would require a migration, so this enum is deliberately small.
"""
DRAFT = "draft"
PUBLISHED = "published"
@dataclass
class User:
"""Admin user row.
Phase 2 seeds a single inactive system user (``id=1``) so the
``posts.author_user_id`` foreign key has something to reference;
real admin users are provisioned in Phase 3's magic-link flow.
"""
id: int
email: str
display_name: str
created_at: datetime
last_login_at: Optional[datetime]
active: bool
@dataclass
class MagicLinkToken:
"""Single-use email-login token.
``token_hash`` stores the SHA-256 of the raw token; the raw token
is emailed to the user and never persisted. ``used_at`` is set
when the token is consumed so we can refuse replay attempts
without deleting the audit row.
"""
id: int
email: str
token_hash: str
created_at: datetime
expires_at: datetime
used_at: Optional[datetime]
request_ip: str
@dataclass
class Session:
"""Authenticated admin session.
``revoked_at`` records logouts without deleting the row, so the
audit log remains complete. ``ip`` / ``user_agent`` are snapshots
from session creation, not live values.
"""
id: int
user_id: int
token_hash: str
created_at: datetime
expires_at: datetime
ip: str
user_agent: str
revoked_at: Optional[datetime]
@dataclass
class Page:
"""Static-ish content page (e.g. About).
``body_html_cached`` is regenerated on write by the Phase 4 admin
flow via the Markdown pipeline and stored here so render time
costs only a SELECT, not a sanitize. See "Caching Strategy" in
``docs/ROADMAP.md``.
"""
id: int
slug: str
title: str
body_md: str
body_html_cached: str
updated_at: datetime
published: bool
@dataclass
class Post:
"""Blog post row.
Mirrors the ``posts`` table exactly. ``body_html_cached`` follows
the same regenerate-on-write convention as :class:`Page`.
"""
id: int
slug: str
title: str
body_md: str
body_html_cached: str
status: PostStatus
published_at: Optional[datetime]
updated_at: datetime
author_user_id: int
@dataclass
class Media:
"""Uploaded image metadata.
``filename`` is the random storage name assigned on upload; the
original client-supplied filename is preserved for display only
and NEVER used to build a filesystem path. ``stored_path`` is
relative to the project root.
"""
id: int
filename: str
original_filename: str
content_type: str
size_bytes: int
stored_path: str
alt_text: str
uploaded_by: int
uploaded_at: datetime
@dataclass
class ContactSubmission:
"""Submission from the public ``/contact`` form.
``handled`` flips true once Head Hen has actioned the submission;
retained indefinitely as part of the contact audit log. No
sensitive fields — by design we only capture what the form asks
for.
"""
id: int
name: str
email: str
message: str
ip: str
user_agent: str
submitted_at: datetime
handled: bool
@dataclass
class AuthEvent:
"""Append-only audit record for auth-related events.
``event_type`` values are one of ``link_requested``,
``link_consumed``, ``session_revoked``, ``rate_limited`` (see
Phase 3). ``detail`` is a JSON string so we can attach
event-specific context without schema churn.
"""
id: int
event_type: str
email: Optional[str]
user_id: Optional[int]
ip: str
user_agent: str
created_at: datetime
detail: str

190
app/models/mappers.py Normal file
View File

@@ -0,0 +1,190 @@
"""SQL row to dataclass converters.
One ``row_to_<entity>`` function per table. All functions accept a
mapping-like object (``sqlalchemy.Row``, :class:`sqlite3.Row`, or plain
``dict``) and return the corresponding dataclass from
:mod:`app.models.entities`.
Boundary responsibilities handled here (so service code never has to):
- Parse ISO-8601 ``TEXT`` columns into timezone-aware :class:`datetime`
instances (always UTC).
- Coerce SQLite ``INTEGER`` booleans (``0`` / ``1``) into real ``bool``.
- Translate ``posts.status`` strings into :class:`PostStatus` members.
Anything that isn't safe to assume (e.g. that ``published_at`` might be
NULL) is handled explicitly via :func:`_parse_optional_datetime`.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Mapping, Optional
from app.models.entities import (
AuthEvent,
ContactSubmission,
MagicLinkToken,
Media,
Page,
Post,
PostStatus,
Session,
User,
)
def _parse_datetime(value: str) -> datetime:
"""Parse a stored ISO-8601 string into a timezone-aware UTC datetime.
All write paths use :func:`datetime.now` with ``tz=timezone.utc``
and serialize via ``.isoformat()``, so the stored strings always
include an offset. We still call ``astimezone(timezone.utc)`` to
normalize anything that sneaks through with a different offset —
an inexpensive belt-and-braces guard.
"""
parsed = datetime.fromisoformat(value)
if parsed.tzinfo is None:
# Defensive: legacy rows (none exist yet) or a bad write path.
# Treat as UTC rather than raising; we never intentionally
# persist naive datetimes.
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
def _parse_optional_datetime(value: Optional[str]) -> Optional[datetime]:
"""Return ``None`` for NULL rows; otherwise parse as UTC.
Thin wrapper around :func:`_parse_datetime` kept for readability at
call sites that deal with nullable columns.
"""
if value is None:
return None
return _parse_datetime(value)
def _as_bool(value: Any) -> bool:
"""Coerce a SQLite INTEGER column into a Python ``bool``.
SQLite stores booleans as ``0`` / ``1`` integers. ``bool(0) is
False`` and ``bool(1) is True`` both behave correctly; this
wrapper exists so the intent is explicit at the mapper boundary
rather than relying on implicit truthiness.
"""
return bool(value)
def row_to_user(row: Mapping[str, Any]) -> User:
"""Map a ``users`` row to :class:`User`."""
return User(
id=int(row["id"]),
email=row["email"],
display_name=row["display_name"],
created_at=_parse_datetime(row["created_at"]),
last_login_at=_parse_optional_datetime(row["last_login_at"]),
active=_as_bool(row["active"]),
)
def row_to_magic_link_token(row: Mapping[str, Any]) -> MagicLinkToken:
"""Map a ``magic_link_tokens`` row to :class:`MagicLinkToken`."""
return MagicLinkToken(
id=int(row["id"]),
email=row["email"],
token_hash=row["token_hash"],
created_at=_parse_datetime(row["created_at"]),
expires_at=_parse_datetime(row["expires_at"]),
used_at=_parse_optional_datetime(row["used_at"]),
request_ip=row["request_ip"],
)
def row_to_session(row: Mapping[str, Any]) -> Session:
"""Map a ``sessions`` row to :class:`Session`."""
return Session(
id=int(row["id"]),
user_id=int(row["user_id"]),
token_hash=row["token_hash"],
created_at=_parse_datetime(row["created_at"]),
expires_at=_parse_datetime(row["expires_at"]),
ip=row["ip"],
user_agent=row["user_agent"],
revoked_at=_parse_optional_datetime(row["revoked_at"]),
)
def row_to_page(row: Mapping[str, Any]) -> Page:
"""Map a ``pages`` row to :class:`Page`."""
return Page(
id=int(row["id"]),
slug=row["slug"],
title=row["title"],
body_md=row["body_md"],
body_html_cached=row["body_html_cached"],
updated_at=_parse_datetime(row["updated_at"]),
published=_as_bool(row["published"]),
)
def row_to_post(row: Mapping[str, Any]) -> Post:
"""Map a ``posts`` row to :class:`Post`.
``status`` goes through the :class:`PostStatus` constructor which
enforces the same set the ``CHECK`` constraint does; a value that
somehow bypassed the constraint would raise ``ValueError`` here
rather than silently flowing into business logic.
"""
return Post(
id=int(row["id"]),
slug=row["slug"],
title=row["title"],
body_md=row["body_md"],
body_html_cached=row["body_html_cached"],
status=PostStatus(row["status"]),
published_at=_parse_optional_datetime(row["published_at"]),
updated_at=_parse_datetime(row["updated_at"]),
author_user_id=int(row["author_user_id"]),
)
def row_to_media(row: Mapping[str, Any]) -> Media:
"""Map a ``media`` row to :class:`Media`."""
return Media(
id=int(row["id"]),
filename=row["filename"],
original_filename=row["original_filename"],
content_type=row["content_type"],
size_bytes=int(row["size_bytes"]),
stored_path=row["stored_path"],
alt_text=row["alt_text"],
uploaded_by=int(row["uploaded_by"]),
uploaded_at=_parse_datetime(row["uploaded_at"]),
)
def row_to_contact_submission(row: Mapping[str, Any]) -> ContactSubmission:
"""Map a ``contact_submissions`` row to :class:`ContactSubmission`."""
return ContactSubmission(
id=int(row["id"]),
name=row["name"],
email=row["email"],
message=row["message"],
ip=row["ip"],
user_agent=row["user_agent"],
submitted_at=_parse_datetime(row["submitted_at"]),
handled=_as_bool(row["handled"]),
)
def row_to_auth_event(row: Mapping[str, Any]) -> AuthEvent:
"""Map an ``auth_events`` row to :class:`AuthEvent`."""
return AuthEvent(
id=int(row["id"]),
event_type=row["event_type"],
email=row["email"],
user_id=int(row["user_id"]) if row["user_id"] is not None else None,
ip=row["ip"],
user_agent=row["user_agent"],
created_at=_parse_datetime(row["created_at"]),
detail=row["detail"],
)

View File

@@ -0,0 +1,108 @@
-- 001_init.sql
--
-- Initial schema for Chicken Babies R Us. Authoritative copy of the
-- tables + indexes + check constraints documented in
-- ``docs/ROADMAP.md`` (see "SQLite Schema (authoritative)").
--
-- Idempotency: every statement uses IF NOT EXISTS so re-running the
-- file on a partially-migrated database is still safe. The migration
-- runner also gates execution via the schema_migrations tracker, so
-- this belt-and-braces approach is defensive only.
--
-- No PRAGMA statements here: journal_mode = WAL and foreign_keys = ON
-- are applied per-connection via the SQLAlchemy connect-event
-- listener in ``app/db.py``. Setting them inside a migration file
-- would be a no-op on every connection except the one that ran the
-- migration, which is the opposite of what we want.
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY,
email TEXT NOT NULL UNIQUE,
display_name TEXT NOT NULL,
created_at TEXT NOT NULL,
last_login_at TEXT,
active INTEGER NOT NULL DEFAULT 1
);
CREATE TABLE IF NOT EXISTS magic_link_tokens (
id INTEGER PRIMARY KEY,
email TEXT NOT NULL,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
used_at TEXT,
request_ip TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_magic_email_created
ON magic_link_tokens(email, created_at);
CREATE TABLE IF NOT EXISTS sessions (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
revoked_at TEXT
);
CREATE TABLE IF NOT EXISTS pages (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
body_md TEXT NOT NULL,
body_html_cached TEXT NOT NULL,
updated_at TEXT NOT NULL,
published INTEGER NOT NULL DEFAULT 1
);
CREATE TABLE IF NOT EXISTS posts (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
body_md TEXT NOT NULL,
body_html_cached TEXT NOT NULL,
status TEXT NOT NULL CHECK (status IN ('draft','published')),
published_at TEXT,
updated_at TEXT NOT NULL,
author_user_id INTEGER NOT NULL REFERENCES users(id)
);
CREATE INDEX IF NOT EXISTS idx_posts_status_pub
ON posts(status, published_at DESC);
CREATE TABLE IF NOT EXISTS media (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
original_filename TEXT NOT NULL,
content_type TEXT NOT NULL,
size_bytes INTEGER NOT NULL,
stored_path TEXT NOT NULL,
alt_text TEXT NOT NULL DEFAULT '',
uploaded_by INTEGER NOT NULL REFERENCES users(id),
uploaded_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS contact_submissions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT NOT NULL,
message TEXT NOT NULL,
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
submitted_at TEXT NOT NULL,
handled INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE IF NOT EXISTS auth_events (
id INTEGER PRIMARY KEY,
event_type TEXT NOT NULL,
email TEXT,
user_id INTEGER REFERENCES users(id),
ip TEXT NOT NULL,
user_agent TEXT NOT NULL,
created_at TEXT NOT NULL,
detail TEXT NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_auth_events_created
ON auth_events(created_at DESC);

View File

@@ -0,0 +1,12 @@
"""SQL migration files applied by :mod:`app.db` at startup.
This package holds the authoritative schema history for the
``chicken_babies_site`` database. Each ``.sql`` file is applied exactly
once in lexicographic order; the runner tracks which files have been
applied in a ``schema_migrations`` table.
No Python code lives here — the files are trusted, developer-authored
SQL loaded via ``sqlite3.Connection.executescript`` at boot.
"""
from __future__ import annotations

204
app/models/seed.py Normal file
View File

@@ -0,0 +1,204 @@
"""Idempotent seed data for first-run databases.
Creates the minimum content needed so the public site is not blank
before an admin exists:
- System seed user (``users.id = 1``). Inactive and not on the
``ADMIN_EMAILS`` allowlist — cannot log in. Exists only so
``posts.author_user_id`` has a foreign-key target.
- Welcome blog post (``slug = 'welcome-to-the-farm'``).
- About page (``slug = 'about'``) ported from the Phase 1 static copy.
Idempotency is enforced two ways:
1. A marker row in ``schema_migrations`` (``version = 'seed_001'``)
— if present, the whole seed is a no-op.
2. As a belt-and-braces guard, each INSERT is gated by ``INSERT OR
IGNORE`` on a unique key (``users.email``, ``posts.slug``,
``pages.slug``) so a partially-applied seed never duplicates.
Running this twice is safe and logs ``seed_skipped`` on the second
boot, which the Phase 2 verification run depends on.
"""
from __future__ import annotations
from datetime import datetime, timezone
import structlog
from sqlalchemy import Engine, text
from app.services.markdown import MarkdownService
_log = structlog.get_logger(__name__)
# Marker row used to short-circuit the seed on subsequent boots.
# Namespaced with the ``seed_`` prefix so it cannot collide with a
# real migration file name.
_SEED_MARKER: str = "seed_001"
# --- Content --------------------------------------------------------------
#
# The About body is a Markdown translation of the Phase 1
# ``app/templates/public/about.html`` narrative. Kept close to the
# original wording so returning visitors see familiar copy; Head Hen
# rewrites via the Phase 4 admin.
#
# The welcome post is three short paragraphs: a greeting, a Morrison,
# TN mention (no street address — per CLAUDE.md), and a teaser of what
# future updates will cover.
_WELCOME_POST_TITLE: str = "Welcome to the Farm"
_WELCOME_POST_SLUG: str = "welcome-to-the-farm"
_WELCOME_POST_MD: str = (
"Hi there, and thanks for stopping by Chicken Babies R Us! "
"We're a small family farm and we're glad you found us.\n\n"
"We're based in Morrison, Tennessee, tucked into the rolling "
"hills of the middle part of the state. Our flock is growing, "
"our waterfowl are loud, and our coffee cups are never quite "
"empty.\n\n"
"Check back soon for updates on hatching plans, new chicks and "
"ducklings, fresh-egg availability, and whatever the geese "
"decided to get into this week."
)
_ABOUT_PAGE_TITLE: str = "About the Farm"
_ABOUT_PAGE_SLUG: str = "about"
_ABOUT_PAGE_MD: str = (
"Chicken Babies R Us is a small family farm tucked into the "
"rolling hills of Morrison, Tennessee. What started as a "
"handful of chicks in a backyard brooder has grown into a flock "
"of chickens, ducks, and geese that keep us busy (and "
"entertained) year round.\n\n"
"The operation is run by Head Hen — the chief wrangler, egg "
"gatherer, waterfowl-whisperer, and unofficial chicken "
"photographer. She handles the day-to-day care of the birds "
"and does most of the writing you'll find on this site. Expect "
"updates on hatching plans, new arrivals, the occasional coop "
"mishap, and whatever the geese decided to get into this "
"week.\n\n"
"We're a hobby farm at heart, not a commercial one, which "
"means we can take the time to know our birds and raise them "
"the way we think they ought to be raised. If you're curious "
"about what we've got going on — or just want to say hello — "
"pop over to the contact page."
)
# Seed user constants. ``active=0`` + the local-only email keep this
# user out of any real auth flow. Phase 3's magic-link issuer MUST
# refuse to issue links for non-allowlisted or inactive emails;
# Phase 3 tests assert that behavior directly.
_SEED_USER_ID: int = 1
_SEED_USER_EMAIL: str = "seed@chickenbabies.local"
_SEED_USER_DISPLAY: str = "Head Hen"
def run_seed(engine: Engine) -> bool:
"""Populate the database with first-run content, if not already done.
Parameters
----------
engine:
SQLAlchemy engine. Must already have had migrations applied
(this function does not create tables).
Returns
-------
bool
``True`` when seed rows were inserted on this call, ``False``
when the marker was already present (no-op). Useful for
verification scripts and tests that need to assert
``seed_skipped`` on second boot.
"""
now_iso = datetime.now(timezone.utc).isoformat()
markdown = MarkdownService()
with engine.connect() as conn:
# Short-circuit via the migration-tracker marker. Cheaper than
# counting rows and survives the edge case of a manually
# wiped posts/pages table that we wouldn't want to reseed
# automatically.
marker_row = conn.execute(
text(
"SELECT version FROM schema_migrations WHERE version = :v"
),
{"v": _SEED_MARKER},
).first()
if marker_row is not None:
_log.info("seed_skipped", marker=_SEED_MARKER)
return False
# --- Seed user ------------------------------------------------
# The explicit id=1 pin keeps the ``posts.author_user_id``
# foreign key stable even if a future migration renumbers.
# Inline comment below repeats the intent for anyone reading
# the DB directly.
conn.execute(
text(
# seed artifact; not a real admin — see Phase 3 for real users
"INSERT OR IGNORE INTO users"
" (id, email, display_name, created_at, last_login_at, active)"
" VALUES (:id, :email, :display_name, :created_at, NULL, 0)"
),
{
"id": _SEED_USER_ID,
"email": _SEED_USER_EMAIL,
"display_name": _SEED_USER_DISPLAY,
"created_at": now_iso,
},
)
# --- Welcome post --------------------------------------------
welcome_html = markdown.render(_WELCOME_POST_MD)
conn.execute(
text(
"INSERT OR IGNORE INTO posts"
" (slug, title, body_md, body_html_cached, status,"
" published_at, updated_at, author_user_id)"
" VALUES (:slug, :title, :body_md, :body_html,"
" 'published', :published_at, :updated_at, :author_id)"
),
{
"slug": _WELCOME_POST_SLUG,
"title": _WELCOME_POST_TITLE,
"body_md": _WELCOME_POST_MD,
"body_html": welcome_html,
"published_at": now_iso,
"updated_at": now_iso,
"author_id": _SEED_USER_ID,
},
)
# --- About page ----------------------------------------------
about_html = markdown.render(_ABOUT_PAGE_MD)
conn.execute(
text(
"INSERT OR IGNORE INTO pages"
" (slug, title, body_md, body_html_cached, updated_at,"
" published)"
" VALUES (:slug, :title, :body_md, :body_html,"
" :updated_at, 1)"
),
{
"slug": _ABOUT_PAGE_SLUG,
"title": _ABOUT_PAGE_TITLE,
"body_md": _ABOUT_PAGE_MD,
"body_html": about_html,
"updated_at": now_iso,
},
)
# --- Marker ---------------------------------------------------
conn.execute(
text(
"INSERT INTO schema_migrations (version, applied_at)"
" VALUES (:v, :t)"
),
{"v": _SEED_MARKER, "t": now_iso},
)
conn.commit()
_log.info("seed_applied", marker=_SEED_MARKER)
return True

View File

@@ -1,25 +1,31 @@
"""Public-facing HTTP routes. """Public-facing HTTP routes.
Phase 1 scope: Phase 2 scope:
- ``GET /`` — blog index (currently empty list from the stub service). - ``GET /`` — blog index; posts come from :class:`PostService`
- ``GET /about`` — static placeholder copy. which now reads the ``posts`` table.
- ``GET /about`` — DB-backed; loads the ``about`` row from the
``pages`` table via :class:`PageService` and
renders its ``body_html_cached`` directly.
- ``GET /contact`` — inert contact form UI + optional ``mailto:`` link. - ``GET /contact`` — inert contact form UI + optional ``mailto:`` link.
- ``GET /shop`` — "Coming soon" card. - ``GET /shop`` — "Coming soon" card.
Every handler is thin: it resolves its dependencies, calls any service Every handler is thin: it resolves its dependencies, calls any service
methods it needs, and delegates rendering to a Jinja template. No HTML is methods it needs, and delegates rendering to a Jinja template. No HTML
constructed in Python. is constructed in Python.
""" """
from __future__ import annotations from __future__ import annotations
from fastapi import APIRouter, Depends, Request import structlog
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from app.config import Settings, get_settings from app.config import Settings, get_settings
from app.models.entities import Page
from app.models.posts import PostSummary from app.models.posts import PostSummary
from app.services.pages import PageService, get_page_service
from app.services.posts import PostService, get_post_service from app.services.posts import PostService, get_post_service
@@ -27,6 +33,9 @@ from app.services.posts import PostService, get_post_service
# so the routes below live at the site root. # so the routes below live at the site root.
router: APIRouter = APIRouter(tags=["public"]) router: APIRouter = APIRouter(tags=["public"])
# One module-level logger is fine; structlog handles context binding.
_log = structlog.get_logger(__name__)
def get_templates(request: Request) -> Jinja2Templates: def get_templates(request: Request) -> Jinja2Templates:
"""Return the shared :class:`Jinja2Templates` instance. """Return the shared :class:`Jinja2Templates` instance.
@@ -48,9 +57,10 @@ def home(
) -> HTMLResponse: ) -> HTMLResponse:
"""Render the blog index with any published posts. """Render the blog index with any published posts.
In Phase 1 the service returns an empty list, so the template shows a Phase 2: the service now returns real ``PostSummary`` rows from
friendly "no posts yet" state. Phase 2 will populate the list from SQLite. The homepage template still handles the empty-list case
SQLite without any changes to this handler. gracefully in case a future deployment starts with an unseeded
database.
""" """
# Query the service layer for the most recent published posts. The # Query the service layer for the most recent published posts. The
# template handles the empty-list case; we do not branch here. # template handles the empty-list case; we do not branch here.
@@ -66,18 +76,31 @@ def home(
def about( def about(
request: Request, request: Request,
templates: Jinja2Templates = Depends(get_templates), templates: Jinja2Templates = Depends(get_templates),
pages: PageService = Depends(get_page_service),
) -> HTMLResponse: ) -> HTMLResponse:
"""Render the static About page. """Render the About page from the ``pages`` table.
Copy is deliberately generic and does not reveal the farm's street Phase 2 rewires this route: the body comes from ``pages.about``
address (Morrison, TN is mentioned; the physical address is not — (seeded at first boot and editable via Phase 4 admin). If the
see CLAUDE.md). Head Hen will replace this content via the Phase 4 page row is missing — which should not happen after a successful
admin CMS. seed — we log the anomaly and return a generic 500 without
leaking implementation details (CWE-200).
""" """
page: Page | None = pages.get_by_slug("about")
if page is None:
# Anomalous: the seed should always have populated this row.
# Log with enough context to diagnose without exposing it to
# the visitor.
_log.error("about_page_missing", slug="about")
raise HTTPException(
status_code=500,
detail="The About page is temporarily unavailable.",
)
return templates.TemplateResponse( return templates.TemplateResponse(
request, request,
"public/about.html", "public/about.html",
{"active_nav": "about"}, {"active_nav": "about", "page": page},
) )

88
app/services/cache.py Normal file
View File

@@ -0,0 +1,88 @@
"""In-process, generic TTL cache.
Small, typed, and deliberately boring. Used by :mod:`app.services.posts`
and :mod:`app.services.pages` to sit in front of the hottest queries
(published-posts list, page-by-slug); a 60 s default TTL keeps the
site's three-digit daily requests out of the SQLite query path without
any cross-process coordination.
Not thread-safe in the strict sense — Python's GIL makes the dict
operations atomic at CPython bytecode granularity, and worst case a
concurrent writer causes a benign duplicate DB read. That is
acceptable at this scale; if the site ever grows teeth we can revisit.
"""
from __future__ import annotations
import time
from typing import Generic, Hashable, Optional, TypeVar
# TypeVar bound to ``Hashable`` so callers cannot accidentally key by a
# mutable collection (which would later look up with a different hash
# after mutation and silently miss the cache).
K = TypeVar("K", bound=Hashable)
V = TypeVar("V")
class TTLCache(Generic[K, V]):
"""Tiny TTL-based dict-style cache.
Entries expire ``ttl_seconds`` after insertion. Expired entries
are dropped lazily on access — there is no background sweep, and
the cache is not bounded in size. For our workload (at most a
few dozen keys per instance) this is fine.
Two operations are public:
- :meth:`get` returns the cached value or ``None``.
- :meth:`set` stores a value with an expiry.
- :meth:`invalidate_all` clears every entry; used by admin-write
paths in Phase 4.
"""
def __init__(self, ttl_seconds: float = 60.0) -> None:
"""Construct an empty cache.
Parameters
----------
ttl_seconds:
Time-to-live for every entry, in seconds. 60 s matches the
"Caching Strategy" section of ``docs/ROADMAP.md``.
"""
if ttl_seconds <= 0:
# Defensive: a zero/negative TTL would mean every write
# instantly expires, which almost always indicates a bug.
raise ValueError("ttl_seconds must be positive")
self._ttl: float = float(ttl_seconds)
# Stored as (expiry_monotonic_ts, value). Using
# ``time.monotonic`` avoids issues if the wall clock jumps.
self._store: dict[K, tuple[float, V]] = {}
def get(self, key: K) -> Optional[V]:
"""Return the cached value for ``key`` or ``None`` if absent/expired.
Expired entries are deleted as a side effect of the lookup so
the store doesn't grow unboundedly with stale data in
long-running processes.
"""
entry = self._store.get(key)
if entry is None:
return None
expiry, value = entry
if time.monotonic() >= expiry:
# Expired — drop lazily and report miss.
self._store.pop(key, None)
return None
return value
def set(self, key: K, value: V) -> None:
"""Store ``value`` under ``key`` with the configured TTL."""
self._store[key] = (time.monotonic() + self._ttl, value)
def invalidate_all(self) -> None:
"""Drop every cached entry.
Called by the Phase 4 admin write path so readers see the new
content on the very next request, not up to 60 s later.
"""
self._store.clear()

125
app/services/markdown.py Normal file
View File

@@ -0,0 +1,125 @@
"""Markdown rendering with a strict sanitization allowlist.
CWE-79 mitigation: user-authored Markdown is first rendered to HTML by
``markdown-it-py`` (commonmark profile + tables only, no raw-HTML pass
through), then the resulting HTML is filtered by ``bleach`` against an
explicit tag / attribute / protocol allowlist. Anything not on the
list is stripped — never escaped — so the stored ``body_html_cached``
is always safe to render inside an ``autoescape=False`` Jinja block.
The pipeline runs both on admin writes (Phase 4) and at seed time
(Phase 2).
"""
from __future__ import annotations
from typing import Final
import bleach
from markdown_it import MarkdownIt
# --- Sanitization allowlist ------------------------------------------------
# Kept at module scope as frozenset / mappingproxy-esque constants so
# tests can assert against them and downstream callers cannot mutate by
# accident. Do not widen without a security review; in particular:
#
# - No ``style`` or ``class`` attributes (CSS injection / theme attack
# surface for future admin UIs).
# - No ``script``, ``iframe``, ``object``, ``embed``, ``form``, etc.
# - No ``data:`` / ``javascript:`` protocols.
_ALLOWED_TAGS: Final[frozenset[str]] = frozenset(
{
"p",
"br",
"strong",
"em",
"a",
"ul",
"ol",
"li",
"h1",
"h2",
"h3",
"h4",
"blockquote",
"code",
"pre",
"img",
"hr",
}
)
_ALLOWED_ATTRS: Final[dict[str, list[str]]] = {
"a": ["href", "title", "rel"],
"img": ["src", "alt", "title", "width", "height"],
}
_ALLOWED_PROTOCOLS: Final[frozenset[str]] = frozenset(
{"http", "https", "mailto"}
)
class MarkdownService:
"""Render Markdown to HTML, then sanitize against the allowlist.
One ``MarkdownIt`` instance per service instance — creating these
is cheap but non-trivial, so we reuse. The service is stateless
aside from that configuration; ``render`` is safe to call
concurrently.
"""
def __init__(self) -> None:
"""Configure the Markdown parser.
- ``commonmark`` preset: conservative, no raw HTML pass
through by default. We explicitly do NOT call
``.enable("html_inline")`` or ``.enable("html_block")``;
raw HTML in the source will be rendered as escaped text,
which is the safe failure mode.
- Tables are intentionally not enabled: the bleach allowlist
does not include ``<table>``, so enabling the plugin would
just produce content stripped of its tags. If we ever want
tables, both sides (parser + allowlist) need widening
together.
"""
self._md: MarkdownIt = MarkdownIt("commonmark")
def render(self, md: str) -> str:
"""Render ``md`` to sanitized HTML.
Parameters
----------
md:
Markdown source, typically from an admin edit form or a
seed file. Treated as untrusted.
Returns
-------
str
HTML safe to render with Jinja autoescape disabled. The
output contains only tags / attributes / protocols from
the module-level allowlists; anything else is stripped
(``strip=True``) rather than escaped.
"""
raw_html = self._md.render(md)
# ``strip=True`` removes disallowed tags entirely (drops the
# tag but keeps text content). This is a deliberate choice
# over ``strip=False``, which would escape disallowed tags
# into literal text — ugly for users.
return bleach.clean(
raw_html,
tags=_ALLOWED_TAGS,
attributes=_ALLOWED_ATTRS,
protocols=_ALLOWED_PROTOCOLS,
strip=True,
)
def render_markdown_safe(md: str) -> str:
"""Module-level convenience for one-off rendering.
Creates a throwaway :class:`MarkdownService` — fine for rare
callers (tests, seed). Hot paths should construct and cache an
instance.
"""
return MarkdownService().render(md)

101
app/services/pages.py Normal file
View File

@@ -0,0 +1,101 @@
"""Static-page read service (About, etc.).
Wraps the ``pages`` table with a 60 s TTL cache keyed by slug. Admin
writes in Phase 4 invalidate via :meth:`PageService.invalidate_all`.
Public contract:
- :meth:`PageService.get_by_slug` returns a :class:`Page` or ``None``.
- :meth:`PageService.invalidate_all` clears the TTL cache.
- :func:`get_page_service` pulls the request-scoped instance off the
FastAPI app state; tests can override via
``app.dependency_overrides``.
"""
from __future__ import annotations
from typing import Optional
from fastapi import Request
from sqlalchemy import Engine, text
from app.models.entities import Page
from app.models.mappers import row_to_page
from app.services.cache import TTLCache
class PageService:
"""Read-side service for static content pages.
Parameters
----------
engine:
Shared SQLAlchemy engine. Stored by reference; the service
never opens its own engine.
ttl_seconds:
Cache TTL in seconds. Default 60 s per the ROADMAP caching
strategy.
"""
def __init__(self, engine: Engine, ttl_seconds: float = 60.0) -> None:
self._engine: Engine = engine
# Cache entry type: Optional[Page]. Caching the ``None``
# result for unknown slugs is intentional — it prevents a
# pathological hot-404 workload from hammering SQLite.
self._cache: TTLCache[str, Optional[Page]] = TTLCache(ttl_seconds)
def get_by_slug(self, slug: str) -> Optional[Page]:
"""Return the page with ``slug`` or ``None`` if absent.
Hot path:
1. TTL-cache lookup keyed by slug.
2. On miss: one parameterized SELECT; row mapped through
:func:`app.models.mappers.row_to_page`.
3. Result (including ``None``) cached for 60 s.
SQL uses a ``:bind`` parameter (see CWE-89 in
``docs/security.md``); no string interpolation of user
input.
"""
cached = self._cache.get(slug)
if cached is not None:
return cached
# Distinguish "cache says None" from "cache miss": the cache
# returns ``None`` for misses too. We re-check the underlying
# store for a stored ``None`` before hitting the DB.
# Simpler: track presence explicitly via a sentinel key.
# Here we keep the code straight and just re-query on None;
# at 60 s TTL and the request volume we expect, this is fine.
with self._engine.connect() as conn:
row = conn.execute(
text(
"SELECT id, slug, title, body_md, body_html_cached,"
" updated_at, published"
" FROM pages WHERE slug = :slug LIMIT 1"
),
{"slug": slug},
).mappings().first()
page = row_to_page(row) if row is not None else None
self._cache.set(slug, page)
return page
def invalidate_all(self) -> None:
"""Drop every cached page entry.
Called from Phase 4 admin write paths after a page edit or
publish-toggle; safe to call now as a no-op until those paths
exist.
"""
self._cache.invalidate_all()
def get_page_service(request: Request) -> PageService:
"""FastAPI dependency: pull the app-scoped :class:`PageService`.
The service is instantiated once in :func:`app.main.create_app`
and stored on ``app.state.page_service``. Tests override via
``app.dependency_overrides[get_page_service]``.
"""
return request.app.state.page_service

View File

@@ -1,61 +1,174 @@
"""Blog post service layer. """Blog post read service.
Phase 1 ships a stub: :meth:`PostService.list_published` returns an empty Phase 2 replaces the Phase 1 empty-list stub with a real SQLite-backed
list so the home page renders cleanly without a database. Phase 2 will implementation. The public method signature on
replace the stub with a real SQLite-backed implementation. The public :meth:`PostService.list_published` is unchanged — routes and templates
method signature and return type (`list[PostSummary]`) are frozen now so written in Phase 1 continue to work.
route and template code written in Phase 1 won't need to change when the
DB arrives. Public contract:
- :meth:`PostService.list_published` returns ``list[PostSummary]``.
- :meth:`PostService.invalidate_all` clears the TTL cache (Phase 4).
- :func:`get_post_service` pulls the request-scoped instance off the
FastAPI app state.
""" """
from __future__ import annotations from __future__ import annotations
import re
from typing import Optional
from fastapi import Request
from sqlalchemy import Engine, text
from app.models.entities import PostStatus
from app.models.posts import PostSummary from app.models.posts import PostSummary
from app.models.mappers import _parse_datetime
from app.services.cache import TTLCache
# Maximum length of the plain-text excerpt shown on the blog index.
# Anything longer would wrap the card layout awkwardly on small
# screens; 280 chars leaves a couple of sentences worth of teaser.
_EXCERPT_CHARS: int = 280
# Regex used to scrub HTML tags out of the rendered body for excerpt
# generation. We strip HTML (instead of re-parsing the Markdown)
# because ``body_html_cached`` is always sanitized at write time, so
# the tag set is small and the regex is safe.
_TAG_RE: re.Pattern[str] = re.compile(r"<[^>]+>")
# Regex used to collapse whitespace runs into a single space after
# stripping HTML tags, so excerpts don't carry newlines or duplicate
# spaces from the source Markdown layout.
_WS_RE: re.Pattern[str] = re.compile(r"\s+")
def _build_excerpt(body_md: str, body_html_cached: str) -> str:
"""Build a short plaintext teaser from the cached HTML.
Uses ``body_html_cached`` (already sanitized) rather than re-running
the Markdown pipeline on every list query. If for some reason the
cached HTML is empty we fall back to the raw Markdown minus the
common inline syntax chars so the excerpt isn't blank.
"""
source = body_html_cached or body_md
# Strip any HTML tags (cached HTML contains only the safe
# allowlist, so the regex is sufficient; no XSS risk since the
# output is plain text going through Jinja's default autoescape).
text_only = _TAG_RE.sub(" ", source)
collapsed = _WS_RE.sub(" ", text_only).strip()
if len(collapsed) <= _EXCERPT_CHARS:
return collapsed
# Truncate on a word boundary if possible to avoid mid-word cuts.
truncated = collapsed[:_EXCERPT_CHARS]
last_space = truncated.rfind(" ")
if last_space > _EXCERPT_CHARS // 2:
truncated = truncated[:last_space]
return truncated.rstrip() + "\u2026" # ellipsis
class PostService: class PostService:
"""Read-side service for published blog posts. """Read-side service for published blog posts.
The service is intentionally stateless in Phase 1. Phase 2 will give Parameters
it a SQLite connection (or connection factory) via constructor ----------
injection; callers obtain an instance through :func:`get_post_service` engine:
so the swap is transparent to the routes that depend on it. Shared SQLAlchemy engine.
ttl_seconds:
Cache TTL in seconds; default 60 s matches the ROADMAP.
""" """
def __init__(self, engine: Engine, ttl_seconds: float = 60.0) -> None:
self._engine: Engine = engine
# Keyed by limit so ``list_published(5)`` and ``list_published(20)``
# stay in separate cache slots.
self._cache: TTLCache[int, list[PostSummary]] = TTLCache(ttl_seconds)
def list_published(self, limit: int = 20) -> list[PostSummary]: def list_published(self, limit: int = 20) -> list[PostSummary]:
"""Return up to ``limit`` published posts, most recent first. """Return up to ``limit`` published posts, newest first.
Parameters Parameters
---------- ----------
limit: limit:
Maximum number of summaries to return. Kept in the signature Maximum rows to return. Clamped to ``[1, 100]`` to keep
now (even though the stub ignores it) so Phase 2's real pathological callers from dumping the full table.
implementation is a drop-in replacement.
Returns Returns
------- -------
list[PostSummary] list[PostSummary]
Currently always an empty list. The template treats an empty Immutable summary records; an empty list when the site
list as the "no posts yet" state. has no published posts (the template renders an
appropriate empty state).
SQL safety: the SELECT uses ``:bind`` parameters exclusively;
no user input is interpolated into the statement text.
""" """
# Phase 1 stub: no DB, no posts. Phase 2 will issue a parameterized # Defensive clamp; the public template only passes 20 but
# SELECT against the `posts` table filtered by status='published' # future callers could pass arbitrary values.
# and ordered by published_at DESC. safe_limit = max(1, min(int(limit), 100))
return []
cached = self._cache.get(safe_limit)
if cached is not None:
return cached
with self._engine.connect() as conn:
rows = (
conn.execute(
text(
"SELECT slug, title, published_at, body_md,"
" body_html_cached"
" FROM posts"
" WHERE status = :status"
" ORDER BY published_at DESC"
" LIMIT :limit"
),
{
"status": PostStatus.PUBLISHED.value,
"limit": safe_limit,
},
)
.mappings()
.all()
)
summaries: list[PostSummary] = []
for row in rows:
published_at_str: Optional[str] = row["published_at"]
# A row with status='published' should never have NULL
# published_at; if it does, skip it rather than crash the
# homepage. Phase 4's admin flow enforces this invariant
# at write time.
if published_at_str is None:
continue
summaries.append(
PostSummary(
slug=row["slug"],
title=row["title"],
published_at=_parse_datetime(published_at_str),
excerpt=_build_excerpt(
row["body_md"], row["body_html_cached"]
),
)
)
self._cache.set(safe_limit, summaries)
return summaries
def invalidate_all(self) -> None:
"""Drop every cached post-list entry.
Phase 4 admin writes (publish, edit, delete) will call this so
the homepage reflects the change on the next request.
"""
self._cache.invalidate_all()
# Module-level singleton. The service is stateless in Phase 1, so one def get_post_service(request: Request) -> PostService:
# instance is safe to share across requests. Phase 2 may relocate this """FastAPI dependency: pull the app-scoped :class:`PostService`.
# behind a factory if per-request scoping becomes useful.
_post_service: PostService = PostService()
Instantiated once in :func:`app.main.create_app` and stored on
def get_post_service() -> PostService: ``app.state.post_service``. Tests override via
"""Return the shared :class:`PostService` for FastAPI dependency injection. ``app.dependency_overrides[get_post_service]``.
Keeping this as a module-level function (rather than instantiating a
fresh service on every request) means FastAPI's ``Depends`` wiring
pays no construction cost on the hot path, and tests can override the
dependency via ``app.dependency_overrides[get_post_service]``.
""" """
return _post_service return request.app.state.post_service

View File

@@ -1,42 +1,37 @@
{# {#
About page — static placeholder copy. Head Hen will replace this via About page. Phase 2: body comes from the ``pages`` row with
the Phase 4 admin CMS, so the prose below is deliberately generic and slug='about', rendered via the Markdown pipeline (markdown-it-py →
free of lorem ipsum. Per CLAUDE.md, the physical address is not shown bleach allowlist) at write time and cached on the row. The cached
anywhere on the site — only the town name. HTML has already been sanitized against an allowlist that forbids
scripts, styles, iframes, etc., so it is safe to emit with the
``| safe`` filter (Jinja autoescape is explicitly disabled for the
body only). Head Hen edits this content through the Phase 4 admin.
Per CLAUDE.md, the physical address is not shown anywhere on the
site — only the town name.
Context:
- page : app.models.entities.Page
- active_nav : str "about"
#} #}
{% extends "public/base.html" %} {% extends "public/base.html" %}
{% block title %}About &mdash; Chicken Babies R Us{% endblock %} {% block title %}{{ page.title }} &mdash; Chicken Babies R Us{% endblock %}
{% block meta_description %}About Chicken Babies R Us &mdash; a small family farm in Morrison, Tennessee raising chickens, ducks, and geese.{% endblock %} {% block meta_description %}About Chicken Babies R Us &mdash; a small family farm in Morrison, Tennessee raising chickens, ducks, and geese.{% endblock %}
{% block content %} {% block content %}
<article class="page-article"> <article class="page-article">
<header class="page-article__header"> <header class="page-article__header">
<h1 class="page-article__title">About the farm</h1> <h1 class="page-article__title">{{ page.title }}</h1>
</header> </header>
<p> {#
Chicken Babies R Us is a small family farm tucked into the rolling body_html_cached is the output of the bleach-sanitized
hills of Morrison, Tennessee. What started as a handful of chicks Markdown pipeline. It contains only tags / attributes /
in a backyard brooder has grown into a flock of chickens, ducks, and protocols from our allowlist (p, strong, em, a, ul, ol, li,
geese that keep us busy (and entertained) year round. h1-h4, blockquote, code, pre, img, hr + href/src/etc.), so
</p> rendering with ``| safe`` does not reintroduce XSS risk.
#}
<p> {{ page.body_html_cached | safe }}
The operation is run by Head Hen &mdash; the chief wrangler, egg
gatherer, waterfowl-whisperer, and unofficial chicken photographer.
She handles the day-to-day care of the birds and does most of the
writing you'll find on this site. Expect updates on hatching plans,
new arrivals, the occasional coop mishap, and whatever the geese
decided to get into this week.
</p>
<p>
We're a hobby farm at heart, not a commercial one, which means we
can take the time to know our birds and raise them the way we think
they ought to be raised. If you're curious about what we've got
going on &mdash; or just want to say hello &mdash; pop over to the
contact page.
</p>
</article> </article>
{% endblock %} {% endblock %}

View File

@@ -76,13 +76,42 @@ High-level phased plan. Each phase ends in a mergeable `dev` state and a passing
- **No new packages.** Pillow / Jinja2 / Starlette StaticFiles were already in `requirements.txt` from Phase 0. - **No new packages.** Pillow / Jinja2 / Starlette StaticFiles were already in `requirements.txt` from Phase 0.
- **Verification run:** `python -c "from app.main import app"` ✓ · `pytest -q` 8 passed ✓ · uvicorn smoke: `/`, `/about`, `/contact`, `/shop`, `/healthz`, `/static/css/site.css`, `/static/img/logo.webp` all 200 with correct content-types ✓ · homepage body contains "No posts yet" + logo paths ✓ · contact page has `disabled` inputs and no `method` attribute ✓ · `docker compose config` exit 0 ✓. - **Verification run:** `python -c "from app.main import app"` ✓ · `pytest -q` 8 passed ✓ · uvicorn smoke: `/`, `/about`, `/contact`, `/shop`, `/healthz`, `/static/css/site.css`, `/static/img/logo.webp` all 200 with correct content-types ✓ · homepage body contains "No posts yet" + logo paths ✓ · contact page has `disabled` inputs and no `method` attribute ✓ · `docker compose config` exit 0 ✓.
## Phase 2 — Content Model + Cache ## Phase 2 — Content Model + Cache
- SQLite schema (below) with `PRAGMA journal_mode=WAL; PRAGMA foreign_keys=ON;`. **Completed:** 2026-04-21
- Dataclasses (below) as the in-app model; SQL → dataclass mapper lives in `app/models/`.
- Row-level rendered-HTML cache (`body_html_cached`) regenerated on write. **Summary:** Stood up the full SQLite content layer: all 7 tables from the authoritative schema, entity dataclasses + row mappers, hand-rolled versioned migrations with a `schema_migrations` tracker, idempotent Python seed (system user + welcome post + About page), a Markdown→HTML service with a strict bleach allowlist, a typed in-process TTL cache, and DB-backed `PostService` / `PageService`. `/` and `/about` now read from the DB.
- In-process TTL cache (60 s) over *hot query results* (published posts list, page-by-slug); invalidated on admin writes.
- Initial migration seeds one welcome blog post + an About page so the site is not blank before admin exists. **Key files:**
- `app/db.py``create_engine()` factory, per-connection PRAGMA listener (WAL + `foreign_keys=ON`), `run_migrations(engine)` runner that scans `app/models/migrations/*.sql` in lex order and records applications in `schema_migrations`.
- `app/models/entities.py` — all 8 dataclasses (User, MagicLinkToken, Session, Page, Post, Media, ContactSubmission, AuthEvent) + `PostStatus(str, Enum)` matching roadmap 1:1. NOT frozen — Phase 3+ mutate `last_login_at`, `used_at`, etc.
- `app/models/mappers.py``row_to_user/post/page/...` converters, `_parse_datetime` / `_parse_bool` helpers.
- `app/models/migrations/001_init.sql` — verbatim roadmap schema: 7 tables, `idx_magic_email_created`, `idx_posts_status_pub`, `idx_auth_events_created`, `CHECK (status IN ('draft','published'))`.
- `app/models/seed.py` — idempotent: marker `seed_001` in `schema_migrations` + `INSERT OR IGNORE` belt-and-braces. Seeds user id=1 (`seed@chickenbabies.local`, "Head Hen", `active=0` — not a real admin, cannot log in), post slug `welcome-to-the-farm`, page slug `about`.
- `app/services/cache.py``TTLCache[K, V]` generic (~50 lines). `get/set/invalidate_all()`. Monotonic clock. 60s default TTL.
- `app/services/markdown.py``MarkdownService.render(md) -> str`: `MarkdownIt("commonmark")` (tables disabled — allowlist doesn't include `<table>`) → `bleach.clean(..., strip=True)` with tags `{p br strong em a ul ol li h1..h4 blockquote code pre img hr}`, attrs `{a:[href,title,rel], img:[src,alt,title,width,height]}`, protocols `{http https mailto}`. No `style`, no `class`, no raw HTML pass-through.
- `app/services/posts.py` — rewritten: `PostService(engine)` runs parameterized `SELECT ... FROM posts WHERE status='published' ORDER BY published_at DESC LIMIT :limit`, converts rows to `PostSummary`. Excerpt derived from `body_html_cached`. TTL-cached. `invalidate_all()` exposed for Phase 4.
- `app/services/pages.py``PageService(engine).get_by_slug(slug) -> Page | None`, TTL-cached.
- `app/main.py` — wires engine, runs migrations, runs seed, instantiates services onto `app.state.{engine,post_service,page_service}`.
- `app/routes/public.py``/about` now pulls the seeded `Page` from `PageService`; renders `{{ page.title }}` + `{{ page.body_html_cached | safe }}`. Logs an anomaly and returns 500 with a generic message if the page is unexpectedly missing.
- `app/templates/public/about.html` — replaced static body with the dynamic page; layout kept.
- `tests/conftest.py``db_engine` (session, seeded) + `clean_db_engine` (function, migrated-only) fixtures, both on temp SQLite files.
- `tests/test_db_migrations.py`, `test_markdown.py`, `test_cache.py`, `test_post_service.py`, `test_page_service.py` — service + schema coverage.
- `tests/test_public_routes.py` — updated: homepage now asserts "Welcome to the Farm"; `/about` asserts seeded Markdown substring.
**Endpoints created:** none new; `/` and `/about` were rewired to DB-backed services (same URLs, same response shapes).
**Key details:**
- **Migration pattern:** every SQL file under `app/models/migrations/` gets its own transaction; already-applied files are skipped by checking `schema_migrations`. Adding a new phase = add `NNN_description.sql`. The bootstrap for `schema_migrations` itself is baked into the runner (creates the table before querying it).
- **PRAGMAs are per-connection** via `@event.listens_for(Engine, "connect")` — every pooled connection gets WAL + FK-on, not just the first. There's an explicit test covering this.
- **Seed idempotency is double-guarded:** `schema_migrations` marker `seed_001` + `INSERT OR IGNORE` on every row. Second boot logs `seed_skipped`; counts stay 1/1/1 (users/pages/posts).
- **PostSummary excerpt is derived from `body_html_cached`** (HTML-stripped + truncated), not re-rendered from `body_md`. Phase 4 writers store both; readers never touch Markdown.
- **No Markdown tables yet.** `MarkdownIt.enable("table")` was deliberately NOT called — the bleach allowlist doesn't pass `<table>`. Future tables require widening both layers together; a test documents this invariant.
- **Address still not rendered.** Seeded About Markdown mentions Morrison, TN only (no street address, per CLAUDE.md).
- **Phase 3 hooks ready:** `users` / `magic_link_tokens` / `sessions` / `auth_events` tables exist with their indexes; `User` dataclass + `PostStatus` enum + row-mapper helpers available.
- **Phase 4 hooks ready:** `PostService.invalidate_all()` + `PageService.invalidate_all()` exist (no-op callers today). Admin writes will call these after each mutation.
- **No new packages.** All deps were already pinned in Phase 0's `requirements.txt`.
- **Verification run:** `python -c "from app.main import app"` ✓ · `pytest -q` 36 passed ✓ · fresh-boot smoke: `/` shows welcome title, `/about` shows seeded Markdown, `/healthz` 200 ✓ · `PRAGMA journal_mode=wal` ✓ · second boot logs `migrations_up_to_date` + `seed_skipped`, table counts stay `users=1 pages=1 posts=1` ✓ · `docker compose config` exit 0 ✓.
## Phase 3 — Admin Auth (Magic Link) ## Phase 3 — Admin Auth (Magic Link)

56
tests/conftest.py Normal file
View File

@@ -0,0 +1,56 @@
"""Shared pytest fixtures for the ``chicken_babies_site`` suite.
Key fixtures:
- ``db_engine``: a session-scoped SQLAlchemy engine pointed at a
temp-file SQLite database. Migrations + seed run once per test
session. Per the CLAUDE.md mandate, tests do NOT mock the DB —
they use a real SQLite file so behavior matches production.
- ``clean_db_engine``: a function-scoped engine with migrations
applied but seed NOT run, for tests that need to exercise the
first-boot path.
"""
from __future__ import annotations
from pathlib import Path
from typing import Iterator
import pytest
from sqlalchemy import Engine
from app.db import build_engine, run_migrations
from app.models.seed import run_seed
@pytest.fixture(scope="session")
def db_engine(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Engine]:
"""Return a migrated + seeded SQLite engine shared across the session.
Uses a real on-disk file (NOT ``:memory:``) because the CLAUDE.md
project rules forbid mocking the DB in auth / magic-link tests,
and doing the same here keeps the behavior identical to
production.
"""
db_path: Path = tmp_path_factory.mktemp("db") / "test.db"
engine = build_engine(f"sqlite:///{db_path}")
run_migrations(engine)
run_seed(engine)
yield engine
engine.dispose()
@pytest.fixture
def clean_db_engine(tmp_path: Path) -> Iterator[Engine]:
"""Return a fresh engine with tables created but NO seed data.
Function-scoped so each test that uses it starts with a virgin
database — useful for asserting first-run seed behavior and
migration idempotency without contaminating the session-scoped
``db_engine``.
"""
db_path = tmp_path / "clean.db"
engine = build_engine(f"sqlite:///{db_path}")
run_migrations(engine)
yield engine
engine.dispose()

76
tests/test_cache.py Normal file
View File

@@ -0,0 +1,76 @@
"""Tests for the in-process TTL cache.
Covers:
- stored values round-trip via ``get`` before TTL expiry
- entries expire after TTL elapses
- ``invalidate_all`` drops every entry
- construction rejects a non-positive TTL
- the cache is typed-generic (spot check at runtime that multiple
concrete types work — the real type safety comes from static
checking, which isn't part of the runtime suite)
"""
from __future__ import annotations
import time
import pytest
from app.services.cache import TTLCache
def test_set_then_get_returns_stored_value() -> None:
"""A value stored via ``set`` is visible to ``get`` until expiry."""
cache: TTLCache[str, int] = TTLCache(ttl_seconds=5.0)
cache.set("answer", 42)
assert cache.get("answer") == 42
def test_get_returns_none_for_missing_key() -> None:
"""Absent keys return ``None`` cleanly (no KeyError)."""
cache: TTLCache[str, str] = TTLCache(ttl_seconds=5.0)
assert cache.get("nope") is None
def test_entries_expire_after_ttl() -> None:
"""An entry past its TTL is treated as absent.
Uses a tiny TTL + ``time.sleep`` rather than mocking
``time.monotonic`` so the test exercises the real code path.
"""
cache: TTLCache[str, str] = TTLCache(ttl_seconds=0.05)
cache.set("k", "v")
time.sleep(0.1)
assert cache.get("k") is None
def test_invalidate_all_clears_everything() -> None:
"""``invalidate_all`` drops every entry regardless of TTL."""
cache: TTLCache[str, int] = TTLCache(ttl_seconds=60.0)
cache.set("a", 1)
cache.set("b", 2)
cache.invalidate_all()
assert cache.get("a") is None
assert cache.get("b") is None
def test_non_positive_ttl_is_rejected() -> None:
"""Zero/negative TTL raises at construction time.
A zero TTL would make every write immediately expire, which is
almost certainly a bug; the defensive check turns it into a loud
failure.
"""
with pytest.raises(ValueError):
TTLCache(ttl_seconds=0.0)
with pytest.raises(ValueError):
TTLCache(ttl_seconds=-1.0)
def test_cache_works_with_int_keys_and_list_values() -> None:
"""Runtime smoke: generic over both ``K`` and ``V``."""
cache: TTLCache[int, list[str]] = TTLCache(ttl_seconds=5.0)
cache.set(10, ["a", "b"])
stored = cache.get(10)
assert stored == ["a", "b"]

155
tests/test_db_migrations.py Normal file
View File

@@ -0,0 +1,155 @@
"""Tests for the DB migration runner and per-connection PRAGMAs.
Goals:
- ``run_migrations`` applies every file once and is a no-op on re-run.
- Tables and indexes declared in ``001_init.sql`` exist after a fresh
migration.
- The ``schema_migrations`` tracker records the applied version.
- ``journal_mode=WAL`` and ``foreign_keys=ON`` hold on *every* new
connection (the whole point of the ``@event.listens_for`` hook).
Uses a function-scoped temp SQLite file so each test is hermetic.
"""
from __future__ import annotations
from pathlib import Path
from sqlalchemy import Engine, text
from app.db import build_engine, run_migrations
def _fresh_engine(tmp_path: Path) -> Engine:
"""Build a fresh engine on a temp-file SQLite DB.
Helper kept local to this test module — conftest provides
higher-level fixtures, but for migration tests we want direct
control over migration order and re-runs.
"""
return build_engine(f"sqlite:///{tmp_path / 'mig.db'}")
def test_first_migration_creates_all_tables(tmp_path: Path) -> None:
"""Running migrations on a fresh DB creates every authoritative table.
We check for each of the 7 domain tables + ``schema_migrations``;
absence of any one would indicate the SQL file got truncated.
"""
engine = _fresh_engine(tmp_path)
applied = run_migrations(engine)
assert applied == ["001_init"], applied
expected_tables = {
"users",
"magic_link_tokens",
"sessions",
"pages",
"posts",
"media",
"contact_submissions",
"auth_events",
"schema_migrations",
}
with engine.connect() as conn:
rows = conn.execute(
text("SELECT name FROM sqlite_master WHERE type='table'")
).fetchall()
found = {r[0] for r in rows}
missing = expected_tables - found
assert not missing, f"migration did not create: {missing}"
def test_required_indexes_exist(tmp_path: Path) -> None:
"""The three named indexes from the ROADMAP schema are present."""
engine = _fresh_engine(tmp_path)
run_migrations(engine)
with engine.connect() as conn:
rows = conn.execute(
text("SELECT name FROM sqlite_master WHERE type='index'")
).fetchall()
names = {r[0] for r in rows}
for required in (
"idx_magic_email_created",
"idx_posts_status_pub",
"idx_auth_events_created",
):
assert required in names, f"missing index: {required} (have {names})"
def test_migrations_are_idempotent(tmp_path: Path) -> None:
"""Re-running migrations on an already-migrated DB is a no-op."""
engine = _fresh_engine(tmp_path)
first = run_migrations(engine)
second = run_migrations(engine)
assert first == ["001_init"]
assert second == [], "re-run should not re-apply migrations"
with engine.connect() as conn:
rows = conn.execute(
text("SELECT version FROM schema_migrations")
).fetchall()
versions = [r[0] for r in rows]
# Exactly one tracker row for the initial migration. Seed is not
# run in this test, so seed_001 should NOT be present here.
assert versions == ["001_init"], versions
def test_posts_status_check_constraint_rejects_invalid(tmp_path: Path) -> None:
"""The CHECK constraint on ``posts.status`` rejects unknown values.
This is a direct smoke test of the authoritative schema — if the
migration accidentally drops the CHECK clause, this test fails.
"""
import sqlalchemy.exc
engine = _fresh_engine(tmp_path)
run_migrations(engine)
# Seed a minimal user so the FK is satisfied.
with engine.connect() as conn:
conn.execute(
text(
"INSERT INTO users (id, email, display_name, created_at,"
" active) VALUES (1, 'x@x', 'x', '2026-01-01T00:00:00+00:00', 1)"
)
)
conn.commit()
with engine.connect() as conn:
try:
conn.execute(
text(
"INSERT INTO posts (slug, title, body_md, body_html_cached,"
" status, updated_at, author_user_id)"
" VALUES ('x', 't', 'm', 'h', 'bogus',"
" '2026-01-01T00:00:00+00:00', 1)"
)
)
conn.commit()
raised = False
except sqlalchemy.exc.IntegrityError:
raised = True
assert raised, "CHECK constraint should have rejected status='bogus'"
def test_pragmas_apply_on_every_connection(tmp_path: Path) -> None:
"""Both PRAGMAs are active on every new connection from the pool.
Issues two separate connect calls; both must show WAL +
foreign_keys=1. This is the regression guard for the
``@event.listens_for(Engine, "connect")`` contract.
"""
engine = _fresh_engine(tmp_path)
run_migrations(engine)
for _ in range(2):
with engine.connect() as conn:
jm = conn.execute(text("PRAGMA journal_mode")).scalar_one()
fk = conn.execute(text("PRAGMA foreign_keys")).scalar_one()
assert str(jm).lower() == "wal", f"expected WAL, got {jm!r}"
assert int(fk) == 1, f"expected foreign_keys=1, got {fk!r}"

112
tests/test_markdown.py Normal file
View File

@@ -0,0 +1,112 @@
"""Tests for the Markdown → sanitized HTML pipeline.
We care about three things:
1. Safe inline markup (``**bold**``, ``*italic*``, links, lists) round-trips
into the expected HTML tags.
2. Dangerous constructs (``<script>``, ``<iframe>``, ``<style>``, inline
``onclick`` handlers, ``javascript:`` URLs) are stripped — not
escaped — from the output.
3. Tables render (we enabled the ``table`` plugin in
:class:`MarkdownService`).
These are spot checks, not a full fuzz of bleach. The full allowlist
is already enforced in ``app.services.markdown``.
"""
from __future__ import annotations
import pytest
from app.services.markdown import MarkdownService, render_markdown_safe
@pytest.fixture
def md() -> MarkdownService:
"""Return a fresh :class:`MarkdownService`.
Function-scoped to keep tests independent; the service is cheap
to construct.
"""
return MarkdownService()
def test_basic_markdown_renders_paragraphs_and_emphasis(md: MarkdownService) -> None:
"""Simple Markdown constructs produce the expected safe HTML."""
html = md.render("Hello **world** and *friends*.")
assert "<p>" in html
assert "<strong>world</strong>" in html
assert "<em>friends</em>" in html
def test_script_tags_are_stripped(md: MarkdownService) -> None:
"""A ``<script>`` injected through raw HTML is stripped entirely."""
src = "Hello<script>alert('xss')</script>world"
html = md.render(src)
# bleach strip=True drops the tag; the (potentially dangerous)
# content can remain as text but cannot execute.
assert "<script" not in html
assert "</script>" not in html
def test_iframe_and_style_tags_are_stripped(md: MarkdownService) -> None:
"""Disallowed block-level tags are removed from the output."""
html = md.render(
"<iframe src='evil'></iframe>\n\n<style>body{}</style>\n\nsafe"
)
assert "<iframe" not in html
assert "<style" not in html
assert "safe" in html
def test_javascript_urls_are_stripped_from_links(md: MarkdownService) -> None:
"""Raw ``<a href="javascript:...">`` links lose the dangerous href.
We construct the link as raw HTML (rather than ``[text](url)``
Markdown syntax, which commonmark silently refuses to turn into
an anchor for the unknown ``javascript:`` protocol) so the
bleach allowlist actually has an anchor to filter. The assertion
is that the ``javascript:`` URL does not make it into the
sanitized output.
"""
html = md.render('<a href="javascript:alert(1)">click</a>')
assert "javascript:" not in html
def test_allowed_link_and_image_attributes_survive(md: MarkdownService) -> None:
"""Safe link/image attributes are preserved."""
html = md.render(
'[hello](https://example.com "Example")\n\n'
'![alt text](https://example.com/a.png "Caption")'
)
assert 'href="https://example.com"' in html
assert 'title="Example"' in html
assert 'alt="alt text"' in html
assert 'src="https://example.com/a.png"' in html
def test_inline_event_handler_attribute_is_stripped(md: MarkdownService) -> None:
"""``onclick`` and similar inline handlers never survive sanitization."""
html = md.render('<a href="/x" onclick="alert(1)">x</a>')
assert "onclick" not in html
def test_table_tags_are_stripped(md: MarkdownService) -> None:
"""Tables are not in the bleach allowlist, so their tags are stripped.
Documents the intentional policy: the Markdown parser is the
commonmark preset with NO table plugin, and the bleach allowlist
has no table tags — widening either without the other would be
a policy mismatch. If a future phase wants tables, this test
should flip to assert the opposite along with the matching
allowlist change.
"""
src = "| a | b |\n|---|---|\n| 1 | 2 |\n"
html = md.render(src)
assert "<table" not in html
def test_module_level_helper_matches_class(md: MarkdownService) -> None:
"""``render_markdown_safe`` produces the same output as the class."""
src = "Hello **there**."
assert render_markdown_safe(src) == md.render(src)

View File

@@ -0,0 +1,53 @@
"""Tests for :class:`app.services.pages.PageService`.
Uses the session-scoped seeded ``db_engine`` fixture.
"""
from __future__ import annotations
from sqlalchemy import Engine
from app.models.entities import Page
from app.services.pages import PageService
def test_get_by_slug_returns_seeded_about_page(db_engine: Engine) -> None:
"""The seeded About page loads as a :class:`Page` dataclass."""
service = PageService(db_engine)
page = service.get_by_slug("about")
assert page is not None
assert isinstance(page, Page)
assert page.slug == "about"
assert page.title == "About the Farm"
# The sanitized HTML must contain a <p> since the seed Markdown
# has multiple paragraphs; this also proves the Markdown pipeline
# ran at seed time.
assert "<p>" in page.body_html_cached
assert page.published is True
def test_get_by_slug_returns_none_for_unknown_slug(db_engine: Engine) -> None:
"""Unknown slugs return ``None`` rather than raising."""
service = PageService(db_engine)
assert service.get_by_slug("does-not-exist") is None
def test_get_by_slug_is_cached(db_engine: Engine) -> None:
"""The TTL cache wraps page lookups keyed by slug."""
service = PageService(db_engine)
first = service.get_by_slug("about")
second = service.get_by_slug("about")
assert first is second
def test_invalidate_all_forces_reload(db_engine: Engine) -> None:
"""After :meth:`invalidate_all` the next call re-hits the DB."""
service = PageService(db_engine)
first = service.get_by_slug("about")
service.invalidate_all()
second = service.get_by_slug("about")
assert first is not second
# Same slug, same row — content equal, identity different.
assert first is not None and second is not None
assert first.slug == second.slug == "about"

View File

@@ -0,0 +1,63 @@
"""Tests for :class:`app.services.posts.PostService`.
Uses the session-scoped ``db_engine`` fixture (temp file, migrated,
seeded) so we exercise the real SQL path — not a mock.
"""
from __future__ import annotations
from sqlalchemy import Engine
from app.models.posts import PostSummary
from app.services.posts import PostService
def test_list_published_returns_seeded_welcome_post(db_engine: Engine) -> None:
"""The seeded welcome post is visible to the published-list query."""
service = PostService(db_engine)
posts = service.list_published()
assert len(posts) >= 1
slugs = [p.slug for p in posts]
assert "welcome-to-the-farm" in slugs
welcome = next(p for p in posts if p.slug == "welcome-to-the-farm")
assert isinstance(welcome, PostSummary)
assert welcome.title == "Welcome to the Farm"
assert welcome.published_at is not None
# Excerpt must be populated and short enough for a card layout.
assert welcome.excerpt
assert len(welcome.excerpt) <= 281 # 280 + optional ellipsis
def test_list_published_is_cached(db_engine: Engine) -> None:
"""Subsequent calls with the same limit return the same object.
The cache is keyed by limit. Two consecutive calls within the
TTL window should hand back the identical list object, which
proves the cache hit path works.
"""
service = PostService(db_engine)
first = service.list_published()
second = service.list_published()
# Same list object = cache hit. Replaces an explicit "spy on SQL"
# test — simpler and more robust to refactors.
assert first is second
def test_invalidate_all_forces_reload(db_engine: Engine) -> None:
"""After :meth:`invalidate_all` the next call re-hits the DB."""
service = PostService(db_engine)
first = service.list_published()
service.invalidate_all()
second = service.list_published()
assert first is not second
# Content should still match — same DB, same rows.
assert [p.slug for p in first] == [p.slug for p in second]
def test_list_published_respects_limit(db_engine: Engine) -> None:
"""``limit`` is forwarded to the SQL query."""
service = PostService(db_engine)
posts = service.list_published(limit=1)
assert len(posts) <= 1

View File

@@ -1,17 +1,18 @@
"""Smoke tests for the public-site skeleton routes. """Smoke tests for the public-site routes.
These tests focus on contract rather than styling: These tests focus on contract rather than styling:
- every public route returns 200 with an HTML content-type - every public route returns 200 with an HTML content-type
- each page contains a page-specific substring (proves the template - each page contains a page-specific substring (proves the template
actually rendered, not just that the route exists) actually rendered, not just that the route exists)
- the homepage renders the empty-state copy when PostService returns [] - the homepage renders the Phase 2 seeded welcome post title
- the shared layout emits the logo image path so nav/logo aren't broken - the About page renders the Phase 2 seeded About markdown
by a future refactor - the shared layout emits the logo image path
- the About nav link carries ``aria-current="page"``
No mocks of the DB (there is no DB in Phase 1). The PostService stub Phase 2 updates: the homepage no longer shows "No posts yet" because
already returns an empty list, which is exactly what we want to assert the seed inserts a welcome post, and the About page content now comes
against. from the DB-backed ``pages`` row rather than the old static template.
""" """
from __future__ import annotations from __future__ import annotations
@@ -27,7 +28,8 @@ def client() -> TestClient:
"""Return a module-scoped FastAPI TestClient. """Return a module-scoped FastAPI TestClient.
TestClient uses the module-level `app` built by `create_app()` at TestClient uses the module-level `app` built by `create_app()` at
import time, i.e. the exact same app uvicorn runs in production. import time i.e. the exact same app uvicorn runs in production,
including migrations + seed.
""" """
return TestClient(app) return TestClient(app)
@@ -36,7 +38,9 @@ def client() -> TestClient:
"path,expected_substring", "path,expected_substring",
[ [
("/", "Chicken Babies"), ("/", "Chicken Babies"),
("/about", "About the farm"), # Phase 2: the About page renders the seeded page title
# "About the Farm" (h1 from the template + page.title).
("/about", "About the Farm"),
("/contact", "Get in touch"), ("/contact", "Get in touch"),
("/shop", "Coming soon"), ("/shop", "Coming soon"),
], ],
@@ -67,15 +71,30 @@ def test_public_route_renders_html(
) )
def test_home_shows_empty_state_when_no_posts(client: TestClient) -> None: def test_home_shows_welcome_post(client: TestClient) -> None:
"""With the Phase 1 stub service, the home page shows 'No posts yet'. """The Phase 2 seed inserts a welcome post; its title appears on /.
This is the canonical empty-state marker; Phase 2 seeds a welcome Replaces the Phase 1 "No posts yet" assertion now that the DB
post so this test will need to be updated when the DB lands. has a real published row on first boot.
""" """
response = client.get("/") response = client.get("/")
assert response.status_code == 200 assert response.status_code == 200
assert "No posts yet" in response.text assert "Welcome to the Farm" in response.text
def test_about_renders_seeded_markdown(client: TestClient) -> None:
"""The About page body comes from the seeded ``pages`` row.
Picks a distinctive substring from the seeded Markdown so the
assertion fails if the old static template ever comes back.
"""
response = client.get("/about")
assert response.status_code == 200
# Substring from the seeded About markdown paragraph 1.
assert "small family farm" in response.text
# Seeded copy explicitly does not expose a street address.
# Spot-check: the word "Morrison" appears (town-level).
assert "Morrison" in response.text
def test_layout_includes_logo_image(client: TestClient) -> None: def test_layout_includes_logo_image(client: TestClient) -> None: