Stand up the full SQLite content layer: all 7 tables from the authoritative schema with WAL + foreign-keys enforced per-connection, entity dataclasses plus row mappers, hand-rolled versioned migrations tracked in schema_migrations, and an idempotent Python seed (system user + welcome post + About page). Add a Markdown->HTML service using markdown-it-py with a strict bleach allowlist (tables intentionally omitted on both sides). Add a typed in-process TTLCache[K,V] and wire it into real DB-backed PostService and PageService, both exposing invalidate_all() for Phase 4 admin writes. Rewire / and /about to read from the DB; homepage renders the seeded welcome post, About renders page.title + sanitized body_html_cached. Update the Phase 1 route tests accordingly. Mark Phase 2 complete in docs/ROADMAP.md.
175 lines
6.2 KiB
Python
175 lines
6.2 KiB
Python
"""Blog post read service.
|
|
|
|
Phase 2 replaces the Phase 1 empty-list stub with a real SQLite-backed
|
|
implementation. The public method signature on
|
|
:meth:`PostService.list_published` is unchanged — routes and templates
|
|
written in Phase 1 continue to work.
|
|
|
|
Public contract:
|
|
|
|
- :meth:`PostService.list_published` returns ``list[PostSummary]``.
|
|
- :meth:`PostService.invalidate_all` clears the TTL cache (Phase 4).
|
|
- :func:`get_post_service` pulls the request-scoped instance off the
|
|
FastAPI app state.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Optional
|
|
|
|
from fastapi import Request
|
|
from sqlalchemy import Engine, text
|
|
|
|
from app.models.entities import PostStatus
|
|
from app.models.posts import PostSummary
|
|
from app.models.mappers import _parse_datetime
|
|
from app.services.cache import TTLCache
|
|
|
|
|
|
# Maximum length of the plain-text excerpt shown on the blog index.
|
|
# Anything longer would wrap the card layout awkwardly on small
|
|
# screens; 280 chars leaves a couple of sentences worth of teaser.
|
|
_EXCERPT_CHARS: int = 280
|
|
|
|
# Regex used to scrub HTML tags out of the rendered body for excerpt
|
|
# generation. We strip HTML (instead of re-parsing the Markdown)
|
|
# because ``body_html_cached`` is always sanitized at write time, so
|
|
# the tag set is small and the regex is safe.
|
|
_TAG_RE: re.Pattern[str] = re.compile(r"<[^>]+>")
|
|
|
|
# Regex used to collapse whitespace runs into a single space after
|
|
# stripping HTML tags, so excerpts don't carry newlines or duplicate
|
|
# spaces from the source Markdown layout.
|
|
_WS_RE: re.Pattern[str] = re.compile(r"\s+")
|
|
|
|
|
|
def _build_excerpt(body_md: str, body_html_cached: str) -> str:
|
|
"""Build a short plaintext teaser from the cached HTML.
|
|
|
|
Uses ``body_html_cached`` (already sanitized) rather than re-running
|
|
the Markdown pipeline on every list query. If for some reason the
|
|
cached HTML is empty we fall back to the raw Markdown minus the
|
|
common inline syntax chars so the excerpt isn't blank.
|
|
"""
|
|
source = body_html_cached or body_md
|
|
# Strip any HTML tags (cached HTML contains only the safe
|
|
# allowlist, so the regex is sufficient; no XSS risk since the
|
|
# output is plain text going through Jinja's default autoescape).
|
|
text_only = _TAG_RE.sub(" ", source)
|
|
collapsed = _WS_RE.sub(" ", text_only).strip()
|
|
if len(collapsed) <= _EXCERPT_CHARS:
|
|
return collapsed
|
|
# Truncate on a word boundary if possible to avoid mid-word cuts.
|
|
truncated = collapsed[:_EXCERPT_CHARS]
|
|
last_space = truncated.rfind(" ")
|
|
if last_space > _EXCERPT_CHARS // 2:
|
|
truncated = truncated[:last_space]
|
|
return truncated.rstrip() + "\u2026" # ellipsis
|
|
|
|
|
|
class PostService:
|
|
"""Read-side service for published blog posts.
|
|
|
|
Parameters
|
|
----------
|
|
engine:
|
|
Shared SQLAlchemy engine.
|
|
ttl_seconds:
|
|
Cache TTL in seconds; default 60 s matches the ROADMAP.
|
|
"""
|
|
|
|
def __init__(self, engine: Engine, ttl_seconds: float = 60.0) -> None:
|
|
self._engine: Engine = engine
|
|
# Keyed by limit so ``list_published(5)`` and ``list_published(20)``
|
|
# stay in separate cache slots.
|
|
self._cache: TTLCache[int, list[PostSummary]] = TTLCache(ttl_seconds)
|
|
|
|
def list_published(self, limit: int = 20) -> list[PostSummary]:
|
|
"""Return up to ``limit`` published posts, newest first.
|
|
|
|
Parameters
|
|
----------
|
|
limit:
|
|
Maximum rows to return. Clamped to ``[1, 100]`` to keep
|
|
pathological callers from dumping the full table.
|
|
|
|
Returns
|
|
-------
|
|
list[PostSummary]
|
|
Immutable summary records; an empty list when the site
|
|
has no published posts (the template renders an
|
|
appropriate empty state).
|
|
|
|
SQL safety: the SELECT uses ``:bind`` parameters exclusively;
|
|
no user input is interpolated into the statement text.
|
|
"""
|
|
# Defensive clamp; the public template only passes 20 but
|
|
# future callers could pass arbitrary values.
|
|
safe_limit = max(1, min(int(limit), 100))
|
|
|
|
cached = self._cache.get(safe_limit)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
with self._engine.connect() as conn:
|
|
rows = (
|
|
conn.execute(
|
|
text(
|
|
"SELECT slug, title, published_at, body_md,"
|
|
" body_html_cached"
|
|
" FROM posts"
|
|
" WHERE status = :status"
|
|
" ORDER BY published_at DESC"
|
|
" LIMIT :limit"
|
|
),
|
|
{
|
|
"status": PostStatus.PUBLISHED.value,
|
|
"limit": safe_limit,
|
|
},
|
|
)
|
|
.mappings()
|
|
.all()
|
|
)
|
|
|
|
summaries: list[PostSummary] = []
|
|
for row in rows:
|
|
published_at_str: Optional[str] = row["published_at"]
|
|
# A row with status='published' should never have NULL
|
|
# published_at; if it does, skip it rather than crash the
|
|
# homepage. Phase 4's admin flow enforces this invariant
|
|
# at write time.
|
|
if published_at_str is None:
|
|
continue
|
|
summaries.append(
|
|
PostSummary(
|
|
slug=row["slug"],
|
|
title=row["title"],
|
|
published_at=_parse_datetime(published_at_str),
|
|
excerpt=_build_excerpt(
|
|
row["body_md"], row["body_html_cached"]
|
|
),
|
|
)
|
|
)
|
|
|
|
self._cache.set(safe_limit, summaries)
|
|
return summaries
|
|
|
|
def invalidate_all(self) -> None:
|
|
"""Drop every cached post-list entry.
|
|
|
|
Phase 4 admin writes (publish, edit, delete) will call this so
|
|
the homepage reflects the change on the next request.
|
|
"""
|
|
self._cache.invalidate_all()
|
|
|
|
|
|
def get_post_service(request: Request) -> PostService:
|
|
"""FastAPI dependency: pull the app-scoped :class:`PostService`.
|
|
|
|
Instantiated once in :func:`app.main.create_app` and stored on
|
|
``app.state.post_service``. Tests override via
|
|
``app.dependency_overrides[get_post_service]``.
|
|
"""
|
|
return request.app.state.post_service
|