"""Blog post read service. Phase 2 replaces the Phase 1 empty-list stub with a real SQLite-backed implementation. The public method signature on :meth:`PostService.list_published` is unchanged — routes and templates written in Phase 1 continue to work. Public contract: - :meth:`PostService.list_published` returns ``list[PostSummary]``. - :meth:`PostService.invalidate_all` clears the TTL cache (Phase 4). - :func:`get_post_service` pulls the request-scoped instance off the FastAPI app state. """ from __future__ import annotations import re from typing import Optional from fastapi import Request from sqlalchemy import Engine, text from app.models.entities import PostStatus from app.models.posts import PostSummary from app.models.mappers import _parse_datetime from app.services.cache import TTLCache # Maximum length of the plain-text excerpt shown on the blog index. # Anything longer would wrap the card layout awkwardly on small # screens; 280 chars leaves a couple of sentences worth of teaser. _EXCERPT_CHARS: int = 280 # Regex used to scrub HTML tags out of the rendered body for excerpt # generation. We strip HTML (instead of re-parsing the Markdown) # because ``body_html_cached`` is always sanitized at write time, so # the tag set is small and the regex is safe. _TAG_RE: re.Pattern[str] = re.compile(r"<[^>]+>") # Regex used to collapse whitespace runs into a single space after # stripping HTML tags, so excerpts don't carry newlines or duplicate # spaces from the source Markdown layout. _WS_RE: re.Pattern[str] = re.compile(r"\s+") def _build_excerpt(body_md: str, body_html_cached: str) -> str: """Build a short plaintext teaser from the cached HTML. Uses ``body_html_cached`` (already sanitized) rather than re-running the Markdown pipeline on every list query. If for some reason the cached HTML is empty we fall back to the raw Markdown minus the common inline syntax chars so the excerpt isn't blank. """ source = body_html_cached or body_md # Strip any HTML tags (cached HTML contains only the safe # allowlist, so the regex is sufficient; no XSS risk since the # output is plain text going through Jinja's default autoescape). text_only = _TAG_RE.sub(" ", source) collapsed = _WS_RE.sub(" ", text_only).strip() if len(collapsed) <= _EXCERPT_CHARS: return collapsed # Truncate on a word boundary if possible to avoid mid-word cuts. truncated = collapsed[:_EXCERPT_CHARS] last_space = truncated.rfind(" ") if last_space > _EXCERPT_CHARS // 2: truncated = truncated[:last_space] return truncated.rstrip() + "\u2026" # ellipsis class PostService: """Read-side service for published blog posts. Parameters ---------- engine: Shared SQLAlchemy engine. ttl_seconds: Cache TTL in seconds; default 60 s matches the ROADMAP. """ def __init__(self, engine: Engine, ttl_seconds: float = 60.0) -> None: self._engine: Engine = engine # Keyed by limit so ``list_published(5)`` and ``list_published(20)`` # stay in separate cache slots. self._cache: TTLCache[int, list[PostSummary]] = TTLCache(ttl_seconds) def list_published(self, limit: int = 20) -> list[PostSummary]: """Return up to ``limit`` published posts, newest first. Parameters ---------- limit: Maximum rows to return. Clamped to ``[1, 100]`` to keep pathological callers from dumping the full table. Returns ------- list[PostSummary] Immutable summary records; an empty list when the site has no published posts (the template renders an appropriate empty state). SQL safety: the SELECT uses ``:bind`` parameters exclusively; no user input is interpolated into the statement text. """ # Defensive clamp; the public template only passes 20 but # future callers could pass arbitrary values. safe_limit = max(1, min(int(limit), 100)) cached = self._cache.get(safe_limit) if cached is not None: return cached with self._engine.connect() as conn: rows = ( conn.execute( text( "SELECT slug, title, published_at, body_md," " body_html_cached" " FROM posts" " WHERE status = :status" " ORDER BY published_at DESC" " LIMIT :limit" ), { "status": PostStatus.PUBLISHED.value, "limit": safe_limit, }, ) .mappings() .all() ) summaries: list[PostSummary] = [] for row in rows: published_at_str: Optional[str] = row["published_at"] # A row with status='published' should never have NULL # published_at; if it does, skip it rather than crash the # homepage. Phase 4's admin flow enforces this invariant # at write time. if published_at_str is None: continue summaries.append( PostSummary( slug=row["slug"], title=row["title"], published_at=_parse_datetime(published_at_str), excerpt=_build_excerpt( row["body_md"], row["body_html_cached"] ), ) ) self._cache.set(safe_limit, summaries) return summaries def invalidate_all(self) -> None: """Drop every cached post-list entry. Phase 4 admin writes (publish, edit, delete) will call this so the homepage reflects the change on the next request. """ self._cache.invalidate_all() def get_post_service(request: Request) -> PostService: """FastAPI dependency: pull the app-scoped :class:`PostService`. Instantiated once in :func:`app.main.create_app` and stored on ``app.state.post_service``. Tests override via ``app.dependency_overrides[get_post_service]``. """ return request.app.state.post_service