Files
chicken_babies_site/app/services/media.py
Phillip Tarrant 9a8506970c feat: phase 4 admin CMS — dashboard, editor, media, CSRF
Head Hen CMS end-to-end: dashboard lists all posts (drafts + published),
Markdown editor with live preview + drag-drop image upload, Pillow media
pipeline re-encoding every upload to JPEG, post CRUD + publish toggle +
hard delete, About page edit, and double-submit CSRF cookie enforced on
every admin mutating endpoint (Phase 3's TODO markers resolved).

Slug auto-generated on create and server-locked once a post has been
published. Unpublish preserves `published_at` so re-publish keeps
original date ordering. Every admin write invalidates the read-side
Post/Page TTL caches and records an `auth_events` audit row.

CSRF middleware is narrow by design — issues/refreshes the `cb_csrf`
cookie only on `GET /admin*`, and mutating endpoints opt in via
`require_csrf_form` or `require_csrf_header` Depends. Public routes,
healthz, and pre-auth login stay untouched.

64 new tests cover slugs, CSRF, media, admin posts/pages services, and
end-to-end CMS routes. Tests never mock the DB — real temp SQLite files
per the CLAUDE.md mandate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 20:42:01 -05:00

324 lines
12 KiB
Python

"""Image upload pipeline: validate → re-encode → store → record.
Every admin image upload passes through this service. The contract is
strict on purpose — the site serves user-editable HTML (via the
sanitizer) plus the bytes that flow through here, so anything we miss
becomes XSS / RCE surface area.
Steps in :meth:`MediaService.save_upload`:
1. **Size cap** — reject anything over 8 MB at the bytes level
(before decoding). We read the full buffer so we can hash and
re-encode it; streaming would complicate Pillow's decode path and
upload volumes are tiny.
2. **Magic-byte check** — :mod:`python-magic` inspects the first
2048 bytes and yields a MIME type. Anything not in our allowlist
(``image/jpeg``, ``image/png``, ``image/webp``) is rejected.
Notably, ``image/gif`` is NOT allowed — animated GIFs have a long
history of ambiguous / abuse-friendly encodings.
3. **Pillow decode** — open via :func:`PIL.Image.open` on a
:class:`io.BytesIO` wrapper. Call ``.verify()`` on a dedicated copy
(it consumes the stream), then re-open for the actual encode path.
Reject anything larger than 10000 px per side as a defense against
decompression bombs.
4. **Re-encode to JPEG** — always JPEG. Strip metadata by reopening
into a clean :class:`PIL.Image.Image`; flatten alpha on a white
background so transparent PNG / WebP images don't render as black.
5. **Store** — write to ``<media_root>/<yyyy>/<mm>/<random>.jpg`` where
the random component is :func:`secrets.token_urlsafe(16)`. The
client-supplied filename is kept only in the DB row's
``original_filename`` for display; it is NEVER used to build a
filesystem path.
6. **DB row** — insert a :class:`Media` row. Return the loaded
dataclass.
"""
from __future__ import annotations
import io
import secrets
from datetime import datetime, timezone
from pathlib import Path
from typing import Final, Optional
import structlog
from PIL import Image, UnidentifiedImageError
from sqlalchemy import Engine, text
from app.models.entities import Media
from app.models.mappers import row_to_media
from app.services.audit import AuditService
_log = structlog.get_logger(__name__)
# Upper bound on the raw upload bytes. 8 MB matches the project
# security constraint; larger images are almost certainly a mistake
# for a brochure-site blog.
MAX_UPLOAD_BYTES: Final[int] = 8 * 1024 * 1024
# Maximum decoded dimension — reject any image wider or taller than
# this as a lightweight defense against decompression bombs.
MAX_PIXEL_DIMENSION: Final[int] = 10_000
# MIME types accepted from the magic-byte sniff. We always re-encode
# to JPEG regardless of input.
_ACCEPTED_MIME: Final[frozenset[str]] = frozenset(
{"image/jpeg", "image/png", "image/webp"}
)
# Output quality for Pillow's JPEG encoder. 85 is a widely-used
# sweet spot for photograph-like content.
_JPEG_QUALITY: Final[int] = 85
class MediaRejectedError(Exception):
"""Raised when an upload fails any validation step.
The message is user-facing (shown in the admin editor) — keep it
generic and free of implementation detail.
"""
class MediaService:
"""Validate and store admin-uploaded images.
Parameters
----------
engine:
Shared SQLAlchemy engine.
media_root:
Filesystem directory under which uploads live (the
``<yyyy>/<mm>/`` partition is appended). Relative paths are
resolved against the process cwd, matching how the FastAPI
StaticFiles mount is configured.
public_prefix:
URL prefix where the media root is mounted for public serving.
Defaults to ``/media`` so the Markdown that the admin inserts
after a drag-drop upload uses a path the public site can
actually reach.
audit:
:class:`AuditService` for the ``media_uploaded`` event.
"""
def __init__(
self,
engine: Engine,
media_root: str,
audit: AuditService,
*,
public_prefix: str = "/media",
) -> None:
self._engine: Engine = engine
self._media_root: Path = Path(media_root)
# Normalize to no trailing slash — we always join with "/<yyyy>/..."
self._public_prefix: str = "/" + public_prefix.strip("/")
self._audit: AuditService = audit
# ------------------------------------------------------------------
# save_upload
# ------------------------------------------------------------------
def save_upload(
self,
*,
original_filename: str,
data: bytes,
uploaded_by: int,
alt_text: str = "",
) -> Media:
"""Validate + re-encode + persist a new media upload.
Parameters
----------
original_filename:
The filename the client submitted. Stored in the DB row
for display only; NEVER used to build a filesystem path.
data:
Raw request body. Must be at most :data:`MAX_UPLOAD_BYTES`.
uploaded_by:
:class:`User` id of the authenticated admin performing the
upload.
alt_text:
Optional alt text. Empty is allowed — admin can set it
later by hand-editing the Markdown.
Returns
-------
Media
Fully-populated :class:`Media` dataclass.
Raises
------
MediaRejectedError
When any validation step fails (size, MIME, decode).
"""
# 1. Size cap — cheap, do first.
if len(data) == 0:
raise MediaRejectedError("Empty upload.")
if len(data) > MAX_UPLOAD_BYTES:
raise MediaRejectedError(
"Upload exceeds the 8 MB limit."
)
# 2. Magic-byte sniff.
sniffed_mime = _sniff_mime(data)
if sniffed_mime not in _ACCEPTED_MIME:
raise MediaRejectedError(
f"Unsupported image type ({sniffed_mime})."
)
# 3. Pillow verify on a fresh BytesIO (verify consumes the
# stream). If this raises we swallow and translate to a generic
# rejection so we never echo the Pillow error string back to
# the admin UI.
try:
Image.open(io.BytesIO(data)).verify()
except (UnidentifiedImageError, Exception): # noqa: BLE001
raise MediaRejectedError("Image could not be decoded.")
# 4. Re-open for the actual encode.
try:
image = Image.open(io.BytesIO(data))
# Load here so we catch truncated / corrupt payloads that
# verify() misses. Without load() the decode is lazy.
image.load()
except (UnidentifiedImageError, Exception): # noqa: BLE001
raise MediaRejectedError("Image could not be decoded.")
width, height = image.size
if width <= 0 or height <= 0:
raise MediaRejectedError("Image has zero dimension.")
if width > MAX_PIXEL_DIMENSION or height > MAX_PIXEL_DIMENSION:
raise MediaRejectedError(
"Image dimensions exceed the maximum allowed."
)
# Flatten transparency onto a white background when present.
# Pillow uses "RGBA", "LA", and "P" (palette, possibly with
# transparency) as modes that carry alpha-like semantics. We
# always convert to "RGB" before encoding as JPEG.
if image.mode in ("RGBA", "LA") or (
image.mode == "P" and "transparency" in image.info
):
# Convert through RGBA so alpha-compositing is well-defined,
# then flatten onto a white RGB background.
rgba = image.convert("RGBA")
background = Image.new("RGB", rgba.size, (255, 255, 255))
background.paste(rgba, mask=rgba.split()[-1])
image_out = background
elif image.mode != "RGB":
image_out = image.convert("RGB")
else:
image_out = image
# 5. Randomize the storage name and partition by month.
now = datetime.now(timezone.utc)
partition = f"{now:%Y}/{now:%m}"
random_name = f"{secrets.token_urlsafe(16)}.jpg"
target_dir = self._media_root / partition
target_dir.mkdir(parents=True, exist_ok=True)
target_path = target_dir / random_name
# Re-encode to JPEG with the metadata stripped (a fresh
# re-save removes any EXIF / color profile the source had).
image_out.save(
target_path,
format="JPEG",
quality=_JPEG_QUALITY,
optimize=True,
)
final_bytes = target_path.stat().st_size
stored_path = str(target_path)
# 6. DB row.
now_iso = now.isoformat()
with self._engine.begin() as conn:
result = conn.execute(
text(
"INSERT INTO media"
" (filename, original_filename, content_type,"
" size_bytes, stored_path, alt_text, uploaded_by,"
" uploaded_at)"
" VALUES (:filename, :original_filename, :content_type,"
" :size_bytes, :stored_path, :alt_text, :uploaded_by,"
" :uploaded_at)"
),
{
"filename": random_name,
"original_filename": original_filename or random_name,
"content_type": "image/jpeg",
"size_bytes": int(final_bytes),
"stored_path": stored_path,
"alt_text": alt_text or "",
"uploaded_by": int(uploaded_by),
"uploaded_at": now_iso,
},
)
new_id = int(result.lastrowid) # type: ignore[arg-type]
row = conn.execute(
text(
"SELECT id, filename, original_filename, content_type,"
" size_bytes, stored_path, alt_text, uploaded_by,"
" uploaded_at"
" FROM media WHERE id = :id"
),
{"id": new_id},
).mappings().first()
if row is None: # pragma: no cover — just inserted
raise RuntimeError("failed to reload just-inserted media row")
media = row_to_media(row)
self._audit.record(
"media_uploaded",
user_id=uploaded_by,
detail={
"media_id": media.id,
"filename": media.filename,
"size_bytes": media.size_bytes,
"original_mime": sniffed_mime,
},
)
return media
# ------------------------------------------------------------------
# URL helpers
# ------------------------------------------------------------------
def public_url(self, media: Media) -> str:
"""Return the URL the public site uses to fetch ``media``.
Built from the configured ``public_prefix`` + the partition
under ``media_root``. A stored path outside the media root
(should never happen — we always write under it) falls back
to the partition-less prefix to avoid leaking filesystem
paths.
"""
try:
rel = Path(media.stored_path).resolve().relative_to(
self._media_root.resolve()
)
except (ValueError, OSError):
return f"{self._public_prefix}/{media.filename}"
return f"{self._public_prefix}/{rel.as_posix()}"
def _sniff_mime(data: bytes) -> str:
"""Return the MIME type of ``data`` according to python-magic.
Wrapped so tests that monkeypatch can reach a single seam, and so
the import of :mod:`magic` stays local (the module has a
filesystem dependency on libmagic that should not block app
import).
"""
# Import is module-level normally; keep here to avoid any import
# order weirdness if libmagic is missing in exotic environments.
import magic
# First 2 KB is well beyond what any image header uses, and
# streaming beyond that buys nothing for MIME sniffing.
head = data[:2048]
return magic.from_buffer(head, mime=True)