Head Hen CMS end-to-end: dashboard lists all posts (drafts + published), Markdown editor with live preview + drag-drop image upload, Pillow media pipeline re-encoding every upload to JPEG, post CRUD + publish toggle + hard delete, About page edit, and double-submit CSRF cookie enforced on every admin mutating endpoint (Phase 3's TODO markers resolved). Slug auto-generated on create and server-locked once a post has been published. Unpublish preserves `published_at` so re-publish keeps original date ordering. Every admin write invalidates the read-side Post/Page TTL caches and records an `auth_events` audit row. CSRF middleware is narrow by design — issues/refreshes the `cb_csrf` cookie only on `GET /admin*`, and mutating endpoints opt in via `require_csrf_form` or `require_csrf_header` Depends. Public routes, healthz, and pre-auth login stay untouched. 64 new tests cover slugs, CSRF, media, admin posts/pages services, and end-to-end CMS routes. Tests never mock the DB — real temp SQLite files per the CLAUDE.md mandate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
324 lines
12 KiB
Python
324 lines
12 KiB
Python
"""Image upload pipeline: validate → re-encode → store → record.
|
|
|
|
Every admin image upload passes through this service. The contract is
|
|
strict on purpose — the site serves user-editable HTML (via the
|
|
sanitizer) plus the bytes that flow through here, so anything we miss
|
|
becomes XSS / RCE surface area.
|
|
|
|
Steps in :meth:`MediaService.save_upload`:
|
|
|
|
1. **Size cap** — reject anything over 8 MB at the bytes level
|
|
(before decoding). We read the full buffer so we can hash and
|
|
re-encode it; streaming would complicate Pillow's decode path and
|
|
upload volumes are tiny.
|
|
2. **Magic-byte check** — :mod:`python-magic` inspects the first
|
|
2048 bytes and yields a MIME type. Anything not in our allowlist
|
|
(``image/jpeg``, ``image/png``, ``image/webp``) is rejected.
|
|
Notably, ``image/gif`` is NOT allowed — animated GIFs have a long
|
|
history of ambiguous / abuse-friendly encodings.
|
|
3. **Pillow decode** — open via :func:`PIL.Image.open` on a
|
|
:class:`io.BytesIO` wrapper. Call ``.verify()`` on a dedicated copy
|
|
(it consumes the stream), then re-open for the actual encode path.
|
|
Reject anything larger than 10000 px per side as a defense against
|
|
decompression bombs.
|
|
4. **Re-encode to JPEG** — always JPEG. Strip metadata by reopening
|
|
into a clean :class:`PIL.Image.Image`; flatten alpha on a white
|
|
background so transparent PNG / WebP images don't render as black.
|
|
5. **Store** — write to ``<media_root>/<yyyy>/<mm>/<random>.jpg`` where
|
|
the random component is :func:`secrets.token_urlsafe(16)`. The
|
|
client-supplied filename is kept only in the DB row's
|
|
``original_filename`` for display; it is NEVER used to build a
|
|
filesystem path.
|
|
6. **DB row** — insert a :class:`Media` row. Return the loaded
|
|
dataclass.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import secrets
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Final, Optional
|
|
|
|
import structlog
|
|
from PIL import Image, UnidentifiedImageError
|
|
from sqlalchemy import Engine, text
|
|
|
|
from app.models.entities import Media
|
|
from app.models.mappers import row_to_media
|
|
from app.services.audit import AuditService
|
|
|
|
|
|
_log = structlog.get_logger(__name__)
|
|
|
|
|
|
# Upper bound on the raw upload bytes. 8 MB matches the project
|
|
# security constraint; larger images are almost certainly a mistake
|
|
# for a brochure-site blog.
|
|
MAX_UPLOAD_BYTES: Final[int] = 8 * 1024 * 1024
|
|
|
|
# Maximum decoded dimension — reject any image wider or taller than
|
|
# this as a lightweight defense against decompression bombs.
|
|
MAX_PIXEL_DIMENSION: Final[int] = 10_000
|
|
|
|
# MIME types accepted from the magic-byte sniff. We always re-encode
|
|
# to JPEG regardless of input.
|
|
_ACCEPTED_MIME: Final[frozenset[str]] = frozenset(
|
|
{"image/jpeg", "image/png", "image/webp"}
|
|
)
|
|
|
|
# Output quality for Pillow's JPEG encoder. 85 is a widely-used
|
|
# sweet spot for photograph-like content.
|
|
_JPEG_QUALITY: Final[int] = 85
|
|
|
|
|
|
class MediaRejectedError(Exception):
|
|
"""Raised when an upload fails any validation step.
|
|
|
|
The message is user-facing (shown in the admin editor) — keep it
|
|
generic and free of implementation detail.
|
|
"""
|
|
|
|
|
|
class MediaService:
|
|
"""Validate and store admin-uploaded images.
|
|
|
|
Parameters
|
|
----------
|
|
engine:
|
|
Shared SQLAlchemy engine.
|
|
media_root:
|
|
Filesystem directory under which uploads live (the
|
|
``<yyyy>/<mm>/`` partition is appended). Relative paths are
|
|
resolved against the process cwd, matching how the FastAPI
|
|
StaticFiles mount is configured.
|
|
public_prefix:
|
|
URL prefix where the media root is mounted for public serving.
|
|
Defaults to ``/media`` so the Markdown that the admin inserts
|
|
after a drag-drop upload uses a path the public site can
|
|
actually reach.
|
|
audit:
|
|
:class:`AuditService` for the ``media_uploaded`` event.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
engine: Engine,
|
|
media_root: str,
|
|
audit: AuditService,
|
|
*,
|
|
public_prefix: str = "/media",
|
|
) -> None:
|
|
self._engine: Engine = engine
|
|
self._media_root: Path = Path(media_root)
|
|
# Normalize to no trailing slash — we always join with "/<yyyy>/..."
|
|
self._public_prefix: str = "/" + public_prefix.strip("/")
|
|
self._audit: AuditService = audit
|
|
|
|
# ------------------------------------------------------------------
|
|
# save_upload
|
|
# ------------------------------------------------------------------
|
|
def save_upload(
|
|
self,
|
|
*,
|
|
original_filename: str,
|
|
data: bytes,
|
|
uploaded_by: int,
|
|
alt_text: str = "",
|
|
) -> Media:
|
|
"""Validate + re-encode + persist a new media upload.
|
|
|
|
Parameters
|
|
----------
|
|
original_filename:
|
|
The filename the client submitted. Stored in the DB row
|
|
for display only; NEVER used to build a filesystem path.
|
|
data:
|
|
Raw request body. Must be at most :data:`MAX_UPLOAD_BYTES`.
|
|
uploaded_by:
|
|
:class:`User` id of the authenticated admin performing the
|
|
upload.
|
|
alt_text:
|
|
Optional alt text. Empty is allowed — admin can set it
|
|
later by hand-editing the Markdown.
|
|
|
|
Returns
|
|
-------
|
|
Media
|
|
Fully-populated :class:`Media` dataclass.
|
|
|
|
Raises
|
|
------
|
|
MediaRejectedError
|
|
When any validation step fails (size, MIME, decode).
|
|
"""
|
|
# 1. Size cap — cheap, do first.
|
|
if len(data) == 0:
|
|
raise MediaRejectedError("Empty upload.")
|
|
if len(data) > MAX_UPLOAD_BYTES:
|
|
raise MediaRejectedError(
|
|
"Upload exceeds the 8 MB limit."
|
|
)
|
|
|
|
# 2. Magic-byte sniff.
|
|
sniffed_mime = _sniff_mime(data)
|
|
if sniffed_mime not in _ACCEPTED_MIME:
|
|
raise MediaRejectedError(
|
|
f"Unsupported image type ({sniffed_mime})."
|
|
)
|
|
|
|
# 3. Pillow verify on a fresh BytesIO (verify consumes the
|
|
# stream). If this raises we swallow and translate to a generic
|
|
# rejection so we never echo the Pillow error string back to
|
|
# the admin UI.
|
|
try:
|
|
Image.open(io.BytesIO(data)).verify()
|
|
except (UnidentifiedImageError, Exception): # noqa: BLE001
|
|
raise MediaRejectedError("Image could not be decoded.")
|
|
|
|
# 4. Re-open for the actual encode.
|
|
try:
|
|
image = Image.open(io.BytesIO(data))
|
|
# Load here so we catch truncated / corrupt payloads that
|
|
# verify() misses. Without load() the decode is lazy.
|
|
image.load()
|
|
except (UnidentifiedImageError, Exception): # noqa: BLE001
|
|
raise MediaRejectedError("Image could not be decoded.")
|
|
|
|
width, height = image.size
|
|
if width <= 0 or height <= 0:
|
|
raise MediaRejectedError("Image has zero dimension.")
|
|
if width > MAX_PIXEL_DIMENSION or height > MAX_PIXEL_DIMENSION:
|
|
raise MediaRejectedError(
|
|
"Image dimensions exceed the maximum allowed."
|
|
)
|
|
|
|
# Flatten transparency onto a white background when present.
|
|
# Pillow uses "RGBA", "LA", and "P" (palette, possibly with
|
|
# transparency) as modes that carry alpha-like semantics. We
|
|
# always convert to "RGB" before encoding as JPEG.
|
|
if image.mode in ("RGBA", "LA") or (
|
|
image.mode == "P" and "transparency" in image.info
|
|
):
|
|
# Convert through RGBA so alpha-compositing is well-defined,
|
|
# then flatten onto a white RGB background.
|
|
rgba = image.convert("RGBA")
|
|
background = Image.new("RGB", rgba.size, (255, 255, 255))
|
|
background.paste(rgba, mask=rgba.split()[-1])
|
|
image_out = background
|
|
elif image.mode != "RGB":
|
|
image_out = image.convert("RGB")
|
|
else:
|
|
image_out = image
|
|
|
|
# 5. Randomize the storage name and partition by month.
|
|
now = datetime.now(timezone.utc)
|
|
partition = f"{now:%Y}/{now:%m}"
|
|
random_name = f"{secrets.token_urlsafe(16)}.jpg"
|
|
target_dir = self._media_root / partition
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
target_path = target_dir / random_name
|
|
|
|
# Re-encode to JPEG with the metadata stripped (a fresh
|
|
# re-save removes any EXIF / color profile the source had).
|
|
image_out.save(
|
|
target_path,
|
|
format="JPEG",
|
|
quality=_JPEG_QUALITY,
|
|
optimize=True,
|
|
)
|
|
|
|
final_bytes = target_path.stat().st_size
|
|
stored_path = str(target_path)
|
|
|
|
# 6. DB row.
|
|
now_iso = now.isoformat()
|
|
with self._engine.begin() as conn:
|
|
result = conn.execute(
|
|
text(
|
|
"INSERT INTO media"
|
|
" (filename, original_filename, content_type,"
|
|
" size_bytes, stored_path, alt_text, uploaded_by,"
|
|
" uploaded_at)"
|
|
" VALUES (:filename, :original_filename, :content_type,"
|
|
" :size_bytes, :stored_path, :alt_text, :uploaded_by,"
|
|
" :uploaded_at)"
|
|
),
|
|
{
|
|
"filename": random_name,
|
|
"original_filename": original_filename or random_name,
|
|
"content_type": "image/jpeg",
|
|
"size_bytes": int(final_bytes),
|
|
"stored_path": stored_path,
|
|
"alt_text": alt_text or "",
|
|
"uploaded_by": int(uploaded_by),
|
|
"uploaded_at": now_iso,
|
|
},
|
|
)
|
|
new_id = int(result.lastrowid) # type: ignore[arg-type]
|
|
row = conn.execute(
|
|
text(
|
|
"SELECT id, filename, original_filename, content_type,"
|
|
" size_bytes, stored_path, alt_text, uploaded_by,"
|
|
" uploaded_at"
|
|
" FROM media WHERE id = :id"
|
|
),
|
|
{"id": new_id},
|
|
).mappings().first()
|
|
|
|
if row is None: # pragma: no cover — just inserted
|
|
raise RuntimeError("failed to reload just-inserted media row")
|
|
|
|
media = row_to_media(row)
|
|
|
|
self._audit.record(
|
|
"media_uploaded",
|
|
user_id=uploaded_by,
|
|
detail={
|
|
"media_id": media.id,
|
|
"filename": media.filename,
|
|
"size_bytes": media.size_bytes,
|
|
"original_mime": sniffed_mime,
|
|
},
|
|
)
|
|
return media
|
|
|
|
# ------------------------------------------------------------------
|
|
# URL helpers
|
|
# ------------------------------------------------------------------
|
|
def public_url(self, media: Media) -> str:
|
|
"""Return the URL the public site uses to fetch ``media``.
|
|
|
|
Built from the configured ``public_prefix`` + the partition
|
|
under ``media_root``. A stored path outside the media root
|
|
(should never happen — we always write under it) falls back
|
|
to the partition-less prefix to avoid leaking filesystem
|
|
paths.
|
|
"""
|
|
try:
|
|
rel = Path(media.stored_path).resolve().relative_to(
|
|
self._media_root.resolve()
|
|
)
|
|
except (ValueError, OSError):
|
|
return f"{self._public_prefix}/{media.filename}"
|
|
return f"{self._public_prefix}/{rel.as_posix()}"
|
|
|
|
|
|
def _sniff_mime(data: bytes) -> str:
|
|
"""Return the MIME type of ``data`` according to python-magic.
|
|
|
|
Wrapped so tests that monkeypatch can reach a single seam, and so
|
|
the import of :mod:`magic` stays local (the module has a
|
|
filesystem dependency on libmagic that should not block app
|
|
import).
|
|
"""
|
|
# Import is module-level normally; keep here to avoid any import
|
|
# order weirdness if libmagic is missing in exotic environments.
|
|
import magic
|
|
|
|
# First 2 KB is well beyond what any image header uses, and
|
|
# streaming beyond that buys nothing for MIME sniffing.
|
|
head = data[:2048]
|
|
return magic.from_buffer(head, mime=True)
|