Files
chicken_babies_site/tests/test_markdown.py
Phillip Tarrant 0306f71763 feat: phase 2 content model + cache — SQLite schema, markdown, TTL
Stand up the full SQLite content layer: all 7 tables from the authoritative
schema with WAL + foreign-keys enforced per-connection, entity dataclasses
plus row mappers, hand-rolled versioned migrations tracked in
schema_migrations, and an idempotent Python seed (system user + welcome
post + About page).

Add a Markdown->HTML service using markdown-it-py with a strict bleach
allowlist (tables intentionally omitted on both sides). Add a typed
in-process TTLCache[K,V] and wire it into real DB-backed PostService and
PageService, both exposing invalidate_all() for Phase 4 admin writes.

Rewire / and /about to read from the DB; homepage renders the seeded
welcome post, About renders page.title + sanitized body_html_cached.
Update the Phase 1 route tests accordingly.

Mark Phase 2 complete in docs/ROADMAP.md.
2026-04-21 15:40:35 -05:00

113 lines
4.0 KiB
Python

"""Tests for the Markdown → sanitized HTML pipeline.
We care about three things:
1. Safe inline markup (``**bold**``, ``*italic*``, links, lists) round-trips
into the expected HTML tags.
2. Dangerous constructs (``<script>``, ``<iframe>``, ``<style>``, inline
``onclick`` handlers, ``javascript:`` URLs) are stripped — not
escaped — from the output.
3. Tables render (we enabled the ``table`` plugin in
:class:`MarkdownService`).
These are spot checks, not a full fuzz of bleach. The full allowlist
is already enforced in ``app.services.markdown``.
"""
from __future__ import annotations
import pytest
from app.services.markdown import MarkdownService, render_markdown_safe
@pytest.fixture
def md() -> MarkdownService:
"""Return a fresh :class:`MarkdownService`.
Function-scoped to keep tests independent; the service is cheap
to construct.
"""
return MarkdownService()
def test_basic_markdown_renders_paragraphs_and_emphasis(md: MarkdownService) -> None:
"""Simple Markdown constructs produce the expected safe HTML."""
html = md.render("Hello **world** and *friends*.")
assert "<p>" in html
assert "<strong>world</strong>" in html
assert "<em>friends</em>" in html
def test_script_tags_are_stripped(md: MarkdownService) -> None:
"""A ``<script>`` injected through raw HTML is stripped entirely."""
src = "Hello<script>alert('xss')</script>world"
html = md.render(src)
# bleach strip=True drops the tag; the (potentially dangerous)
# content can remain as text but cannot execute.
assert "<script" not in html
assert "</script>" not in html
def test_iframe_and_style_tags_are_stripped(md: MarkdownService) -> None:
"""Disallowed block-level tags are removed from the output."""
html = md.render(
"<iframe src='evil'></iframe>\n\n<style>body{}</style>\n\nsafe"
)
assert "<iframe" not in html
assert "<style" not in html
assert "safe" in html
def test_javascript_urls_are_stripped_from_links(md: MarkdownService) -> None:
"""Raw ``<a href="javascript:...">`` links lose the dangerous href.
We construct the link as raw HTML (rather than ``[text](url)``
Markdown syntax, which commonmark silently refuses to turn into
an anchor for the unknown ``javascript:`` protocol) so the
bleach allowlist actually has an anchor to filter. The assertion
is that the ``javascript:`` URL does not make it into the
sanitized output.
"""
html = md.render('<a href="javascript:alert(1)">click</a>')
assert "javascript:" not in html
def test_allowed_link_and_image_attributes_survive(md: MarkdownService) -> None:
"""Safe link/image attributes are preserved."""
html = md.render(
'[hello](https://example.com "Example")\n\n'
'![alt text](https://example.com/a.png "Caption")'
)
assert 'href="https://example.com"' in html
assert 'title="Example"' in html
assert 'alt="alt text"' in html
assert 'src="https://example.com/a.png"' in html
def test_inline_event_handler_attribute_is_stripped(md: MarkdownService) -> None:
"""``onclick`` and similar inline handlers never survive sanitization."""
html = md.render('<a href="/x" onclick="alert(1)">x</a>')
assert "onclick" not in html
def test_table_tags_are_stripped(md: MarkdownService) -> None:
"""Tables are not in the bleach allowlist, so their tags are stripped.
Documents the intentional policy: the Markdown parser is the
commonmark preset with NO table plugin, and the bleach allowlist
has no table tags — widening either without the other would be
a policy mismatch. If a future phase wants tables, this test
should flip to assert the opposite along with the matching
allowlist change.
"""
src = "| a | b |\n|---|---|\n| 1 | 2 |\n"
html = md.render(src)
assert "<table" not in html
def test_module_level_helper_matches_class(md: MarkdownService) -> None:
"""``render_markdown_safe`` produces the same output as the class."""
src = "Hello **there**."
assert render_markdown_safe(src) == md.render(src)