Stand up the full SQLite content layer: all 7 tables from the authoritative schema with WAL + foreign-keys enforced per-connection, entity dataclasses plus row mappers, hand-rolled versioned migrations tracked in schema_migrations, and an idempotent Python seed (system user + welcome post + About page). Add a Markdown->HTML service using markdown-it-py with a strict bleach allowlist (tables intentionally omitted on both sides). Add a typed in-process TTLCache[K,V] and wire it into real DB-backed PostService and PageService, both exposing invalidate_all() for Phase 4 admin writes. Rewire / and /about to read from the DB; homepage renders the seeded welcome post, About renders page.title + sanitized body_html_cached. Update the Phase 1 route tests accordingly. Mark Phase 2 complete in docs/ROADMAP.md.
113 lines
4.0 KiB
Python
113 lines
4.0 KiB
Python
"""Tests for the Markdown → sanitized HTML pipeline.
|
|
|
|
We care about three things:
|
|
|
|
1. Safe inline markup (``**bold**``, ``*italic*``, links, lists) round-trips
|
|
into the expected HTML tags.
|
|
2. Dangerous constructs (``<script>``, ``<iframe>``, ``<style>``, inline
|
|
``onclick`` handlers, ``javascript:`` URLs) are stripped — not
|
|
escaped — from the output.
|
|
3. Tables render (we enabled the ``table`` plugin in
|
|
:class:`MarkdownService`).
|
|
|
|
These are spot checks, not a full fuzz of bleach. The full allowlist
|
|
is already enforced in ``app.services.markdown``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from app.services.markdown import MarkdownService, render_markdown_safe
|
|
|
|
|
|
@pytest.fixture
|
|
def md() -> MarkdownService:
|
|
"""Return a fresh :class:`MarkdownService`.
|
|
|
|
Function-scoped to keep tests independent; the service is cheap
|
|
to construct.
|
|
"""
|
|
return MarkdownService()
|
|
|
|
|
|
def test_basic_markdown_renders_paragraphs_and_emphasis(md: MarkdownService) -> None:
|
|
"""Simple Markdown constructs produce the expected safe HTML."""
|
|
html = md.render("Hello **world** and *friends*.")
|
|
assert "<p>" in html
|
|
assert "<strong>world</strong>" in html
|
|
assert "<em>friends</em>" in html
|
|
|
|
|
|
def test_script_tags_are_stripped(md: MarkdownService) -> None:
|
|
"""A ``<script>`` injected through raw HTML is stripped entirely."""
|
|
src = "Hello<script>alert('xss')</script>world"
|
|
html = md.render(src)
|
|
# bleach strip=True drops the tag; the (potentially dangerous)
|
|
# content can remain as text but cannot execute.
|
|
assert "<script" not in html
|
|
assert "</script>" not in html
|
|
|
|
|
|
def test_iframe_and_style_tags_are_stripped(md: MarkdownService) -> None:
|
|
"""Disallowed block-level tags are removed from the output."""
|
|
html = md.render(
|
|
"<iframe src='evil'></iframe>\n\n<style>body{}</style>\n\nsafe"
|
|
)
|
|
assert "<iframe" not in html
|
|
assert "<style" not in html
|
|
assert "safe" in html
|
|
|
|
|
|
def test_javascript_urls_are_stripped_from_links(md: MarkdownService) -> None:
|
|
"""Raw ``<a href="javascript:...">`` links lose the dangerous href.
|
|
|
|
We construct the link as raw HTML (rather than ``[text](url)``
|
|
Markdown syntax, which commonmark silently refuses to turn into
|
|
an anchor for the unknown ``javascript:`` protocol) so the
|
|
bleach allowlist actually has an anchor to filter. The assertion
|
|
is that the ``javascript:`` URL does not make it into the
|
|
sanitized output.
|
|
"""
|
|
html = md.render('<a href="javascript:alert(1)">click</a>')
|
|
assert "javascript:" not in html
|
|
|
|
|
|
def test_allowed_link_and_image_attributes_survive(md: MarkdownService) -> None:
|
|
"""Safe link/image attributes are preserved."""
|
|
html = md.render(
|
|
'[hello](https://example.com "Example")\n\n'
|
|
''
|
|
)
|
|
assert 'href="https://example.com"' in html
|
|
assert 'title="Example"' in html
|
|
assert 'alt="alt text"' in html
|
|
assert 'src="https://example.com/a.png"' in html
|
|
|
|
|
|
def test_inline_event_handler_attribute_is_stripped(md: MarkdownService) -> None:
|
|
"""``onclick`` and similar inline handlers never survive sanitization."""
|
|
html = md.render('<a href="/x" onclick="alert(1)">x</a>')
|
|
assert "onclick" not in html
|
|
|
|
|
|
def test_table_tags_are_stripped(md: MarkdownService) -> None:
|
|
"""Tables are not in the bleach allowlist, so their tags are stripped.
|
|
|
|
Documents the intentional policy: the Markdown parser is the
|
|
commonmark preset with NO table plugin, and the bleach allowlist
|
|
has no table tags — widening either without the other would be
|
|
a policy mismatch. If a future phase wants tables, this test
|
|
should flip to assert the opposite along with the matching
|
|
allowlist change.
|
|
"""
|
|
src = "| a | b |\n|---|---|\n| 1 | 2 |\n"
|
|
html = md.render(src)
|
|
assert "<table" not in html
|
|
|
|
|
|
def test_module_level_helper_matches_class(md: MarkdownService) -> None:
|
|
"""``render_markdown_safe`` produces the same output as the class."""
|
|
src = "Hello **there**."
|
|
assert render_markdown_safe(src) == md.render(src)
|