From acd2d88d908427e86c91fd980c4a75643ac9ba92 Mon Sep 17 00:00:00 2001
From: John Franey <1728528+johnfraney@users.noreply.github.com>
Date: Tue, 31 Dec 2024 11:37:57 -0400
Subject: [PATCH] feat: headings and url_path Jinja filters

Adds headings and url_path Jinja filter plugins and updates Markdown
heading handling to add an ID to the headings.

These filters enable creating a table of contents and simplify showing
whether a link is active for the current page.
---
 blurry/markdown/__init__.py             |  7 ++
 blurry/plugins/jinja_plugins/filters.py | 43 ++++++++++++
 pyproject.toml                          |  4 ++
 tests/test_jinja_filter_plugins.py      | 87 +++++++++++++++++++++++++
 tests/test_markdown_renderer.py         | 32 +++++++++
 5 files changed, 173 insertions(+)
 create mode 100644 blurry/plugins/jinja_plugins/filters.py
 create mode 100644 tests/test_jinja_filter_plugins.py
 create mode 100644 tests/test_markdown_renderer.py
diff --git a/blurry/markdown/__init__.py b/blurry/markdown/__init__.py
index 9212110..d1d5e98 100644
--- a/blurry/markdown/__init__.py
+++ b/blurry/markdown/__init__.py
@@ -23,6 +23,7 @@
 from blurry.images import generate_srcset_string
 from blurry.images import get_widths_for_image_width
 from blurry.plugins import discovered_markdown_plugins
+from blurry.plugins.jinja_plugins.filters import slugify
 from blurry.settings import get_content_directory
 from blurry.settings import SETTINGS
 from blurry.types import is_str
@@ -135,6 +136,12 @@ def link(self, text, url, title: str | None = None) -> str:
 
         return f"<a {attrs_string}>{text}</a>"
 
+    def heading(self, text: str, level: int, **attrs: Any) -> str:
+        tag = f"h{level}"
+        html = f"<{tag}"
+        html += f' id="{slugify(text)}"'
+        return f"{html}>{text}</{tag}>\n"
+
 
 def is_blurry_renderer(
     renderer: mistune.BaseRenderer,
diff --git a/blurry/plugins/jinja_plugins/filters.py b/blurry/plugins/jinja_plugins/filters.py
new file mode 100644
index 0000000..f2a8a82
--- /dev/null
+++ b/blurry/plugins/jinja_plugins/filters.py
@@ -0,0 +1,43 @@
+import re
+import unicodedata
+from urllib.parse import urlparse
+
+from selectolax.lexbor import LexborHTMLParser
+
+
+def url_path(url: str) -> str:
+    url_instance = urlparse(url)
+    return url_instance.path
+
+
+def slugify(value):
+    """
+    Convert spaces to hyphens.
+    Remove characters that aren't alphanumerics, underscores, or hyphens.
+    Convert to lowercase. Also strip leading and trailing whitespace.
+    Adapted from: https://github.com/django/django/blob/92053acbb9160862c3e743a99ed8ccff8d4f8fd6/django/utils/text.py
+    """
+    value = unicodedata.normalize("NFKC", value)
+    value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower()
+    return re.sub(r"[-\s]+", "-", value, flags=re.U)
+
+
+def headings(html: str, max_level: int = 2):
+    tree = LexborHTMLParser(html)
+    heading_list: list = []
+
+    for node in tree.css("body *"):
+        if node.tag in {"h2", "h3", "h4", "h5", "h6"}:
+            level = int(node.tag[-1])
+            if level > max_level:
+                continue
+            text = node.text()
+            heading_list.append(
+                {
+                    "level": level,
+                    "text": text,
+                    "id": slugify(text),
+                }
+            )
+
+    return heading_list
diff --git a/pyproject.toml b/pyproject.toml
index d2e5bc5..5eb0062 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,3 +74,7 @@ container = 'blurry.plugins.markdown_plugins.container_plugin:container'
 
 [tool.poetry.plugins."blurry.jinja_extensions"]
 blurry_image = "blurry.plugins.jinja_plugins.blurry_image_extension:BlurryImage"
+
+[tool.poetry.plugins."blurry.jinja_filter_plugins"]
+url_path = "blurry.plugins.jinja_plugins.filters:url_path"
+headings = "blurry.plugins.jinja_plugins.filters:headings"
diff --git a/tests/test_jinja_filter_plugins.py b/tests/test_jinja_filter_plugins.py
new file mode 100644
index 0000000..fb4d89d
--- /dev/null
+++ b/tests/test_jinja_filter_plugins.py
@@ -0,0 +1,87 @@
+import pytest
+
+from blurry.plugins.jinja_plugins.filters import headings
+from blurry.plugins.jinja_plugins.filters import slugify
+from blurry.plugins.jinja_plugins.filters import url_path
+
+html = """
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <title>Getting started: quick start | Blurry</title>
+</head>
+
+<body>
+<main>
+        
+<h1>Blurry: A Python-powered static site generator</h1>
+<h2>What is Blurry?</h2>
+<p>Blurry is a static site generator with a terrible pun of a name: if you're generating static sight, you're making things Blurry.</p>
+<p>Blurry brings the concept of schema-first development to static site generators.
+Specifically, Blurry uses <a href="https://schema.org/" target="_blank" rel="noopener">Schema.org</a> schema type names as the names for its template files, and schema type properties as Markdown front matter to populate those templates.</p>
+<h2>Goals</h2>
+<h3>SEO performance</h3>
+<p>Blurry supports <a href="https://schema.org/" target="_blank" rel="noopener">Schema.org</a> and <a href="https://ogp.me/" target="_blank" rel="noopener">Open Graph</a> with zero configuration.
+This enables <a href="https://developers.google.com/search/docs/appearance/structured-data/search-gallery" target="_blank" rel="noopener">rich Google results</a> and <a href="https://www.opengraph.xyz/" target="_blank" rel="noopener">link previews</a> out-of-the-box.</p>
+<h3>Page speed</h3>
+<p>While using Blurry doesn't guarantee good page speed, it does solve a number of pain points that tend to slow down page loads.</p>
+<p><a href="/content/images/" rel="noreferrer">Blurry's image handling</a> and HTML minification, for instance, can help get you a 100/100 <a href="https://pagespeed.web.dev/" target="_blank" rel="noopener">PageSpeed</a> score if the rest of your site is fast.</p>
+<h3>Minimal configuration</h3>
+<p>Blurry seeks to use sensible defaults so you can spend less time configuring and more time writing.
+A viable Blurry configuration file (<a href="/../configuration/blurry.toml/" rel="noreferrer"><code>blurry.toml</code></a>) can be as simple as:</p>
+<pre><code class="language-toml hljs language-ini"><span class="hljs-section">[blurry]</span>
+<span class="hljs-attr">domain</span> = <span class="hljs-string">"johnfraney.ca"</span>
+</code></pre>
+<h3>Semantic HTML</h3>
+<p>Where applicable, Blurry tries to use semantic HTML elements like <code>&lt;aside&gt;</code> over more generic elements like <code>&lt;div&gt;</code>.
+Using semantic HTML elements also facilities classless CSS styling, which can be useful when styling some Markdown-generated HTML elements, and it can be <a href="https://developer.mozilla.org/en-US/docs/Learn/Accessibility/HTML" target="_blank" rel="noopener">good for accessibility</a>, too.</p>
+<h2>Non-goals</h2>
+<h3>"Gotta go fast!"</h3>
+<p>While Blurry aims to be performant, build performance is not its top priority.
+It's written in Python, so it may not be able to compete on speed with other static site generators like <a href="https://gohugo.io/" target="_blank" rel="noopener">Hugo</a>.
+Instead, it aims to be <em>fast enough</em> while taking advantage of the Python ecosystem.</p>
+    </main>
+</body>
+</html>"""
+
+
+def test_headings_filter_defaults():
+    heading_list = headings(html)
+    assert heading_list == [
+        {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
+        {"level": 2, "text": "Goals", "id": "goals"},
+        {"level": 2, "text": "Non-goals", "id": "non-goals"},
+    ]
+
+
+def test_headings_filter_max_level():
+    heading_list = headings(html, max_level=3)
+    assert heading_list == [
+        {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
+        {"level": 2, "text": "Goals", "id": "goals"},
+        {"level": 3, "text": "SEO performance", "id": "seo-performance"},
+        {"level": 3, "text": "Page speed", "id": "page-speed"},
+        {"level": 3, "text": "Minimal configuration", "id": "minimal-configuration"},
+        {"level": 3, "text": "Semantic HTML", "id": "semantic-html"},
+        {"level": 2, "text": "Non-goals", "id": "non-goals"},
+        {"level": 3, "text": '"Gotta go fast!"', "id": "gotta-go-fast"},
+    ]
+
+
+@pytest.mark.parametrize(
+    "title, slug",
+    [
+        ["Non-goals", "non-goals"],
+        ['"Gotta go fast!"', "gotta-go-fast"],
+        ["That's blasé", "thats-blasé"],
+        ["Sub-subsection 1.2.1", "sub-subsection-121"],
+    ],
+)
+def test_slugify(title, slug):
+    assert slugify(title) == slug
+
+
+def test_url_path():
+    url = "http://127.0.0.1:8000/getting-started/quickstart/"
+    assert url_path(url) == "/getting-started/quickstart/"
diff --git a/tests/test_markdown_renderer.py b/tests/test_markdown_renderer.py
new file mode 100644
index 0000000..ae00474
--- /dev/null
+++ b/tests/test_markdown_renderer.py
@@ -0,0 +1,32 @@
+from blurry.markdown import markdown
+
+MARKDOWN_WITH_HEADINGS = """
+# Home
+
+This is the homepage with some sections.
+
+## Section 1
+
+### Subsection 1.1
+
+It's a subsection.
+
+### Subsection 1.2
+
+It's another subsection.
+
+#### Sub-subsection 1.2.1
+
+Now we're nesting.
+
+## Section 2
+
+Look! A section!
+"""
+
+
+def test_renderer_headings():
+    html, _ = markdown.parse(MARKDOWN_WITH_HEADINGS)
+    assert '<h1 id="home">Home</h1>' in html
+    assert '<h2 id="section-1">Section 1</h2>' in html
+    assert '<h4 id="sub-subsection-121">Sub-subsection 1.2.1</h4>' in html