From acd2d88d908427e86c91fd980c4a75643ac9ba92 Mon Sep 17 00:00:00 2001
From: John Franey <1728528+johnfraney@users.noreply.github.com>
Date: Tue, 31 Dec 2024 11:37:57 -0400
Subject: [PATCH] feat: headings and url_path Jinja filters
Adds headings and url_path Jinja filter plugins and updates Markdown
heading handling to add an ID to the headings.
These filters enable creating a table of contents and simplify showing
whether a link is active for the current page.
---
blurry/markdown/__init__.py | 7 ++
blurry/plugins/jinja_plugins/filters.py | 43 ++++++++++++
pyproject.toml | 4 ++
tests/test_jinja_filter_plugins.py | 87 +++++++++++++++++++++++++
tests/test_markdown_renderer.py | 32 +++++++++
5 files changed, 173 insertions(+)
create mode 100644 blurry/plugins/jinja_plugins/filters.py
create mode 100644 tests/test_jinja_filter_plugins.py
create mode 100644 tests/test_markdown_renderer.py
diff --git a/blurry/markdown/__init__.py b/blurry/markdown/__init__.py
index 9212110..d1d5e98 100644
--- a/blurry/markdown/__init__.py
+++ b/blurry/markdown/__init__.py
@@ -23,6 +23,7 @@
from blurry.images import generate_srcset_string
from blurry.images import get_widths_for_image_width
from blurry.plugins import discovered_markdown_plugins
+from blurry.plugins.jinja_plugins.filters import slugify
from blurry.settings import get_content_directory
from blurry.settings import SETTINGS
from blurry.types import is_str
@@ -135,6 +136,12 @@ def link(self, text, url, title: str | None = None) -> str:
return f"{text}"
+ def heading(self, text: str, level: int, **attrs: Any) -> str:
+ tag = f"h{level}"
+ html = f"<{tag}"
+ html += f' id="{slugify(text)}"'
+ return f"{html}>{text}{tag}>\n"
+
def is_blurry_renderer(
renderer: mistune.BaseRenderer,
diff --git a/blurry/plugins/jinja_plugins/filters.py b/blurry/plugins/jinja_plugins/filters.py
new file mode 100644
index 0000000..f2a8a82
--- /dev/null
+++ b/blurry/plugins/jinja_plugins/filters.py
@@ -0,0 +1,43 @@
+import re
+import unicodedata
+from urllib.parse import urlparse
+
+from selectolax.lexbor import LexborHTMLParser
+
+
+def url_path(url: str) -> str:
+ url_instance = urlparse(url)
+ return url_instance.path
+
+
+def slugify(value):
+ """
+ Convert spaces to hyphens.
+ Remove characters that aren't alphanumerics, underscores, or hyphens.
+ Convert to lowercase. Also strip leading and trailing whitespace.
+ Adapted from: https://github.com/django/django/blob/92053acbb9160862c3e743a99ed8ccff8d4f8fd6/django/utils/text.py
+ """
+ value = unicodedata.normalize("NFKC", value)
+ value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower()
+ return re.sub(r"[-\s]+", "-", value, flags=re.U)
+
+
+def headings(html: str, max_level: int = 2):
+ tree = LexborHTMLParser(html)
+ heading_list: list = []
+
+ for node in tree.css("body *"):
+ if node.tag in {"h2", "h3", "h4", "h5", "h6"}:
+ level = int(node.tag[-1])
+ if level > max_level:
+ continue
+ text = node.text()
+ heading_list.append(
+ {
+ "level": level,
+ "text": text,
+ "id": slugify(text),
+ }
+ )
+
+ return heading_list
diff --git a/pyproject.toml b/pyproject.toml
index d2e5bc5..5eb0062 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,3 +74,7 @@ container = 'blurry.plugins.markdown_plugins.container_plugin:container'
[tool.poetry.plugins."blurry.jinja_extensions"]
blurry_image = "blurry.plugins.jinja_plugins.blurry_image_extension:BlurryImage"
+
+[tool.poetry.plugins."blurry.jinja_filter_plugins"]
+url_path = "blurry.plugins.jinja_plugins.filters:url_path"
+headings = "blurry.plugins.jinja_plugins.filters:headings"
diff --git a/tests/test_jinja_filter_plugins.py b/tests/test_jinja_filter_plugins.py
new file mode 100644
index 0000000..fb4d89d
--- /dev/null
+++ b/tests/test_jinja_filter_plugins.py
@@ -0,0 +1,87 @@
+import pytest
+
+from blurry.plugins.jinja_plugins.filters import headings
+from blurry.plugins.jinja_plugins.filters import slugify
+from blurry.plugins.jinja_plugins.filters import url_path
+
+html = """
+
+
+
+
+ Getting started: quick start | Blurry
+
+
+
+
+
+Blurry: A Python-powered static site generator
+What is Blurry?
+Blurry is a static site generator with a terrible pun of a name: if you're generating static sight, you're making things Blurry.
+Blurry brings the concept of schema-first development to static site generators.
+Specifically, Blurry uses Schema.org schema type names as the names for its template files, and schema type properties as Markdown front matter to populate those templates.
+Goals
+SEO performance
+Blurry supports Schema.org and Open Graph with zero configuration.
+This enables rich Google results and link previews out-of-the-box.
+Page speed
+While using Blurry doesn't guarantee good page speed, it does solve a number of pain points that tend to slow down page loads.
+Blurry's image handling and HTML minification, for instance, can help get you a 100/100 PageSpeed score if the rest of your site is fast.
+Minimal configuration
+Blurry seeks to use sensible defaults so you can spend less time configuring and more time writing.
+A viable Blurry configuration file (blurry.toml
) can be as simple as:
+[blurry]
+domain = "johnfraney.ca"
+
+Semantic HTML
+Where applicable, Blurry tries to use semantic HTML elements like <aside>
over more generic elements like <div>
.
+Using semantic HTML elements also facilities classless CSS styling, which can be useful when styling some Markdown-generated HTML elements, and it can be good for accessibility, too.
+Non-goals
+"Gotta go fast!"
+While Blurry aims to be performant, build performance is not its top priority.
+It's written in Python, so it may not be able to compete on speed with other static site generators like Hugo.
+Instead, it aims to be fast enough while taking advantage of the Python ecosystem.
+
+
+"""
+
+
+def test_headings_filter_defaults():
+ heading_list = headings(html)
+ assert heading_list == [
+ {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
+ {"level": 2, "text": "Goals", "id": "goals"},
+ {"level": 2, "text": "Non-goals", "id": "non-goals"},
+ ]
+
+
+def test_headings_filter_max_level():
+ heading_list = headings(html, max_level=3)
+ assert heading_list == [
+ {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
+ {"level": 2, "text": "Goals", "id": "goals"},
+ {"level": 3, "text": "SEO performance", "id": "seo-performance"},
+ {"level": 3, "text": "Page speed", "id": "page-speed"},
+ {"level": 3, "text": "Minimal configuration", "id": "minimal-configuration"},
+ {"level": 3, "text": "Semantic HTML", "id": "semantic-html"},
+ {"level": 2, "text": "Non-goals", "id": "non-goals"},
+ {"level": 3, "text": '"Gotta go fast!"', "id": "gotta-go-fast"},
+ ]
+
+
+@pytest.mark.parametrize(
+ "title, slug",
+ [
+ ["Non-goals", "non-goals"],
+ ['"Gotta go fast!"', "gotta-go-fast"],
+ ["That's blasé", "thats-blasé"],
+ ["Sub-subsection 1.2.1", "sub-subsection-121"],
+ ],
+)
+def test_slugify(title, slug):
+ assert slugify(title) == slug
+
+
+def test_url_path():
+ url = "http://127.0.0.1:8000/getting-started/quickstart/"
+ assert url_path(url) == "/getting-started/quickstart/"
diff --git a/tests/test_markdown_renderer.py b/tests/test_markdown_renderer.py
new file mode 100644
index 0000000..ae00474
--- /dev/null
+++ b/tests/test_markdown_renderer.py
@@ -0,0 +1,32 @@
+from blurry.markdown import markdown
+
+MARKDOWN_WITH_HEADINGS = """
+# Home
+
+This is the homepage with some sections.
+
+## Section 1
+
+### Subsection 1.1
+
+It's a subsection.
+
+### Subsection 1.2
+
+It's another subsection.
+
+#### Sub-subsection 1.2.1
+
+Now we're nesting.
+
+## Section 2
+
+Look! A section!
+"""
+
+
+def test_renderer_headings():
+ html, _ = markdown.parse(MARKDOWN_WITH_HEADINGS)
+ assert 'Home
' in html
+ assert 'Section 1
' in html
+ assert 'Sub-subsection 1.2.1
' in html