From acd2d88d908427e86c91fd980c4a75643ac9ba92 Mon Sep 17 00:00:00 2001 From: John Franey <1728528+johnfraney@users.noreply.github.com> Date: Tue, 31 Dec 2024 11:37:57 -0400 Subject: [PATCH] feat: headings and url_path Jinja filters Adds headings and url_path Jinja filter plugins and updates Markdown heading handling to add an ID to the headings. These filters enable creating a table of contents and simplify showing whether a link is active for the current page. --- blurry/markdown/__init__.py | 7 ++ blurry/plugins/jinja_plugins/filters.py | 43 ++++++++++++ pyproject.toml | 4 ++ tests/test_jinja_filter_plugins.py | 87 +++++++++++++++++++++++++ tests/test_markdown_renderer.py | 32 +++++++++ 5 files changed, 173 insertions(+) create mode 100644 blurry/plugins/jinja_plugins/filters.py create mode 100644 tests/test_jinja_filter_plugins.py create mode 100644 tests/test_markdown_renderer.py diff --git a/blurry/markdown/__init__.py b/blurry/markdown/__init__.py index 9212110..d1d5e98 100644 --- a/blurry/markdown/__init__.py +++ b/blurry/markdown/__init__.py @@ -23,6 +23,7 @@ from blurry.images import generate_srcset_string from blurry.images import get_widths_for_image_width from blurry.plugins import discovered_markdown_plugins +from blurry.plugins.jinja_plugins.filters import slugify from blurry.settings import get_content_directory from blurry.settings import SETTINGS from blurry.types import is_str @@ -135,6 +136,12 @@ def link(self, text, url, title: str | None = None) -> str: return f"{text}" + def heading(self, text: str, level: int, **attrs: Any) -> str: + tag = f"h{level}" + html = f"<{tag}" + html += f' id="{slugify(text)}"' + return f"{html}>{text}\n" + def is_blurry_renderer( renderer: mistune.BaseRenderer, diff --git a/blurry/plugins/jinja_plugins/filters.py b/blurry/plugins/jinja_plugins/filters.py new file mode 100644 index 0000000..f2a8a82 --- /dev/null +++ b/blurry/plugins/jinja_plugins/filters.py @@ -0,0 +1,43 @@ +import re +import unicodedata +from urllib.parse import urlparse + +from selectolax.lexbor import LexborHTMLParser + + +def url_path(url: str) -> str: + url_instance = urlparse(url) + return url_instance.path + + +def slugify(value): + """ + Convert spaces to hyphens. + Remove characters that aren't alphanumerics, underscores, or hyphens. + Convert to lowercase. Also strip leading and trailing whitespace. + Adapted from: https://github.com/django/django/blob/92053acbb9160862c3e743a99ed8ccff8d4f8fd6/django/utils/text.py + """ + value = unicodedata.normalize("NFKC", value) + value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower() + return re.sub(r"[-\s]+", "-", value, flags=re.U) + + +def headings(html: str, max_level: int = 2): + tree = LexborHTMLParser(html) + heading_list: list = [] + + for node in tree.css("body *"): + if node.tag in {"h2", "h3", "h4", "h5", "h6"}: + level = int(node.tag[-1]) + if level > max_level: + continue + text = node.text() + heading_list.append( + { + "level": level, + "text": text, + "id": slugify(text), + } + ) + + return heading_list diff --git a/pyproject.toml b/pyproject.toml index d2e5bc5..5eb0062 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,3 +74,7 @@ container = 'blurry.plugins.markdown_plugins.container_plugin:container' [tool.poetry.plugins."blurry.jinja_extensions"] blurry_image = "blurry.plugins.jinja_plugins.blurry_image_extension:BlurryImage" + +[tool.poetry.plugins."blurry.jinja_filter_plugins"] +url_path = "blurry.plugins.jinja_plugins.filters:url_path" +headings = "blurry.plugins.jinja_plugins.filters:headings" diff --git a/tests/test_jinja_filter_plugins.py b/tests/test_jinja_filter_plugins.py new file mode 100644 index 0000000..fb4d89d --- /dev/null +++ b/tests/test_jinja_filter_plugins.py @@ -0,0 +1,87 @@ +import pytest + +from blurry.plugins.jinja_plugins.filters import headings +from blurry.plugins.jinja_plugins.filters import slugify +from blurry.plugins.jinja_plugins.filters import url_path + +html = """ + + + + + Getting started: quick start | Blurry + + + +
+ +

Blurry: A Python-powered static site generator

+

What is Blurry?

+

Blurry is a static site generator with a terrible pun of a name: if you're generating static sight, you're making things Blurry.

+

Blurry brings the concept of schema-first development to static site generators. +Specifically, Blurry uses Schema.org schema type names as the names for its template files, and schema type properties as Markdown front matter to populate those templates.

+

Goals

+

SEO performance

+

Blurry supports Schema.org and Open Graph with zero configuration. +This enables rich Google results and link previews out-of-the-box.

+

Page speed

+

While using Blurry doesn't guarantee good page speed, it does solve a number of pain points that tend to slow down page loads.

+

Blurry's image handling and HTML minification, for instance, can help get you a 100/100 PageSpeed score if the rest of your site is fast.

+

Minimal configuration

+

Blurry seeks to use sensible defaults so you can spend less time configuring and more time writing. +A viable Blurry configuration file (blurry.toml) can be as simple as:

+
[blurry]
+domain = "johnfraney.ca"
+
+

Semantic HTML

+

Where applicable, Blurry tries to use semantic HTML elements like <aside> over more generic elements like <div>. +Using semantic HTML elements also facilities classless CSS styling, which can be useful when styling some Markdown-generated HTML elements, and it can be good for accessibility, too.

+

Non-goals

+

"Gotta go fast!"

+

While Blurry aims to be performant, build performance is not its top priority. +It's written in Python, so it may not be able to compete on speed with other static site generators like Hugo. +Instead, it aims to be fast enough while taking advantage of the Python ecosystem.

+
+ +""" + + +def test_headings_filter_defaults(): + heading_list = headings(html) + assert heading_list == [ + {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"}, + {"level": 2, "text": "Goals", "id": "goals"}, + {"level": 2, "text": "Non-goals", "id": "non-goals"}, + ] + + +def test_headings_filter_max_level(): + heading_list = headings(html, max_level=3) + assert heading_list == [ + {"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"}, + {"level": 2, "text": "Goals", "id": "goals"}, + {"level": 3, "text": "SEO performance", "id": "seo-performance"}, + {"level": 3, "text": "Page speed", "id": "page-speed"}, + {"level": 3, "text": "Minimal configuration", "id": "minimal-configuration"}, + {"level": 3, "text": "Semantic HTML", "id": "semantic-html"}, + {"level": 2, "text": "Non-goals", "id": "non-goals"}, + {"level": 3, "text": '"Gotta go fast!"', "id": "gotta-go-fast"}, + ] + + +@pytest.mark.parametrize( + "title, slug", + [ + ["Non-goals", "non-goals"], + ['"Gotta go fast!"', "gotta-go-fast"], + ["That's blasé", "thats-blasé"], + ["Sub-subsection 1.2.1", "sub-subsection-121"], + ], +) +def test_slugify(title, slug): + assert slugify(title) == slug + + +def test_url_path(): + url = "http://127.0.0.1:8000/getting-started/quickstart/" + assert url_path(url) == "/getting-started/quickstart/" diff --git a/tests/test_markdown_renderer.py b/tests/test_markdown_renderer.py new file mode 100644 index 0000000..ae00474 --- /dev/null +++ b/tests/test_markdown_renderer.py @@ -0,0 +1,32 @@ +from blurry.markdown import markdown + +MARKDOWN_WITH_HEADINGS = """ +# Home + +This is the homepage with some sections. + +## Section 1 + +### Subsection 1.1 + +It's a subsection. + +### Subsection 1.2 + +It's another subsection. + +#### Sub-subsection 1.2.1 + +Now we're nesting. + +## Section 2 + +Look! A section! +""" + + +def test_renderer_headings(): + html, _ = markdown.parse(MARKDOWN_WITH_HEADINGS) + assert '

Home

' in html + assert '

Section 1

' in html + assert '

Sub-subsection 1.2.1

' in html