Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: headings and url_path Jinja filters #106

Merged
merged 1 commit into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions blurry/markdown/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from blurry.images import generate_srcset_string
from blurry.images import get_widths_for_image_width
from blurry.plugins import discovered_markdown_plugins
from blurry.plugins.jinja_plugins.filters import slugify
from blurry.settings import get_content_directory
from blurry.settings import SETTINGS
from blurry.types import is_str
Expand Down Expand Up @@ -135,6 +136,12 @@ def link(self, text, url, title: str | None = None) -> str:

return f"<a {attrs_string}>{text}</a>"

def heading(self, text: str, level: int, **attrs: Any) -> str:
tag = f"h{level}"
html = f"<{tag}"
html += f' id="{slugify(text)}"'
return f"{html}>{text}</{tag}>\n"


def is_blurry_renderer(
renderer: mistune.BaseRenderer,
Expand Down
43 changes: 43 additions & 0 deletions blurry/plugins/jinja_plugins/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import re
import unicodedata
from urllib.parse import urlparse

from selectolax.lexbor import LexborHTMLParser


def url_path(url: str) -> str:
url_instance = urlparse(url)
return url_instance.path


def slugify(value):
"""
Convert spaces to hyphens.
Remove characters that aren't alphanumerics, underscores, or hyphens.
Convert to lowercase. Also strip leading and trailing whitespace.
Adapted from: https://github.com/django/django/blob/92053acbb9160862c3e743a99ed8ccff8d4f8fd6/django/utils/text.py
"""
value = unicodedata.normalize("NFKC", value)
value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower()
return re.sub(r"[-\s]+", "-", value, flags=re.U)


def headings(html: str, max_level: int = 2):
tree = LexborHTMLParser(html)
heading_list: list = []

for node in tree.css("body *"):
if node.tag in {"h2", "h3", "h4", "h5", "h6"}:
level = int(node.tag[-1])
if level > max_level:
continue
text = node.text()
heading_list.append(
{
"level": level,
"text": text,
"id": slugify(text),
}
)

return heading_list
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,7 @@ container = 'blurry.plugins.markdown_plugins.container_plugin:container'

[tool.poetry.plugins."blurry.jinja_extensions"]
blurry_image = "blurry.plugins.jinja_plugins.blurry_image_extension:BlurryImage"

[tool.poetry.plugins."blurry.jinja_filter_plugins"]
url_path = "blurry.plugins.jinja_plugins.filters:url_path"
headings = "blurry.plugins.jinja_plugins.filters:headings"
87 changes: 87 additions & 0 deletions tests/test_jinja_filter_plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pytest

from blurry.plugins.jinja_plugins.filters import headings
from blurry.plugins.jinja_plugins.filters import slugify
from blurry.plugins.jinja_plugins.filters import url_path

html = """
<!DOCTYPE html>
<html lang="en">

<head>
<title>Getting started: quick start | Blurry</title>
</head>

<body>
<main>

<h1>Blurry: A Python-powered static site generator</h1>
<h2>What is Blurry?</h2>
<p>Blurry is a static site generator with a terrible pun of a name: if you're generating static sight, you're making things Blurry.</p>
<p>Blurry brings the concept of schema-first development to static site generators.
Specifically, Blurry uses <a href="https://schema.org/" target="_blank" rel="noopener">Schema.org</a> schema type names as the names for its template files, and schema type properties as Markdown front matter to populate those templates.</p>
<h2>Goals</h2>
<h3>SEO performance</h3>
<p>Blurry supports <a href="https://schema.org/" target="_blank" rel="noopener">Schema.org</a> and <a href="https://ogp.me/" target="_blank" rel="noopener">Open Graph</a> with zero configuration.
This enables <a href="https://developers.google.com/search/docs/appearance/structured-data/search-gallery" target="_blank" rel="noopener">rich Google results</a> and <a href="https://www.opengraph.xyz/" target="_blank" rel="noopener">link previews</a> out-of-the-box.</p>
<h3>Page speed</h3>
<p>While using Blurry doesn't guarantee good page speed, it does solve a number of pain points that tend to slow down page loads.</p>
<p><a href="/content/images/" rel="noreferrer">Blurry's image handling</a> and HTML minification, for instance, can help get you a 100/100 <a href="https://pagespeed.web.dev/" target="_blank" rel="noopener">PageSpeed</a> score if the rest of your site is fast.</p>
<h3>Minimal configuration</h3>
<p>Blurry seeks to use sensible defaults so you can spend less time configuring and more time writing.
A viable Blurry configuration file (<a href="/../configuration/blurry.toml/" rel="noreferrer"><code>blurry.toml</code></a>) can be as simple as:</p>
<pre><code class="language-toml hljs language-ini"><span class="hljs-section">[blurry]</span>
<span class="hljs-attr">domain</span> = <span class="hljs-string">"johnfraney.ca"</span>
</code></pre>
<h3>Semantic HTML</h3>
<p>Where applicable, Blurry tries to use semantic HTML elements like <code>&lt;aside&gt;</code> over more generic elements like <code>&lt;div&gt;</code>.
Using semantic HTML elements also facilities classless CSS styling, which can be useful when styling some Markdown-generated HTML elements, and it can be <a href="https://developer.mozilla.org/en-US/docs/Learn/Accessibility/HTML" target="_blank" rel="noopener">good for accessibility</a>, too.</p>
<h2>Non-goals</h2>
<h3>"Gotta go fast!"</h3>
<p>While Blurry aims to be performant, build performance is not its top priority.
It's written in Python, so it may not be able to compete on speed with other static site generators like <a href="https://gohugo.io/" target="_blank" rel="noopener">Hugo</a>.
Instead, it aims to be <em>fast enough</em> while taking advantage of the Python ecosystem.</p>
</main>
</body>
</html>"""


def test_headings_filter_defaults():
heading_list = headings(html)
assert heading_list == [
{"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
{"level": 2, "text": "Goals", "id": "goals"},
{"level": 2, "text": "Non-goals", "id": "non-goals"},
]


def test_headings_filter_max_level():
heading_list = headings(html, max_level=3)
assert heading_list == [
{"level": 2, "text": "What is Blurry?", "id": "what-is-blurry"},
{"level": 2, "text": "Goals", "id": "goals"},
{"level": 3, "text": "SEO performance", "id": "seo-performance"},
{"level": 3, "text": "Page speed", "id": "page-speed"},
{"level": 3, "text": "Minimal configuration", "id": "minimal-configuration"},
{"level": 3, "text": "Semantic HTML", "id": "semantic-html"},
{"level": 2, "text": "Non-goals", "id": "non-goals"},
{"level": 3, "text": '"Gotta go fast!"', "id": "gotta-go-fast"},
]


@pytest.mark.parametrize(
"title, slug",
[
["Non-goals", "non-goals"],
['"Gotta go fast!"', "gotta-go-fast"],
["That's blasé", "thats-blasé"],
["Sub-subsection 1.2.1", "sub-subsection-121"],
],
)
def test_slugify(title, slug):
assert slugify(title) == slug


def test_url_path():
url = "http://127.0.0.1:8000/getting-started/quickstart/"
assert url_path(url) == "/getting-started/quickstart/"
32 changes: 32 additions & 0 deletions tests/test_markdown_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from blurry.markdown import markdown

MARKDOWN_WITH_HEADINGS = """
# Home

This is the homepage with some sections.

## Section 1

### Subsection 1.1

It's a subsection.

### Subsection 1.2

It's another subsection.

#### Sub-subsection 1.2.1

Now we're nesting.

## Section 2

Look! A section!
"""


def test_renderer_headings():
html, _ = markdown.parse(MARKDOWN_WITH_HEADINGS)
assert '<h1 id="home">Home</h1>' in html
assert '<h2 id="section-1">Section 1</h2>' in html
assert '<h4 id="sub-subsection-121">Sub-subsection 1.2.1</h4>' in html
Loading