From 533b0f984854f45e399518a92881d4d5018a80fd Mon Sep 17 00:00:00 2001
From: Oliver Kandler <oliver@martpersonalfinance.info>
Date: Mon, 12 Feb 2024 17:31:43 +0100
Subject: [PATCH 1/3] bump to private version 0.6.2.1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 60e2f2f..151a3be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "blurry-cli"
-version = "0.6.2"
+version = "0.6.2.1"
 description = "A Mistune-based static site generator for Python"
 authors = ["John Franey <franey@duck.com>"]
 license = "MIT"

From 3d8e1f5e4ffea4ff909cb4c1e8c056ce1c6ae491 Mon Sep 17 00:00:00 2001
From: Oliver Kandler <oliver@martpersonalfinance.info>
Date: Mon, 12 Feb 2024 21:57:33 +0100
Subject: [PATCH 2/3] new setting for global/local schema merge

---
 blurry/settings.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/blurry/settings.py b/blurry/settings.py
index fa812fb..843b9e4 100644
--- a/blurry/settings.py
+++ b/blurry/settings.py
@@ -26,6 +26,7 @@ class Settings(TypedDict):
     USE_HTTP: bool
     RUNSERVER: bool
     FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX: str
+    FRONT_MATTER_RESOLUTION: str
 
 
 SETTINGS: Settings = {
@@ -46,6 +47,7 @@ class Settings(TypedDict):
     "RUNSERVER": False,
     "FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX": "~",
     "TEMPLATE_SCHEMA_TYPES": {},
+    "FRONT_MATTER_RESOLUTION": "overwrite", # or "merge"
 }
 
 

From 9911221a2cb2aec8aa90799724933903676cafc8 Mon Sep 17 00:00:00 2001
From: Oliver Kandler <oliver@martpersonalfinance.info>
Date: Mon, 12 Feb 2024 22:01:20 +0100
Subject: [PATCH 3/3] new option to merge schema and necessary refactorings

---
 blurry/__init__.py          |   5 +-
 blurry/markdown/__init__.py | 111 ++++++++++++++++++++++++++++++------
 blurry/types.py             |   1 +
 tests/test_sitemap.py       |   4 ++
 tests/test_utils.py         |   4 ++
 5 files changed, 106 insertions(+), 19 deletions(-)

diff --git a/blurry/__init__.py b/blurry/__init__.py
index 2bebb0f..aba261e 100644
--- a/blurry/__init__.py
+++ b/blurry/__init__.py
@@ -118,7 +118,7 @@ async def write_html_file(
         extra_context["sibling_pages"] = sibling_pages
     folder_in_build = convert_content_path_to_directory_in_build(file_data.path)
 
-    schema_type = file_data.front_matter.get("@type")
+    schema_type = file_data.top_level_type
     if not schema_type:
         raise ValueError(
             f"Required @type value missing in file or TOML front matter invalid: "
@@ -207,9 +207,10 @@ async def build(release=True):
             file_data_by_directory[directory] = []
 
         # Convert Markdown file to HTML
-        body, front_matter = convert_markdown_file_to_html(filepath)
+        body, front_matter, top_level_type = convert_markdown_file_to_html(filepath)
         file_data = MarkdownFileData(
             body=body,
+            top_level_type=top_level_type,
             front_matter=front_matter,
             path=relative_filepath,
         )
diff --git a/blurry/markdown/__init__.py b/blurry/markdown/__init__.py
index 0e75656..d06426d 100644
--- a/blurry/markdown/__init__.py
+++ b/blurry/markdown/__init__.py
@@ -4,6 +4,9 @@
 from typing import TypeAlias
 from typing import TypeGuard
 
+import json
+from pyld import jsonld
+
 import mistune
 from mistune import BlockState
 from mistune.plugins.abbr import abbr
@@ -149,9 +152,96 @@ def is_blurry_renderer(
     + [plugin.load() for plugin in discovered_markdown_plugins],
 )
 
+SCHEMA_ORG = json.loads('{ "@vocab": "https://schema.org/" }')
+def jsonld_document_loader(secure=False, fragments=[], **kwargs):
+    """
+    Create a Requests document loader.
+
+    Can be used to setup extra Requests args such as verify, cert, timeout,
+    or others.
+
+    :param secure: require all requests to use HTTPS (default: False).
+    :param fragments: the fragments of schema loaded as dicts
+    :param **kwargs: extra keyword args for Requests get() call.
+
+    :return: the RemoteDocument loader function.
+    """
+    from pyld.jsonld import JsonLdError
+
+    def loader(ignored, options={}):
+        """
+        Retrieves JSON-LD from the dicts provided as fragments.
+
+        :param ignored: this positional paramter is ignored, because the tomls fragments are side loaded
+
+        :return: the RemoteDocument.
+        """
+        fragments_str = []
+        for fragment in fragments:
+            if not fragment.get('@context'):
+                fragment['@context'] = SCHEMA_ORG
+            fragments_str.append(json.dumps(fragment))
+            # print("==========================")
+            # print(json.dumps(fragment, indent=2))
+
+        result = '[' + ','.join(fragments_str) + ']'
+        # print(">>>>>>>>> ",result)
+
+        doc = {
+                'contentType': 'application/ld+json',
+                'contextUrl': None,
+                'documentUrl': None,
+                'document': result
+            }
+        return doc
 
-def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
+    return loader
+
+def add_inferred_schema(local_front_matter: dict, filepath: Path) -> dict:
     CONTENT_DIR = get_content_directory()
+
+    # Add inferred/computed/relative values
+    local_front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})
+
+    # Add inferred/computed/relative values
+    # https://schema.org/image
+    # https://schema.org/thumbnailUrl
+    if image := front_matter.get("image"):
+        image_copy = deepcopy(image)
+        relative_image_path = get_relative_image_path_from_image_property(image_copy)
+        image_path = resolve_relative_path_in_markdown(relative_image_path, filepath)
+        front_matter["image"] = update_image_with_url(image_copy, image_path)
+        front_matter["thumbnailUrl"] = image_path_to_thumbnailUrl(image_path)
+
+    return local_front_matter
+
+def resolve_front_matter(state: dict, filepath: Path) -> tuple[dict[str, Any], str]:
+    if SETTINGS.get("FRONT_MATTER_RESOLUTION") == "merge":
+        try:
+            global_schema = dict(SETTINGS.get("SCHEMA_DATA", {}))
+            if not global_schema.get('@context'):
+                global_schema['@context'] = SCHEMA_ORG
+
+            local_schema = state.env.get("front_matter", {})
+            top_level_type = local_schema.get("@type", None)
+            if not local_schema.get('@context'):
+                local_schema['@context'] = SCHEMA_ORG
+            local_schema = add_inferred_schema(local_schema, filepath)
+            jsonld.set_document_loader(jsonld_document_loader(fragments=[global_schema, local_schema]))
+            front_matter: dict[str, Any] = jsonld.compact("ignore", SCHEMA_ORG)
+        except Exception as e:
+            print("merging front matter failed:", e)
+            raise e
+    else:
+        # Seed front_matter with schema_data from config file
+        front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
+        front_matter.update(state.env.get("front_matter", {}))
+        front_matter = add_inferred_schema(front_matter, filepath)
+
+        top_level_type = None
+    return front_matter, top_level_type
+
+def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any], str]:
     if not markdown.renderer:
         raise Exception("Blurry markdown renderer not set on Mistune Markdown instance")
 
@@ -167,26 +257,13 @@ def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
     html, state = markdown.parse(markdown_text, state=state)
 
     if not is_str(html):
-        raise Exception(f"Expected html to be a string but got: {type(html)}")
+        raise Exception(f"Expected html to be a string but got: {top_level_type(html)}")
 
     # Post-process HTML
     html = remove_lazy_loading_from_first_image(html)
 
-    # Seed front_matter with schema_data from config file
-    front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
-    front_matter.update(state.env.get("front_matter", {}))
-
-    # Add inferred/computed/relative values
-    # https://schema.org/image
-    # https://schema.org/thumbnailUrl
-    front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})
-    if image := front_matter.get("image"):
-        image_copy = deepcopy(image)
-        relative_image_path = get_relative_image_path_from_image_property(image_copy)
-        image_path = resolve_relative_path_in_markdown(relative_image_path, filepath)
-        front_matter["image"] = update_image_with_url(image_copy, image_path)
-        front_matter["thumbnailUrl"] = image_path_to_thumbnailUrl(image_path)
-    return html, front_matter
+    front_matter, top_level_type = resolve_front_matter(state, filepath)
+    return html, front_matter, top_level_type
 
 
 def image_path_to_thumbnailUrl(image_path: Path):
diff --git a/blurry/types.py b/blurry/types.py
index 93338c1..1fe8c8b 100644
--- a/blurry/types.py
+++ b/blurry/types.py
@@ -8,6 +8,7 @@
 @dataclass
 class MarkdownFileData:
     body: str
+    top_level_type: str
     front_matter: dict[str, Any]
     path: Path
 
diff --git a/tests/test_sitemap.py b/tests/test_sitemap.py
index f51cb9d..7576905 100644
--- a/tests/test_sitemap.py
+++ b/tests/test_sitemap.py
@@ -8,16 +8,19 @@
 directory_file_data = [
     MarkdownFileData(
         front_matter=dict(datePublished=date(2021, 1, 1), url="/blog/a-post-1/"),
+        top_level_type = "WebPage",
         body="",
         path=blog_path / "a-post-1",
     ),
     MarkdownFileData(
         front_matter=dict(datePublished=date(2021, 3, 1), url="/blog/b-post-3/"),
+        top_level_type = "WebPage",
         body="",
         path=blog_path / "b-post-3",
     ),
     MarkdownFileData(
         front_matter=dict(dateCreated=date(2021, 2, 1), url="/blog/c-post-2/"),
+        top_level_type = "WebPage",
         body="",
         path=blog_path / "c-post-2",
     ),
@@ -27,6 +30,7 @@
             dateModified=date(2022, 1, 13),
             url="/blog/c-post-4/",
         ),
+        top_level_type = "WebPage",
         body="",
         path=blog_path / "c-post-4",
     ),
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2737351..02c2d02 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -67,21 +67,25 @@ def test_sort_directory_file_data_by_date():
         blog_path: [
             MarkdownFileData(
                 front_matter=dict(datePublished=date(2021, 1, 1)),
+                top_level_type = "WebPage",
                 body="",
                 path=Path("a-post-1"),
             ),
             MarkdownFileData(
                 front_matter=dict(datePublished=date(2021, 3, 1)),
+                top_level_type = "WebPage",
                 body="",
                 path=Path("b-post-3"),
             ),
             MarkdownFileData(
                 front_matter=dict(dateCreated=date(2021, 2, 1)),
+                top_level_type = "WebPage",
                 body="",
                 path=Path("c-post-2"),
             ),
             MarkdownFileData(
                 front_matter=dict(),
+                top_level_type = "WebPage",
                 body="",
                 path=Path("c-post-4"),
             ),