From e5d2da654127f1811b6bae3c9224217a94ad48e5 Mon Sep 17 00:00:00 2001
From: HaudinFlorence <haudin.florence@gmail.com>
Date: Wed, 6 Nov 2024 17:56:20 +0100
Subject: [PATCH] Change the logics to get the headers using parsing of the
 markdown cell content to html and using BeautifulSoup.select.

---
 nbconvert/filters/markdown_mistune.py | 57 ++++++---------------------
 share/templates/lab/index.html.j2     | 18 ++++-----
 2 files changed, 20 insertions(+), 55 deletions(-)
diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 8c5eb4715..d1d3226c9 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -13,7 +13,6 @@
 
 import bs4
 import mistune
-from mistune.renderers.markdown import MarkdownRenderer
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexer import Lexer
@@ -492,61 +491,27 @@ def markdown2html_mistune(source: str) -> str:
     return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
 
 
-class HeadingExtractor(MarkdownRenderer):
-    """A renderer to capture headings"""
-
-    def __init__(self):
-        """Initialize the class."""
-        super().__init__()
-        self.headings = []
-
-    def heading(self, text, level):
-        """Return an empty string for the headings to avoid outputting them."""
-        self.headings.append((level, text))
-        return ""
-
-
 def extract_titles_from_notebook_node(nb: NotebookNode):
     """Create a Markdown parser with the HeadingExtractor renderer to collect all the headings of a notebook
     The input argument is the notebooknode from which a single string with all the markdown content concatenated
     The output is an array containing information about the headings such as their level, their text content, an identifier and a href that can be used in case of html converter.s"""
 
-    markdown_collection = ""
+    cells_html_collection = ""
     for cell in nb.cells:
         if cell.cell_type == "markdown":
-            lines = cell.source.splitlines()
-            for line in lines:
-                newline = line.replace("<h1>", "# ")
-                newline = newline.replace("<h2>", "## ")
-                newline = newline.replace("<h3>", "### ")
-                newline = newline.replace("<h4>", "#### ")
-                newline = newline.replace("<h5>", "##### ")
-                newline = newline.replace("<h6>", "###### ")
-                newline = newline.replace("</h1>", "")
-                newline = newline.replace("</h2>", "")
-                newline = newline.replace("</h3>", "")
-                newline = newline.replace("</h4>", "")
-                newline = newline.replace("</h5>", "")
-                newline = newline.replace("</h6>", "")
-            if newline.startswith('#'):
-                markdown_collection = markdown_collection + newline.strip() + "\n"
+            markdown_source = cell.source
+            html_source = mistune.html(markdown_source)  # convert all the markdown sources to html
+            cells_html_collection = cells_html_collection + html_source + "\n"
 
     titles_array = []
-    renderer = HeadingExtractor()
-    extract_titles = mistune.create_markdown(renderer=renderer)
-    print(markdown_collection)
-    extract_titles(markdown_collection)
-    headings = renderer.headings
+    html_collection = bs4.BeautifulSoup(cells_html_collection, "html.parser")
+    headings = html_collection.select("h1, h2, h3, h4, h5, h6")
 
     # Iterate on all headings to get the necessary information on the various titles
-    for __, title in headings:
-        children = title["children"]
-        attrs = title["attrs"]
-        raw_text = children[0]["raw"]
-        header_level = attrs["level"]
-        id = raw_text.replace(" ", "-")
+    for heading in headings:
+        text = heading.get_text().lstrip().rstrip()
+        level = int(heading.name[1])
+        id = text.replace(" ", "-")
         href = "#" + id
-        titles_array.append([header_level, raw_text, id, href])
-        # print("header_level:", header_level)
-        # print("raw_text:", raw_text)
+        titles_array.append([str(heading), level, href])
     return titles_array
diff --git a/share/templates/lab/index.html.j2 b/share/templates/lab/index.html.j2
index 7b756fb50..f08466728 100644
--- a/share/templates/lab/index.html.j2
+++ b/share/templates/lab/index.html.j2
@@ -103,7 +103,7 @@ a.anchor-link {
 /* Table of Contents for the html exporter */
 .jp-RenderedHTMLTOC-Title {
   font-family: var(--jp-content-font-family);
-  font-size: 24px;
+  font-size: 14px;
   margin: 16px 0;
   padding-left: 64px;
   font-weight: bold;
@@ -111,42 +111,42 @@ a.anchor-link {
 
 .jp-RenderedHTMLTOC-Item-h1 {
   font-family: var(--jp-content-font-family);
-  font-size: 20px;
+  font-size: 14px;
   margin: 0;
   padding-left: 88px;
 }
 
 .jp-RenderedHTMLTOC-Item-h2 {
   font-family: var(--jp-content-font-family);
-  font-size: 18px;
+  font-size: 12px;
   margin: 4px;
   padding-left: 112px;
 }
 
 .jp-RenderedHTMLTOC-Item-h3 {
   font-family: var(--jp-content-font-family);
-  font-size:16px;
+  font-size:10px;
   margin: 4px;
   padding-left: 136px;
 }
 
 .jp-RenderedHTMLTOC-Item-h4 {
   font-family: var(--jp-content-font-family);
-  font-size: 14px;
+  font-size: 8px;
   margin: 4px;
   padding-left: 160px;
 }
 
 .jp-RenderedHTMLTOC-Item-h5 {
   font-family: var(--jp-content-font-family);
-  font-size: 12px;
+  font-size: 7px;
   margin: 4px;
   padding-left: 184px;
 }
 
 .jp-RenderedHTMLTOC-Item-h6 {
   font-family: var(--jp-content-font-family);
-  font-size: 10px;
+  font-size: 6px;
   margin: 2px;
   padding-left: 208px;
 }
@@ -181,7 +181,7 @@ a.anchor-link {
 {%- set tableofcontents= resources.extract_titles_from_nodebook_node(nb) -%}
 <div class="jp-RenderedHTMLTOC-Title">Table of contents</div>
 {%- for item in tableofcontents -%}
-{%- set (level, text, id, href) = item -%}
+{%- set (header, level, href) = item -%}
 <div class="
 {%- if level==1 -%}
 jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h1
@@ -198,7 +198,7 @@ jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h6
 {%- endif -%}"
 >
 <a href={{href}}>
-{{text}}
+{{header}}
 </a>
 </div>
 {%- endfor -%}