From e5d2da654127f1811b6bae3c9224217a94ad48e5 Mon Sep 17 00:00:00 2001 From: HaudinFlorence Date: Wed, 6 Nov 2024 17:56:20 +0100 Subject: [PATCH] Change the logics to get the headers using parsing of the markdown cell content to html and using BeautifulSoup.select. --- nbconvert/filters/markdown_mistune.py | 57 ++++++--------------------- share/templates/lab/index.html.j2 | 18 ++++----- 2 files changed, 20 insertions(+), 55 deletions(-) diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py index 8c5eb4715..d1d3226c9 100644 --- a/nbconvert/filters/markdown_mistune.py +++ b/nbconvert/filters/markdown_mistune.py @@ -13,7 +13,6 @@ import bs4 import mistune -from mistune.renderers.markdown import MarkdownRenderer from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexer import Lexer @@ -492,61 +491,27 @@ def markdown2html_mistune(source: str) -> str: return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source) -class HeadingExtractor(MarkdownRenderer): - """A renderer to capture headings""" - - def __init__(self): - """Initialize the class.""" - super().__init__() - self.headings = [] - - def heading(self, text, level): - """Return an empty string for the headings to avoid outputting them.""" - self.headings.append((level, text)) - return "" - - def extract_titles_from_notebook_node(nb: NotebookNode): """Create a Markdown parser with the HeadingExtractor renderer to collect all the headings of a notebook The input argument is the notebooknode from which a single string with all the markdown content concatenated The output is an array containing information about the headings such as their level, their text content, an identifier and a href that can be used in case of html converter.s""" - markdown_collection = "" + cells_html_collection = "" for cell in nb.cells: if cell.cell_type == "markdown": - lines = cell.source.splitlines() - for line in lines: - newline = line.replace("

", "# ") - newline = newline.replace("

", "## ") - newline = newline.replace("

", "### ") - newline = newline.replace("

", "#### ") - newline = newline.replace("

", "##### ") - newline = newline.replace("
", "###### ") - newline = newline.replace("
", "") - newline = newline.replace("", "") - newline = newline.replace("", "") - newline = newline.replace("", "") - newline = newline.replace("", "") - newline = newline.replace("", "") - if newline.startswith('#'): - markdown_collection = markdown_collection + newline.strip() + "\n" + markdown_source = cell.source + html_source = mistune.html(markdown_source) # convert all the markdown sources to html + cells_html_collection = cells_html_collection + html_source + "\n" titles_array = [] - renderer = HeadingExtractor() - extract_titles = mistune.create_markdown(renderer=renderer) - print(markdown_collection) - extract_titles(markdown_collection) - headings = renderer.headings + html_collection = bs4.BeautifulSoup(cells_html_collection, "html.parser") + headings = html_collection.select("h1, h2, h3, h4, h5, h6") # Iterate on all headings to get the necessary information on the various titles - for __, title in headings: - children = title["children"] - attrs = title["attrs"] - raw_text = children[0]["raw"] - header_level = attrs["level"] - id = raw_text.replace(" ", "-") + for heading in headings: + text = heading.get_text().lstrip().rstrip() + level = int(heading.name[1]) + id = text.replace(" ", "-") href = "#" + id - titles_array.append([header_level, raw_text, id, href]) - # print("header_level:", header_level) - # print("raw_text:", raw_text) + titles_array.append([str(heading), level, href]) return titles_array diff --git a/share/templates/lab/index.html.j2 b/share/templates/lab/index.html.j2 index 7b756fb50..f08466728 100644 --- a/share/templates/lab/index.html.j2 +++ b/share/templates/lab/index.html.j2 @@ -103,7 +103,7 @@ a.anchor-link { /* Table of Contents for the html exporter */ .jp-RenderedHTMLTOC-Title { font-family: var(--jp-content-font-family); - font-size: 24px; + font-size: 14px; margin: 16px 0; padding-left: 64px; font-weight: bold; @@ -111,42 +111,42 @@ a.anchor-link { .jp-RenderedHTMLTOC-Item-h1 { font-family: var(--jp-content-font-family); - font-size: 20px; + font-size: 14px; margin: 0; padding-left: 88px; } .jp-RenderedHTMLTOC-Item-h2 { font-family: var(--jp-content-font-family); - font-size: 18px; + font-size: 12px; margin: 4px; padding-left: 112px; } .jp-RenderedHTMLTOC-Item-h3 { font-family: var(--jp-content-font-family); - font-size:16px; + font-size:10px; margin: 4px; padding-left: 136px; } .jp-RenderedHTMLTOC-Item-h4 { font-family: var(--jp-content-font-family); - font-size: 14px; + font-size: 8px; margin: 4px; padding-left: 160px; } .jp-RenderedHTMLTOC-Item-h5 { font-family: var(--jp-content-font-family); - font-size: 12px; + font-size: 7px; margin: 4px; padding-left: 184px; } .jp-RenderedHTMLTOC-Item-h6 { font-family: var(--jp-content-font-family); - font-size: 10px; + font-size: 6px; margin: 2px; padding-left: 208px; } @@ -181,7 +181,7 @@ a.anchor-link { {%- set tableofcontents= resources.extract_titles_from_nodebook_node(nb) -%}
Table of contents
{%- for item in tableofcontents -%} -{%- set (level, text, id, href) = item -%} +{%- set (header, level, href) = item -%}
-{{text}} +{{header}}
{%- endfor -%}