Skip to content

Commit

Permalink
Change the logics to get the headers using parsing of the markdown ce…
Browse files Browse the repository at this point in the history
…ll content to html and using BeautifulSoup.select.
  • Loading branch information
HaudinFlorence committed Nov 6, 2024
1 parent e833379 commit e5d2da6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 55 deletions.
57 changes: 11 additions & 46 deletions nbconvert/filters/markdown_mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import bs4
import mistune
from mistune.renderers.markdown import MarkdownRenderer
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexer import Lexer
Expand Down Expand Up @@ -492,61 +491,27 @@ def markdown2html_mistune(source: str) -> str:
return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)


class HeadingExtractor(MarkdownRenderer):
"""A renderer to capture headings"""

def __init__(self):
"""Initialize the class."""
super().__init__()
self.headings = []

def heading(self, text, level):
"""Return an empty string for the headings to avoid outputting them."""
self.headings.append((level, text))
return ""


def extract_titles_from_notebook_node(nb: NotebookNode):
"""Create a Markdown parser with the HeadingExtractor renderer to collect all the headings of a notebook
The input argument is the notebooknode from which a single string with all the markdown content concatenated
The output is an array containing information about the headings such as their level, their text content, an identifier and a href that can be used in case of html converter.s"""

markdown_collection = ""
cells_html_collection = ""
for cell in nb.cells:
if cell.cell_type == "markdown":
lines = cell.source.splitlines()
for line in lines:
newline = line.replace("<h1>", "# ")
newline = newline.replace("<h2>", "## ")
newline = newline.replace("<h3>", "### ")
newline = newline.replace("<h4>", "#### ")
newline = newline.replace("<h5>", "##### ")
newline = newline.replace("<h6>", "###### ")
newline = newline.replace("</h1>", "")
newline = newline.replace("</h2>", "")
newline = newline.replace("</h3>", "")
newline = newline.replace("</h4>", "")
newline = newline.replace("</h5>", "")
newline = newline.replace("</h6>", "")
if newline.startswith('#'):
markdown_collection = markdown_collection + newline.strip() + "\n"
markdown_source = cell.source
html_source = mistune.html(markdown_source) # convert all the markdown sources to html
cells_html_collection = cells_html_collection + html_source + "\n"

titles_array = []
renderer = HeadingExtractor()
extract_titles = mistune.create_markdown(renderer=renderer)
print(markdown_collection)
extract_titles(markdown_collection)
headings = renderer.headings
html_collection = bs4.BeautifulSoup(cells_html_collection, "html.parser")
headings = html_collection.select("h1, h2, h3, h4, h5, h6")

# Iterate on all headings to get the necessary information on the various titles
for __, title in headings:
children = title["children"]
attrs = title["attrs"]
raw_text = children[0]["raw"]
header_level = attrs["level"]
id = raw_text.replace(" ", "-")
for heading in headings:
text = heading.get_text().lstrip().rstrip()
level = int(heading.name[1])
id = text.replace(" ", "-")
href = "#" + id
titles_array.append([header_level, raw_text, id, href])
# print("header_level:", header_level)
# print("raw_text:", raw_text)
titles_array.append([str(heading), level, href])
return titles_array
18 changes: 9 additions & 9 deletions share/templates/lab/index.html.j2
Original file line number Diff line number Diff line change
Expand Up @@ -103,50 +103,50 @@ a.anchor-link {
/* Table of Contents for the html exporter */
.jp-RenderedHTMLTOC-Title {
font-family: var(--jp-content-font-family);
font-size: 24px;
font-size: 14px;
margin: 16px 0;
padding-left: 64px;
font-weight: bold;
}
.jp-RenderedHTMLTOC-Item-h1 {
font-family: var(--jp-content-font-family);
font-size: 20px;
font-size: 14px;
margin: 0;
padding-left: 88px;
}
.jp-RenderedHTMLTOC-Item-h2 {
font-family: var(--jp-content-font-family);
font-size: 18px;
font-size: 12px;
margin: 4px;
padding-left: 112px;
}
.jp-RenderedHTMLTOC-Item-h3 {
font-family: var(--jp-content-font-family);
font-size:16px;
font-size:10px;
margin: 4px;
padding-left: 136px;
}
.jp-RenderedHTMLTOC-Item-h4 {
font-family: var(--jp-content-font-family);
font-size: 14px;
font-size: 8px;
margin: 4px;
padding-left: 160px;
}
.jp-RenderedHTMLTOC-Item-h5 {
font-family: var(--jp-content-font-family);
font-size: 12px;
font-size: 7px;
margin: 4px;
padding-left: 184px;
}
.jp-RenderedHTMLTOC-Item-h6 {
font-family: var(--jp-content-font-family);
font-size: 10px;
font-size: 6px;
margin: 2px;
padding-left: 208px;
}
Expand Down Expand Up @@ -181,7 +181,7 @@ a.anchor-link {
{%- set tableofcontents= resources.extract_titles_from_nodebook_node(nb) -%}
<div class="jp-RenderedHTMLTOC-Title">Table of contents</div>
{%- for item in tableofcontents -%}
{%- set (level, text, id, href) = item -%}
{%- set (header, level, href) = item -%}
<div class="
{%- if level==1 -%}
jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h1
Expand All @@ -198,7 +198,7 @@ jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h6
{%- endif -%}"
>
<a href={{href}}>
{{text}}
{{header}}
</a>
</div>
{%- endfor -%}
Expand Down

0 comments on commit e5d2da6

Please sign in to comment.