Skip to content

Commit

Permalink
Add main functions and testing
Browse files Browse the repository at this point in the history
  • Loading branch information
emarquezz committed Oct 15, 2024
1 parent f1f4f04 commit 43b1548
Show file tree
Hide file tree
Showing 12 changed files with 437 additions and 6 deletions.
Binary file added .DS_Store
Binary file not shown.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ Custom your title colors and create an automatic table of contents.
## Features

- TODO


## Examples
To see `cutiepynb` in action, run the following script to process example notebooks:
```bash
python run_examples.py
3 changes: 3 additions & 0 deletions cutiepynb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
__author__ = """Elisa Márquez-Zavala"""
__email__ = "[email protected]"
__version__ = "0.0.1"


from .core import cutipy_nb
197 changes: 197 additions & 0 deletions cutiepynb/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import json
import re
import seaborn as sns # Make it conditional

from .utils import generate_corpus_id, save_doc_enchulado
from .styles import maquillate, update_heading_colors_in_document

def create_new_document(document, colors):
"""
Add a table of contents and anchors to headings in a Jupyter notebook.
Args:
document (Dict[str, Union[str, Dict[str, Any], List[Dict[str, Any]]]]): A dictionary representing a Jupyter notebook.
colors (Optional[List[str]]): A list of colors to be used for the headings.
Returns:
Dict[str, Union[str, Dict[str, Any], List[Dict[str, Any]]]]: The modified Jupyter notebook with the table of contents and anchors added.
"""
cells = document['cells']
info_to_add, new_cells = generate_new_cells(cells, colors)
table_of_contents = generate_contents(info_to_add)
document['cells'] = [table_of_contents] + new_cells
return document

def generate_new_cells(cells, colors):
"""
Generate a list of cells with anchors and, optionally, colored headings added.
Args:
cells (List[Dict[str, Union[str, Dict[str, Any], List[str]]]]): A list of dictionaries representing the cells in a Jupyter notebook.
colors (Optional[List[str]]): A list of colors to be used for the headings.
Returns:
Tuple[Dict[int, Dict[str, Any]], List[Dict[str, Union[str, Dict[str, Any], List[str]]]]]: A tuple with a dictionary of heading information and a list of modified cells.
"""
info_to_add = {}
new_cells = []
for cell in cells:
if cell['cell_type'] != 'markdown':
new_cells.append(cell)
continue
source = cell['source']

if not source or not source[0].startswith('#'):
new_cells.append(cell)
continue

info_to_add = extract_info(source, info_to_add)
number = len(info_to_add) - 1
if number < 0:
new_cells.append(cell)
continue

values = info_to_add[number]
new_cell = cell
new_source = create_source_anchor(source, values, colors)
new_cell['source'] = new_source
new_cells.append(new_cell)
return info_to_add, new_cells

def generate_contents(info_to_add):
"""
Generate a markdown cell with a table of contents based on the heading information in a dictionary.
Args:
info_to_add (Dict[int, Dict[str, Any]]): A dictionary with the heading information, with keys corresponding to integers.
Returns:
Dict[str, Union[str, Dict[str, Any], List[str]]]: A dictionary representing a markdown cell with the table of contents.
"""
cell_id = generate_corpus_id()
table_of_contents = {'cell_type': 'markdown', 'id': str(cell_id),
'metadata': {}, 'source': [' # Table of Contents\n']}

for title_numb in sorted(info_to_add):
line = format_title_index(title_numb, info_to_add)
table_of_contents['source'].append(line)

return table_of_contents


def create_source_anchor(source, values, colors):
"""
Add an HTML anchor element and, optionally, color to a heading in a markdown cell.
Args:
source (List[str]): The source list of strings for the markdown cell.
values (Dict[str, Any]): A dictionary with the keys 'anchor', 'title', and 'level'.
colors (Optional[List[str]]): A list of colors to be used for the headings.
Returns:
List[str]: The modified source list with the anchor element and, optionally, colored heading added.
"""
anchor = values['anchor']
title = values['title']
level = values['level']

# Create the anchor element
term_i = '<a class="anchor" id="'
term_f = '"></a>\n'
full_term = term_i + anchor + term_f
# Optional: style the title with colors
if colors:
title = maquillate(title, level, colors, span=False)


# Add a span with a dynamic class based on the title level
span_class = f"title_{level}"
title_html = f'<span class={span_class}{title}</span>'

# Create the new source list with the anchor and title
new_source = [full_term] + source
new_source[1] = f"#{'#' * level} {title_html}"
return new_source

def format_title_index(title_numb, titles):
"""
Format a heading for inclusion in the table of contents.
Args:
title_numb (int): The key for the heading information in the titles dictionary.
titles (Dict[int, Dict[str, Any]]): A dictionary with the heading information, with keys corresponding to integers.
Returns:
str: A string with the formatted heading.
"""
title = titles[title_numb]

anchor = title['anchor']
level = '\t' * title['level']
title_format = '[' + title['title'] + ']'
term = f'((?:^|\\W){re.escape(anchor)}(?:$|\\W))'
anchor = re.sub(term, r'(#\1)', anchor)

line = level + '+ ' + title_format + anchor + '\n'

return line

def extract_info(source, titles):
"""
Extract heading information from a markdown cell and store it in a dictionary.
Args:
source (List[str]): The source list of strings for the markdown cell.
titles (Dict[int, Dict[str, Any]]): A dictionary to store the heading information, with auto-incrementing keys.
Returns:
Dict[int, Dict[str, Any]]: The modified dictionary with the extracted heading information.
"""
for i, word in enumerate(source):
if word.startswith('#'):
level = len(re.findall('#', word)) - 1
title = word[level + 2:]
anchor = title.rstrip().replace(' ', '_') + '_' + str(len(titles))
titles[len(titles)] = {'title': title, 'level': level, 'anchor': anchor}
return titles


def enchular_ipynb(file, sns_palette=None, colors=None, update_colors=None):
"""
Add a table of contents and anchors to headings in a Jupyter notebook.
Optionally update the colors of the titles directly in the markdown cells.
Args:
file (str): The path to the Jupyter notebook file.
sns_palette (Optional[str]): A string representing a seaborn color palette to be used for the headings.
colors (Optional[List[str]]): A list of colors to be used for the headings.
update_colors (Optional[List[str]]): A new list of colors to update the titles with.
Returns:
Dict[str, Union[str, Dict[str, Any], List[Dict[str, Union[str, Dict[str, Any], List[str]]]]]]: The modified Jupyter notebook.
"""
if sns_palette:
pal = sns.color_palette(sns_palette, len(sns_palette))
colors = [i for i in pal]
else:
colors = colors

with open(file, 'r') as f:
document = json.load(f)

# Add the table of contents and anchor elements to the headings
document = create_new_document(document, colors)

# If update_colors is provided, update the heading colors directly in the notebook
if update_colors:
document = update_heading_colors_in_document(document, update_colors)

return document



def cutipy_nb(file, sns_palette=None, colors=None, save=True, update_colors=None):
doc_chulo = enchular_ipynb(file, sns_palette, colors, update_colors)
if save:
save_doc_enchulado(doc_chulo, file)

78 changes: 78 additions & 0 deletions cutiepynb/styles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import re

def maquillate(title, level, colors,span=True):
"""
Add color to a title string based on the level and colors arguments.
Args:
title (str): The title to be formatted.
level (int): The level of the heading.
colors (List[str]): A list of colors to be used for the headings.
Returns:
str: The formatted title string.
"""
color = colors[level % len(colors)]
if span:
return f'<span style="color: {color}">{title}</span>'
else:
return f' style="color: {color}">{title}'


def update_heading_colors_in_document(document, new_colors):
"""
Update the colors of the headings in a Jupyter notebook by modifying the inline styles
of existing <span> tags without creating nested tags.
Args:
document (Dict[str, Union[str, Dict[str, Any], List[Dict[str, Any]]]]): A dictionary representing a Jupyter notebook.
new_colors (List[str]): A list of new colors to be used for the headings.
Returns:
Dict[str, Union[str, Dict[str, Any], List[Dict[str, Any]]]]: The updated Jupyter notebook with new colors applied to the headings.
"""
def update_existing_color_span(text, level, new_color):
"""
Helper function to update the color in an existing <span> tag.
Args:
text (str): The HTML text containing the <span> tag.
level (int): The heading level.
new_color (str): The new color to apply.
Returns:
str: The HTML text with the updated color.
"""
# Regular expression to find and update the color inside a <span> style attribute
color_regex = r'style="color:\s*#[0-9a-fA-F]{6}"'
updated_style = f'style="color: {new_color}"'
updated_text = re.sub(color_regex, updated_style, text)
return updated_text

# Loop through all cells in the document and update colors for headings
for cell in document['cells']:
if cell['cell_type'] == 'markdown':
source = cell['source']
if isinstance(source, list):
source = ''.join(source)

updated_source_lines = []
for line in source.splitlines():
if line.startswith('#'):
level = len(re.match(r'#+', line).group(0))
title_html_match = re.search(r'<span[^>]*>(.*?)</span>', line)
if title_html_match:
title_html = title_html_match.group(0) # Full <span> HTML including attributes
new_color = new_colors[level % len(new_colors)]
updated_title_html = update_existing_color_span(title_html, level, new_color)
updated_line = f"{'#' * level} {updated_title_html}"
updated_source_lines.append(updated_line)
else:
updated_source_lines.append(line) # If no match, just append the line as is
else:
updated_source_lines.append(line)

# Update the cell's source with the modified lines
cell['source'] = '\n'.join(updated_source_lines)

return document
13 changes: 13 additions & 0 deletions cutiepynb/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import uuid

def generate_corpus_id():
"""Generate a random 8-character hexadecimal string using the uuid module."""
return uuid.uuid4().hex[:8]

def save_doc_enchulado(doc_chulo, file):
name_chulo = file.split('.ipynb')[0]
name_chulo += '_chulo.ipynb'
with open(name_chulo, "w") as outfile:
json.dump(doc_chulo, outfile)

print('Saved as ',name_chulo)
57 changes: 57 additions & 0 deletions docs/examples/Test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8a04f659",
"metadata": {},
"source": [
"# Titulo 1"
]
},
{
"cell_type": "markdown",
"id": "fa779a58",
"metadata": {},
"source": [
"## Titulo 2"
]
},
{
"cell_type": "markdown",
"id": "5debffe4",
"metadata": {},
"source": [
"### Titulo 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ece2fc9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
11 changes: 11 additions & 0 deletions docs/examples/example_notebooks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Example Notebooks

Here are some example notebooks that demonstrate how to use the `cutiepynb` package:

- [Test.ipynb](Test.ipynb): Demonstrates how to use `cutipy_nb` to apply colors and create a table of contents in a Jupyter Notebook.
- [Another_example.ipynb](Another_example.ipynb): Shows additional customization options for modifying notebook headings.

To run the examples, use the following command in your terminal:
```bash
python run_examples.py
```
Loading

0 comments on commit 43b1548

Please sign in to comment.