-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
266 lines (227 loc) · 9.72 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
import matplotlib.pyplot as plt
import os
import re
import streamlit as st
from code_editor import code_editor
import io
import sys
import json
def generate_header_id(header_text):
"""
Generate a valid HTML id from the header text by removing special characters.
1. Lowercase Conversion: Converts the entire header text to lowercase.
2. Whitespace Replacement: Replaces all whitespace characters with hyphens ('-').
3. Special Character Removal: Removes any characters that are not alphanumeric, hyphens, or underscores.
Parameters:
header_text (str): The original header text that needs to be converted into a valid HTML id.
Returns:
str: A sanitized, lowercase string with hyphens replacing spaces and all other special characters removed,
suitable for use as an HTML id.
"""
# Convert to lowercase
header_id = header_text.lower()
# Replace spaces with hyphens
header_id = re.sub(r'\s+', '-', header_id)
# Remove any characters that are not alphanumeric, hyphens, or underscores
header_id = re.sub(r'[^a-z0-9\-_]', '', header_id)
return header_id
def get_first_level_headers(language, folder, filenames):
"""
Extract the first-level markdown header (# ) from each file.
Reads files from "docs/{language}/{folder}/", retrieves the first line starting with '# ',
and returns a list of the header texts. Prints an error if a file is not found.
Parameters:
language (str): Language code (used in lowercase for the path).
folder (str): Folder name containing the files.
filenames (list): List of file names.
Returns:
list: First-level headers from each file.
"""
headers = []
for filename in filenames:
base_path = f"docs/{language.lower()}/{folder}/{filename}"
try:
with open(base_path, 'r', encoding='utf-8') as file:
for line in file:
if line.startswith('# '):
header = line.strip('# ').strip()
headers.append(header)
break # Stop after the first header is found
except FileNotFoundError:
print(f"File not found: {base_path}")
return headers
def run_code_editor(default_code, global_namespace, height=[2,30], key=None):
"""
Launch a Streamlit code editor, execute submitted Python code, and display output.
Loads custom buttons from a JSON file and opens a code editor with the given default code.
When the user submits code, it is executed in the provided global namespace. Captured
standard output and any generated matplotlib figures are then displayed.
Parameters:
default_code (str): The initial code shown in the editor.
global_namespace (dict): The namespace in which the submitted code is executed.
height (list, optional): A two-element list defining the editor's height.
key (str, optional): A unique key for the editor widget.
Returns:
None
"""
with open('custom/buttons_code_cells.json') as json_button_file:
custom_buttons = json.load(json_button_file)
response_dict = code_editor(
default_code,
lang="python",
props={"style": {"pointerEvents": "none"}},
height=height,
theme="monokai",
buttons=custom_buttons,
key=key # Add a unique key here
)
if response_dict['type'] == "submit" and len(response_dict['text']) != 0:
code = response_dict['text']
old_stdout = sys.stdout
sys.stdout = buffer = io.StringIO()
try:
exec(code, global_namespace)
except IndentationError as e:
st.error(f"Indentation Error: {e}")
except Exception as e:
st.error(f"Error: {e}")
output = buffer.getvalue()
if output:
st.code(output, language="python")
sys.stdout = old_stdout
if plt.get_fignums():
st.pyplot(plt.gcf())
plt.close('all')
def load_markdown_preview(filename, folder, language, lines=3):
"""
Load a markdown file and return a preview of its first few lines.
Parameters:
filename (str): Name of the markdown file.
folder (str): Folder containing the file.
language (str): Language folder name (converted to lowercase).
lines (int, optional): Number of lines to include in the preview (default is 3).
Returns:
str: Preview text from the file.
"""
# Load the markdown file
full_path = f"docs/{language.lower()}/{folder}/{filename}"
with open(full_path, "r") as file:
content = file.readlines()
# Get the first few lines for the preview
preview = "".join(content[:lines]).strip()
return preview
def load_markdown_file_combined(filename, folder, language, global_namespace=None, **placeholders):
"""
Load markdown content from a file and process dynamic content, images, code blocks,
alerts, and dataframe blocks based on the file's content.
Parameters:
- filename: The markdown file name.
- folder: The folder in which the file is stored.
- language: The language sub-folder.
- global_namespace: Optional. If provided, code blocks will be executed via run_code_editor.
- placeholders: Optional keyword arguments for dynamic placeholder replacement.
"""
base_path = f"docs/{language.lower()}/{folder}/{filename}"
if not os.path.exists(base_path):
st.error(f"File not found: {base_path}")
return
with open(base_path, 'r', encoding='utf-8') as f:
content = f.read()
# Replace dynamic placeholders if any
for key, value in placeholders.items():
content = content.replace(f"{{{key}}}", str(value))
# Initialize buffers and flags
markdown_buffer = []
in_code_block = False
code_buffer = []
in_alert_block = False
alert_type = None
alert_buffer = []
in_dataframe_block = False
dataframe_var = None # The key for the dataframe in placeholders
line_number = 0 # For generating unique keys for code blocks
# Regular expressions for different markers
alert_start_re = re.compile(r'> \[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]')
alert_end_re = re.compile(r'> \[!END\]')
dataframe_start_re = re.compile(r'> \[!dataframe\]')
dataframe_end_re = re.compile(r'> \[!end\]')
image_re = re.compile(r'!\[(.*?)\]\((.*?)\)')
# Process the file line by line
for line in content.splitlines():
line_number += 1
# --- Code Block Handling ---
if line.startswith("```"):
if not in_code_block:
in_code_block = True
# Render any pending markdown before starting the code block
if markdown_buffer:
st.markdown('\n'.join(markdown_buffer), unsafe_allow_html=True)
markdown_buffer = []
else:
in_code_block = False
code = '\n'.join(code_buffer)
if global_namespace is not None:
run_code_editor(code, global_namespace, key=f"{filename}_line_{line_number}")
code_buffer = []
continue # Skip further processing for this line
if in_code_block:
code_buffer.append(line)
continue
# --- Dataframe Block Handling ---
if in_dataframe_block:
if dataframe_end_re.match(line):
in_dataframe_block = False
if dataframe_var and dataframe_var in placeholders:
if markdown_buffer:
st.markdown('\n'.join(markdown_buffer), unsafe_allow_html=True)
markdown_buffer = []
st.dataframe(placeholders[dataframe_var])
dataframe_var = None
else:
# Assume the line inside a dataframe block is the key for the dataframe
dataframe_var = line.strip()
continue
if dataframe_start_re.match(line):
in_dataframe_block = True
continue
# --- Alert Block Handling ---
if in_alert_block:
if alert_end_re.match(line):
in_alert_block = False
alert_text = '\n'.join(alert_buffer).strip()
if alert_type == "NOTE":
st.info(alert_text)
elif alert_type == "TIP":
st.success(alert_text)
elif alert_type == "IMPORTANT":
st.warning(alert_text)
elif alert_type == "WARNING":
st.error(alert_text)
elif alert_type == "CAUTION":
st.warning(alert_text)
alert_buffer = []
else:
alert_buffer.append(line)
continue
if alert_start_re.match(line):
if markdown_buffer:
st.markdown('\n'.join(markdown_buffer), unsafe_allow_html=True)
markdown_buffer = []
alert_type = alert_start_re.match(line).group(1)
in_alert_block = True
continue
# --- Image Handling ---
if image_re.match(line):
image_match = image_re.match(line)
if image_match:
if markdown_buffer:
st.markdown('\n'.join(markdown_buffer), unsafe_allow_html=True)
markdown_buffer = []
caption, img_path = image_match.groups()
st.image(img_path, caption=caption, width=650)
continue
# --- Accumulate Markdown ---
markdown_buffer.append(line)
# Render any remaining markdown content
if markdown_buffer:
st.markdown('\n'.join(markdown_buffer), unsafe_allow_html=True)