-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathformat-md.py
151 lines (111 loc) · 4.92 KB
/
format-md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Convert *.md files to *.qmd files and pre-process them
# Randomize footnote identifiers in multiple Quarto files to avoid conflicts
# Convert reference-style links to inline links
# Remove line breaks within a straight Chinese quote
# Reformat display math equations in Ulysses
# Copyright: © 2024 Tom Ben
# License: MIT License
import re
import glob
import os
import random
import string
def get_md_files():
# Get all *.md files
return [f for f in glob.glob("contents/[0-9]*.md")]
def randomize_footnote_identifiers(qmd_content):
# Find all existing footnote identifiers (numbers)
existing_ids = set(re.findall(r'\[\^(\d+)\]', qmd_content))
# Generate a unique random identifier for each existing footnote
unique_ids = {}
for id in existing_ids:
# Generate a random string of 5 characters
new_id = ''.join(random.choices(
string.ascii_letters + string.digits, k=5))
while new_id in unique_ids.values():
new_id = ''.join(random.choices(
string.ascii_letters + string.digits, k=5))
unique_ids[id] = new_id
# Replace all footnote references and definitions with new identifiers
for old_id, new_id in unique_ids.items():
qmd_content = re.sub(rf'\[\^{old_id}\]', f'[^{new_id}]', qmd_content)
qmd_content = re.sub(rf'\[\^{old_id}\]:', f'[^{new_id}]:', qmd_content)
return qmd_content
def convert_reference_to_inline(qmd_content):
# Extract reference links
reference_links = {}
reference_pattern = re.compile(r'\n\[(\d+)\]:\s*(.*)')
for match in reference_pattern.findall(qmd_content):
reference_links[match[0]] = match[1]
# Remove the reference link definitions from the qmd_content
qmd_content = reference_pattern.sub('', qmd_content)
# Replace reference-style link usages with inline links
def replace_link(match):
text = match.group(1)
key = match.group(2)
url = reference_links.get(key, '')
return f'[{text}]({url})'
usage_pattern = re.compile(r'\[(.*?)\]\[(\d+)\]')
qmd_content = usage_pattern.sub(replace_link, qmd_content)
return qmd_content
def remove_linebreaks_in_quotes(text):
# Regular expression pattern to find blocks within single Chinese quotes
pattern = r'「[^」]*?」'
# Function to replace newlines in the found quoted text
def replace_newlines(m):
# Remove all newlines within the quote block
return m.group(0).replace('\n', '')
# Use re.sub to replace the newline characters in each match
cleaned_text = re.sub(pattern, replace_newlines, text)
return cleaned_text
def reformat_math_equations(content):
# Reformat display math with labels to block format
labeled_pattern = r"\$(.+?)\$ *(\{#.+?\})"
def replace_with_labeled_block(match):
equation = match.group(1).strip()
label = match.group(2).strip()
return f"$$\n{equation}\n$$ {label}"
content = re.sub(labeled_pattern, replace_with_labeled_block, content)
# Reformat display math without labels to block format
display_pattern = r"(?<!\$)\$\$([^\$]+?)\$\$(?!\{#)" # Match `$$ ... $$` without label
def replace_with_display_block(match):
equation = match.group(1).strip()
return f"$$\n{equation}\n$$"
content = re.sub(display_pattern, replace_with_display_block, content)
return content
def process_file(input_file, output_file):
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
# Remove links with `[@]` and a space before it
content = re.sub(r"\s*\[@\].*?[\]\)]", "", content)
# Remove square brackets enclosing the caption
content = re.sub(r"^\[(.*)\}\]$", r"\n :\1}", content, flags=re.MULTILINE)
# Merge multiple adjacent citations into one
content = re.sub(r"\][\(\[].*?;\s*\[", "; ", content)
# Replace '{{\<...\>}}' with '{{<...>}}'
content = re.sub(r"\{\{\\<(.*)\\>}}", r"{{<\1>}}", content)
# Remove comment blocks to avoid errors of Python filter
content = re.sub(r"^```{=comment}.*?^```$", "",
content, flags=re.DOTALL | re.MULTILINE)
# Randomize footnote identifiers
content = randomize_footnote_identifiers(content)
# Convert reference-style links to inline links
content = convert_reference_to_inline(content)
# Remove line breaks in quotes
content = remove_linebreaks_in_quotes(content)
# Reformat math equations
content = reformat_math_equations(content)
with open(output_file, "w", encoding="utf-8") as f:
f.write(content)
def main():
md_files = get_md_files()
# Convert *.md files to *.qmd files
qmd_files = [f.replace(".md", ".qmd") for f in md_files]
for md_file, qmd_file in zip(md_files, qmd_files):
process_file(md_file, qmd_file)
os.chdir('contents')
qmd_files = glob.glob('*.qmd')
for qmd_file in qmd_files:
process_file(qmd_file, qmd_file)
if __name__ == "__main__":
main()