-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape_to_table.py
51 lines (44 loc) · 1.3 KB
/
scrape_to_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests
from bs4 import BeautifulSoup
# Fetch the URL
url = "https://qwik.builder.io/docs/guides/react-cheat-sheet/"
response = requests.get(url)
html_content = response.text
# Parse HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find all section names represented by <h2> tags
section_names = []
sections = soup.find_all('h2')
for tag in sections:
qwik_h3 = tag.find_next_sibling('h3')
qwik = qwik_h3.find_next_sibling('div').find('pre').find('code')
react_h3 = qwik_h3.find_next_sibling('h3')
react_div = react_h3.find_next_sibling('div')
react = react_div.find('pre').find('code')
section_names.append({
"name": tag.text.strip(),
"qwik": qwik,
"react": react
})
# NOTE This is futile with the standard Markdown dialects. Let's generate an HTML table instead.
html_table = """
<table>
<thead>
<th>Aspect</th>
<th>Qwik</th>
<th>React</th>
</thead>
<tbody>"""
for row in section_names:
html_table += f"""<tr>
<td>{row['name']}</td>
<td><pre>{row['qwik']}</pre></td>
<td><pre>{row['react']}</pre></td>
</tr>"""
html_table += """</tbody>
</table>
"""
# Define your own action. This is just a placeholder.
if __name__ == '__main__':
# Print the markdown table
print(html_table)