-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
39 lines (29 loc) · 832 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
from bs4 import BeautifulSoup
from bs4.element import Tag
f = open('JoSAA.html', 'r')
lines = f.read()
f.close()
soup = BeautifulSoup(lines, 'html.parser')
table = soup.find('table')
tableRows = table.find_all('tr')
headingRow: Tag
headingRow = tableRows[0]
tableRows.pop(0)
headingData = headingRow.find_all('th')
headings = []
i: Tag
for i in headingData:
headings.append(i.get_text())
data = []
for i in tableRows:
contentData = i.find_all('td')
contents = []
x: Tag
for x in contentData:
contents.append(x.get_text().replace('\n', '').strip())
data.append(contents)
dataframe = pd.DataFrame(data, columns=headings)
dataframe.to_excel('josaa-cutoff.xlsx', index=False)
print('Exported to excel file!')
print('Open the exported sheet and format it as per your need.')