forked from ArturoAmaya/ExploratoryCurricularAnalytics
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscrape_prereqs.py
38 lines (34 loc) · 1.43 KB
/
scrape_prereqs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import re
from typing import List
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from parse_defs import CourseCode, Prerequisite, TermCode
def get_prereqs(term: TermCode, course: CourseCode) -> List[List[Prerequisite]]:
prereqs: List[List[Prerequisite]] = []
with urlopen(
Request(
"https://act.ucsd.edu/scheduleOfClasses/scheduleOfClassesPreReq.htm?"
+ urlencode({"termCode": term, "courseId": "".join(course)}),
)
) as response:
for match in re.finditer(
rb'(\.)</td>|<span class="bold_text">([A-Z]+)(\d+[A-Z]*)|<span class="ertext">\*\*\*</span>\s',
response.read(),
):
subject = match.group(2)
number = match.group(3)
if subject is None or number is None:
if match.group(1) is not None:
# New OR-list
prereqs.append([])
else:
# *** course may be taken concurrently (eg FA12 ANAR 144)
prereqs[-1][-1] = Prerequisite(prereqs[-1][-1].course_code, True)
else:
# Prerequisite
prereqs[-1].append(
Prerequisite(CourseCode(subject.decode(), number.decode()), False)
)
return prereqs
print(get_prereqs(TermCode("FA98"), CourseCode("ECE", "108")))
print(get_prereqs(TermCode("FA12"), CourseCode("ANAR", "144")))