forked from ArturoAmaya/ExploratoryCurricularAnalytics
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparse.py
287 lines (239 loc) · 9.73 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""
Parses the prerequisite and academic plan CSV files into objects for easier
manipulation.
Exports:
`prereqs`, a dictionary mapping from a subject code-number tuple to a list
of prerequisites, which are each lists of possible course codes to satisfy
the requirement.
`major_plans`, a dictionary mapping from ISIS major codes to `MajorPlans`
objects, which contains a dictionary mapping college codes to `Plan`s, which
have a list of list of `PlannedCourse`s for each quarter.
`major_codes`, a dictionary mapping from ISIS major codes to `MajorInfo`
objects, which contains data from the ISIS major codes spreadsheet.
python3 parse.py <year> # Get a list of major codes to upload with upload.sh
"""
import csv
from functools import cached_property
import os
from typing import Dict, Iterable, List, NamedTuple, Optional, Set, Tuple
from parse_defs import CourseCode, ProcessedCourse, Prerequisite, RawCourse, TermCode
from university import university
__all__ = ["prereqs", "major_plans", "major_codes"]
def prereq_rows_to_dict(
rows: Iterable[List[str]],
) -> Dict[CourseCode, List[List[Prerequisite]]]:
"""
Converts prerequisite rows from a CSV to a nested dictionary mapping from a
term code (e.g. FA12) to a course code to its prerequisites.
The dictionary values are lists of lists. The outer list is a list of
requirements, like an AND, while each inner list is a list of possible
courses to satisfy the requirement, like an OR.
"""
courses: Dict[CourseCode, List[List[Prerequisite]]] = {}
for (
_, # Term Code
_, # Term ID
_, # Course ID
subject, # Course Subject Code
number, # Course Number
req_id, # Prereq Sequence ID
_, # Prereq Course ID
req_subj, # Prereq Subject Code
req_num, # Prereq Course Number
_, # Prereq Minimum Grade Priority
_, # Prereq Minimum Grade
allow_concurrent, # Allow concurrent registration
) in rows:
course = CourseCode(subject.strip(), number.strip())
prereq = Prerequisite(
CourseCode(req_subj.strip(), req_num.strip()), allow_concurrent == "Y"
)
if course not in courses:
courses[course] = []
if req_id == "":
continue
index = int(req_id) - 1
while len(courses[course]) <= index:
courses[course].append([])
# Could probably include the allow concurrent registration info here
courses[course][index].append(prereq)
return courses
def terms() -> List[TermCode]:
return _cache.terms
_prereq_cache: Dict[TermCode, Dict[CourseCode, List[List[Prerequisite]]]] = {}
def prereqs(term: str) -> Dict[CourseCode, List[List[Prerequisite]]]:
term = TermCode(term)
if term < terms()[0]:
term = terms()[0]
elif term > terms()[-1]:
term = terms()[-1]
if term not in _prereq_cache:
try:
with open(f"./files/prereqs/prereqs_{term}.csv", newline="") as file:
_prereq_cache[term] = prereq_rows_to_dict(csv.reader(file))
university.fix_prereqs(_prereq_cache[term], term)
except FileNotFoundError:
_prereq_cache[term] = {}
return _prereq_cache[term]
class MajorPlans:
"""
Represents a major's set of academic plans. Contains plans for each college.
To get the plan for a specific college, use the two-letter college code. For
example, `plan("FI")` contains the academic plan for ERC (Fifth College).
"""
year: int
# TODO: MajorPlan.department vs MajorInfo.department
department: str
major_code: str
colleges: Set[str]
raw_plans: Dict[str, List[RawCourse]]
_parsed_plans: Dict[str, List[ProcessedCourse]]
def __init__(self, year: int, department: str, major_code: str) -> None:
self.year = year
self.department = department
self.major_code = major_code
self.colleges = set()
self.raw_plans = {}
self._parsed_plans = {}
def add_raw_course(self, college_code: str, course: RawCourse) -> None:
if college_code not in self.colleges:
if university.keep_plan(self.year, college_code):
self.colleges.add(college_code)
self.raw_plans[college_code] = []
self.raw_plans[college_code].append(course)
def plan(self, college: str) -> List[ProcessedCourse]:
if college not in self._parsed_plans:
self._parsed_plans[college] = university.process_plan(
self.raw_plans[college]
)
return self._parsed_plans[college]
def curriculum(self, college: Optional[str] = None) -> List[ProcessedCourse]:
"""
Returns a list of courses based on the specified college's degree plan
with college-specific courses removed. Can be used to create a
curriculum for Curricular Analytics.
Two curricula are equivalent if they have the same of each number of
course, regardless of the order. However, there can be multiple
identical courses (eg "ELECTIVE"), so this method does not return a set.
The `overlaps_ge` attribute for these courses should be ignored (because
there is no college whose GEs the course overlaps with).
If no college is specified, it will try Marshall (Third College) by
default because it appears to be a generally good college to base
curricula off of (see #14). If there is no Marshall plan, it will try a
different college.
"""
if college is None:
for college_code in university.curriculum_priority:
if college_code in self.colleges:
college = college_code
break
if college is None:
# Use an arbitrary college as the base if there is one (for
# non-UCSD plans)
college = next(iter(self.colleges))
return [course for course in self.plan(college) if course.for_major]
def plan_rows_to_dict(rows: Iterable[List[str]]) -> Dict[str, MajorPlans]:
"""
Converts the academic plans CSV rows into a dictionary of major codes to
`Major` objects.
"""
plans: Dict[str, MajorPlans] = {}
for (
department, # Department
major_code, # Major
college_code, # College
course_title, # Course
units, # Units
course_type, # Course Type
overlap, # GE/Major Overlap
year, # Start Year
plan_yr, # Year Taken
plan_qtr, # Quarter Taken
*_, # Term Taken, Plan Length
) in rows:
year = int(year)
if major_code not in plans:
plans[major_code] = MajorPlans(year, department, major_code)
if course_type != "COLLEGE" and course_type != "DEPARTMENT":
raise TypeError('Course type is neither "COLLEGE" nor "DEPARTMENT"')
plans[major_code].add_raw_course(
college_code,
RawCourse(
course_title,
float(units),
course_type,
overlap == "Y",
int(plan_yr) - 1,
int(plan_qtr) - 1,
),
)
return plans
_plan_cache: Dict[Tuple[int, int], Dict[str, MajorPlans]] = {}
def major_plans(year: int, length: int = 4) -> Dict[str, MajorPlans]:
if (year, length) not in _plan_cache:
try:
with open(f"./files/plans/plans_{year}_{length}yr.csv", newline="") as file:
_plan_cache[year, length] = plan_rows_to_dict(csv.reader(file))
except FileNotFoundError:
_plan_cache[year, length] = {}
return _plan_cache[year, length]
class MajorInfo(NamedTuple):
"""
Represents information about a major from the ISIS major code list.
You can find the major code list by Googling "isis major codes," but it's
not going to be in the format that this program expects:
https://blink.ucsd.edu/_files/instructors-tab/major-codes/isis_major_code_list.xlsx
"""
isis_code: str
name: str
# TODO: MajorPlan.department vs MajorInfo.department
department: str
cip_code: str
award_types: List[str]
def major_rows_to_dict(rows: Iterable[List[str]]) -> Dict[str, MajorInfo]:
majors: Dict[str, MajorInfo] = {}
for (
_, # Previous Local Code
_, # UCOP Major Code (CSS)
isis_code, # ISIS Major Code
_, # Major Abbreviation
_, # Major Description
title, # Diploma Title
_, # Start Term
_, # End Term
_, # Student Level
department, # Department
award_types, # Award Type
_, # Program Length (in years)
_, # College
cip_code, # CIP Code
*_, # et cetera...
) in rows:
majors[isis_code.strip()] = MajorInfo(
isis_code.strip(),
title.strip(),
department,
cip_code[0:2] + "." + cip_code[2:],
award_types.split(" ") if award_types and award_types != "NONE" else [],
)
return majors
class _ParseCache:
@cached_property
def terms(self) -> List[TermCode]:
return sorted(
TermCode(name.replace("prereqs_", "").replace(".csv", ""))
for name in os.listdir("./files/prereqs/")
if name.startswith("prereqs_") and name.endswith(".csv")
)
@cached_property
def major_codes(self) -> Dict[str, MajorInfo]:
with open(university.majors_file, newline="") as file:
reader = csv.reader(file)
next(reader) # Skip header
return major_rows_to_dict(reader)
_cache = _ParseCache()
def major_codes() -> Dict[str, MajorInfo]:
return _cache.major_codes
if __name__ == "__main__":
import sys
print(" ".join(major_plans(int(sys.argv[1])).keys()))