Skip to content
This repository has been archived by the owner on Oct 18, 2024. It is now read-only.

Commit

Permalink
feat(registrar-scraper): ✨ implement course history scraper
Browse files Browse the repository at this point in the history
fix #113
  • Loading branch information
cokwong committed Feb 29, 2024
1 parent d25925a commit 676c37a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 23 deletions.
33 changes: 17 additions & 16 deletions tools/registrar-scraper/src/course-scraper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import fetch from "cross-fetch";
const CATALOGUE_BASE_URL = "https://catalogue.uci.edu";
const URL_TO_ALL_COURSES = `${CATALOGUE_BASE_URL}/allcourses/`;
const URL_TO_ALL_SCHOOLS = `${CATALOGUE_BASE_URL}/schoolsandprograms/`;
const ENROLL_HIST_URL = 'https://www.reg.uci.edu/perl/EnrollHist.pl'
const ENROLL_HIST_URL = "https://www.reg.uci.edu/perl/EnrollHist.pl";

const YEAR_THRESHOLD = 9; // Number of years to look back when grabbing course history

Expand Down Expand Up @@ -246,6 +246,7 @@ export async function getCourses() {
courses.forEach((v, k) =>
allCourses.set(k, { ...v, school: schoolMapping.get(v.department) ?? "" }),
);
await sleep(1000);
}
if (deptsWithoutSchools.size > 0) {
throw new Error(
Expand All @@ -259,29 +260,27 @@ export async function getCourses() {

export async function getCourseHistory(
department: string,
year_threshold: number
year_threshold: number,
): Promise<{ [key: string]: Set<string> }> {
const courseTerms: { [key: string]: Set<string> } = {};
let page: string;
let continueParsing: boolean;
var ptr = -6;
let ptr = -6;
const params = {
dept_name: department,
action: "Submit",
ptr: "",
}
};
try {
do {
page = await (
await fetch(ENROLL_HIST_URL + "?" + new URLSearchParams(params))
).text();
page = await (await fetch(ENROLL_HIST_URL + "?" + new URLSearchParams(params))).text();
const $ = load(page);
const warning = $("tr td.lcRegWeb_red_message");
if (warning.length && warning.text().startsWith("No results found")) {
return courseTerms;
}
return courseTerms;
}
continueParsing = await parseCourseHistoryPage(page, year_threshold, courseTerms);
ptr += 6
ptr += 6;
params["action"] = "Prev";
params["ptr"] = ptr.toString();
} while (continueParsing);
Expand All @@ -294,32 +293,34 @@ export async function getCourseHistory(
async function parseCourseHistoryPage(
courseHistoryPage: string,
year_threshold: number,
courseTerms: { [key: string]: Set<string> }
courseTerms: { [key: string]: Set<string> },
): Promise<boolean> {
const fieldLabels = {
term: 0,
courseNo: 4,
}
};
const currentYear = new Date().getFullYear() % 100;
let entryFound = false;
try {
const $ = load(courseHistoryPage);
let term = "";
$("table tbody tr").each(function (this) {
const entry = $(this).find("td");
if ($(entry).length == 15) {
const term = $(entry[fieldLabels.term]).text().trim();
if (term === "Term") {
const termValue = $(entry[fieldLabels.term]).text().trim();
if (termValue === "Term") {
return true;
}
if (term.length === 3) {
if (termValue.length === 3) {
term = termValue;
entryFound = true;
const termYear = parseInt(term.replace(/\D/g, ""));
if (currentYear - termYear > year_threshold) {
entryFound = false;
return false;
}
}
if (term.length) {
if (term && termValue.length === 0) {
const courseNo = $(entry[fieldLabels.courseNo]).text().trim();
if (!courseTerms[courseNo]) {
courseTerms[courseNo] = new Set();
Expand Down
16 changes: 9 additions & 7 deletions tools/registrar-scraper/src/lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ export const createCourses =
corequisite,
ge_list,
ge_text,
terms,
},
]: [string, ScrapedCourse]): Prisma.CourseCreateManyInput => {
const courseId = `${department} ${number}`;
Expand Down Expand Up @@ -195,12 +196,13 @@ export const createCourses =
}
}),
geText: ge_text,
// terms: Array.from(
// new Set(
// Object.values(instructorInfo)
// .filter((x) => Object.keys(x.courseHistory ?? {}).includes(courseId))
// .flatMap((x) => x.courseHistory[courseId]),
// ),
// ).sort(sortTerms),
terms: Array.from(
new Set([
...terms.map(transformTerm).filter((x) => x.length),
...Object.values(instructorInfo)
.filter((x) => Object.keys(x.courseHistory ?? {}).includes(courseId))
.flatMap((x) => x.courseHistory[courseId]),
]),
).sort(sortTerms),
};
};

0 comments on commit 676c37a

Please sign in to comment.