Skip to content
This repository has been archived by the owner on Oct 18, 2024. It is now read-only.

Commit

Permalink
feat(registrar-scraper): implement course term scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
cokwong committed Feb 29, 2024
1 parent 2c69eb7 commit d25925a
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 8 deletions.
89 changes: 88 additions & 1 deletion tools/registrar-scraper/src/course-scraper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import fetch from "cross-fetch";
const CATALOGUE_BASE_URL = "https://catalogue.uci.edu";
const URL_TO_ALL_COURSES = `${CATALOGUE_BASE_URL}/allcourses/`;
const URL_TO_ALL_SCHOOLS = `${CATALOGUE_BASE_URL}/schoolsandprograms/`;
const ENROLL_HIST_URL = 'https://www.reg.uci.edu/perl/EnrollHist.pl'

const YEAR_THRESHOLD = 9; // Number of years to look back when grabbing course history

const Ia = "GE Ia: Lower Division Writing";
const Ib = "GE Ib: Upper Division Writing";
Expand Down Expand Up @@ -167,6 +170,8 @@ async function getCoursesOfDepartment(deptURL: string) {
const $ = load(await res.text());
const courses: [string, Course][] = [];
const deptName = normalized($(".page-title").text()).split("(")[0].trim();
const deptCode = normalized($(".page-title").text()).split("(")[1].slice(0, -1).trim();
const courseTerms = await getCourseHistory(deptCode, YEAR_THRESHOLD);
$("#courseinventorycontainer > .courses > .courseblock").each((_, courseBlock) => {
const header: string[] = normalized($(courseBlock).find(".courseblocktitle").text())
.split(" ")
Expand Down Expand Up @@ -219,7 +224,7 @@ async function getCoursesOfDepartment(deptURL: string) {
.map((x) => x.filter((y) => y)[1])
.map((x) => GE_DICTIONARY[x]),
ge_text: courseBody.filter((x) => x.match(/^\({1,2}[IV]/))[0] ?? "",
terms: [],
terms: [...(courseTerms[courseNumber] ?? [])],
},
]);
});
Expand Down Expand Up @@ -251,3 +256,85 @@ export async function getCourses() {
}
return Object.fromEntries(allCourses.entries());
}

export async function getCourseHistory(
department: string,
year_threshold: number
): Promise<{ [key: string]: Set<string> }> {
const courseTerms: { [key: string]: Set<string> } = {};
let page: string;
let continueParsing: boolean;
var ptr = -6;
const params = {
dept_name: department,
action: "Submit",
ptr: "",
}
try {
do {
page = await (
await fetch(ENROLL_HIST_URL + "?" + new URLSearchParams(params))
).text();
const $ = load(page);
const warning = $("tr td.lcRegWeb_red_message");
if (warning.length && warning.text().startsWith("No results found")) {
return courseTerms;
}
continueParsing = await parseCourseHistoryPage(page, year_threshold, courseTerms);
ptr += 6
params["action"] = "Prev";
params["ptr"] = ptr.toString();
} while (continueParsing);
} catch (error) {
console.log(error);
}
return courseTerms;
}

async function parseCourseHistoryPage(
courseHistoryPage: string,
year_threshold: number,
courseTerms: { [key: string]: Set<string> }
): Promise<boolean> {
const fieldLabels = {
term: 0,
courseNo: 4,
}
const currentYear = new Date().getFullYear() % 100;
let entryFound = false;
try {
const $ = load(courseHistoryPage);
$("table tbody tr").each(function (this) {
const entry = $(this).find("td");
if ($(entry).length == 15) {
const term = $(entry[fieldLabels.term]).text().trim();
if (term === "Term") {
return true;
}
if (term.length === 3) {
entryFound = true;
const termYear = parseInt(term.replace(/\D/g, ""));
if (currentYear - termYear > year_threshold) {
entryFound = false;
return false;
}
}
if (term.length) {
const courseNo = $(entry[fieldLabels.courseNo]).text().trim();
if (!courseTerms[courseNo]) {
courseTerms[courseNo] = new Set();
}
courseTerms[courseNo].add(term);
}
}
return true;
});
if ($('a:contains("Prev")').length === 0) {
entryFound = false;
return false;
}
} catch (error) {
console.log(error);
}
return entryFound;
}
14 changes: 7 additions & 7 deletions tools/registrar-scraper/src/lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,12 @@ export const createCourses =
}
}),
geText: ge_text,
terms: Array.from(
new Set(
Object.values(instructorInfo)
.filter((x) => Object.keys(x.courseHistory ?? {}).includes(courseId))
.flatMap((x) => x.courseHistory[courseId]),
),
).sort(sortTerms),
// terms: Array.from(
// new Set(
// Object.values(instructorInfo)
// .filter((x) => Object.keys(x.courseHistory ?? {}).includes(courseId))
// .flatMap((x) => x.courseHistory[courseId]),
// ),
// ).sort(sortTerms),
};
};

0 comments on commit d25925a

Please sign in to comment.