Skip to content
This repository has been archived by the owner on Oct 18, 2024. It is now read-only.

feat: ✨ implement larc caching #148

Merged
merged 1 commit into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 8 additions & 52 deletions apps/api/src/routes/v1/rest/larc/+endpoint.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { PrismaClient } from "@libs/db";
import { createHandler } from "@libs/lambda";
import { load } from "cheerio";
import { fetch } from "cross-fetch";
import { ZodError } from "zod";

import { fmtBldg, fmtDays, fmtTime, quarterToLarcSuffix } from "./lib";
import { QuerySchema } from "./schema";

const prisma = new PrismaClient();

export const GET = createHandler(async (event, context, res) => {
const headers = event.headers;
const requestId = context.awsRequestId;
Expand All @@ -14,55 +14,11 @@ export const GET = createHandler(async (event, context, res) => {
try {
const { year, quarter } = QuerySchema.parse(query);

// SS10wk does not have LARC sessions apparently
if (quarter === "Summer10wk") return res.createOKResult([], headers, requestId);

// TODO: move this code to its own scraper, and rewrite this route to fetch
// data from the DB.

const html = await fetch(
`https://enroll.larc.uci.edu/${year}${quarterToLarcSuffix(quarter)}`,
).then((response) => response.text());

const $ = load(html);

const larcSections = $(".tutorial-group")
.toArray()
.map((card) => {
const match = $(card)
.find(".card-header")
.text()
.trim()
.match(
/(?<courseNumber>[^()]*)( \(same as (?<sameAs>.*)\))? - (.*) \((?<courseName>.*)\)/,
);

const sections = $(card)
.find(".list-group")
.toArray()
.map((group) => {
const rows = $(group).find(".col-lg-4");

const [days, time] = $(rows[0])
.find(".col")
.map((_, col) => $(col).text().trim());

const [instructor, building] = $(rows[1])
.find(".col")
.map((_, col) => $(col).text().trim());

return {
days: fmtDays(days),
time: fmtTime(time),
instructor,
bldg: fmtBldg(building),
};
});

return { courseInfo: { ...match?.groups }, sections };
});

return res.createOKResult(larcSections, headers, requestId);
return res.createOKResult(
(await prisma.larcTerm.findFirst({ where: { year, quarter } }))?.courses ?? [],
headers,
requestId,
);
} catch (e) {
if (e instanceof ZodError) {
const messages = e.issues.map((issue) => issue.message);
Expand Down
9 changes: 9 additions & 0 deletions libs/db/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ model Instructor {
courses Json @default("[]")
}

model LarcTerm {
year String
quarter Quarter
courses Json

@@id([year, quarter])
@@unique([year, quarter], name: "idx")
}

model Major {
id String @id
degreeId String
Expand Down
25 changes: 25 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 72 additions & 0 deletions services/larc-scraper/build.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { chmod, copyFile, mkdir, rm } from "node:fs/promises";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";

import { build } from "esbuild";

const __dirname = dirname(fileURLToPath(import.meta.url));

/**
* @see https://github.com/evanw/esbuild/issues/1921#issuecomment-1623640043
*/
// language=JavaScript
const js = `
import topLevelModule from "node:module";
import topLevelUrl from "node:url";
import topLevelPath from "node:path";

const require = topLevelModule.createRequire(import.meta.url);
const __filename = topLevelUrl.fileURLToPath(import.meta.url);
const __dirname = topLevelPath.dirname(__filename);
`;

async function buildApp() {
const options = {
entryPoints: { index: "src/index.ts" },
outdir: "dist",
outExtension: { ".js": ".mjs" },
bundle: true,
minify: true,
format: "esm",
platform: "node",
target: "node20",
logLevel: "info",
banner: { js },
plugins: [
{
name: "clean",
setup(build) {
build.onStart(async () => {
await rm(join(__dirname, "dist/"), { recursive: true, force: true });
await mkdir(join(__dirname, "dist/"));
});
},
},
{
name: "copy",
setup(build) {
build.onEnd(async () => {
await copyFile(
join(
__dirname,
"../../libs/db/node_modules/prisma/libquery_engine-linux-arm64-openssl-3.0.x.so.node",
),
join(__dirname, "dist/libquery_engine-linux-arm64-openssl-3.0.x.so.node"),
);
await copyFile(
join(__dirname, "../../libs/db/prisma/schema.prisma"),
join(__dirname, "dist/schema.prisma"),
);
await chmod(
join(__dirname, "dist/libquery_engine-linux-arm64-openssl-3.0.x.so.node"),
0o755,
);
});
},
},
],
};
await build(options);
}

buildApp().then();
24 changes: 24 additions & 0 deletions services/larc-scraper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "@services/larc-scraper",
"version": "0.0.0",
"private": true,
"description": "Automated scraper for LARC sections",
"license": "MIT",
"type": "module",
"main": "src/index.ts",
"types": "src/index.ts",
"scripts": {
"build": "node build.mjs"
},
"dependencies": {
"@libs/db": "workspace:^",
"@libs/uc-irvine-lib": "workspace:^",
"@libs/utils": "workspace:^",
"cheerio": "1.0.0-rc.12",
"cross-fetch": "4.0.0"
},
"devDependencies": {
"@peterportal-api/types": "workspace:^",
"esbuild": "0.20.1"
}
}
69 changes: 69 additions & 0 deletions services/larc-scraper/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { PrismaClient } from "@libs/db";
import { LarcResponse, Quarter } from "@peterportal-api/types";
import { load } from "cheerio";
import { fetch } from "cross-fetch";

import { fmtBldg, fmtDays, fmtTime, quarterToLarcSuffix } from "./lib";

const EARLIEST_YEAR = 2019;

const prisma = new PrismaClient();

export const sleep = async (duration: number) =>
new Promise((resolve) => setTimeout(resolve, duration));

export const handler = async () => {
const data: Array<{ year: string; quarter: Quarter; courses: LarcResponse }> = [];
const quarters = ["Fall", "Winter", "Spring", "Summer1", "Summer2"] as const;
for (let year = EARLIEST_YEAR; year < new Date().getFullYear() + 2; ++year) {
for (const quarter of quarters) {
console.log(`Scraping ${year} ${quarter}`);
const html = await fetch(
`https://enroll.larc.uci.edu/${year}${quarterToLarcSuffix(quarter)}`,
).then((response) => response.text());

const $ = load(html);

const courses = $(".tutorial-group")
.toArray()
.map((card) => {
const match = $(card)
.find(".card-header")
.text()
.trim()
.match(
/(?<courseNumber>[^()]*)( \(same as (?<sameAs>.*)\))? - (.*) \((?<courseName>.*)\)/,
);

const sections = $(card)
.find(".list-group")
.toArray()
.map((group) => {
const rows = $(group).find(".col-lg-4");

const [days, time] = $(rows[0])
.find(".col")
.map((_, col) => $(col).text().trim());

const [instructor, building] = $(rows[1])
.find(".col")
.map((_, col) => $(col).text().trim());

return {
days: fmtDays(days),
time: fmtTime(time),
instructor,
bldg: fmtBldg(building),
};
});

return { courseInfo: { ...match?.groups }, sections };
});
data.push({ year: year.toString(), quarter, courses: (courses as LarcResponse) ?? [] });
await sleep(1000);
}
}
await prisma.$transaction([prisma.larcTerm.deleteMany({}), prisma.larcTerm.createMany({ data })]);
};

handler().then();
42 changes: 42 additions & 0 deletions tools/cdk/src/constructs/LarcScraper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";

import { Duration } from "aws-cdk-lib";
import { Rule, RuleTargetInput, Schedule } from "aws-cdk-lib/aws-events";
import { LambdaFunction } from "aws-cdk-lib/aws-events-targets";
import { Architecture, Code, Function, Runtime } from "aws-cdk-lib/aws-lambda";
import { Construct } from "constructs";

export class LarcScraper extends Construct {
constructor(scope: Construct, id: string) {
super(scope, id);

const ruleName = `${id}-rule`;

const rule = new Rule(this, ruleName, {
ruleName,
schedule: Schedule.rate(Duration.days(1)),
});

const functionName = `${id}-function`;

rule.addTarget(
new LambdaFunction(
new Function(this, functionName, {
architecture: Architecture.ARM_64,
code: Code.fromAsset(
join(dirname(fileURLToPath(import.meta.url)), "../../../../services/larc-scraper/dist"),
),
functionName,
handler: "index.handler",
timeout: Duration.seconds(15),
runtime: Runtime.NODEJS_20_X,
memorySize: 512,
}),
{
event: RuleTargetInput.fromObject({ body: "{}" }),
},
),
);
}
}
3 changes: 3 additions & 0 deletions tools/cdk/src/stacks/services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { SubnetType, Vpc } from "aws-cdk-lib/aws-ec2";
import type { Construct } from "constructs";

import { CalendarScraper } from "../constructs/CalendarScraper";
import { LarcScraper } from "../constructs/LarcScraper";
import { WebsocProxy } from "../constructs/WebsocProxy";
import { WebsocScraperV2 } from "../constructs/WebsocScraperV2";

Expand All @@ -30,6 +31,8 @@ export class ServicesStack extends Stack {

new CalendarScraper(this, `${id}-calendar-scraper`);

new LarcScraper(this, `${id}-larc-scraper`);

new WebsocProxy(this, `${id}-websoc-proxy`);

new WebsocScraperV2(this, `${id}-websoc-scraper-v2`, vpc);
Expand Down
Loading