Skip to content
This repository has been archived by the owner on Oct 18, 2024. It is now read-only.

Commit

Permalink
feat: ✨ implement larc scraper, change route to get from cache
Browse files Browse the repository at this point in the history
  • Loading branch information
ecxyzzy committed May 20, 2024
1 parent cf565b3 commit 7d5b44b
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 52 deletions.
60 changes: 8 additions & 52 deletions apps/api/src/routes/v1/rest/larc/+endpoint.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { PrismaClient } from "@libs/db";
import { createHandler } from "@libs/lambda";
import { load } from "cheerio";
import { fetch } from "cross-fetch";
import { ZodError } from "zod";

import { fmtBldg, fmtDays, fmtTime, quarterToLarcSuffix } from "./lib";
import { QuerySchema } from "./schema";

const prisma = new PrismaClient();

export const GET = createHandler(async (event, context, res) => {
const headers = event.headers;
const requestId = context.awsRequestId;
Expand All @@ -14,55 +14,11 @@ export const GET = createHandler(async (event, context, res) => {
try {
const { year, quarter } = QuerySchema.parse(query);

// SS10wk does not have LARC sessions apparently
if (quarter === "Summer10wk") return res.createOKResult([], headers, requestId);

// TODO: move this code to its own scraper, and rewrite this route to fetch
// data from the DB.

const html = await fetch(
`https://enroll.larc.uci.edu/${year}${quarterToLarcSuffix(quarter)}`,
).then((response) => response.text());

const $ = load(html);

const larcSections = $(".tutorial-group")
.toArray()
.map((card) => {
const match = $(card)
.find(".card-header")
.text()
.trim()
.match(
/(?<courseNumber>[^()]*)( \(same as (?<sameAs>.*)\))? - (.*) \((?<courseName>.*)\)/,
);

const sections = $(card)
.find(".list-group")
.toArray()
.map((group) => {
const rows = $(group).find(".col-lg-4");

const [days, time] = $(rows[0])
.find(".col")
.map((_, col) => $(col).text().trim());

const [instructor, building] = $(rows[1])
.find(".col")
.map((_, col) => $(col).text().trim());

return {
days: fmtDays(days),
time: fmtTime(time),
instructor,
bldg: fmtBldg(building),
};
});

return { courseInfo: { ...match?.groups }, sections };
});

return res.createOKResult(larcSections, headers, requestId);
return res.createOKResult(
(await prisma.larcTerm.findFirst({ where: { year, quarter } }))?.courses ?? [],
headers,
requestId,
);
} catch (e) {
if (e instanceof ZodError) {
const messages = e.issues.map((issue) => issue.message);
Expand Down
9 changes: 9 additions & 0 deletions libs/db/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,15 @@ model Instructor {
courses Json @default("[]")
}

model LarcTerm {
year String
quarter Quarter
courses Json
@@id([year, quarter])
@@unique([year, quarter], name: "idx")
}

model Major {
id String @id
degreeId String
Expand Down
25 changes: 25 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 72 additions & 0 deletions services/larc-scraper/build.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { chmod, copyFile, mkdir, rm } from "node:fs/promises";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";

import { build } from "esbuild";

const __dirname = dirname(fileURLToPath(import.meta.url));

/**
* @see https://github.com/evanw/esbuild/issues/1921#issuecomment-1623640043
*/
// language=JavaScript
const js = `
import topLevelModule from "node:module";
import topLevelUrl from "node:url";
import topLevelPath from "node:path";
const require = topLevelModule.createRequire(import.meta.url);
const __filename = topLevelUrl.fileURLToPath(import.meta.url);
const __dirname = topLevelPath.dirname(__filename);
`;

async function buildApp() {
const options = {
entryPoints: { index: "src/index.ts" },
outdir: "dist",
outExtension: { ".js": ".mjs" },
bundle: true,
minify: true,
format: "esm",
platform: "node",
target: "node20",
logLevel: "info",
banner: { js },
plugins: [
{
name: "clean",
setup(build) {
build.onStart(async () => {
await rm(join(__dirname, "dist/"), { recursive: true, force: true });
await mkdir(join(__dirname, "dist/"));
});
},
},
{
name: "copy",
setup(build) {
build.onEnd(async () => {
await copyFile(
join(
__dirname,
"../../libs/db/node_modules/prisma/libquery_engine-linux-arm64-openssl-3.0.x.so.node",
),
join(__dirname, "dist/libquery_engine-linux-arm64-openssl-3.0.x.so.node"),
);
await copyFile(
join(__dirname, "../../libs/db/prisma/schema.prisma"),
join(__dirname, "dist/schema.prisma"),
);
await chmod(
join(__dirname, "dist/libquery_engine-linux-arm64-openssl-3.0.x.so.node"),
0o755,
);
});
},
},
],
};
await build(options);
}

buildApp().then();
24 changes: 24 additions & 0 deletions services/larc-scraper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "@services/larc-scraper",
"version": "0.0.0",
"private": true,
"description": "Automated scraper for LARC sections",
"license": "MIT",
"type": "module",
"main": "src/index.ts",
"types": "src/index.ts",
"scripts": {
"build": "node build.mjs"
},
"dependencies": {
"@libs/db": "workspace:^",
"@libs/uc-irvine-lib": "workspace:^",
"@libs/utils": "workspace:^",
"cheerio": "1.0.0-rc.12",
"cross-fetch": "4.0.0"
},
"devDependencies": {
"@peterportal-api/types": "workspace:^",
"esbuild": "0.20.1"
}
}
69 changes: 69 additions & 0 deletions services/larc-scraper/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { PrismaClient } from "@libs/db";
import { LarcResponse, Quarter } from "@peterportal-api/types";
import { load } from "cheerio";
import { fetch } from "cross-fetch";

import { fmtBldg, fmtDays, fmtTime, quarterToLarcSuffix } from "./lib";

const EARLIEST_YEAR = 2019;

const prisma = new PrismaClient();

export const sleep = async (duration: number) =>
new Promise((resolve) => setTimeout(resolve, duration));

export const handler = async () => {
const data: Array<{ year: string; quarter: Quarter; courses: LarcResponse }> = [];
const quarters = ["Fall", "Winter", "Spring", "Summer1", "Summer2"] as const;
for (let year = EARLIEST_YEAR; year < new Date().getFullYear() + 2; ++year) {
for (const quarter of quarters) {
console.log(`Scraping ${year} ${quarter}`);
const html = await fetch(
`https://enroll.larc.uci.edu/${year}${quarterToLarcSuffix(quarter)}`,
).then((response) => response.text());

const $ = load(html);

const courses = $(".tutorial-group")
.toArray()
.map((card) => {
const match = $(card)
.find(".card-header")
.text()
.trim()
.match(
/(?<courseNumber>[^()]*)( \(same as (?<sameAs>.*)\))? - (.*) \((?<courseName>.*)\)/,
);

const sections = $(card)
.find(".list-group")
.toArray()
.map((group) => {
const rows = $(group).find(".col-lg-4");

const [days, time] = $(rows[0])
.find(".col")
.map((_, col) => $(col).text().trim());

const [instructor, building] = $(rows[1])
.find(".col")
.map((_, col) => $(col).text().trim());

return {
days: fmtDays(days),
time: fmtTime(time),
instructor,
bldg: fmtBldg(building),
};
});

return { courseInfo: { ...match?.groups }, sections };
});
data.push({ year: year.toString(), quarter, courses: (courses as LarcResponse) ?? [] });
await sleep(1000);
}
}
await prisma.$transaction([prisma.larcTerm.deleteMany({}), prisma.larcTerm.createMany({ data })]);
};

handler().then();
File renamed without changes.
42 changes: 42 additions & 0 deletions tools/cdk/src/constructs/LarcScraper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";

import { Duration } from "aws-cdk-lib";
import { Rule, RuleTargetInput, Schedule } from "aws-cdk-lib/aws-events";
import { LambdaFunction } from "aws-cdk-lib/aws-events-targets";
import { Architecture, Code, Function, Runtime } from "aws-cdk-lib/aws-lambda";
import { Construct } from "constructs";

export class LarcScraper extends Construct {
constructor(scope: Construct, id: string) {
super(scope, id);

const ruleName = `${id}-rule`;

const rule = new Rule(this, ruleName, {
ruleName,
schedule: Schedule.rate(Duration.days(1)),
});

const functionName = `${id}-function`;

rule.addTarget(
new LambdaFunction(
new Function(this, functionName, {
architecture: Architecture.ARM_64,
code: Code.fromAsset(
join(dirname(fileURLToPath(import.meta.url)), "../../../../services/larc-scraper/dist"),
),
functionName,
handler: "index.handler",
timeout: Duration.seconds(15),
runtime: Runtime.NODEJS_20_X,
memorySize: 512,
}),
{
event: RuleTargetInput.fromObject({ body: "{}" }),
},
),
);
}
}
3 changes: 3 additions & 0 deletions tools/cdk/src/stacks/services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { SubnetType, Vpc } from "aws-cdk-lib/aws-ec2";
import type { Construct } from "constructs";

import { CalendarScraper } from "../constructs/CalendarScraper";
import { LarcScraper } from "../constructs/LarcScraper";
import { WebsocProxy } from "../constructs/WebsocProxy";
import { WebsocScraperV2 } from "../constructs/WebsocScraperV2";

Expand All @@ -30,6 +31,8 @@ export class ServicesStack extends Stack {

new CalendarScraper(this, `${id}-calendar-scraper`);

new LarcScraper(this, `${id}-larc-scraper`);

new WebsocProxy(this, `${id}-websoc-proxy`);

new WebsocScraperV2(this, `${id}-websoc-scraper-v2`, vpc);
Expand Down

0 comments on commit 7d5b44b

Please sign in to comment.