diff --git a/apps/api/src/routes/v1/graphql/resolvers.ts b/apps/api/src/routes/v1/graphql/resolvers.ts index 52e56058..e67dcf8e 100644 --- a/apps/api/src/routes/v1/graphql/resolvers.ts +++ b/apps/api/src/routes/v1/graphql/resolvers.ts @@ -9,6 +9,14 @@ export const resolvers: ApolloServerOptions["resolvers"] = { course: proxyRestApi("/v1/rest/courses", { pathArg: "courseId" }), courses: proxyRestApi("/v1/rest/courses", { argsTransform: geTransform }), allCourses: proxyRestApi("/v1/rest/courses/all"), + major: proxyRestApi("/v1/rest/degrees/majors"), + majors: proxyRestApi("/v1/rest/degrees/majors"), + minor: proxyRestApi("/v1/rest/degrees/minors"), + minors: proxyRestApi("/v1/rest/degrees/minors"), + specialization: proxyRestApi("/v1/rest/degrees/specializations"), + specializations: proxyRestApi("/v1/rest/degrees/specializations"), + specializationsByMajorId: proxyRestApi("/v1/rest/degrees/specializations"), + allDegrees: proxyRestApi("/v1/rest/degrees/all"), enrollmentHistory: proxyRestApi("/v1/rest/enrollmentHistory"), rawGrades: proxyRestApi("/v1/rest/grades/raw"), aggregateGrades: proxyRestApi("/v1/rest/grades/aggregate"), diff --git a/apps/api/src/routes/v1/graphql/schema/degrees.graphql b/apps/api/src/routes/v1/graphql/schema/degrees.graphql new file mode 100644 index 00000000..609dbb03 --- /dev/null +++ b/apps/api/src/routes/v1/graphql/schema/degrees.graphql @@ -0,0 +1,39 @@ +type Specialization { + id: String! + majorId: String! + name: String! + requirements: JSON! +} + +type Major { + id: String! + degreeId: String! + code: String! + name: String! + requirements: JSON! + specializations: [Specialization!]! +} + +type Minor { + id: String! + name: String! + requirements: JSON! +} + +type Degree { + id: String! + name: String! + division: DegreeDivision! + majors: [Major!]! +} + +extend type Query { + major(id: String!): Major! + majors(degreeId: String, nameContains: String): [Major!]! + minor(id: String!): Minor! + minors(nameContains: String): [Minor!]! + specialization(id: String!): Specialization! + specializations(nameContains: String): [Specialization!]! + specializationsByMajorId(majorId: String!): [Specialization!]! + allDegrees: [Degree!]! +} diff --git a/apps/api/src/routes/v1/graphql/schema/enum.graphql b/apps/api/src/routes/v1/graphql/schema/enum.graphql index 38820ffc..b33d4cf6 100644 --- a/apps/api/src/routes/v1/graphql/schema/enum.graphql +++ b/apps/api/src/routes/v1/graphql/schema/enum.graphql @@ -69,3 +69,8 @@ enum WebsocSectionFinalExamStatus { TBA_FINAL SCHEDULED_FINAL } +"The set of valid degree divisions." +enum DegreeDivision { + Undergraduate + Graduate +} diff --git a/apps/api/src/routes/v1/rest/degrees/{id}/+endpoint.ts b/apps/api/src/routes/v1/rest/degrees/{id}/+endpoint.ts new file mode 100644 index 00000000..a1cc3655 --- /dev/null +++ b/apps/api/src/routes/v1/rest/degrees/{id}/+endpoint.ts @@ -0,0 +1,146 @@ +import { PrismaClient } from "@libs/db"; +import { createHandler } from "@libs/lambda"; + +import { ProgramSchema, SpecializationSchema } from "./schema"; + +const prisma = new PrismaClient(); + +async function onWarm() { + await prisma.$connect(); +} + +const degreeRepository = { + majors: { + findMany: async () => { + return await prisma.major.findMany({ include: { specializations: true } }); + }, + findFirstById: async (id: string) => { + return await prisma.major.findFirst({ where: { id }, include: { specializations: true } }); + }, + findManyNameContains: async (degreeId: string, contains?: string) => { + return await prisma.major.findMany({ + where: { + degreeId, + name: { contains, mode: "insensitive" }, + }, + include: { specializations: true }, + }); + }, + }, + minors: { + findMany: async () => { + return await prisma.minor.findMany({}); + }, + findFirstById: async (id: string) => { + return await prisma.minor.findFirst({ where: { id } }); + }, + }, +}; + +export const GET = createHandler(async (event, context, res) => { + const headers = event.headers; + const params = event.pathParameters ?? {}; + const query = event.queryStringParameters ?? {}; + const requestId = context.awsRequestId; + + switch (params?.id) { + case "all": + return res.createOKResult( + await prisma.degree.findMany({ + include: { majors: { include: { specializations: true } } }, + }), + headers, + requestId, + ); + + case "majors": // falls through + case "minors": { + const parsedQuery = ProgramSchema.safeParse(query); + + if (!parsedQuery.success) { + return res.createErrorResult( + 400, + parsedQuery.error.issues.map((issue) => issue.message).join("; "), + requestId, + ); + } + + switch (parsedQuery.data.type) { + case "id": { + const result = await degreeRepository[params.id].findFirstById(parsedQuery.data.id); + return result + ? res.createOKResult(result, headers, requestId) + : res.createErrorResult( + 404, + `${params.id === "majors" ? "Major" : "Minor"} with ID ${parsedQuery.data.id} not found`, + requestId, + ); + } + + case "degreeOrName": { + const { degreeId, nameContains } = parsedQuery.data; + + if (params.id === "minors" && degreeId != null) { + return res.createErrorResult(400, "Invalid input", requestId); + } + + const result = await degreeRepository.majors.findManyNameContains(degreeId, nameContains); + return res.createOKResult(result, headers, requestId); + } + + case "empty": { + const result = await degreeRepository[params.id].findMany(); + return res.createOKResult(result, headers, requestId); + } + } + break; + } + + case "specializations": { + const parsedQuery = SpecializationSchema.safeParse(query); + + if (!parsedQuery.success) { + return res.createErrorResult( + 400, + parsedQuery.error.issues.map((issue) => issue.message).join("; "), + requestId, + ); + } + + switch (parsedQuery.data.type) { + case "id": { + const row = await prisma.specialization.findFirst({ where: { id: parsedQuery.data.id } }); + + return row + ? res.createOKResult(row, headers, requestId) + : res.createErrorResult( + 404, + `Specialization with ID ${parsedQuery.data.id} not found`, + requestId, + ); + } + + case "major": { + const result = await prisma.specialization.findMany({ + where: { majorId: parsedQuery.data.majorId }, + }); + return res.createOKResult(result, headers, requestId); + } + + case "name": { + const result = await prisma.specialization.findMany({ + where: { name: { contains: parsedQuery.data.nameContains, mode: "insensitive" } }, + }); + return res.createOKResult(result, headers, requestId); + } + + case "empty": { + const result = await prisma.specialization.findMany(); + return res.createOKResult(result, headers, requestId); + } + } + } + } + + return res.createErrorResult(400, "Invalid endpoint", requestId); +}, onWarm); diff --git a/apps/api/src/routes/v1/rest/degrees/{id}/schema.ts b/apps/api/src/routes/v1/rest/degrees/{id}/schema.ts new file mode 100644 index 00000000..04dc5e69 --- /dev/null +++ b/apps/api/src/routes/v1/rest/degrees/{id}/schema.ts @@ -0,0 +1,42 @@ +import { z } from "zod"; + +export const ProgramSchema = z + .union([ + z.object({ id: z.string() }), + z.object({ degreeId: z.string().optional(), nameContains: z.string().optional() }), + z.object({}), + ]) + .transform((data) => { + if ("id" in data) { + return { type: "id" as const, ...data }; + } + + if ("degreeId" in data && data.degreeId != null) { + return { type: "degreeOrName" as const, degreeId: data.degreeId, ...data }; + } + + return { type: "empty" as const, ...data }; + }); + +export const SpecializationSchema = z + .union([ + z.object({ id: z.string() }), + z.object({ majorId: z.string() }), + z.object({ nameContains: z.string() }), + z.object({}), + ]) + .transform((data) => { + if ("id" in data) { + return { type: "id" as const, ...data }; + } + + if ("majorId" in data) { + return { type: "major" as const, ...data }; + } + + if ("nameContains" in data) { + return { type: "name" as const, ...data }; + } + + return { type: "empty" as const, ...data }; + }); diff --git a/libs/db/prisma/schema.prisma b/libs/db/prisma/schema.prisma index d648c063..109e7414 100644 --- a/libs/db/prisma/schema.prisma +++ b/libs/db/prisma/schema.prisma @@ -20,6 +20,11 @@ enum CourseLevel { Graduate } +enum Division { + Undergraduate + Graduate +} + enum Quarter { Fall Winter @@ -91,17 +96,11 @@ model Course { terms String[] } -model Instructor { - ucinetid String @id - name String - shortenedName String - title String - email String - department String - schools String[] - relatedDepartments String[] - courseHistory Json - courses Json @default("[]") +model Degree { + id String @id + name String + division Division + majors Major[] } model GradesInstructor { @@ -148,6 +147,43 @@ model GradesSection { @@unique([year, quarter, sectionCode], name: "idx") } +model Instructor { + ucinetid String @id + name String + shortenedName String + title String + email String + department String + schools String[] + relatedDepartments String[] + courseHistory Json + courses Json @default("[]") +} + +model Major { + id String @id + degreeId String + degree Degree @relation(fields: [degreeId], references: [id]) + code String + name String + requirements Json + specializations Specialization[] +} + +model Minor { + id String @id + name String + requirements Json +} + +model Specialization { + id String @id + majorId String + major Major @relation(fields: [majorId], references: [id]) + name String + requirements Json +} + model WebsocEnrollmentHistoryEntry { year String quarter Quarter diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 346fc602..a4363a11 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -397,6 +397,28 @@ importers: specifier: 4.7.1 version: 4.7.1 + tools/degreeworks-scraper: + dependencies: + '@libs/db': + specifier: workspace:^ + version: link:../../libs/db + '@peterportal-api/types': + specifier: workspace:^ + version: link:../../packages/types + cross-fetch: + specifier: 4.0.0 + version: 4.0.0 + dotenv: + specifier: 16.4.1 + version: 16.4.1 + jwt-decode: + specifier: 4.0.0 + version: 4.0.0 + devDependencies: + tsx: + specifier: 4.7.0 + version: 4.7.0 + tools/grades-updater: dependencies: '@libs/db': @@ -2876,16 +2898,6 @@ packages: conventional-changelog-conventionalcommits: 7.0.2 dev: false - /@commitlint/config-validator@18.6.0: - resolution: {integrity: sha512-Ptfa865arNozlkjxrYG3qt6wT9AlhNUHeuDyKEZiTL/l0ftncFhK/KN0t/EAMV2tec+0Mwxo0FmhbESj/bI+1g==} - engines: {node: '>=v18'} - requiresBuild: true - dependencies: - '@commitlint/types': 18.6.0 - ajv: 8.12.0 - dev: false - optional: true - /@commitlint/config-validator@19.0.3: resolution: {integrity: sha512-2D3r4PKjoo59zBc2auodrSCaUnCSALCx54yveOFwwP/i2kfEAQrygwOleFWswLqK0UL/F9r07MFi5ev2ohyM4Q==} engines: {node: '>=v18'} @@ -2906,13 +2918,6 @@ packages: lodash.upperfirst: 4.3.1 dev: false - /@commitlint/execute-rule@18.4.4: - resolution: {integrity: sha512-a37Nd3bDQydtg9PCLLWM9ZC+GO7X5i4zJvrggJv5jBhaHsXeQ9ZWdO6ODYR+f0LxBXXNYK3geYXJrCWUCP8JEg==} - engines: {node: '>=v18'} - requiresBuild: true - dev: false - optional: true - /@commitlint/execute-rule@19.0.0: resolution: {integrity: sha512-mtsdpY1qyWgAO/iOK0L6gSGeR7GFcdW7tIjcNFxcWkfLDF5qVbPHKuGATFqRMsxcO8OUKNj0+3WOHB7EHm4Jdw==} engines: {node: '>=v18'} @@ -2944,28 +2949,6 @@ packages: '@commitlint/types': 19.0.3 dev: false - /@commitlint/load@18.6.0(@types/node@20.11.24)(typescript@5.3.3): - resolution: {integrity: sha512-RRssj7TmzT0bowoEKlgwg8uQ7ORXWkw7lYLsZZBMi9aInsJuGNLNWcMxJxRZbwxG3jkCidGUg85WmqJvRjsaDA==} - engines: {node: '>=v18'} - requiresBuild: true - dependencies: - '@commitlint/config-validator': 18.6.0 - '@commitlint/execute-rule': 18.4.4 - '@commitlint/resolve-extends': 18.6.0 - '@commitlint/types': 18.6.0 - chalk: 4.1.2 - cosmiconfig: 8.3.6(typescript@5.3.3) - cosmiconfig-typescript-loader: 5.0.0(@types/node@20.11.24)(cosmiconfig@8.3.6)(typescript@5.3.3) - lodash.isplainobject: 4.0.6 - lodash.merge: 4.6.2 - lodash.uniq: 4.5.0 - resolve-from: 5.0.0 - transitivePeerDependencies: - - '@types/node' - - typescript - dev: false - optional: true - /@commitlint/load@19.0.3(@types/node@20.11.24)(typescript@5.3.3): resolution: {integrity: sha512-18Tk/ZcDFRKIoKfEcl7kC+bYkEQ055iyKmGsYDoYWpKf6FUvBrP9bIWapuy/MB+kYiltmP9ITiUx6UXtqC9IRw==} engines: {node: '>=v18'} @@ -3009,20 +2992,6 @@ packages: minimist: 1.2.8 dev: false - /@commitlint/resolve-extends@18.6.0: - resolution: {integrity: sha512-k2Xp+Fxeggki2i90vGrbiLDMefPius3zGSTFFlRAPKce/SWLbZtI+uqE9Mne23mHO5lmcSV8z5m6ziiJwGpOcg==} - engines: {node: '>=v18'} - requiresBuild: true - dependencies: - '@commitlint/config-validator': 18.6.0 - '@commitlint/types': 18.6.0 - import-fresh: 3.3.0 - lodash.mergewith: 4.6.2 - resolve-from: 5.0.0 - resolve-global: 1.0.0 - dev: false - optional: true - /@commitlint/resolve-extends@19.0.3: resolution: {integrity: sha512-18BKmta8OC8+Ub+Q3QGM9l27VjQaXobloVXOrMvu8CpEwJYv62vC/t7Ka5kJnsW0tU9q1eMqJFZ/nN9T/cOaIA==} engines: {node: '>=v18'} @@ -3058,15 +3027,6 @@ packages: find-up: 7.0.0 dev: false - /@commitlint/types@18.6.0: - resolution: {integrity: sha512-oavoKLML/eJa2rJeyYSbyGAYzTxQ6voG5oeX3OrxpfrkRWhJfm4ACnhoRf5tgiybx2MZ+EVFqC1Lw3W8/uwpZA==} - engines: {node: '>=v18'} - requiresBuild: true - dependencies: - chalk: 4.1.2 - dev: false - optional: true - /@commitlint/types@19.0.3: resolution: {integrity: sha512-tpyc+7i6bPG9mvaBbtKUeghfyZSDgWquIDfMgqYtTbmZ9Y9VzEm2je9EYcQ0aoz5o7NvGS+rcDec93yO08MHYA==} engines: {node: '>=v18'} @@ -7373,7 +7333,7 @@ packages: longest: 2.0.1 word-wrap: 1.2.3 optionalDependencies: - '@commitlint/load': 18.6.0(@types/node@20.11.24)(typescript@5.3.3) + '@commitlint/load': 19.0.3(@types/node@20.11.24)(typescript@5.3.3) transitivePeerDependencies: - '@types/node' - typescript @@ -7718,6 +7678,11 @@ packages: webpack: 5.84.1 dev: false + /dotenv@16.4.1: + resolution: {integrity: sha512-CjA3y+Dr3FyFDOAMnxZEGtnW9KBR2M0JvvUtXNW+dYJL5ROWxP9DUHCwgFqpMk0OXCc0ljhaNTr2w/kutYIcHQ==} + engines: {node: '>=12'} + dev: false + /dotenv@16.4.5: resolution: {integrity: sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==} engines: {node: '>=12'} @@ -8781,15 +8746,6 @@ packages: ini: 4.1.1 dev: false - /global-dirs@0.1.1: - resolution: {integrity: sha512-NknMLn7F2J7aflwFOlGdNIuCDpN3VGoSoB+aap3KABFWbHVn1TCgFC+np23J8W2BiZbjfEw3BFBycSMv1AFblg==} - engines: {node: '>=4'} - requiresBuild: true - dependencies: - ini: 1.3.8 - dev: false - optional: true - /global-dirs@3.0.1: resolution: {integrity: sha512-NBcGGFbBA9s1VzD41QXDG+3++t9Mn5t1FpLdhESY6oKY4gYTFpX4wO3sqGUa0Srjtbfj3szX0RnemmrVRUdULA==} engines: {node: '>=10'} @@ -9830,6 +9786,11 @@ packages: hasBin: true dev: false + /jwt-decode@4.0.0: + resolution: {integrity: sha512-+KJGIyHgkGuIq3IEBNftfhW/LfWhXUIY6OmyVWjliu5KH1y0fw7VQ8YndE2O4qZdMSd9SqbnC8GOcZEy0Om7sA==} + engines: {node: '>=18'} + dev: false + /keyv@3.1.0: resolution: {integrity: sha512-9ykJ/46SN/9KPM/sichzQ7OvXyGDYKGTaDlKMGCAlg2UK8KRy4jb0d8sFc+0Tt0YYnThq8X2RZgCg74RPxgcVA==} dependencies: @@ -11911,15 +11872,6 @@ packages: resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} engines: {node: '>=8'} - /resolve-global@1.0.0: - resolution: {integrity: sha512-zFa12V4OLtT5XUX/Q4VLvTfBf+Ok0SPc1FNGM/z9ctUdiU618qwKpWnd0CHs3+RqROfyEg/DhuHbMWYqcgljEw==} - engines: {node: '>=8'} - requiresBuild: true - dependencies: - global-dirs: 0.1.1 - dev: false - optional: true - /resolve-pathname@3.0.0: resolution: {integrity: sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==} dev: false @@ -12962,6 +12914,17 @@ packages: - ts-node dev: true + /tsx@4.7.0: + resolution: {integrity: sha512-I+t79RYPlEYlHn9a+KzwrvEwhJg35h/1zHsLC2JXvhC2mdynMv6Zxzvhv5EMV6VF5qJlLlkSnMVvdZV3PSIGcg==} + engines: {node: '>=18.0.0'} + hasBin: true + dependencies: + esbuild: 0.19.11 + get-tsconfig: 4.7.2 + optionalDependencies: + fsevents: 2.3.3 + dev: true + /tsx@4.7.1: resolution: {integrity: sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==} engines: {node: '>=18.0.0'} diff --git a/tools/degreeworks-scraper/.gitignore b/tools/degreeworks-scraper/.gitignore new file mode 100644 index 00000000..2132813f --- /dev/null +++ b/tools/degreeworks-scraper/.gitignore @@ -0,0 +1,2 @@ +output/ +.env diff --git a/tools/degreeworks-scraper/package.json b/tools/degreeworks-scraper/package.json new file mode 100644 index 00000000..b370177b --- /dev/null +++ b/tools/degreeworks-scraper/package.json @@ -0,0 +1,20 @@ +{ + "name": "@tools/degreeworks-scraper", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "src/index.ts", + "scripts": { + "start": "tsx src/index.ts" + }, + "dependencies": { + "@libs/db": "workspace:^", + "@peterportal-api/types": "workspace:^", + "cross-fetch": "4.0.0", + "dotenv": "16.4.1", + "jwt-decode": "4.0.0" + }, + "devDependencies": { + "tsx": "4.7.0" + } +} diff --git a/tools/degreeworks-scraper/src/components/AuditParser.ts b/tools/degreeworks-scraper/src/components/AuditParser.ts new file mode 100644 index 00000000..47316c8f --- /dev/null +++ b/tools/degreeworks-scraper/src/components/AuditParser.ts @@ -0,0 +1,170 @@ +import type { Course } from "@peterportal-api/types"; + +import type { Block, Program, ProgramId, Requirement, Rule } from "../types"; + +import { PPAPIOfflineClient } from "."; + +export class AuditParser { + private static readonly specOrOtherMatcher = /"type":"(?:SPEC|OTHER)","value":"\w+"/g; + private static readonly electiveMatcher = /ELECTIVE @+/; + private static readonly wildcardMatcher = /\w@/; + private static readonly rangeMatcher = /-\w+/; + + private ppapi!: PPAPIOfflineClient; + + private constructor() {} + + static async new(): Promise { + const ap = new AuditParser(); + ap.ppapi = await PPAPIOfflineClient.new(); + console.log("[AuditParser.new] AuditParser initialized"); + return ap; + } + + parseBlock = (blockId: string, block: Block): Program => ({ + ...this.parseBlockId(blockId), + name: block.title, + requirements: this.ruleArrayToRequirements(block.ruleArray), + specs: this.parseSpecs(block), + }); + + lexOrd = new Intl.Collator().compare; + + parseSpecs = (block: Block): string[] => + Array.from(JSON.stringify(block).matchAll(AuditParser.specOrOtherMatcher)) + .map((x) => JSON.parse(`{${x[0]}}`).value) + .sort(); + + flattenIfStmt(ruleArray: Rule[]): Rule[] { + const ret = []; + for (const rule of ruleArray) { + switch (rule.ruleType) { + case "IfStmt": + ret.push( + ...this.flattenIfStmt(rule.requirement.ifPart.ruleArray), + ...this.flattenIfStmt(rule.requirement.elsePart?.ruleArray ?? []), + ); + break; + default: + ret.push(rule); + } + } + return ret; + } + + normalizeCourseId(courseIdLike: string): Course[] { + // "ELECTIVE @" is typically used as a pseudo-course and can be safely ignored. + if (courseIdLike.match(AuditParser.electiveMatcher)) return []; + const [department, courseNumber] = courseIdLike.split(" "); + if (courseNumber.match(AuditParser.wildcardMatcher)) { + // Wildcard course numbers. + return this.ppapi.getCoursesByDepartment( + department, + (x) => + !!x.courseNumber.match( + new RegExp( + "^" + + courseNumber.replace( + /@+/g, + `.{${[...courseNumber].filter((y) => y === "@").length},}`, + ), + ), + ), + ); + } + if (courseNumber.match(AuditParser.rangeMatcher)) { + // Course number ranges. + const [minCourseNumber, maxCourseNumber] = courseNumber.split("-"); + return this.ppapi.getCoursesByDepartment( + department, + (x) => + x.courseNumeric >= Number.parseInt(minCourseNumber, 10) && + x.courseNumeric <= Number.parseInt(maxCourseNumber, 10), + ); + } + // Probably a normal course, just make sure that it exists. + const course = this.ppapi.getCourse(`${department}${courseNumber}`); + return course ? [course] : []; + } + + ruleArrayToRequirements(ruleArray: Rule[]) { + const ret: Record = {}; + for (const rule of ruleArray) { + switch (rule.ruleType) { + case "Block": + case "Noncourse": + break; + case "Course": { + const includedCourses = rule.requirement.courseArray.map( + (x) => `${x.discipline} ${x.number}${x.numberEnd ? `-${x.numberEnd}` : ""}`, + ); + const toInclude = new Map( + includedCourses.flatMap(this.normalizeCourseId.bind(this)).map((x) => [x.id, x]), + ); + const excludedCourses = + rule.requirement.except?.courseArray.map( + (x) => `${x.discipline} ${x.number}${x.numberEnd ? `-${x.numberEnd}` : ""}`, + ) ?? []; + const toExclude = new Set( + excludedCourses.flatMap(this.normalizeCourseId.bind(this)).map((x) => x.id), + ); + const courses = Array.from(toInclude) + .filter(([x]) => !toExclude.has(x)) + .sort(([, a], [, b]) => + a.department === b.department + ? a.courseNumeric - b.courseNumeric || this.lexOrd(a.courseNumber, b.courseNumber) + : this.lexOrd(a.department, b.department), + ) + .map(([x]) => x); + if (rule.requirement.classesBegin) { + ret[rule.label] = { + requirementType: "Course", + courseCount: Number.parseInt(rule.requirement.classesBegin, 10), + courses, + }; + } else if (rule.requirement.creditsBegin) { + ret[rule.label] = { + requirementType: "Unit", + unitCount: Number.parseInt(rule.requirement.creditsBegin, 10), + courses, + }; + } + break; + } + case "Group": { + ret[rule.label] = { + requirementType: "Group", + requirementCount: Number.parseInt(rule.requirement.numberOfGroups), + requirements: this.ruleArrayToRequirements(rule.ruleArray), + }; + break; + } + case "IfStmt": { + const rules = this.flattenIfStmt([rule]); + if (rules.length > 1 && !rules.some((x) => x.ruleType === "Block")) { + ret["Select 1 of the following"] = { + requirementType: "Group", + requirementCount: 1, + requirements: this.ruleArrayToRequirements(rules), + }; + } + break; + } + case "Subset": { + const requirements = this.ruleArrayToRequirements(rule.ruleArray); + ret[rule.label] = { + requirementType: "Group", + requirementCount: Object.keys(requirements).length, + requirements, + }; + } + } + } + return ret; + } + + parseBlockId(blockId: string) { + const [school, programType, code, degreeType] = blockId.split("-"); + return { school, programType, code, degreeType } as ProgramId; + } +} diff --git a/tools/degreeworks-scraper/src/components/DegreeworksClient.ts b/tools/degreeworks-scraper/src/components/DegreeworksClient.ts new file mode 100644 index 00000000..6f475885 --- /dev/null +++ b/tools/degreeworks-scraper/src/components/DegreeworksClient.ts @@ -0,0 +1,131 @@ +import fetch from "cross-fetch"; + +import type { Block, DWAuditResponse, DWMappingResponse } from "../types"; + +export class DegreeworksClient { + private static readonly API_URL = "https://reg.uci.edu/RespDashboard/api"; + private static readonly AUDIT_URL = `${DegreeworksClient.API_URL}/audit`; + private catalogYear: string = ""; + + private constructor( + private readonly studentId: string, + private readonly headers: HeadersInit, + private readonly delay: number, + ) {} + + static async new( + studentId: string, + headers: HeadersInit, + delay: number = 1000, + ): Promise { + const dw = new DegreeworksClient(studentId, headers, delay); + /** + * Depending on when we are scraping, the catalog year may be the academic year that + * started the previous calendar year, or the one that will start this calendar year. + * + * We determine the catalog year by seeing if we can fetch the major data for the + * B.S. in Computer Science for the latter. If it is available, then we use that + * as the catalog year. Otherwise, we use the former. + */ + const currentYear = new Date().getUTCFullYear(); + const dataThisYear = await dw.getMajorAudit("BS", "U", "201"); + dw.catalogYear = dataThisYear + ? `${currentYear}${currentYear + 1}` + : `${currentYear - 1}${currentYear}`; + console.log(`[DegreeworksClient.new] Set catalogYear to ${dw.catalogYear}`); + return dw; + } + + sleep = (ms: number = this.delay) => new Promise((r) => setTimeout(r, ms)); + + async getMajorAudit( + degree: string, + school: string, + majorCode: string, + ): Promise { + const res = await fetch(DegreeworksClient.AUDIT_URL, { + method: "POST", + body: JSON.stringify({ + catalogYear: this.catalogYear, + degree, + school, + studentId: this.studentId, + classes: [], + goals: [{ code: "MAJOR", value: majorCode }], + }), + headers: this.headers, + }); + await this.sleep(); + const json: DWAuditResponse = await res.json().catch(() => ({ error: "" })); + return "error" in json + ? undefined + : json.blockArray.find( + (x) => x.requirementType === "MAJOR" && x.requirementValue === majorCode, + ); + } + + async getMinorAudit(minorCode: string): Promise { + const res = await fetch(DegreeworksClient.AUDIT_URL, { + method: "POST", + body: JSON.stringify({ + catalogYear: this.catalogYear, + studentId: this.studentId, + degree: "BA", + school: "U", + classes: [], + goals: [ + { code: "MAJOR", value: "000" }, + { code: "MINOR", value: minorCode }, + ], + }), + headers: this.headers, + }); + await this.sleep(); + const json: DWAuditResponse = await res.json().catch(() => ({ error: "" })); + return "error" in json + ? undefined + : json.blockArray.find( + (x) => x.requirementType === "MINOR" && x.requirementValue === minorCode, + ); + } + + async getSpecAudit( + degree: string, + school: string, + majorCode: string, + specCode: string, + ): Promise { + const res = await fetch(DegreeworksClient.AUDIT_URL, { + method: "POST", + body: JSON.stringify({ + catalogYear: this.catalogYear, + degree, + school, + studentId: this.studentId, + classes: [], + goals: [ + { code: "MAJOR", value: majorCode }, + { code: "SPEC", value: specCode }, + { code: "OTHER", value: specCode }, + ], + }), + headers: this.headers, + }); + await this.sleep(); + const json: DWAuditResponse = await res.json().catch(() => ({ error: "" })); + return "error" in json + ? undefined + : json.blockArray.find( + (x) => + (x.requirementType === "SPEC" || x.requirementType === "OTHER") && + x.requirementValue === specCode, + ); + } + + async getMapping(path: T): Promise> { + const res = await fetch(`${DegreeworksClient.API_URL}/${path}`, { headers: this.headers }); + await this.sleep(); + const json: DWMappingResponse = await res.json(); + return new Map(json._embedded[path].map((x) => [x.key, x.description])); + } +} diff --git a/tools/degreeworks-scraper/src/components/PPAPIOfflineClient.ts b/tools/degreeworks-scraper/src/components/PPAPIOfflineClient.ts new file mode 100644 index 00000000..a1c9729a --- /dev/null +++ b/tools/degreeworks-scraper/src/components/PPAPIOfflineClient.ts @@ -0,0 +1,37 @@ +import { isErrorResponse } from "@peterportal-api/types"; +import type { Course, RawResponse } from "@peterportal-api/types"; +import fetch from "cross-fetch"; + +const ENDPOINT = "https://api-next.peterportal.org/v1/rest/courses/all"; + +export class PPAPIOfflineClient { + private cache = new Map(); + + private constructor() {} + + static async new(): Promise { + const ppapi = new PPAPIOfflineClient(); + const res = await fetch(ENDPOINT, { headers: { "accept-encoding": "gzip" } }); + const json: RawResponse = await res.json(); + if (isErrorResponse(json)) + throw new Error("Could not fetch courses cache from PeterPortal API"); + json.payload.forEach((y) => ppapi.cache.set(y.id, y)); + console.log( + `[PPAPIOfflineClient.new] Fetched and stored ${json.payload.length} courses from PeterPortal API`, + ); + return ppapi; + } + + getCourse(courseNumber: string): Course | undefined { + return this.cache.get(courseNumber); + } + + getCoursesByDepartment( + department: string, + predicate: (x: Course) => boolean = () => true, + ): Course[] { + return Array.from(this.cache.values()) + .filter((x) => x.id.startsWith(department)) + .filter(predicate); + } +} diff --git a/tools/degreeworks-scraper/src/components/Scraper.ts b/tools/degreeworks-scraper/src/components/Scraper.ts new file mode 100644 index 00000000..bef2a812 --- /dev/null +++ b/tools/degreeworks-scraper/src/components/Scraper.ts @@ -0,0 +1,191 @@ +import { jwtDecode } from "jwt-decode"; +import type { JwtPayload } from "jwt-decode"; + +import type { Program } from "../types"; + +import { AuditParser, DegreeworksClient } from "."; + +const JWT_HEADER_PREFIX_LENGTH = 7; + +export class Scraper { + private ap!: AuditParser; + private dw!: DegreeworksClient; + + private degrees: Map | undefined = undefined; + private majorPrograms: Set | undefined = undefined; + private minorPrograms: Set | undefined = undefined; + + private done = false; + private parsedMinorPrograms: Map | undefined = undefined; + private parsedUgradPrograms: Map | undefined = undefined; + private parsedGradPrograms: Map | undefined = undefined; + private parsedSpecializations: Map | undefined = undefined; + private degreesAwarded: Map | undefined = undefined; + + private constructor() {} + + private async scrapePrograms(school: string, degrees: Set) { + if (!this.majorPrograms) throw new Error("majorPrograms has not yet been initialized."); + const ret = new Map(); + for (const degree of degrees) { + for (const majorCode of this.majorPrograms) { + const audit = await this.dw.getMajorAudit(degree, school, majorCode); + if (!audit) { + console.log( + `Requirements block not found (majorCode = ${majorCode}, degree = ${degree})`, + ); + continue; + } + if (ret.has(audit.title)) { + console.log( + `Requirements block already exists for "${audit.title}" (majorCode = ${majorCode}, degree = ${degree})`, + ); + continue; + } + ret.set(audit.title, this.ap.parseBlock(`${school}-MAJOR-${majorCode}-${degree}`, audit)); + console.log( + `Requirements block found and parsed for "${audit.title}" (majorCode = ${majorCode}, degree = ${degree})`, + ); + } + } + return ret; + } + private cleanUpPrograms(programs: Map) { + const ret = new Map(); + for (const [name, program] of programs) { + if (!Object.keys(program.requirements).length) { + if (program.specs.length === 1) { + program.requirements = this.parsedSpecializations!.get(program.specs[0])!.requirements; + } else { + program.requirements = { + "Select 1 of the following": { + requirementType: "Group", + requirementCount: 1, + requirements: Object.fromEntries( + program.specs.map((x) => [ + this.parsedSpecializations?.get(x)?.name, + this.parsedSpecializations?.get(x)?.requirements, + ]), + ), + }, + }; + } + program.specs = []; + } + ret.set(name, program); + } + return ret; + } + async run() { + if (this.done) throw new Error("This scraper instance has already finished its run."); + console.log("[Scraper] degreeworks-scraper starting"); + this.degrees = await this.dw.getMapping("degrees"); + console.log(`Fetched ${this.degrees.size} degrees`); + this.majorPrograms = new Set((await this.dw.getMapping("majors")).keys()); + console.log(`Fetched ${this.majorPrograms.size} major programs`); + this.minorPrograms = new Set((await this.dw.getMapping("minors")).keys()); + console.log(`Fetched ${this.minorPrograms.size} minor programs`); + const ugradDegrees = new Set(); + const gradDegrees = new Set(); + for (const degree of this.degrees.keys()) + (degree.startsWith("B") ? ugradDegrees : gradDegrees).add(degree); + this.parsedMinorPrograms = new Map(); + console.log("Scraping minor program requirements"); + for (const minorCode of this.minorPrograms) { + const audit = await this.dw.getMinorAudit(minorCode); + if (!audit) { + console.log(`Requirements block not found (minorCode = ${minorCode})`); + continue; + } + this.parsedMinorPrograms.set(audit.title, this.ap.parseBlock(`U-MINOR-${minorCode}`, audit)); + console.log( + `Requirements block found and parsed for "${audit.title}" (minorCode = ${minorCode})`, + ); + } + console.log("Scraping undergraduate program requirements"); + this.parsedUgradPrograms = await this.scrapePrograms("U", ugradDegrees); + console.log("Scraping graduate program requirements"); + this.parsedGradPrograms = await this.scrapePrograms("G", gradDegrees); + this.parsedSpecializations = new Map(); + console.log("Scraping all specialization requirements"); + for (const [, { specs, school, code: majorCode, degreeType: degree }] of [ + ...this.parsedUgradPrograms, + ...this.parsedGradPrograms, + ]) { + if (!degree) throw new Error("Degree type is undefined"); + for (const specCode of specs) { + const audit = await this.dw.getSpecAudit(degree, school, majorCode, specCode); + if (!audit) { + console.log( + `Requirements block not found (school = ${school}, majorCode = ${majorCode}, specCode = ${specCode}, degree = ${degree})`, + ); + continue; + } + this.parsedSpecializations.set( + specCode, + this.ap.parseBlock(`${school}-SPEC-${specCode}-${degree}`, audit), + ); + console.log( + `Requirements block found and parsed for "${audit.title}" (specCode = ${specCode})`, + ); + } + } + this.degreesAwarded = new Map( + Array.from( + new Set( + [...this.parsedUgradPrograms, ...this.parsedGradPrograms].map(([, x]) => x.degreeType!), + ), + ).map((x) => [x, this.degrees!.get(x)!]), + ); + + // Post-processing steps. + + // As of this commit, the only program which seems to require both of + // its "specializations" is the B.A. in Art History. There's probably a + // cleaner way to address this, but this is such an insanely niche case + // that it's probably not worth the effort to write a general solution. + + let x, y, z; + if ( + (x = this.parsedUgradPrograms.get("Major in Art History")!) && + (y = this.parsedSpecializations.get("AHGEO")!) && + (z = this.parsedSpecializations.get("AHPER")!) + ) { + x.specs = []; + x.requirements = { ...x.requirements, ...y.requirements, ...z.requirements }; + this.parsedSpecializations.delete("AHGEO"); + this.parsedSpecializations.delete("AHPER"); + this.parsedUgradPrograms.set("Major in Art History", x); + } + + // Some programs have an empty requirements block and more than one specialization. + // They can be simplified into a "Select 1 of the following" group requirement. + this.parsedUgradPrograms = this.cleanUpPrograms(this.parsedUgradPrograms); + this.parsedGradPrograms = this.cleanUpPrograms(this.parsedGradPrograms); + + this.done = true; + } + get() { + if (!this.done) throw new Error("This scraper instance has not yet finished its run."); + return { + parsedMinorPrograms: this.parsedMinorPrograms!, + parsedUgradPrograms: this.parsedUgradPrograms!, + parsedGradPrograms: this.parsedGradPrograms!, + parsedSpecializations: this.parsedSpecializations!, + degreesAwarded: this.degreesAwarded!, + }; + } + static async new(authCookie: string): Promise { + const studentId = jwtDecode(authCookie.slice(JWT_HEADER_PREFIX_LENGTH))?.sub; + if (studentId?.length !== 8) throw new Error("Could not parse student ID from auth cookie."); + const headers = { + "Content-Type": "application/json", + Cookie: `X-AUTH-TOKEN=${authCookie}`, + Origin: "https://reg.uci.edu", + }; + const scraper = new Scraper(); + scraper.ap = await AuditParser.new(); + scraper.dw = await DegreeworksClient.new(studentId, headers); + return scraper; + } +} diff --git a/tools/degreeworks-scraper/src/components/index.ts b/tools/degreeworks-scraper/src/components/index.ts new file mode 100644 index 00000000..e8ce9349 --- /dev/null +++ b/tools/degreeworks-scraper/src/components/index.ts @@ -0,0 +1,4 @@ +export { AuditParser } from "./AuditParser"; +export { DegreeworksClient } from "./DegreeworksClient"; +export { PPAPIOfflineClient } from "./PPAPIOfflineClient"; +export { Scraper } from "./Scraper"; diff --git a/tools/degreeworks-scraper/src/index.ts b/tools/degreeworks-scraper/src/index.ts new file mode 100644 index 00000000..ecc46a90 --- /dev/null +++ b/tools/degreeworks-scraper/src/index.ts @@ -0,0 +1,56 @@ +import { PrismaClient } from "@libs/db"; + +import { Scraper } from "./components"; + +import "dotenv/config"; + +const prisma = new PrismaClient(); + +type Division = "Undergraduate" | "Graduate"; + +async function main() { + if (!process.env["X_AUTH_TOKEN"]) throw new Error("Auth cookie not set."); + const scraper = await Scraper.new(process.env["X_AUTH_TOKEN"]); + await scraper.run(); + const { + degreesAwarded, + parsedSpecializations, + parsedGradPrograms, + parsedMinorPrograms, + parsedUgradPrograms, + } = scraper.get(); + const degreeData = Array.from(degreesAwarded.entries()).map(([id, name]) => ({ + id, + name, + division: (id.startsWith("B") ? "Undergraduate" : "Graduate") as Division, + })); + const majorData = [ + ...Array.from(parsedUgradPrograms.values()), + ...Array.from(parsedGradPrograms.values()), + ].map(({ name, degreeType, code, requirements }) => ({ + id: `${degreeType}-${code}`, + degreeId: degreeType!, + code, + name, + requirements, + })); + const minorData = Array.from(parsedMinorPrograms.values()).map( + ({ name, code: id, requirements }) => ({ id, name, requirements }), + ); + const specData = Array.from(parsedSpecializations.values()).map( + ({ name, degreeType, code, requirements }) => ({ + id: `${degreeType}-${code}`, + majorId: `${degreeType}-${code.slice(0, code.length - 1)}`, + name, + requirements, + }), + ); + await prisma.$transaction([ + prisma.degree.createMany({ data: degreeData, skipDuplicates: true }), + prisma.major.createMany({ data: majorData, skipDuplicates: true }), + prisma.minor.createMany({ data: minorData, skipDuplicates: true }), + prisma.specialization.createMany({ data: specData, skipDuplicates: true }), + ]); +} + +main().then(); diff --git a/tools/degreeworks-scraper/src/types.ts b/tools/degreeworks-scraper/src/types.ts new file mode 100644 index 00000000..c96c9a75 --- /dev/null +++ b/tools/degreeworks-scraper/src/types.ts @@ -0,0 +1,148 @@ +// region DegreeWorks response types +/** + * The base type for all `Rule` objects. + */ +export type RuleBase = { label: string }; +/** + * A group of `numberOfRules` rules, + * of which `numberOfGroups` must be satisfied + * in order to fulfill this rule. + */ +export type RuleGroup = { + ruleType: "Group"; + requirement: { numberOfGroups: string; numberOfRules: string }; + ruleArray: Rule[]; +}; +/** + * An object that represents a (range of) course(s). + */ +export type Course = { discipline: string; number: string; numberEnd?: string }; +/** + * A rule that is fulfilled by taking `creditsBegin` units + * and/or `classesBegin` courses from the `courseArray`. + */ +export type RuleCourse = { + ruleType: "Course"; + requirement: { + creditsBegin?: string; + classesBegin?: string; + courseArray: Course[]; + except?: { courseArray: Course[] }; + }; +}; +/** + * A rule that has different requirements depending on some boolean condition. + * This seems to be used to denote all specializations that can be applied to a major. + */ +export type RuleIfStmt = { + ruleType: "IfStmt"; + requirement: { ifPart: { ruleArray: Rule[] }; elsePart?: { ruleArray: Rule[] } }; +}; +/** + * A rule that refers to another block (typically a specialization). + */ +export type RuleBlock = { + ruleType: "Block"; + requirement: { numBlocks: string; type: string; value: string }; +}; +/** + * A rule that is not a course. + * This seems to be only used by Engineering majors + * that have a design unit requirement. + */ +export type RuleNoncourse = { + ruleType: "Noncourse"; + requirement: { numNoncourses: string; code: string }; +}; +export type RuleSubset = { + ruleType: "Subset"; + ruleArray: Rule[]; +}; +export type Rule = RuleBase & + (RuleGroup | RuleCourse | RuleIfStmt | RuleBlock | RuleNoncourse | RuleSubset); +export type Block = { + requirementType: string; + requirementValue: string; + title: string; + ruleArray: Rule[]; +}; +export type DWAuditOKResponse = { blockArray: Block[] }; +export type DWAuditErrorResponse = { error: never }; +/** + * The type of the DegreeWorks audit response. + */ +export type DWAuditResponse = DWAuditOKResponse | DWAuditErrorResponse; + +export type DWMappingResponse = { + _embedded: { [P in T]: { key: string; description: string }[] }; +}; +// endregion + +// region Processed types + +export type ProgramId = { + school: "U" | "G"; + programType: "MAJOR" | "MINOR" | "SPEC"; + code: string; + degreeType?: string; +}; + +export type Program = ProgramId & { + /** + * The display name of the program. + * @example "Major in Computer Science" + * @example "Minor in Mathematics" + * @example "Specialization in Digital Signal Processing" + */ + name: string; + /** + * The mapping of requirement names to requirement nodes. + */ + requirements: Record; + /** + * The set of specializations (if any) that this program has. + * If this array is not empty, then exactly one specialization must be selected + * to fulfill the requirements of the program. + */ + specs: string[]; +}; + +export type CourseRequirement = { + requirementType: "Course"; + /** + * The number of courses required to fulfill this requirement. + */ + courseCount: number; + /** + * The set of courses that can be taken to fulfill this requirement. + */ + courses: string[]; +}; + +export type UnitRequirement = { + requirementType: "Unit"; + /** + * The number of units earned from the following list of courses + * that are required to fulfill this requirement. + */ + unitCount: number; + /** + * The set of courses which units count towards this requirement. + */ + courses: string[]; +}; + +export type GroupRequirement = { + requirementType: "Group"; + /** + * The number of requirement from the mapping below + * that must be fulfilled to fulfill this requirement. + */ + requirementCount: number; + /** + * The mapping of requirement names to requirement nodes. + */ + requirements: Record; +}; + +export type Requirement = CourseRequirement | UnitRequirement | GroupRequirement;