Skip to content

Commit

Permalink
fix: importation des jpos scrappées
Browse files Browse the repository at this point in the history
  • Loading branch information
K4ST0R committed Feb 13, 2025
1 parent 3f4bb6e commit 26cd1e2
Show file tree
Hide file tree
Showing 11 changed files with 5,860 additions and 11 deletions.
5,737 changes: 5,737 additions & 0 deletions server/data/jpo.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions server/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { computeBCNMEFContinuum } from "./jobs/bcn/computeBCNMEFContinuum";
import { importLibelle } from "./jobs/bcn/importLibelle";
import { importACCEEtablissements } from "./jobs/etablissements/importACCEEtablissements";
import { importEtablissements } from "./jobs/etablissements/importEtablissements";
import { importEtablissementJPOScrapTmp } from "./jobs/etablissements/importEtablissementJPOScrapTmp";
import { importFormation } from "./jobs/formations/importFormation";
import { importFormations as importCAFormations } from "./jobs/catalogueApprentissage/importFormations";
import {
Expand Down Expand Up @@ -77,6 +78,7 @@ const formationEtablissementJobs = [
const etablissementJobs = [
{ name: "etablissementACCE", job: importACCEEtablissements },
{ name: "etablissementEtablissement", job: importEtablissements },
{ name: "etablissementJPOScrapTmp", job: importEtablissementJPOScrapTmp },
];

const catalogueApprentissageJobs = [{ name: "caFormations", job: importCAFormations }];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@ import { DateTime } from "luxon";
import moment from "moment-timezone";
moment.tz.setDefault("Europe/Paris");

export const MONTHS_STR = [
"janvier",
"février",
"mars",
"avril",
"mai",
"juin",
"juillet",
"août",
"septembre",
"octobre",
"novembre",
"décembre",
];

export function parseAsUTCDate(string) {
if (!string) {
return null;
Expand Down
2 changes: 1 addition & 1 deletion server/src/jobs/bcn/importBCN.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { compose, mergeStreams, oleoduc, transformData, writeData } from "oleodu
import { getBCNTable, getDiplome, getNiveauxDiplome } from "#src/services/bcn";
import { omitNil } from "#src/common/utils/objectUtils.js";
import { getLoggerWithContext } from "#src/common/logger.js";
import { parseAsUTCDate } from "#src/common/utils/dateUtils.js";
import { parseAsUTCDate } from "#src/common/utils/dateUtils";
import RawDataRepository, { RawDataType } from "#src/common/repositories/rawData";

const logger = getLoggerWithContext("import");
Expand Down
2 changes: 1 addition & 1 deletion server/src/jobs/bcn/importBCNMEF.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { oleoduc, transformData, writeData } from "oleoduc";
import { getBCNTable } from "#src/services/bcn";
import { omitNil } from "#src/common/utils/objectUtils.js";
import { getLoggerWithContext } from "#src/common/logger.js";
import { parseAsUTCDate } from "#src/common/utils/dateUtils.js";
import { parseAsUTCDate } from "#src/common/utils/dateUtils";
import { fromPairs } from "lodash-es";
import RawDataRepository, { RawDataType } from "#src/common/repositories/rawData";

Expand Down
2 changes: 1 addition & 1 deletion server/src/jobs/etablissements/importACCEEtablissements.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { filterData, oleoduc, writeData } from "oleoduc";
import moment from "#src/common/utils/dateUtils.js";
import moment from "#src/common/utils/dateUtils";
import { getLoggerWithContext } from "#src/common/logger.js";
import { omitNil } from "#src/common/utils/objectUtils.js";
import * as ACCE from "#src/services/acce.js";
Expand Down
95 changes: 95 additions & 0 deletions server/src/jobs/etablissements/importEtablissementJPOScrapTmp.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import fs from "fs";
import { Readable } from "stream";
import { oleoduc, writeData, transformData, filterData, flattenArray } from "oleoduc";
import { getLoggerWithContext } from "#src/common/logger.js";
import moment, { MONTHS_STR, parseAsUTCDate } from "#src/common/utils/dateUtils";
import { kdb } from "#src/common/db/db";
import { parseJourneesPortesOuvertes } from "#src/services/onisep/etablissement.js";
import EtablissementRepository from "#src/common/repositories/etablissement";

const logger = getLoggerWithContext("import");

export async function importEtablissementJPOScrapTmp() {
logger.info(`Importation des dates des JPO des fiches établissements scrapper`);
const stats = { total: 0, created: 0, updated: 0, failed: 0 };

await oleoduc(
Readable.from(Object.entries(JSON.parse(await fs.promises.readFile("data/jpo.json", "utf8")))),
filterData(([_, data]) => {
if (data.jpo.length === 0) {
return false;
}

// Si la date précédentes est avant cette année scolaiure
const match = data.jpo[0].match(/^Date\(s\) indicative\(s\) des Journées portes ouvertes précédentes : le (.*)/);
if (match) {
const date = moment(parseAsUTCDate(match[1]));
if (date.isAfter(`${moment().year() - 1}-10-01`)) {
return true;
}
return false;
}

return true;
}),
transformData(([ensCode, data]) => {
const cleanDatePrec = (str) =>
str.replace("Date(s) indicative(s) des Journées portes ouvertes précédentes : ", "");

const replaceMonth = (str) => {
MONTHS_STR.forEach((month_str, index) => {
str = str.replace(` ${month_str} `, "/" + `${index + 1}`.padStart(2, "0") + "/");
});
return str;
};
return [ensCode, data.jpo.map((jpo) => replaceMonth(cleanDatePrec(jpo))).join(" | ")];
}),
transformData(async ([ensCode, data]) => {
const etablissements = await EtablissementRepository.find({ onisepId: ensCode }, false);
return etablissements.map((e) => ({
etablissement: e,
jpo: data,
}));
}),
flattenArray(),
writeData(
async ({ etablissement, jpo }) => {
try {
const jPOParsed = parseJourneesPortesOuvertes(jpo);
if (jPOParsed.details) {
await EtablissementRepository.updateBy(
{
JPODetails: jPOParsed ? jPOParsed.details : null,
},
{ id: etablissement.id }
);
}

// Remove old JPO
await kdb
.deleteFrom("etablissementJPODate")
.where("etablissementId", "=", etablissement.id)
.executeTakeFirst();
if (jPOParsed && jPOParsed.dates) {
for (const date of jPOParsed.dates) {
await kdb
.insertInto("etablissementJPODate")
.values({ etablissementId: etablissement.id, ...date })
.returning(["id"])
.executeTakeFirst();
}
}

logger.info(`Etablissement ${etablissement.uai} mise à jour`);
stats.updated++;
} catch (e) {
logger.error(e, `Impossible d'ajouter les informations pour l'établissement ${etablissement.uai}`);
stats.failed++;
}
},
{ parallel: 1 }
)
);

return stats;
}
2 changes: 1 addition & 1 deletion server/src/jobs/etablissements/importEtablissements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ export async function importEtablissements() {
{ fn: "ST_SetSRID", args: [sql`4326`] },
])
: null,
JPOdetails: jPO ? jPO.details : null,
JPODetails: jPO ? jPO.details : null,
});

const result = await upsert(kdb, "etablissement", ["uai"], queryData, queryData, ["id"]);
Expand Down
2 changes: 1 addition & 1 deletion server/src/jobs/formations/importFormationEtablissement.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { oleoduc, writeData, transformData, concatStreams, compose, filterData, mergeStreams } from "oleoduc";
import { omit, pick, uniq } from "lodash-es";
import { getLoggerWithContext } from "#src/common/logger.js";
import moment from "#src/common/utils/dateUtils.js";
import moment from "#src/common/utils/dateUtils";
import FormationRepository from "#src/common/repositories/formation";
import { streamOnisepFormations } from "./streamOnisepFormations";
import { kdb, upsert } from "#src/common/db/db";
Expand Down
2 changes: 1 addition & 1 deletion server/src/queries/getFormations.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { GraphHopperApi } from "#src/services/graphHopper/graphHopper.js";
import * as Cache from "#src/common/cache.js";
import moment from "#src/common/utils/dateUtils.js";
import moment from "#src/common/utils/dateUtils";
import { FormationTag } from "shared";
import { getLoggerWithContext } from "#src/common/logger.js";
import { kdb, kyselyChainFn } from "#src/common/db/db.js";
Expand Down
10 changes: 5 additions & 5 deletions server/src/services/onisep/etablissement.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import moment from "#src/common/utils/dateUtils.js";
import moment from "#src/common/utils/dateUtils";

export function parseJourneesPortesOuvertes(journeesPortesOuvertes) {
const dateFormat = "DD/MM/YYYY";
Expand All @@ -9,7 +9,7 @@ export function parseJourneesPortesOuvertes(journeesPortesOuvertes) {
const optionalRegex = "(?: en virtuel)?( \\([^\\)]+\\))?( voir [^\\s]+)?";
const rules = {
date: {
regex: `^le ([0-9]{2}/[0-9]{2}/[0-9]{4})${optionalRegex}$`,
regex: `^[lL]e ([0-9]{1,2}/[0-9]{2}/[0-9]{4})${optionalRegex}$`,
match: defaultMatch,
transform: (rule, str) => {
const match = str.match(rule.regex);
Expand All @@ -26,7 +26,7 @@ export function parseJourneesPortesOuvertes(journeesPortesOuvertes) {
},

dateHour: {
regex: `^le ([0-9]{2}/[0-9]{2}/[0-9]{4}) de ([0-9]{2}h[0-9]{2}) à ([0-9]{2}h[0-9]{2})${optionalRegex}$`,
regex: `^[lL]e ([0-9]{1,2}/[0-9]{2}/[0-9]{4}) de ([0-9]{1,2}h[0-9]{2}) à ([0-9]{1,2}h[0-9]{2})${optionalRegex}$`,
match: defaultMatch,
transform: (rule, str) => {
const match = str.match(rule.regex);
Expand All @@ -44,7 +44,7 @@ export function parseJourneesPortesOuvertes(journeesPortesOuvertes) {
},

period: {
regex: `^du ([0-9]{2}/[0-9]{2}/[0-9]{4}) au ([0-9]{2}/[0-9]{2}/[0-9]{4})${optionalRegex}$`,
regex: `^[dD]u ([0-9]{1,2}/[0-9]{2}/[0-9]{4}) au ([0-9]{1,2}/[0-9]{2}/[0-9]{4})${optionalRegex}$`,
match: defaultMatch,
transform: (rule, str) => {
const match = str.match(rule.regex);
Expand All @@ -62,7 +62,7 @@ export function parseJourneesPortesOuvertes(journeesPortesOuvertes) {
},

periodHour: {
regex: `^du ([0-9]{2}/[0-9]{2}/[0-9]{4}) au ([0-9]{2}/[0-9]{2}/[0-9]{4}) de ([0-9]{2}h[0-9]{2}) à ([0-9]{2}h[0-9]{2})${optionalRegex}$`,
regex: `^[dD]u ([0-9]{1,2}/[0-9]{2}/[0-9]{4}) au ([0-9]{1,2}/[0-9]{2}/[0-9]{4}) de ([0-9]{1,2}h[0-9]{2}) à ([0-9]{1,2}h[0-9]{2})${optionalRegex}$`,
match: defaultMatch,
transform: (rule, str) => {
const match = str.match(rule.regex);
Expand Down

0 comments on commit 26cd1e2

Please sign in to comment.