Skip to content

Commit

Permalink
CDPT-2295 Correct paths (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
EarthlingDavey authored Jan 23, 2025
1 parent 32e45af commit 27e9275
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 95 deletions.
15 changes: 8 additions & 7 deletions conf/node/controllers/generate-indexes.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { intranetUrls, s3BucketName, indexCss } from "../constants.js";
import { getAgenciesFromS3, getSnapshotsFromS3 } from "./s3.js";
import { getAgencyPath } from "./paths.js";

/**
* Generate the root index html
Expand All @@ -16,8 +17,6 @@ export const generateRootIndex = async (bucket = s3BucketName, env) => {
throw new Error("Env is required");
}

const agencyPrefix = "production" === env ? '' : `${env}-`;

const agencies = await getAgenciesFromS3(bucket, env);

const hostname = new URL(intranetUrls[env]).hostname;
Expand All @@ -34,7 +33,10 @@ export const generateRootIndex = async (bucket = s3BucketName, env) => {
.map(
(agency) => `
<li class="list-group-item">
<a href="/${agencyPrefix}${agency}/index.html" target="_blank">${agency}</a>
<a href="/${getAgencyPath(
env,
agency,
)}/index.html" target="_blank">${agency}</a>
</li>`,
)
.join("\n")}
Expand Down Expand Up @@ -75,9 +77,6 @@ export const generateAgencyIndex = async (

const snapshots = await getSnapshotsFromS3(bucket, env, agency);

const snapshotParent =
"production" === env ? `${agency}` : `${env}-${agency}`;

const html = `<!doctype html><html lang="en">
<head><title>Intranet Archive Index</title><style>${indexCss}</style></head>
<body>
Expand All @@ -90,7 +89,9 @@ export const generateAgencyIndex = async (
.map(
(snapshot) => `
<li class="list-group-item">
<a href="/${snapshotParent}/${snapshot}/${url.hostname}/index.html" target="_blank">${snapshot}</a>
<a href="/${getAgencyPath(env, agency)}/${snapshot}/${
url.hostname
}/index.html" target="_blank">${snapshot}</a>
</li>`,
)
.join("\n")}
Expand Down
1 change: 0 additions & 1 deletion conf/node/controllers/generate-indexes.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import {

import { s3BucketName } from "../constants.js";
import { generateRootIndex, generateAgencyIndex } from "./generate-indexes.js";

import { s3Options } from "./s3.js";

let client;
Expand Down
30 changes: 1 addition & 29 deletions conf/node/controllers/httrack.js
Original file line number Diff line number Diff line change
@@ -1,35 +1,7 @@
import { spawn, execSync } from "node:child_process";
import fs from "node:fs";

import { intranetUrls, intranetJwts } from "../constants.js";

/**
* A helper function to get the directory for the snapshot.
*
* @param {Object} props
* @param {string} props.env
* @param {string} props.agency
* @returns {{s3: string, fs: string}} the s3 and fs paths
*/

export const getSnapshotPaths = ({ env, agency }) => {
// Get date in format: 2023-01-17
const dateString = new Date().toISOString().slice(0, 10);

if(!Object.keys(intranetJwts).includes(env)) {
throw new Error(`Invalid environment: ${env}`);
}

const s3Path =
env === "production"
? `${agency}/${dateString}`
: `${env}-${agency}/${dateString}`;

const fsPath = `/tmp/snapshots/${s3Path}`;

// Return directory for the snapshot
return { s3: s3Path, fs: fsPath };
};
import { intranetJwts } from "../constants.js";

/**
* Get arguments for httrack cli.
Expand Down
38 changes: 0 additions & 38 deletions conf/node/controllers/httrack.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import http from "node:http";
import { afterAll, it, jest } from "@jest/globals";

import {
getSnapshotPaths,
getHttrackArgs,
runHttrack,
getHttrackProgress,
Expand All @@ -14,43 +13,6 @@ import { intranetUrls, intranetJwts } from "../constants.js";
// Skip long tests when running in watch mode.
const skipLongTests = process.env.npm_lifecycle_event === "test:watch";

describe("getSnapshotPaths", () => {
it("should return the s3 and fs paths - production", () => {
const env = "production";
const agency = "hq";

const paths = getSnapshotPaths({ env, agency });

expect(paths).toStrictEqual({
s3: `${agency}/${new Date().toISOString().slice(0, 10)}`,
fs: `/tmp/snapshots/${agency}/${new Date().toISOString().slice(0, 10)}`,
});
});

it("should return the s3 and fs paths - non-production", () => {
const env = "dev";
const agency = "hq";

const paths = getSnapshotPaths({ env, agency });

expect(paths).toStrictEqual({
s3: `dev-${agency}/${new Date().toISOString().slice(0, 10)}`,
fs: `/tmp/snapshots/dev-${agency}/${new Date()
.toISOString()
.slice(0, 10)}`,
});
});

it("should return the s3 and fs paths - invalid env", () => {
const env = "invalid";
const agency = "hq";

expect(() => getSnapshotPaths({ env, agency })).toThrowError(
`Invalid environment: ${env}`,
);
});
});

describe("getHttrackArgs", () => {
it("should return an array of arguments", async () => {
const env = "production";
Expand Down
6 changes: 2 additions & 4 deletions conf/node/controllers/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import {
sensitiveFiles,
} from "../constants.js";
import {
getSnapshotPaths,
getHttrackArgs,
runHttrack,
getHttrackProgress,
waitForHttrackComplete,
} from "./httrack.js";
import { getAgencyPath, getSnapshotPaths } from "./paths.js";
import { createHeartbeat, sync, writeToS3 } from "./s3.js";
import { generateRootIndex, generateAgencyIndex } from "./generate-indexes.js";

Expand Down Expand Up @@ -69,9 +69,7 @@ export const main = async ({ env, agency, depth }) => {
const agencyIndexHtml = await generateAgencyIndex(s3BucketName, env, agency);
await writeToS3(
s3BucketName,
"production" === env
? `${agency}/index.html`
: `${env}-${agency}/index.html`,
`${getAgencyPath(env, agency)}/index.html`,
agencyIndexHtml,
{ cacheMaxAge: 600 },
);
Expand Down
2 changes: 1 addition & 1 deletion conf/node/controllers/main.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { afterAll, beforeEach, expect, it, jest } from "@jest/globals";
import { S3Client, ListObjectsV2Command, GetObjectCommand } from "@aws-sdk/client-s3";

import { main } from "./main.js";
import { getSnapshotPaths } from "./httrack.js";
import { getSnapshotPaths } from "./paths.js";
import { s3Options, s3EmptyDir } from "./s3.js";
import { intranetUrls, s3BucketName } from "../constants.js";

Expand Down
45 changes: 45 additions & 0 deletions conf/node/controllers/paths.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { intranetUrls, allowedTargetAgencies } from "../constants.js";

/**
* A helper function to get the folder for the agency.
*
* The folder is prefixed with the environment if it is not production.
* Then, it is followed by the agency.
*
* @param {string} env
* @param {string} agency
* @returns {string} the folder name
*/

export const getAgencyPath = (env, agency) => {
if (!Object.keys(intranetUrls).includes(env)) {
throw new Error(`Invalid environment: ${env}`);
}

if (!allowedTargetAgencies.includes(agency)) {
throw new Error(`Invalid agency: ${agency}`);
}

return env === "production" ? `${agency}` : `${env}-${agency}`;
};

/**
* A helper function to get the directory for the snapshot.
*
* @param {Object} props
* @param {string} props.env
* @param {string} props.agency
* @returns {{s3: string, fs: string}} the s3 and fs paths
*/

export const getSnapshotPaths = ({ env, agency }) => {
// Get date in format: 2023-01-17
const dateString = new Date().toISOString().slice(0, 10);

const s3Path = `${getAgencyPath(env, agency)}/${dateString}`;

const fsPath = `/tmp/snapshots/${s3Path}`;

// Return directory for the snapshot
return { s3: s3Path, fs: fsPath };
};
78 changes: 78 additions & 0 deletions conf/node/controllers/paths.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { it } from "@jest/globals";

import { getAgencyPath, getSnapshotPaths } from "./paths.js";

describe("getAgencyPath", () => {
it("should return the folder name - production", () => {
const env = "production";
const agency = "hq";

const folder = getAgencyPath(env, agency);

expect(folder).toBe("hq");
});

it("should return the folder name - non-production", () => {
const env = "dev";
const agency = "hq";

const folder = getAgencyPath(env, agency);

expect(folder).toBe("dev-hq");
});

it("should throw an error - invalid env", () => {
const env = "invalid";
const agency = "hq";

expect(() => getAgencyPath(env, agency)).toThrowError(
`Invalid environment: ${env}`,
);
});

it("should throw an error - invalid agency", () => {
const env = "production";
const agency = "invalid";

expect(() => getAgencyPath(env, agency)).toThrowError(
`Invalid agency: ${agency}`,
);
});
});

describe("getSnapshotPaths", () => {
it("should return the s3 and fs paths - production", () => {
const env = "production";
const agency = "hq";

const paths = getSnapshotPaths({ env, agency });

expect(paths).toStrictEqual({
s3: `${agency}/${new Date().toISOString().slice(0, 10)}`,
fs: `/tmp/snapshots/${agency}/${new Date().toISOString().slice(0, 10)}`,
});
});

it("should return the s3 and fs paths - non-production", () => {
const env = "dev";
const agency = "hq";

const paths = getSnapshotPaths({ env, agency });

expect(paths).toStrictEqual({
s3: `dev-${agency}/${new Date().toISOString().slice(0, 10)}`,
fs: `/tmp/snapshots/dev-${agency}/${new Date()
.toISOString()
.slice(0, 10)}`,
});
});

it("should return the s3 and fs paths - invalid env", () => {
const env = "invalid";
const agency = "hq";

expect(() => getSnapshotPaths({ env, agency })).toThrowError(
`Invalid environment: ${env}`,
);
});
});
9 changes: 5 additions & 4 deletions conf/node/controllers/s3.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ import {
s3Credentials as credentials,
s3Region,
heartbeatEndpoint,
allowedTargetAgencies
allowedTargetAgencies,
} from "../constants.js";
import { getAgencyPath } from "./paths.js";

/**
* S3 client options
Expand Down Expand Up @@ -208,8 +209,8 @@ export const getAgenciesFromS3 = async (bucket = s3BucketName, env) => {
folder.Prefix.replace("/", "").replace("/", ""),
);

if(env === 'production') {
return folders.filter((folder) => allowedTargetAgencies.includes(folder) )
if (env === "production") {
return folders.filter((folder) => allowedTargetAgencies.includes(folder));
}

return folders
Expand Down Expand Up @@ -241,7 +242,7 @@ export const getSnapshotsFromS3 = async (
throw new Error("Agency is required");
}

const prefix = "production" === env ? `${agency}/` : `${env}-${agency}/`;
const prefix = `${getAgencyPath(env, agency)}/`;

const command = new ListObjectsV2Command({
Bucket: bucket,
Expand Down
2 changes: 1 addition & 1 deletion conf/node/controllers/s3.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import {
writeToS3,
getAgenciesFromS3,
getSnapshotsFromS3,
} from "./s3";
} from "./s3.js";

describe("checkAccess", () => {
let client;
Expand Down
2 changes: 1 addition & 1 deletion conf/node/controllers/schedule.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { expect, it, jest } from "@jest/globals";

import { parseScheduleString, scheduleFunction } from "./schedule";
import { parseScheduleString, scheduleFunction } from "./schedule.js";

jest.useFakeTimers();

Expand Down
14 changes: 9 additions & 5 deletions conf/node/middleware.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {

/**
* @typedef {import('express').Request & { mirror: { env: string, agency: string, depth: number } }} SpiderRequest
* @typedef {import('express').Request & { isValid: ?boolean, agency: string, _hostname: string }} AccessRequest
* @typedef {import('express').Request & { isValid: ?boolean, agency: string, env: string }} AccessRequest
*/

/**
Expand Down Expand Up @@ -106,10 +106,14 @@ export const checkSignature = (req, res, next) => {
}

// Make sure the request is for an allowed hostname
req._hostname = payloadObject.hostname;

if (!allowedTargetHosts.includes(req._hostname)) {
console.error("Error: Hostname not allowed");
req.env = Object.keys(intranetUrls).find(
(env) =>
intranetUrls[env] &&
new URL(intranetUrls[env]).hostname === payloadObject.hostname,
);

if (!req.env) {
console.error("Error: Env not found");
res.status(403).send({ status: 403 });
return;
}
Expand Down
10 changes: 10 additions & 0 deletions conf/node/middleware.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ describe("checkSignature middleware", () => {
expect(res.send).toHaveBeenCalledWith({ status: 400 });
});

it("should return 403 if hostname is not found", () => {
const payload = Buffer.from(
JSON.stringify({ expiry: Date.now() / 1000 + 10, hostname: "invalid" }),
).toString("base64");
req.body = { sig: "signature", payload };
checkSignature(req, res, next);
expect(res.status).toHaveBeenCalledWith(403);
expect(res.send).toHaveBeenCalledWith({ status: 403 });
});

it("should return 403 if request has expired", () => {
const payload = Buffer.from(
JSON.stringify({ expiry: Date.now() / 1000 - 10 }),
Expand Down
Loading

0 comments on commit 27e9275

Please sign in to comment.