Skip to content

Commit

Permalink
Allow retry of failed S3 sync.
Browse files Browse the repository at this point in the history
  • Loading branch information
EarthlingDavey committed Dec 5, 2024
1 parent 16aead8 commit 39e4242
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
17 changes: 11 additions & 6 deletions conf/node/controllers/httrack.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,15 @@ export const getHttrackProgress = async (dest) => {
}

const files = {
index: `${dest}/index.html`,
log: `${dest}/hts-cache/new.txt`,
lock: `${dest}/hts-in_progress.lock`,
};

const response = {
requestCount: 0,
rate: 0,
complete: !fs.existsSync(files.lock),
complete: fs.existsSync(files.index) && !fs.existsSync(files.lock),
};

if (!fs.existsSync(files.log)) {
Expand All @@ -167,7 +168,11 @@ export const getHttrackProgress = async (dest) => {
// The first line is a header, so we subtract 1.
response.requestCount = lineCount - 1;

// If the time is 23:59:55 - 00:00:05, await 5 seconds.
// If the current time is between 00:00:00 and 00:00:05, wait for 5 seconds to avoid spanning requests across two days.
// This prevents the recent requests in `/hts-cache/new.txt` from spanning 2 days.
if (new Date().toTimeString().split(" ")[0] < "00:00:05") {
await new Promise((resolve) => setTimeout(resolve, 5000));
}

// Get the last 20 lines from the file. Or all of them if there are less than 20.
const lastLines = execSync(
Expand Down Expand Up @@ -229,10 +234,10 @@ export const waitForHttrackComplete = async (
fs.existsSync(`${dest}/hts-in_progress.lock`)
) {
if (iterations < 10 || iterations % logFrequency === 0) {
const elapsedTime = new Date(iterations * intervalSeconds * 1000).toISOString().substring(11, 19);
console.log(
`Waiting for httrack to complete ... ${elapsedTime} elapsed`,
);
const elapsedTime = new Date(iterations * intervalSeconds * 1000)
.toISOString()
.substring(11, 19);
console.log(`Waiting for httrack to complete ... ${elapsedTime} elapsed`);
}
await new Promise((resolve) => setTimeout(resolve, intervalSeconds * 1000));
}
Expand Down
30 changes: 18 additions & 12 deletions conf/node/controllers/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
getSnapshotPaths,
getHttrackArgs,
runHttrack,
getHttrackProgress,
waitForHttrackComplete,
} from "./httrack.js";
import { sync } from "./s3.js";
Expand All @@ -20,21 +21,26 @@ import { sync } from "./s3.js";
export const main = async ({ url, agency, depth }) => {
const paths = getSnapshotPaths({ host: url.host, agency });

const httrackArgs = getHttrackArgs({
url,
dest: paths.fs,
agency,
jwt,
depth,
});
const { complete } = getHttrackProgress(paths.fs);

runHttrack(httrackArgs);
// If the snapshot is already complete, skip httrack
if (!complete) {
const httrackArgs = getHttrackArgs({
url,
dest: paths.fs,
agency,
jwt,
depth,
});

const { timedOut } = await waitForHttrackComplete(paths.fs);
runHttrack(httrackArgs);

if (timedOut) {
console.error("Httrack timed out", { url: url.href, agency, depth });
return;
const { timedOut } = await waitForHttrackComplete(paths.fs);

if (timedOut) {
console.error("Httrack timed out", { url: url.href, agency, depth });
return;
}
}

// Remove sensitive files - before syncing to S3
Expand Down

0 comments on commit 39e4242

Please sign in to comment.