Skip to content

Commit

Permalink
Updated migration script: fixed download via mongofiles to use file._…
Browse files Browse the repository at this point in the history
…id as it is possible to have duplicate records, used the source db parameter, added elapsed time info, improved logging.
  • Loading branch information
shilob committed Sep 3, 2024
1 parent 2757e56 commit 5561bf6
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions support/migration/migrate.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,17 @@ const rcloneToS3 = async (rcloneConfigPath, configName, bucketName, key, fullTem
await execAsync(rclone_cmd);
}

const downloadSrcToTemp = async (srcConnStr, srcFileName, fullTempPath) => {
const mongofiles_cmd = `mongofiles --uri="${srcConnStr}" -d redbox-storage get "${srcFileName}" -l="${fullTempPath}"`;
console.log(`Error in upload, downloading via mongofiles: ${mongofiles_cmd}`);
const downloadSrcToTemp = async (srcConnStr, srcDb, srcFileId, fullTempPath) => {
const mongofiles_cmd = `mongofiles --uri="${srcConnStr}" -d ${srcDb} get_id '{"$oid": "${srcFileId}"}' -l="${fullTempPath}"`;
console.log(`Downloading via mongofiles: ${mongofiles_cmd}`);
await execAsync(mongofiles_cmd);
console.log(`Mongofiles downloaded okay: ${fullTempPath}, uploading...`);
const hash = await hasha.fromFile(fullTempPath, {algorithm: 'md5', encoding: 'hex'});
return hash;
};

const migrate = async () => {
const startTime = new Date();
const configFilePath = process.argv[2];
if (!configFilePath) {
console.error(`Please specify config file path.`);
Expand Down Expand Up @@ -77,6 +78,7 @@ const migrate = async () => {
errored: []
};
try {
console.log(`Start time: ${startTime.toLocaleString()}`);
const clientConfig = config.s3.clientConfig;
s3Client = new S3Client(clientConfig);
console.log(`Connecting to source db: ${srcConnStr}`);
Expand All @@ -96,7 +98,7 @@ const migrate = async () => {
const cursor = bucket.find(config.mongodb.source.query);
while (await cursor.hasNext()) {
const attachment = await cursor.next();
const oid = attachment.metadata.redboxOid;
const oid = attachment.metadata.redboxOid || attachment.metadata.oid;
let fileId = attachment.metadata.fileId;
if (_.isEmpty(fileId)) {
// try to guess the fileId
Expand Down Expand Up @@ -138,7 +140,8 @@ const migrate = async () => {
}
// if the md5 is empty, most likely it's too big to stream out, using mongofiles to dump the data
if (_.isEmpty(sourceMd5)) {
sourceMd5 = await downloadSrcToTemp(srcConnStr, attachment.filename, fullTempPath);
console.info(`File has no md5 or could be too big to stream out, using mongofiles to download: ${fileId} of ${oid}`);
sourceMd5 = await downloadSrcToTemp(srcConnStr, srcDbName, attachment._id, fullTempPath);
fd = await fs.open(fullTempPath);
srcStream = fd.createReadStream();
} else {
Expand Down Expand Up @@ -174,9 +177,10 @@ const migrate = async () => {
throw new Error(`Failed to upload ${key} of ${oid}`);
}
} catch (error) {
console.error(`Error in streamed upload: ${fileId} of ${oid}`);
console.error(error);
if (fd == null) {
sourceMd5 = await downloadSrcToTemp(srcConnStr, attachment.filename, fullTempPath);
sourceMd5 = await downloadSrcToTemp(srcConnStr, srcDbName, attachment._id, fullTempPath);
fd = await fs.open(fullTempPath);
srcStream = fd.createReadStream();
try {
Expand Down Expand Up @@ -246,6 +250,11 @@ const migrate = async () => {
console.log(`Errored: ${_.size(stats.errored)}`);
console.log(JSON.stringify(stats.errored));
// done
const endTime = new Date();
const elapsedTime = (endTime - startTime) / 60000; // Convert milliseconds to minutes
const elapsedTimeHours = elapsedTime / 60; // Convert minutes to hours
console.log(`Elapsed time: ${elapsedTime} minutes`);
console.log(`Elapsed time hours: ${elapsedTimeHours} hours`);
return 'Migration done!'
};

Expand Down

0 comments on commit 5561bf6

Please sign in to comment.