Skip to content

Commit

Permalink
Merge pull request #278 from tnc-ca-geo/refactor-bulk-image-deletion
Browse files Browse the repository at this point in the history
Optimize bulk image deletion
  • Loading branch information
jue-henry authored Dec 10, 2024
2 parents 6353c55 + 6749a7b commit d0da6f8
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 128 deletions.
4 changes: 3 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
node_modules/
.build/
.prettierrc
.eslintrc.json
.eslintrc.json
package.json
package-lock.json
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"description": "Lambda-based graphQL API for camera trap data management platform",
"main": "src/handler.js",
"scripts": {
"start": "AWS_PROFILE=animl serverless offline --noAuth",
"start": "npm run build && AWS_PROFILE=animl serverless offline --noAuth",
"build": "tsc",
"deploy-dev": "sls deploy --stage dev",
"deploy-prod": "sls deploy --stage prod",
Expand Down
110 changes: 88 additions & 22 deletions src/api/db/models/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ import { TaskSchema } from '../schemas/Task.js';
const ObjectId = mongoose.Types.ObjectId;

export class ImageModel {
static readonly DELETE_IMAGES_BATCH_SIZE = 300;

static async countImages(
input: gql.QueryImagesCountInput,
context: Pick<Context, 'user'>,
Expand Down Expand Up @@ -144,23 +146,86 @@ export class ImageModel {
context: Pick<Context, 'user'>,
): Promise<gql.StandardErrorPayload> {
try {
const res = await Promise.allSettled(
input.imageIds!.map((imageId) => {
return this.deleteImage({ imageId }, context);
// Current limit of image deletion due to constraints of s3 deleteObjects
if (input.imageIds!.length > this.DELETE_IMAGES_BATCH_SIZE) {
throw new Error('Cannot delete more than 300 images at a time');
}
const s3 = new S3.S3Client({ region: process.env.AWS_DEFAULT_REGION });

console.time('delete-images total');
console.time('delete-images mongo records');
const images = await Image.find({ _id: { $in: input.imageIds! } });

if (images.length !== 0) {
const session = await mongoose.startSession();
session.startTransaction();

try {
const imageRes = await Image.deleteMany(
{
_id: { $in: input.imageIds! },
projectId: context.user['curr_project'],
},
{ session: session },
);
const imageAttemptRes = await ImageAttempt.deleteMany(
{
_id: { $in: input.imageIds! },
projectId: context.user['curr_project'],
},
{ session: session },
);
const imageErrorRes = await ImageError.deleteMany(
{
image: { $in: input.imageIds! },
},
{ session: session },
);
if (imageRes.acknowledged && imageAttemptRes.acknowledged && imageErrorRes.acknowledged) {
// Commit the changes
await session.commitTransaction();
} else {
throw new Error(
'There was an issue deleting the images. Some or all images may not have been deleted.',
);
}
} catch (error) {
// Rollback any changes made in the database
await session.abortTransaction();
throw new InternalServerError(error as string);
} finally {
// Ending the session
await session.endSession();
console.timeEnd('delete-images mongo records');
}
}

const keys: { Key: string }[] = [];
console.time('delete-images s3 records');
input.imageIds!.forEach((id) => {
const image = images.find((i) => idMatch(i._id, id));
['medium', 'original', 'small'].forEach((size) => {
keys.push({ Key: `${size}/${id}-${size}.${image?.fileTypeExtension || 'jpg'}` });
});
});
const s3Res = await s3.send(
new S3.DeleteObjectsCommand({
Bucket: `animl-images-serving-${process.env.STAGE}`,
Delete: { Objects: keys },
}),
);

const errors = res
.filter((r): r is PromiseRejectedResult => r.status === 'rejected')
.map((r) => r.reason); // Will always be a GraphQLError
console.timeEnd('delete-images s3 records');
console.timeEnd('delete-images total');

return {
isOk: !errors.length,
errors,
isOk: s3Res.Errors === undefined || s3Res.Errors.length === 0,
errors: s3Res.Errors?.map((e) => e.toString()) ?? [],
};
} catch (err) {
if (err instanceof GraphQLError) throw err;
throw new InternalServerError(err as string);
return {
isOk: false,
errors: [String(err)],
};
}
}

Expand Down Expand Up @@ -507,9 +572,7 @@ export class ImageModel {
const tag = image.tags?.filter((t) => idMatch(t, input.tagId))[0];
if (!tag) throw new NotFoundError('Tag not found on image');

image.tags = image.tags.filter(
(t) => !idMatch(t, input.tagId),
) as mongoose.Types.ObjectId[];
image.tags = image.tags.filter((t) => !idMatch(t, input.tagId)) as mongoose.Types.ObjectId[];

await image.save();

Expand All @@ -523,12 +586,12 @@ export class ImageModel {
static async countProjectTag(
input: { tagId: string },
context: Pick<Context, 'user'>,
): Promise<number> {
): Promise<number> {
try {
const projectId = context.user['curr_project']!;
const count = await Image.countDocuments({
projectId: projectId,
tags: new ObjectId(input.tagId)
tags: new ObjectId(input.tagId),
});

return count;
Expand All @@ -541,14 +604,17 @@ export class ImageModel {
static async deleteProjectTag(
input: { tagId: string },
context: Pick<Context, 'user'>,
): Promise<UpdateWriteOpResult> {
): Promise<UpdateWriteOpResult> {
try {
const projectId = context.user['curr_project']!;
const res = await Image.updateMany({
projectId: projectId
}, {
$pull: { tags: new mongoose.Types.ObjectId(input.tagId) }
});
const res = await Image.updateMany(
{
projectId: projectId,
},
{
$pull: { tags: new mongoose.Types.ObjectId(input.tagId) },
},
);
return res;
} catch (err) {
if (err instanceof GraphQLError) throw err;
Expand Down
6 changes: 3 additions & 3 deletions src/api/db/models/Project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ export class ProjectModel {
{ retries: 2 },
);
// TODO: we need to reverse the above operation if reMapImagesToDeps fails!
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig });
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig: camConfig });
return camConfig;
} catch (err) {
if (err instanceof GraphQLError) throw err;
Expand Down Expand Up @@ -530,7 +530,7 @@ export class ProjectModel {
);
// TODO: we need to reverse the above operation if reMapImagesToDeps fails!
if (Object.keys(input.diffs).includes('startDate')) {
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig });
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig: camConfig });
}
return camConfig;
} catch (err) {
Expand Down Expand Up @@ -585,7 +585,7 @@ export class ProjectModel {
{ retries: 2 },
);
// TODO: we need to reverse the above operation if reMapImagesToDeps fails!
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig });
await ProjectModel.reMapImagesToDeps({ projId: project._id, camConfig: camConfig });
return camConfig;
} catch (err) {
if (err instanceof GraphQLError) throw err;
Expand Down
16 changes: 10 additions & 6 deletions src/task/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,26 @@ import type * as gql from '../@types/graphql.js';

export async function DeleteImagesByFilter(task: TaskInput<gql.DeleteImagesByFilterTaskInput>) {
/**
* Deletes images that match the inputted filters in batches of 100.
* Deletes images that match the inputted filters in batches of 300.
* This is used by the frontend to delete all images currently shown.
* * @param {Object} input
* * @param {gql.FiltersInput} input.config.filters
*/
const context = { user: { is_superuser: true, curr_project: task.projectId } as User };
let images = await ImageModel.queryByFilter(
{ filters: task.config.filters, limit: 100 },
{ filters: task.config.filters, limit: ImageModel.DELETE_IMAGES_BATCH_SIZE },
context,
);
while (images.results.length > 0) {
const batch = images.results.map((image) => image._id);
await ImageModel.deleteImages({ imageIds: batch }, context);
if (images.hasNext) {
images = await ImageModel.queryByFilter(
{ filters: task.config.filters, limit: 100, next: images.next },
{
filters: task.config.filters,
limit: ImageModel.DELETE_IMAGES_BATCH_SIZE,
next: images.next,
},
context,
);
} else {
Expand All @@ -33,15 +37,15 @@ export async function DeleteImagesByFilter(task: TaskInput<gql.DeleteImagesByFil

export async function DeleteImages(task: TaskInput<gql.DeleteImagesInput>) {
/**
* Deletes a list of images by their IDs in batches of 100.
* This is used by the frontend when the user is selecting more than 100 images to delete to delete at once.
* Deletes a list of images by their IDs in batches of 300.
* This is used by the frontend when the user is selecting more than 300 images to delete to delete at once.
* * @param {Object} input
* * @param {String[]} input.config.imageIds
*/
const context = { user: { is_superuser: true, curr_project: task.projectId } as User };
const imagesToDelete = task.config.imageIds?.slice() ?? [];
while (imagesToDelete.length > 0) {
const batch = imagesToDelete.splice(0, 100);
const batch = imagesToDelete.splice(0, ImageModel.DELETE_IMAGES_BATCH_SIZE);
await ImageModel.deleteImages({ imageIds: batch }, context);
}
return { imageIds: task.config.imageIds };
Expand Down
Loading

0 comments on commit d0da6f8

Please sign in to comment.