From 82f3d350c215093a2f74dc5f29d9966f7c809740 Mon Sep 17 00:00:00 2001 From: Henry Fontanier Date: Fri, 3 Nov 2023 13:10:31 +0100 Subject: [PATCH] feat: add CLI to skip gdrive files (#2363) --- connectors/src/admin/cli.ts | 59 ++++++++++++++++++- .../google_drive/temporal/activities.ts | 20 +++++++ connectors/src/lib/models.ts | 5 ++ 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/connectors/src/admin/cli.ts b/connectors/src/admin/cli.ts index 4bb66581597e..446e373c497a 100644 --- a/connectors/src/admin/cli.ts +++ b/connectors/src/admin/cli.ts @@ -7,10 +7,16 @@ import { STOP_CONNECTOR_BY_TYPE, SYNC_CONNECTOR_BY_TYPE, } from "@connectors/connectors"; +import { getDocumentId } from "@connectors/connectors/google_drive/temporal/activities"; import { launchGoogleDriveRenewWebhooksWorkflow } from "@connectors/connectors/google_drive/temporal/client"; import { toggleSlackbot } from "@connectors/connectors/slack/bot"; import { launchSlackSyncOneThreadWorkflow } from "@connectors/connectors/slack/temporal/client"; -import { Connector, NotionDatabase, NotionPage } from "@connectors/lib/models"; +import { + Connector, + GoogleDriveFiles, + NotionDatabase, + NotionPage, +} from "@connectors/lib/models"; import { Result } from "@connectors/lib/result"; const connectors = async (command: string, args: parseArgs.ParsedArgs) => { @@ -249,10 +255,57 @@ const notion = async (command: string, args: parseArgs.ParsedArgs) => { } }; -const google = async (command: string) => { +const google = async (command: string, args: parseArgs.ParsedArgs) => { switch (command) { case "restart-google-webhooks": { await throwOnError(launchGoogleDriveRenewWebhooksWorkflow()); + return; + } + case "skip-file": { + if (!args.wId) { + throw new Error("Missing --wId argument"); + } + if (!args.dataSourceName) { + throw new Error("Missing --dataSourceName argument"); + } + if (!args.fileId) { + throw new Error("Missing --fileId argument"); + } + + const connector = await Connector.findOne({ + where: { + workspaceId: args.wId, + dataSourceName: args.dataSourceName, + }, + }); + if (!connector) { + throw new Error( + `Could not find connector for workspace ${args.wId} and data source ${args.dataSourceName}` + ); + } + + const existingFile = await GoogleDriveFiles.findOne({ + where: { + driveFileId: args.fileId, + connectorId: connector.id, + }, + }); + if (existingFile) { + await existingFile.update({ + skipReason: args.reason || "blacklisted", + }); + } else { + await GoogleDriveFiles.create({ + driveFileId: args.fileId, + dustFileId: getDocumentId(args.fileId), + name: "unknown", + mimeType: "unknown", + connectorId: connector.id, + skipReason: args.reason || "blacklisted", + }); + } + + return; } } }; @@ -367,7 +420,7 @@ const main = async () => { await notion(command, argv); return; case "google": - await google(command); + await google(command, argv); return; case "slack": await slack(command, argv); diff --git a/connectors/src/connectors/google_drive/temporal/activities.ts b/connectors/src/connectors/google_drive/temporal/activities.ts index b7e818537f52..c644a6f1b7b9 100644 --- a/connectors/src/connectors/google_drive/temporal/activities.ts +++ b/connectors/src/connectors/google_drive/temporal/activities.ts @@ -299,6 +299,26 @@ async function syncOneFile( const documentId = getDocumentId(file.id); let documentContent: string | undefined = undefined; + const fileInDb = await GoogleDriveFiles.findOne({ + where: { + connectorId: connectorId, + driveFileId: file.id, + }, + }); + + if (fileInDb?.skipReason) { + logger.info( + { + documentId, + dataSourceConfig, + fileId: file.id, + title: file.name, + }, + `Google Drive document skipped with skip reason ${fileInDb.skipReason}` + ); + return false; + } + if (FILES_IGNORE_LIST.includes(file.id)) { logger.info( { diff --git a/connectors/src/lib/models.ts b/connectors/src/lib/models.ts index 813059ffa236..c4455d3b7f67 100644 --- a/connectors/src/lib/models.ts +++ b/connectors/src/lib/models.ts @@ -1102,6 +1102,7 @@ export class GoogleDriveFiles extends Model< declare updatedAt: CreationOptional; declare lastSeenTs: Date | null; declare lastUpsertedTs: Date | null; + declare skipReason: string | null; declare connectorId: ForeignKey; declare dustFileId: string; declare driveFileId: string; @@ -1135,6 +1136,10 @@ GoogleDriveFiles.init( type: DataTypes.DATE, allowNull: true, }, + skipReason: { + type: DataTypes.STRING, + allowNull: true, + }, connectorId: { type: DataTypes.INTEGER, allowNull: false,