From af02ecb6bd7d817a72a066fdfc3e1f8ea0806367 Mon Sep 17 00:00:00 2001 From: philipperolet Date: Wed, 6 Sep 2023 17:41:52 +0200 Subject: [PATCH] leaner getParents + fixes on potential inconsistencies --- .../src/connectors/notion/lib/parents.ts | 49 +++++++------------ .../connectors/notion/temporal/activities.ts | 13 ++--- .../connectors/notion/temporal/workflows.ts | 2 +- 3 files changed, 27 insertions(+), 37 deletions(-) diff --git a/connectors/src/connectors/notion/lib/parents.ts b/connectors/src/connectors/notion/lib/parents.ts index 0eaa7e8ace41..5b1190461a0c 100644 --- a/connectors/src/connectors/notion/lib/parents.ts +++ b/connectors/src/connectors/notion/lib/parents.ts @@ -2,6 +2,7 @@ import memoize from "lodash.memoize"; import { getDatabaseChildrenOf, + getNotionDatabaseFromConnectorsDb, getNotionPageFromConnectorsDb, getPageChildrenOf, } from "@connectors/connectors/notion/lib/connectors_db_helpers"; @@ -23,15 +24,19 @@ import { */ async function _getParents( dataSourceInfo: DataSourceInfo, - pageOrDb: { - notionId: string; - parentType: string | null | undefined; - parentId: string | null | undefined; - }, + pageOrDbId: string, // eslint-disable-next-line @typescript-eslint/no-unused-vars -- used for memoization memoizationKey?: string ): Promise { - const parents: string[] = [pageOrDb.notionId]; + const parents: string[] = [pageOrDbId]; + const pageOrDb = + (await getNotionPageFromConnectorsDb(dataSourceInfo, pageOrDbId)) || + (await getNotionDatabaseFromConnectorsDb(dataSourceInfo, pageOrDbId)); + if (!pageOrDb) { + // pageOrDb is either not synced yet (not an issue, see design doc) or + // is not in Dust's scope, in both cases we can just return the page id + return parents; + } switch (pageOrDb.parentType) { case "workspace": return parents; @@ -41,23 +46,10 @@ async function _getParents( return parents; case "page": case "database": { - // retrieve the parent from notion connectors db - // and add it to the parents array - const parent = await getNotionPageFromConnectorsDb( - dataSourceInfo, - pageOrDb.parentId as string // (cannot be null here) - ); - if (!parent) { - // The parent is either not synced yet (not an issue, see design doc) or - // is not in Dust's scope, in both cases we can just return the page id - return parents; - } return parents.concat( - await getParents(dataSourceInfo, { - notionId: parent.notionPageId, - parentType: parent.parentType, - parentId: parent.parentId, - }) + // parentId cannot be undefined here + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + await getParents(dataSourceInfo, pageOrDb.parentId!, memoizationKey) ); } default: @@ -67,8 +59,8 @@ async function _getParents( export const getParents = memoize( _getParents, - (dataSourceInfo, pageOrDb, memoizationKey) => { - return `${dataSourceInfo.dataSourceName}:${pageOrDb.notionId}:${memoizationKey}`; + (dataSourceInfo, pageOrDbId, memoizationKey) => { + return `${dataSourceInfo.dataSourceName}:${pageOrDbId}:${memoizationKey}`; } ); @@ -76,7 +68,7 @@ export async function updateAllParentsFields( dataSourceConfig: DataSourceConfig, pageOrDbs: (NotionPage | NotionDatabase)[], memoizationKey?: string -) { +): Promise { /* Computing all descendants, then updating, ensures the field is updated only once per page, limiting the load on the Datasource */ const pagesToUpdate = await getPagesToUpdate(pageOrDbs, dataSourceConfig); @@ -87,11 +79,7 @@ export async function updateAllParentsFields( for (const page of pagesToUpdate) { const parents = await getParents( dataSourceConfig, - { - notionId: page.notionPageId, - parentType: page.parentType, - parentId: page.parentId, - }, + page.notionPageId, memoizationKey ); @@ -101,6 +89,7 @@ export async function updateAllParentsFields( parents ); } + return pagesToUpdate.length; } /** Get ids of all pages whose parents field should be updated: initial pages in diff --git a/connectors/src/connectors/notion/temporal/activities.ts b/connectors/src/connectors/notion/temporal/activities.ts index 9c9ccf3a2008..6dffa2b01c09 100644 --- a/connectors/src/connectors/notion/temporal/activities.ts +++ b/connectors/src/connectors/notion/temporal/activities.ts @@ -274,11 +274,7 @@ export async function notionUpsertPageActivity( const documentId = `notion-${parsedPage.id}`; const parents = await getParents( dataSourceConfig, - { - notionId: pageId, - parentType: parsedPage.parentType, - parentId: parsedPage.parentId, - }, + pageId, runTimestamp.toString() // memoize at notionSyncWorkflow main inner loop level ); await upsertToDatasource({ @@ -843,14 +839,19 @@ export async function updateParentsFieldsActivity( activitiesResults: UpsertActivityResult[], activityExecutionTimestamp: number ) { + const localLogger = logger.child({ + workspaceId: dataSourceConfig.workspaceId, + dataSourceName: dataSourceConfig.dataSourceName, + }); // Get documents whose path changed (created or moved) If there is // createdOrMoved, then the document cannot be null thus the cast is safe const documents = activitiesResults .filter((aRes) => aRes.createdOrMoved) .map((aRes) => aRes.pageOrDb) as (NotionPage | NotionDatabase)[]; - await updateAllParentsFields( + const nbUpdated = await updateAllParentsFields( dataSourceConfig, documents, activityExecutionTimestamp.toString() ); + localLogger.info({ nbUpdated }, "Updated parents fields."); } diff --git a/connectors/src/connectors/notion/temporal/workflows.ts b/connectors/src/connectors/notion/temporal/workflows.ts index 42b932ccd363..8f4382b658a5 100644 --- a/connectors/src/connectors/notion/temporal/workflows.ts +++ b/connectors/src/connectors/notion/temporal/workflows.ts @@ -221,7 +221,7 @@ export async function notionSyncWorkflow( await updateParentsFieldsActivity( dataSourceConfig, syncWorkflowResults.flat().filter((r) => r) as UpsertActivityResult[], - runTimestamp + new Date().getTime() ); if (!isGargageCollectionRun) {