Skip to content

Commit

Permalink
leaner getParents + fixes on potential inconsistencies
Browse files Browse the repository at this point in the history
  • Loading branch information
philipperolet committed Sep 6, 2023
1 parent 853d7ea commit af02ecb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 37 deletions.
49 changes: 19 additions & 30 deletions connectors/src/connectors/notion/lib/parents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import memoize from "lodash.memoize";

import {
getDatabaseChildrenOf,
getNotionDatabaseFromConnectorsDb,
getNotionPageFromConnectorsDb,
getPageChildrenOf,
} from "@connectors/connectors/notion/lib/connectors_db_helpers";
Expand All @@ -23,15 +24,19 @@ import {
*/
async function _getParents(
dataSourceInfo: DataSourceInfo,
pageOrDb: {
notionId: string;
parentType: string | null | undefined;
parentId: string | null | undefined;
},
pageOrDbId: string,
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- used for memoization
memoizationKey?: string
): Promise<string[]> {
const parents: string[] = [pageOrDb.notionId];
const parents: string[] = [pageOrDbId];
const pageOrDb =
(await getNotionPageFromConnectorsDb(dataSourceInfo, pageOrDbId)) ||
(await getNotionDatabaseFromConnectorsDb(dataSourceInfo, pageOrDbId));
if (!pageOrDb) {
// pageOrDb is either not synced yet (not an issue, see design doc) or
// is not in Dust's scope, in both cases we can just return the page id
return parents;
}
switch (pageOrDb.parentType) {
case "workspace":
return parents;
Expand All @@ -41,23 +46,10 @@ async function _getParents(
return parents;
case "page":
case "database": {
// retrieve the parent from notion connectors db
// and add it to the parents array
const parent = await getNotionPageFromConnectorsDb(
dataSourceInfo,
pageOrDb.parentId as string // (cannot be null here)
);
if (!parent) {
// The parent is either not synced yet (not an issue, see design doc) or
// is not in Dust's scope, in both cases we can just return the page id
return parents;
}
return parents.concat(
await getParents(dataSourceInfo, {
notionId: parent.notionPageId,
parentType: parent.parentType,
parentId: parent.parentId,
})
// parentId cannot be undefined here
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
await getParents(dataSourceInfo, pageOrDb.parentId!, memoizationKey)
);
}
default:
Expand All @@ -67,16 +59,16 @@ async function _getParents(

export const getParents = memoize(
_getParents,
(dataSourceInfo, pageOrDb, memoizationKey) => {
return `${dataSourceInfo.dataSourceName}:${pageOrDb.notionId}:${memoizationKey}`;
(dataSourceInfo, pageOrDbId, memoizationKey) => {
return `${dataSourceInfo.dataSourceName}:${pageOrDbId}:${memoizationKey}`;
}
);

export async function updateAllParentsFields(
dataSourceConfig: DataSourceConfig,
pageOrDbs: (NotionPage | NotionDatabase)[],
memoizationKey?: string
) {
): Promise<number> {
/* Computing all descendants, then updating, ensures the field is updated only
once per page, limiting the load on the Datasource */
const pagesToUpdate = await getPagesToUpdate(pageOrDbs, dataSourceConfig);
Expand All @@ -87,11 +79,7 @@ export async function updateAllParentsFields(
for (const page of pagesToUpdate) {
const parents = await getParents(
dataSourceConfig,
{
notionId: page.notionPageId,
parentType: page.parentType,
parentId: page.parentId,
},
page.notionPageId,
memoizationKey
);

Expand All @@ -101,6 +89,7 @@ export async function updateAllParentsFields(
parents
);
}
return pagesToUpdate.length;
}

/** Get ids of all pages whose parents field should be updated: initial pages in
Expand Down
13 changes: 7 additions & 6 deletions connectors/src/connectors/notion/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -274,11 +274,7 @@ export async function notionUpsertPageActivity(
const documentId = `notion-${parsedPage.id}`;
const parents = await getParents(
dataSourceConfig,
{
notionId: pageId,
parentType: parsedPage.parentType,
parentId: parsedPage.parentId,
},
pageId,
runTimestamp.toString() // memoize at notionSyncWorkflow main inner loop level
);
await upsertToDatasource({
Expand Down Expand Up @@ -843,14 +839,19 @@ export async function updateParentsFieldsActivity(
activitiesResults: UpsertActivityResult[],
activityExecutionTimestamp: number
) {
const localLogger = logger.child({
workspaceId: dataSourceConfig.workspaceId,
dataSourceName: dataSourceConfig.dataSourceName,
});
// Get documents whose path changed (created or moved) If there is
// createdOrMoved, then the document cannot be null thus the cast is safe
const documents = activitiesResults
.filter((aRes) => aRes.createdOrMoved)
.map((aRes) => aRes.pageOrDb) as (NotionPage | NotionDatabase)[];
await updateAllParentsFields(
const nbUpdated = await updateAllParentsFields(
dataSourceConfig,
documents,
activityExecutionTimestamp.toString()
);
localLogger.info({ nbUpdated }, "Updated parents fields.");
}
2 changes: 1 addition & 1 deletion connectors/src/connectors/notion/temporal/workflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ export async function notionSyncWorkflow(
await updateParentsFieldsActivity(
dataSourceConfig,
syncWorkflowResults.flat().filter((r) => r) as UpsertActivityResult[],
runTimestamp
new Date().getTime()
);

if (!isGargageCollectionRun) {
Expand Down

0 comments on commit af02ecb

Please sign in to comment.