Skip to content

Commit

Permalink
[KWSearch] Double Confluence parent IDs (#9274)
Browse files Browse the repository at this point in the history
* upsert both the old and new parents for confluence pages

* add migration script

* fix borrow errors

* fix borrow errors

* clone the qdrant_clients for concurrent use

* pass store as a ref

* clone instead of passing refs for the store

* remove migration script for confluence

* fix: make the upsert consistent with the updateDocumentParents
  • Loading branch information
aubin-tchoi authored Dec 12, 2024
1 parent 7498117 commit 7c0707a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
15 changes: 15 additions & 0 deletions connectors/src/connectors/confluence/lib/internal_ids.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import {
makeConfluencePageId,
makeConfluenceSpaceId,
} from "@connectors/connectors/confluence/temporal/utils";

enum ConfluenceInternalIdPrefix {
Space = "cspace_",
Page = "cpage_",
Expand Down Expand Up @@ -27,3 +32,13 @@ export function isConfluenceInternalPageId(
): internalId is `${ConfluenceInternalIdPrefix.Page}${string}` {
return internalId.startsWith(ConfluenceInternalIdPrefix.Page);
}

export function convertInternalIdToDocumentId(internalId: string): string {
if (isConfluenceInternalPageId(internalId)) {
return makeConfluencePageId(getIdFromConfluenceInternalId(internalId));
}
if (isConfluenceInternalSpaceId(internalId)) {
return makeConfluenceSpaceId(getIdFromConfluenceInternalId(internalId));
}
throw new Error(`Invalid internal ID: ${internalId}`);
}
14 changes: 11 additions & 3 deletions connectors/src/connectors/confluence/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ import {
getConfluencePageParentIds,
getSpaceHierarchy,
} from "@connectors/connectors/confluence/lib/hierarchy";
import { makeConfluenceInternalPageId } from "@connectors/connectors/confluence/lib/internal_ids";
import {
convertInternalIdToDocumentId,
makeConfluenceInternalPageId,
} from "@connectors/connectors/confluence/lib/internal_ids";
import {
makeConfluenceDocumentUrl,
makeConfluencePageId,
Expand Down Expand Up @@ -384,7 +387,8 @@ export async function confluenceCheckAndUpsertPageActivity({
documentUrl,
loggerArgs,
// Parent Ids will be computed after all page imports within the space have been completed.
parents: [makeConfluenceInternalPageId(documentId)],
// TODO(2024-12-11 aubin): we upsert parents x2 (old and new), this is the first step of the backfill plan
parents: [documentId, makeConfluenceInternalPageId(pageId)],
tags,
timestampMs: lastPageVersionCreatedAt.getTime(),
upsertContext: {
Expand Down Expand Up @@ -572,7 +576,11 @@ export async function confluenceUpdatePagesParentIdsActivity(
await updateDocumentParentsField({
dataSourceConfig: dataSourceConfigFromConnector(connector),
documentId: makeConfluencePageId(page.pageId),
parents: parentIds,
// TODO(2024-12-11 aubin): we upsert parents x2 (old and new), this is the first step of the backfill plan
parents: [
...parentIds,
...parentIds.map(convertInternalIdToDocumentId),
],
});
},
{ concurrency: 10 }
Expand Down
4 changes: 4 additions & 0 deletions connectors/src/connectors/confluence/temporal/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ export function makeConfluencePageId(pageId: string) {
return `confluence-page-${pageId}`;
}

export function makeConfluenceSpaceId(spaceId: string) {
return `confluence-space-${spaceId}`;
}

export function makeConfluenceDocumentUrl({
baseUrl,
suffix,
Expand Down

0 comments on commit 7c0707a

Please sign in to comment.