Skip to content

Commit

Permalink
Optimize Confluence API Usage with V1 Search Endpoint (#10906)
Browse files Browse the repository at this point in the history
* Reduce number of Confluence API  queries

* ✨
  • Loading branch information
flvndvd authored Feb 18, 2025
1 parent cb34d1d commit 2470e52
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 36 deletions.
26 changes: 18 additions & 8 deletions connectors/src/connectors/confluence/lib/confluence_api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,11 @@ export async function pageHasReadRestrictions(
}

export interface ConfluencePageRef {
hasChildren: boolean;
hasReadRestrictions: boolean;
id: string;
version: number;
parentId: string | null;
version: number;
}

const PAGE_FETCH_LIMIT = 100;
Expand Down Expand Up @@ -172,19 +174,27 @@ export async function bulkFetchConfluencePageRefs(
spaceId: string;
}
) {
// Fetch the details of the pages (version and parentId).
// Fetch page metadata (version, parent, permissions, etc.) for the given page IDs
const pagesWithDetails = await client.getPagesByIdsInSpace({
spaceId,
sort: "id",
pageIds,
limit,
});

const pageRefs: ConfluencePageRef[] = pagesWithDetails.pages.map((p) => ({
id: p.id,
version: p.version.number,
parentId: p.parentId,
}));
const pageRefs: ConfluencePageRef[] = pagesWithDetails.results.map((p) => {
const hasReadRestrictions =
p.restrictions.read.restrictions.group.results.length > 0 ||
p.restrictions.read.restrictions.user.results.length > 0;

return {
hasChildren: p.childTypes.page.value,
hasReadRestrictions,
id: p.id,
// Ancestors is an array of the page's ancestors, starting with the root page.
parentId: p.ancestors[p.ancestors.length - 1]?.id ?? null,
version: p.version.number,
};
});

return pageRefs;
}
89 changes: 64 additions & 25 deletions connectors/src/connectors/confluence/lib/confluence_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const ConfluenceAccessibleResourcesCodec = t.array(
const ConfluenceSpaceCodec = t.intersection([
t.type({
id: t.string,
key: t.string,
name: t.string,
_links: t.type({
webui: t.string,
Expand Down Expand Up @@ -57,6 +58,49 @@ const ConfluencePageCodec = t.intersection([
CatchAllCodec,
]);

const SearchConfluencePageCodec = t.intersection([
t.type({
id: t.string,
type: t.string,
status: t.string,
title: t.string,

// Version info.
version: t.type({
number: t.number,
}),

// Restrictions.
restrictions: t.type({
read: t.type({
restrictions: t.type({
user: t.type({
results: t.array(t.unknown),
}),
group: t.type({
results: t.array(t.unknown),
}),
}),
}),
}),

// Children info
childTypes: t.type({
page: t.type({
value: t.boolean,
}),
}),

// Ancestors (parent chain)
ancestors: t.array(
t.type({
id: t.string,
})
),
}),
CatchAllCodec,
]);

const ConfluencePageWithBodyCodec = t.intersection([
ConfluencePageCodec,
t.type({
Expand Down Expand Up @@ -547,42 +591,37 @@ export class ConfluenceClient {

async getPagesByIdsInSpace({
spaceId,
sort,
pageCursor,
pageIds,
limit,
}: {
spaceId: string;
sort?: "id" | "-modified-date";
pageCursor?: string | null;
pageIds?: string[];
pageIds: string[];
limit?: number;
}) {
// First get space info to get the key.
// TODO(2025-02-18 flav) Save the key in the DB.
const space = await this.getSpaceById(spaceId);

// Build CQL query to get pages with specific IDs.
const idClause = pageIds?.length ? ` AND id in (${pageIds.join(",")})` : "";
const cqlQuery = `type=page AND space="${space.key}"${idClause}`;

const params = new URLSearchParams({
sort: sort ?? "id",
cql: cqlQuery,
limit: limit?.toString() ?? "25",
status: "current",
"space-id": spaceId,
expand: [
"version", // to check if page changed.
"restrictions.read.restrictions.user", // to check user permissions.
"restrictions.read.restrictions.group", // to check group permissions.
"childTypes.page", // to know if it has children.
"ancestors", // to get parent info.
].join(","),
});

if (pageCursor) {
params.append("cursor", pageCursor);
}

if (pageIds && pageIds.length > 0) {
params.append("id", pageIds.join(","));
}

const pages = await this.request(
`${this.restApiBaseUrl}/pages?${params.toString()}`,
ConfluencePaginatedResults(ConfluencePageCodec)
return this.request(
`${this.legacyRestApiBaseUrl}/content/search?${params.toString()}`,
ConfluencePaginatedResults(SearchConfluencePageCodec)
);
const nextPageCursor = extractCursorFromLinks(pages._links);

return {
pages: pages.results,
nextPageCursor,
};
}

async getPageById(pageId: string) {
Expand Down
4 changes: 1 addition & 3 deletions connectors/src/connectors/confluence/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ export async function confluenceCheckAndUpsertPageActivity({
);

// Check restrictions.
const hasReadRestrictions = await pageHasReadRestrictions(client, pageId);
const { hasReadRestrictions } = pageRef;
if (hasReadRestrictions) {
localLogger.info("Skipping restricted Confluence page.");
return false;
Expand Down Expand Up @@ -713,8 +713,6 @@ export async function fetchAndUpsertRootPagesActivity(params: {
}
}

console.log(">> allowedRootPageIds", allowedRootPageIds);

return allowedRootPageIds;
}

Expand Down
5 changes: 5 additions & 0 deletions connectors/src/connectors/confluence/temporal/workflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,11 @@ export async function confluenceSyncTopLevelChildPagesWorkflow(
}
}

// Only attempt to fetch children if the page has known children.
if (isPageRef && !current.hasChildren) {
continue;
}

// Get child pages using either initial empty cursor or saved cursor.
const { childPageRefs, nextPageCursor } =
await confluenceGetActiveChildPageRefsActivity({
Expand Down

0 comments on commit 2470e52

Please sign in to comment.