From 319f946fc6e08f82eb36ea7df0e0cee6838de2d5 Mon Sep 17 00:00:00 2001 From: philipperolet Date: Wed, 6 Sep 2023 19:34:05 +0200 Subject: [PATCH] migration script --- .../20230906_3_github_fill_parents_field.ts | 101 ++++++++++++++++++ .../connectors/github/temporal/activities.ts | 7 +- 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 connectors/migrations/20230906_3_github_fill_parents_field.ts diff --git a/connectors/migrations/20230906_3_github_fill_parents_field.ts b/connectors/migrations/20230906_3_github_fill_parents_field.ts new file mode 100644 index 000000000000..ebe2e7aa52d7 --- /dev/null +++ b/connectors/migrations/20230906_3_github_fill_parents_field.ts @@ -0,0 +1,101 @@ +import { + getDiscussionDocumentId, + getIssueDocumentId, +} from "@connectors/connectors/github/temporal/activities"; +import { updateDocumentParentsField } from "@connectors/lib/data_sources"; +import { + Connector, + GithubDiscussion, + GithubIssue, +} from "@connectors/lib/models"; + +async function main() { + // if first arg is "all", update all connectors, else update only the + // connector for the corresponding workspace id + const connectors = + process.argv[2] === "all" + ? await Connector.findAll({ + where: { + type: "github", + }, + }) + : await Connector.findAll({ + where: { + type: "github", + workspaceId: process.argv[2], + }, + }); + + for (const connector of connectors) { + console.log(`Updating parents field for connector ${connector.id}`); + await updateDiscussionsParentsFieldForConnector(connector); + await updateIssuesParentsFieldForConnector(connector); + } +} + +async function updateDiscussionsParentsFieldForConnector(connector: Connector) { + // get all distinct documentIds and their channel ids from slack messages in + // this connector + const documentData = await GithubDiscussion.findAll({ + where: { + connectorId: connector.id, + }, + attributes: ["repoId", "discussionNumber"], + }); + // update all parents fields for all pages and databases by chunks of 128 + const chunkSize = 128; + for (let i = 0; i < documentData.length; i += chunkSize) { + const chunk = documentData.slice(i, i + chunkSize); + console.log(`Updating ${chunk.length} documents`); + // update parents field for each document of the chunk, in parallel + await Promise.all( + chunk.map(async (document) => { + const docId = getDiscussionDocumentId( + document.repoId, + document.discussionNumber + ); + await updateDocumentParentsField(connector, docId, [ + document.discussionNumber.toString(), + document.repoId, + ]); + }) + ); + } +} + +async function updateIssuesParentsFieldForConnector(connector: Connector) { + // get all distinct documentIds and their channel ids from slack messages in + // this connector + const documentData = await GithubIssue.findAll({ + where: { + connectorId: connector.id, + }, + attributes: ["repoId", "issueNumber"], + }); + // update all parents fields for all pages and databases by chunks of 128 + const chunkSize = 128; + for (let i = 0; i < documentData.length; i += chunkSize) { + const chunk = documentData.slice(i, i + chunkSize); + console.log(`Updating ${chunk.length} documents`); + // update parents field for each document of the chunk, in parallel + await Promise.all( + chunk.map(async (document) => { + const docId = getIssueDocumentId(document.repoId, document.issueNumber); + await updateDocumentParentsField(connector, docId, [ + document.issueNumber.toString(), + document.repoId, + ]); + }) + ); + } +} + +main() + .then(() => { + console.log("Done"); + process.exit(0); + }) + .catch((err) => { + console.error(err); + process.exit(1); + }); diff --git a/connectors/src/connectors/github/temporal/activities.ts b/connectors/src/connectors/github/temporal/activities.ts index e6f1ad8a6f92..668d631f1dbc 100644 --- a/connectors/src/connectors/github/temporal/activities.ts +++ b/connectors/src/connectors/github/temporal/activities.ts @@ -585,11 +585,14 @@ function renderGithubUser(user: GithubUser | null): string { return `@${user.id}`; } -function getIssueDocumentId(repoId: string, issueNumber: number): string { +export function getIssueDocumentId( + repoId: string, + issueNumber: number +): string { return `github-issue-${repoId}-${issueNumber}`; } -function getDiscussionDocumentId( +export function getDiscussionDocumentId( repoId: string, discussionNumber: number ): string {