-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Github Sync updates github docs parents Field (#1296)
* doc update of parents field * core api to update parents (+ refactor of update tags) * spolu review * logging * review 2 * fixed qdrant payload update via document hash * WIP * wip2 * wip 3 * wip 4 * add parents endpoint to front * cleaning * removed js Set in parents (semantics don't work) * cleaning * remove SyncWorkflowResult * controlled memoization of getParents * cleaning * handle void returns from pqueue execution in notion workflow * fix: document is of class object at runtime, not notionpage or notiondb * cleaning * bump notion workflow version * pass timestamp to activity execution for better memoization * henry's suggestions * migration script * cleaning * leaner getParents + fixes on potential inconsistencies * renaming * Update parents during sync * migration script * comments from pr + adaptations from discussions * last PR comments * doc on convention on connector resource
- Loading branch information
1 parent
ae30922
commit 7a20e7e
Showing
4 changed files
with
163 additions
and
23 deletions.
There are no files selected for viewing
104 changes: 104 additions & 0 deletions
104
connectors/migrations/20230906_3_github_fill_parents_field.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import { | ||
getDiscussionDocumentId, | ||
getIssueDocumentId, | ||
} from "@connectors/connectors/github/temporal/activities"; | ||
import { updateDocumentParentsField } from "@connectors/lib/data_sources"; | ||
import { | ||
Connector, | ||
GithubDiscussion, | ||
GithubIssue, | ||
} from "@connectors/lib/models"; | ||
|
||
async function main() { | ||
if (!process.argv[2]) { | ||
console.error("Missing workspace id or 'all' as first argument"); | ||
process.exit(1); | ||
} | ||
// if first arg is "all", update all connectors, else update only the | ||
// connector for the corresponding workspace id | ||
const connectors = | ||
process.argv[2] === "all" | ||
? await Connector.findAll({ | ||
where: { | ||
type: "github", | ||
}, | ||
}) | ||
: await Connector.findAll({ | ||
where: { | ||
type: "github", | ||
workspaceId: process.argv[2], | ||
}, | ||
}); | ||
|
||
for (const connector of connectors) { | ||
console.log(`Updating parents field for connector ${connector.id}`); | ||
await updateDiscussionsParentsFieldForConnector(connector); | ||
await updateIssuesParentsFieldForConnector(connector); | ||
} | ||
} | ||
|
||
async function updateDiscussionsParentsFieldForConnector(connector: Connector) { | ||
// get all distinct documentIds and their channel ids from slack messages in | ||
// this connector | ||
const documentData = await GithubDiscussion.findAll({ | ||
where: { | ||
connectorId: connector.id, | ||
}, | ||
attributes: ["repoId", "discussionNumber"], | ||
}); | ||
// update all parents fields for discussions by chunks of 128 | ||
const chunkSize = 32; | ||
for (let i = 0; i < documentData.length; i += chunkSize) { | ||
const chunk = documentData.slice(i, i + chunkSize); | ||
console.log(`Updating ${chunk.length} documents`); | ||
// update parents field for each document of the chunk, in parallel | ||
await Promise.all( | ||
chunk.map(async (document) => { | ||
const docId = getDiscussionDocumentId( | ||
document.repoId, | ||
document.discussionNumber | ||
); | ||
await updateDocumentParentsField(connector, docId, [ | ||
getDiscussionDocumentId(document.repoId, document.discussionNumber), | ||
document.repoId, | ||
]); | ||
}) | ||
); | ||
} | ||
} | ||
|
||
async function updateIssuesParentsFieldForConnector(connector: Connector) { | ||
// get all distinct issues and their repo ids fro | ||
const documentData = await GithubIssue.findAll({ | ||
where: { | ||
connectorId: connector.id, | ||
}, | ||
attributes: ["repoId", "issueNumber"], | ||
}); | ||
// update all parents fields for all issues by chunks of 128 | ||
const chunkSize = 32; | ||
for (let i = 0; i < documentData.length; i += chunkSize) { | ||
const chunk = documentData.slice(i, i + chunkSize); | ||
console.log(`Updating ${chunk.length} documents`); | ||
// update parents field for each document of the chunk, in parallel | ||
await Promise.all( | ||
chunk.map(async (document) => { | ||
const docId = getIssueDocumentId(document.repoId, document.issueNumber); | ||
await updateDocumentParentsField(connector, docId, [ | ||
getIssueDocumentId(document.repoId, document.issueNumber), | ||
document.repoId, | ||
]); | ||
}) | ||
); | ||
} | ||
} | ||
|
||
main() | ||
.then(() => { | ||
console.log("Done"); | ||
process.exit(0); | ||
}) | ||
.catch((err) => { | ||
console.error(err); | ||
process.exit(1); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters