-
Notifications
You must be signed in to change notification settings - Fork 123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(Gong) - Add transcripts sync #11231
Changes from all commits
f200516
eba06fb
22e499d
4b47f55
0e87e71
c88eed7
a4ddebe
7c11ea7
3b64095
fdfefa9
8e02302
a7cad16
8fa2e93
7dc7b29
7c54fe7
d8b5f76
38c5709
36e025f
6af1b4e
73753b8
bb9a7f9
6cfb19c
bd65280
35551e2
8cccc34
6055163
f3b2630
87c1df6
56eb911
25aeced
6546107
6a62ad9
84589da
3d31fea
1175fce
4facc2f
c6392a9
a3ceb8a
203ba83
7402be7
fa2e424
f643f33
18a2e9d
b061f9e
e5abd3c
c2fcece
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
-- Migration created on Mar 05, 2025 | ||
CREATE TABLE IF NOT EXISTS "gong_transcripts" | ||
( | ||
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL, | ||
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL, | ||
"callId" TEXT NOT NULL, | ||
"title" TEXT NOT NULL, | ||
"url" TEXT NOT NULL, | ||
"connectorId" BIGINT NOT NULL REFERENCES "connectors" ("id") ON DELETE RESTRICT ON UPDATE CASCADE, | ||
"id" BIGSERIAL, | ||
PRIMARY KEY ("id") | ||
); | ||
CREATE UNIQUE INDEX "gong_transcripts_connector_id_call_id" ON "gong_transcripts" ("connectorId", "callId"); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,52 @@ const GongCallTranscriptCodec = t.type({ | |
transcript: t.array(GongTranscriptMonologueCodec), | ||
}); | ||
|
||
export type GongCallTranscript = t.TypeOf<typeof GongCallTranscriptCodec>; | ||
|
||
export const GongParticipantCodec = t.intersection([ | ||
t.type({ | ||
speakerId: t.union([t.string, t.null]), | ||
userId: t.union([t.string, t.undefined]), | ||
emailAddress: t.union([t.string, t.undefined]), | ||
}), | ||
CatchAllCodec, | ||
]); | ||
|
||
const GongTranscriptMetadataCodec = t.intersection([ | ||
t.type({ | ||
metaData: t.intersection([ | ||
t.type({ | ||
id: t.string, | ||
url: t.string, | ||
primaryUserId: t.string, | ||
direction: t.union([ | ||
t.literal("Inbound"), | ||
t.literal("Outbound"), | ||
t.literal("Conference"), | ||
t.literal("Unknown"), | ||
]), | ||
scope: t.union([ | ||
t.literal("Internal"), | ||
t.literal("External"), | ||
t.literal("Unknown"), | ||
]), | ||
started: t.string, // ISO-8601 date (e.g., '2018-02-18T02:30:00-07:00'). | ||
duration: t.number, // The duration of the call, in seconds. | ||
title: t.string, | ||
media: t.union([t.literal("Video"), t.literal("Audio")]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggest removing some fields from here and only keeping the one we use to avoid validation error. |
||
language: t.string, // The language codes (as defined by ISO-639-2B): eng, fre, spa, ger, and ita. | ||
}), | ||
CatchAllCodec, | ||
]), | ||
parties: t.array(GongParticipantCodec), | ||
}), | ||
CatchAllCodec, | ||
]); | ||
|
||
export type GongTranscriptMetadata = t.TypeOf< | ||
typeof GongTranscriptMetadataCodec | ||
>; | ||
|
||
// Generic codec for paginated results from Gong API. | ||
const GongPaginatedResults = <C extends t.Mixed, F extends string>( | ||
fieldName: F, | ||
|
@@ -60,7 +106,7 @@ const GongPaginatedResults = <C extends t.Mixed, F extends string>( | |
records: t.type({ | ||
currentPageNumber: t.number, | ||
currentPageSize: t.number, | ||
// Cursor only exists if there are more results. | ||
// The cursor only exists if there are more results. | ||
cursor: t.union([t.string, t.undefined]), | ||
totalRecords: t.number, | ||
}), | ||
|
@@ -190,6 +236,7 @@ export class GongClient { | |
return this.handleResponse(response, endpoint, codec); | ||
} | ||
|
||
// https://gong.app.gong.io/settings/api/documentation#post-/v2/calls/transcript | ||
async getTranscripts({ | ||
startTimestamp, | ||
pageCursor, | ||
|
@@ -217,14 +264,15 @@ export class GongClient { | |
} catch (err) { | ||
if (err instanceof GongAPIError && err.status === 404) { | ||
return { | ||
pages: [], | ||
transcripts: [], | ||
nextPageCursor: null, | ||
}; | ||
} | ||
throw err; | ||
} | ||
} | ||
|
||
// https://gong.app.gong.io/settings/api/documentation#get-/v2/users | ||
async getUsers({ pageCursor }: { pageCursor: string | null }) { | ||
try { | ||
const users = await this.getRequest( | ||
|
@@ -260,4 +308,43 @@ export class GongClient { | |
throw err; | ||
} | ||
} | ||
|
||
// https://gong.app.gong.io/settings/api/documentation#post-/v2/calls/extensive | ||
async getCallsMetadata({ | ||
callIds, | ||
pageCursor = null, | ||
}: { | ||
callIds: string[]; | ||
pageCursor?: string | null; | ||
}) { | ||
try { | ||
const callsMetadata = await this.postRequest( | ||
`/calls/extensive`, | ||
{ | ||
cursor: pageCursor, | ||
filter: { | ||
callIds, | ||
}, | ||
contentSelector: { | ||
exposedFields: { | ||
parties: true, | ||
}, | ||
}, | ||
}, | ||
GongPaginatedResults("calls", GongTranscriptMetadataCodec) | ||
); | ||
return { | ||
callsMetadata: callsMetadata.calls, | ||
nextPageCursor: callsMetadata.records.cursor, | ||
}; | ||
} catch (err) { | ||
if (err instanceof GongAPIError && err.status === 404) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see you did it in your PR 🙏 |
||
return { | ||
callsMetadata: [], | ||
nextPageCursor: null, | ||
}; | ||
} | ||
throw err; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import type { ConnectorResource } from "@connectors/resources/connector_resource"; | ||
|
||
export function makeGongTranscriptFolderInternalId( | ||
connector: ConnectorResource | ||
) { | ||
return `gong-transcript-folder-${connector.id}`; | ||
} | ||
|
||
export function makeGongTranscriptInternalId( | ||
connector: ConnectorResource, | ||
callId: string | ||
) { | ||
return `gong-transcript-${connector.id}-${callId}`; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
import { MIME_TYPES } from "@dust-tt/types"; | ||
|
||
import type { | ||
GongCallTranscript, | ||
GongTranscriptMetadata, | ||
} from "@connectors/connectors/gong/lib/gong_api"; | ||
import { | ||
makeGongTranscriptFolderInternalId, | ||
makeGongTranscriptInternalId, | ||
} from "@connectors/connectors/gong/lib/internal_ids"; | ||
import { | ||
renderDocumentTitleAndContent, | ||
renderMarkdownSection, | ||
upsertDataSourceDocument, | ||
} from "@connectors/lib/data_sources"; | ||
import logger from "@connectors/logger/logger"; | ||
import type { ConnectorResource } from "@connectors/resources/connector_resource"; | ||
import type { GongUserResource } from "@connectors/resources/gong_resources"; | ||
import { GongTranscriptResource } from "@connectors/resources/gong_resources"; | ||
import type { DataSourceConfig } from "@connectors/types/data_source_config"; | ||
|
||
/** | ||
* Syncs a transcript in the db and upserts it to the data sources. | ||
*/ | ||
export async function syncGongTranscript({ | ||
transcript, | ||
transcriptMetadata, | ||
participants, | ||
speakerToEmailMap, | ||
connector, | ||
dataSourceConfig, | ||
loggerArgs, | ||
forceResync, | ||
}: { | ||
transcript: GongCallTranscript; | ||
transcriptMetadata: GongTranscriptMetadata; | ||
participants: GongUserResource[]; | ||
speakerToEmailMap: Record<string, string>; | ||
connector: ConnectorResource; | ||
dataSourceConfig: DataSourceConfig; | ||
loggerArgs: Record<string, string | number | null>; | ||
forceResync: boolean; | ||
}) { | ||
const { callId } = transcript; | ||
const createdAtDate = new Date(transcriptMetadata.metaData.started); | ||
const title = transcriptMetadata.metaData.title || "Untitled transcript"; | ||
const documentUrl = transcriptMetadata.metaData.url; | ||
|
||
const transcriptInDb = await GongTranscriptResource.fetchByCallId( | ||
callId, | ||
connector | ||
); | ||
|
||
if (!forceResync && transcriptInDb) { | ||
logger.info( | ||
{ | ||
...loggerArgs, | ||
callId, | ||
}, | ||
"[Gong] Transcript already up to date, skipping sync." | ||
); | ||
return; | ||
} | ||
|
||
if (!transcriptInDb) { | ||
await GongTranscriptResource.makeNew({ | ||
blob: { | ||
connectorId: connector.id, | ||
callId, | ||
title, | ||
url: documentUrl, | ||
}, | ||
}); | ||
} | ||
|
||
logger.info( | ||
{ | ||
...loggerArgs, | ||
callId, | ||
createdAtDate, | ||
}, | ||
"[Gong] Upserting transcript." | ||
); | ||
|
||
const hours = Math.floor(transcriptMetadata.metaData.duration / 3600); | ||
const minutes = Math.floor( | ||
(transcriptMetadata.metaData.duration % 3600) / 60 | ||
); | ||
const callDuration = `${hours} hours ${minutes < 10 ? "0" + minutes : minutes} minutes`; | ||
|
||
let markdownContent = `Meeting title: ${title}\n\nDate: ${createdAtDate.toISOString()}\n\nDuration: ${callDuration}\n\n`; | ||
|
||
// Rebuild the transcript content with [User]: [sentence]. | ||
transcript.transcript.forEach((monologue) => { | ||
let lastSpeakerId: string | null = null; | ||
monologue.sentences.forEach((sentence) => { | ||
if (monologue.speakerId !== lastSpeakerId) { | ||
markdownContent += `# ${speakerToEmailMap[monologue.speakerId] || "Unknown speaker"}\n`; | ||
lastSpeakerId = monologue.speakerId; | ||
} | ||
markdownContent += `${sentence.text}\n`; | ||
}); | ||
}); | ||
|
||
const renderedContent = await renderMarkdownSection( | ||
dataSourceConfig, | ||
markdownContent | ||
); | ||
const documentContent = await renderDocumentTitleAndContent({ | ||
dataSourceConfig, | ||
title, | ||
content: renderedContent, | ||
createdAt: createdAtDate, | ||
additionalPrefixes: { | ||
language: transcriptMetadata.metaData.language, | ||
media: transcriptMetadata.metaData.media, | ||
scope: transcriptMetadata.metaData.scope, | ||
direction: transcriptMetadata.metaData.direction, | ||
participants: participants.map((p) => p.email).join(", ") || "none", | ||
}, | ||
}); | ||
|
||
const documentId = makeGongTranscriptInternalId(connector, callId); | ||
|
||
await upsertDataSourceDocument({ | ||
dataSourceConfig, | ||
documentId, | ||
documentContent, | ||
documentUrl, | ||
timestampMs: createdAtDate.getTime(), | ||
tags: [ | ||
`title:${title}`, | ||
`createdAt:${createdAtDate.getTime()}`, | ||
`language:${transcriptMetadata.metaData.language}`, // The language codes (as defined by ISO-639-2B): eng, fre, spa, ger, and ita. | ||
`media:${transcriptMetadata.metaData.media}`, | ||
`scope:${transcriptMetadata.metaData.scope}`, | ||
`direction:${transcriptMetadata.metaData.direction}`, | ||
...participants.map((p) => p.email), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like this lives out the participant that are not internal to the connected Gong's workspace. Can we still include them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm in any case these participants are retrieved using |
||
], | ||
parents: [documentId, makeGongTranscriptFolderInternalId(connector)], | ||
parentId: makeGongTranscriptFolderInternalId(connector), | ||
loggerArgs: { ...loggerArgs, callId }, | ||
upsertContext: { sync_type: "batch" }, | ||
title, | ||
mimeType: MIME_TYPES.GONG.TRANSCRIPT, | ||
async: true, | ||
}); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can title be bigger than 255 characters?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we don't have occurrences in our gong but I would assume it's not really bounded
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's truncate in the resource then.