From c1771c3c0f56722902cff8e267e9a19c77a562ab Mon Sep 17 00:00:00 2001 From: Ben Scholer Date: Fri, 9 Feb 2024 18:48:22 -0500 Subject: [PATCH 1/4] auto timestamp format --- src/settings.ts | 5 +++-- src/transcribe.ts | 24 +++++++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/settings.ts b/src/settings.ts index 7dca8ed..7d9b777 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -33,7 +33,7 @@ const IS_SWIFTINK = "swiftink"; const DEFAULT_SETTINGS: TranscriptionSettings = { timestamps: false, - timestampFormat: "HH:mm:ss", + timestampFormat: "auto", translate: false, language: "auto", verbosity: 1, @@ -286,11 +286,12 @@ class TranscriptionSettingTab extends PluginSettingTab { new Setting(containerEl) .setName("Timestamp format") .setDesc( - "Your choice of hours, minutes, and/or seconds in the timestamp", + "Your choice of hours, minutes, and/or seconds in the timestamp. Auto uses the shortest possible format.", ) .setClass("depends-on-timestamps") .addDropdown((dropdown) => dropdown + .addOption("auto", "Auto") .addOption("HH:mm:ss", "HH:mm:ss") .addOption("mm:ss", "mm:ss") .addOption("ss", "ss") diff --git a/src/transcribe.ts b/src/transcribe.ts index c54c7c3..be39fd8 100644 --- a/src/transcribe.ts +++ b/src/transcribe.ts @@ -44,22 +44,32 @@ export class TranscriptionEngine { segments: components["schemas"]["TimestampedTextSegment"][], timestampFormat: string, ): string { + let maxDuration = 0; + + // Find the largest timestamp in the segments + segments.forEach(segment => { + maxDuration = Math.max(maxDuration, segment.end); + }); + + // Decide format based on maxDuration + const autoFormat = maxDuration < 3600 ? "mm:ss" : "HH:mm:ss"; + let transcription = ""; - for (const segment of segments) { + segments.forEach(segment => { let start = new Date(segment.start * 1000); let end = new Date(segment.end * 1000); - start = new Date( - start.getTime() + start.getTimezoneOffset() * 60000, - ); + start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); - const start_formatted = format(start, timestampFormat); - const end_formatted = format(end, timestampFormat); + // Use autoFormat if timestampFormat is 'auto' + const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; + const start_formatted = format(start, formatToUse); + const end_formatted = format(end, formatToUse); const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text}\n`; transcription += segment_string; - } + }); return transcription; } From 035b20783e501a5ddb413cc82efb018ab965a228 Mon Sep 17 00:00:00 2001 From: Ben Scholer Date: Fri, 9 Feb 2024 19:41:39 -0500 Subject: [PATCH 2/4] add option to group text by timestamp interval --- src/settings.ts | 24 +++++++++++- src/transcribe.ts | 96 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 89 insertions(+), 31 deletions(-) diff --git a/src/settings.ts b/src/settings.ts index 7d9b777..5c04b72 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -4,6 +4,7 @@ import { Transcription } from "./main"; interface TranscriptionSettings { timestamps: boolean; timestampFormat: string; + timestampInterval: string; // easier to store as a string and convert to number when needed translate: boolean; language: string; verbosity: number; @@ -34,6 +35,7 @@ const IS_SWIFTINK = "swiftink"; const DEFAULT_SETTINGS: TranscriptionSettings = { timestamps: false, timestampFormat: "auto", + timestampInterval: "0", translate: false, language: "auto", verbosity: 1, @@ -302,6 +304,26 @@ class TranscriptionSettingTab extends PluginSettingTab { }), ); + new Setting(containerEl) + .setName("Timestamp interval") + .setDesc("The interval at which to add timestamps, in seconds.") + .setClass("depends-on-timestamps") + .addDropdown((dropdown) => + dropdown + .addOption("0", "Off") + .addOption("5", "5") + .addOption("10", "10") + .addOption("15", "15") + .addOption("20", "20") + .addOption("30", "30") + .addOption("60", "60") + .setValue(this.plugin.settings.timestampInterval) + .onChange(async (value) => { + this.plugin.settings.timestampInterval = value; + await this.plugin.saveSettings(); + }), + ); + new Setting(containerEl) .setName("Swiftink Settings") .setClass("swiftink-settings") @@ -488,7 +510,7 @@ class TranscriptionSettingTab extends PluginSettingTab { new Setting(containerEl) .setName("Word timestamps") - .setDesc("Include timestamps for each word, can get very verbose! Only works if timestamps are enabled.") + .setDesc("Include timestamps for each word, can get very verbose! Only works if timestamps are enabled. Overrides the timestamp interval.") .setClass("whisper-asr-settings") .setClass("depends-on-timestamps") .addToggle((toggle) => diff --git a/src/transcribe.ts b/src/transcribe.ts index be39fd8..735078c 100644 --- a/src/transcribe.ts +++ b/src/transcribe.ts @@ -43,6 +43,7 @@ export class TranscriptionEngine { segmentsToTimestampedString( segments: components["schemas"]["TimestampedTextSegment"][], timestampFormat: string, + interval: number = 0 // in seconds, default is 0 which means no interval adjustment ): string { let maxDuration = 0; @@ -55,21 +56,52 @@ export class TranscriptionEngine { const autoFormat = maxDuration < 3600 ? "mm:ss" : "HH:mm:ss"; let transcription = ""; - segments.forEach(segment => { - let start = new Date(segment.start * 1000); - let end = new Date(segment.end * 1000); - start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); - end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); + if (interval > 0) { + // Group segments based on interval + const groupedSegments: Record = {}; + segments.forEach(segment => { + // Determine which interval the segment's start time falls into + const intervalStart = Math.floor(segment.start / interval) * interval; + if (!groupedSegments[intervalStart]) { + groupedSegments[intervalStart] = { + start: segment.start, + end: segment.end, + texts: [segment.text] + }; + } else { + groupedSegments[intervalStart].end = Math.max(groupedSegments[intervalStart].end, segment.end); + groupedSegments[intervalStart].texts.push(segment.text); + } + }); - // Use autoFormat if timestampFormat is 'auto' - const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; - const start_formatted = format(start, formatToUse); - const end_formatted = format(end, formatToUse); + // Format and append grouped segments + Object.values(groupedSegments).forEach(group => { + let start = new Date(group.start * 1000); + let end = new Date(group.end * 1000); + start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); + end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); + const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; + const start_formatted = format(start, formatToUse); + const end_formatted = format(end, formatToUse); + const text = group.texts.join("").trim(); // spaces are already included in the segments + transcription += `${start_formatted} - ${end_formatted}: ${text}\n`; + }); + } else { + // Default behavior: timestamp each segment individually + segments.forEach(segment => { + let start = new Date(segment.start * 1000); + let end = new Date(segment.end * 1000); + start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); + end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); + const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; + const start_formatted = format(start, formatToUse); + const end_formatted = format(end, formatToUse); + const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text.trim()}\n`; + transcription += segment_string; + }); + } - const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text}\n`; - transcription += segment_string; - }); return transcription; } @@ -102,11 +134,11 @@ export class TranscriptionEngine { await payloadGenerator(payload_data); let args = "output=json"; // always output json, so we can have the timestamps if we need them + args += `&word_timestamps=true`; // always output word timestamps, so we can have the timestamps if we need them const { translate, encode, vadFilter, timestamps, wordTimestamps, language, initialPrompt } = this.settings; if (translate) args += `&task=translate`; if (encode !== DEFAULT_SETTINGS.encode) args += `&encode=${encode}`; if (vadFilter !== DEFAULT_SETTINGS.vadFilter) args += `&vad_filter=${vadFilter}`; - if (timestamps && wordTimestamps !== DEFAULT_SETTINGS.wordTimestamps) args += `&word_timestamps=${wordTimestamps}`; if (language !== DEFAULT_SETTINGS.language) args += `&language=${language}`; if (initialPrompt) args += `&initial_prompt=${initialPrompt}`; @@ -134,23 +166,24 @@ export class TranscriptionEngine { const preprocessed = preprocessWhisperASRResponse(response.json); if (this.settings.debug) console.log("Preprocessed response:", preprocessed); - if ( - this.settings.wordTimestamps - && preprocessed.segments.some((segment: WhisperASRSegment) => segment.wordTimestamps) - ) { - // Create segments for each word timestamp if word timestamps are available and enabled - const wordSegments = preprocessed.segments - .reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => { - if (segment.wordTimestamps) { - acc.push(...segment.wordTimestamps.map(wordTimestamp => ({ - start: wordTimestamp.start, - end: wordTimestamp.end, - text: wordTimestamp.word - } as components["schemas"]["TimestampedTextSegment"]))); - } - return acc; - }, []); + // Create segments for each word timestamp if word timestamps are available + const wordSegments = preprocessed.segments + .reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => { + if (segment.wordTimestamps) { + acc.push(...segment.wordTimestamps.map(wordTimestamp => ({ + start: wordTimestamp.start, + end: wordTimestamp.end, + text: wordTimestamp.word + } as components["schemas"]["TimestampedTextSegment"]))); + } + return acc; + }, []); + + if (this.settings.wordTimestamps) { return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat); + } else if (parseInt(this.settings.timestampInterval)) { + // Feed the function word segments with the interval + return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat, parseInt(this.settings.timestampInterval)); } else if (this.settings.timestamps) { // Use existing segment-to-string functionality if only segment timestamps are needed const segments = preprocessed.segments.map((segment: WhisperASRSegment) => ({ @@ -161,7 +194,10 @@ export class TranscriptionEngine { return this.segmentsToTimestampedString(segments, this.settings.timestampFormat); } else if (preprocessed.segments) { // Concatenate all segments into a single string if no timestamps are required - return preprocessed.segments.map((segment: WhisperASRSegment) => segment.text).join("\n"); + return preprocessed.segments + .map((segment: WhisperASRSegment) => segment.text) + .map(s => s.trim()) + .join("\n"); } else { // Fallback to full text if no segments are there return preprocessed.text; From 5074d6a60b4b57120fe23d03f452f2194fd8d70f Mon Sep 17 00:00:00 2001 From: Ben Scholer Date: Mon, 12 Feb 2024 13:47:25 -0500 Subject: [PATCH 3/4] clean up segmentsToTimestampedString --- src/transcribe.ts | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/transcribe.ts b/src/transcribe.ts index 735078c..050c83e 100644 --- a/src/transcribe.ts +++ b/src/transcribe.ts @@ -55,7 +55,19 @@ export class TranscriptionEngine { // Decide format based on maxDuration const autoFormat = maxDuration < 3600 ? "mm:ss" : "HH:mm:ss"; - let transcription = ""; + const renderSegments = (segments: components["schemas"]["TimestampedTextSegment"][]) => ( + segments.reduce((transcription: string, segment ) => { + let start = new Date(segment.start * 1000); + let end = new Date(segment.end * 1000); + start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); + end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); + const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; + const start_formatted = format(start, formatToUse); + const end_formatted = format(end, formatToUse); + const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text.trim()}\n`; + transcription += segment_string; + return transcription; + }, "")); if (interval > 0) { // Group segments based on interval @@ -75,34 +87,16 @@ export class TranscriptionEngine { } }); - // Format and append grouped segments - Object.values(groupedSegments).forEach(group => { - let start = new Date(group.start * 1000); - let end = new Date(group.end * 1000); - start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); - end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); - const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; - const start_formatted = format(start, formatToUse); - const end_formatted = format(end, formatToUse); - const text = group.texts.join("").trim(); // spaces are already included in the segments - transcription += `${start_formatted} - ${end_formatted}: ${text}\n`; - }); + const bucketedSegments = Object.values(groupedSegments).map(group => ({ + start: group.start, + end: group.end, + text: group.texts.join("").trim() + })); + return renderSegments(bucketedSegments); } else { // Default behavior: timestamp each segment individually - segments.forEach(segment => { - let start = new Date(segment.start * 1000); - let end = new Date(segment.end * 1000); - start = new Date(start.getTime() + start.getTimezoneOffset() * 60000); - end = new Date(end.getTime() + end.getTimezoneOffset() * 60000); - const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat; - const start_formatted = format(start, formatToUse); - const end_formatted = format(end, formatToUse); - const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text.trim()}\n`; - transcription += segment_string; - }); + return renderSegments(segments); } - - return transcription; } async getTranscription(file: TFile): Promise { From 3ac95325a31c4599cc8ecc846e3bc54be6e5379e Mon Sep 17 00:00:00 2001 From: Ben Scholer Date: Mon, 12 Feb 2024 13:50:55 -0500 Subject: [PATCH 4/4] remove unused vars --- src/transcribe.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transcribe.ts b/src/transcribe.ts index 050c83e..0ca5bb8 100644 --- a/src/transcribe.ts +++ b/src/transcribe.ts @@ -129,7 +129,7 @@ export class TranscriptionEngine { let args = "output=json"; // always output json, so we can have the timestamps if we need them args += `&word_timestamps=true`; // always output word timestamps, so we can have the timestamps if we need them - const { translate, encode, vadFilter, timestamps, wordTimestamps, language, initialPrompt } = this.settings; + const { translate, encode, vadFilter, language, initialPrompt } = this.settings; if (translate) args += `&task=translate`; if (encode !== DEFAULT_SETTINGS.encode) args += `&encode=${encode}`; if (vadFilter !== DEFAULT_SETTINGS.vadFilter) args += `&vad_filter=${vadFilter}`;