Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhanced Timestamp Formatting and Custom Interval Grouping #50

Merged
merged 6 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions src/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Transcription } from "./main";
interface TranscriptionSettings {
timestamps: boolean;
timestampFormat: string;
timestampInterval: string; // easier to store as a string and convert to number when needed
translate: boolean;
language: string;
verbosity: number;
Expand Down Expand Up @@ -33,7 +34,8 @@ const IS_SWIFTINK = "swiftink";

const DEFAULT_SETTINGS: TranscriptionSettings = {
timestamps: false,
timestampFormat: "HH:mm:ss",
timestampFormat: "auto",
timestampInterval: "0",
translate: false,
language: "auto",
verbosity: 1,
Expand Down Expand Up @@ -253,11 +255,12 @@ class TranscriptionSettingTab extends PluginSettingTab {
new Setting(containerEl)
.setName("Timestamp format")
.setDesc(
"Your choice of hours, minutes, and/or seconds in the timestamp",
"Your choice of hours, minutes, and/or seconds in the timestamp. Auto uses the shortest possible format.",
)
.setClass("depends-on-timestamps")
.addDropdown((dropdown) =>
dropdown
.addOption("auto", "Auto")
.addOption("HH:mm:ss", "HH:mm:ss")
.addOption("mm:ss", "mm:ss")
.addOption("ss", "ss")
Expand All @@ -268,6 +271,26 @@ class TranscriptionSettingTab extends PluginSettingTab {
}),
);

new Setting(containerEl)
.setName("Timestamp interval")
.setDesc("The interval at which to add timestamps, in seconds.")
.setClass("depends-on-timestamps")
.addDropdown((dropdown) =>
dropdown
.addOption("0", "Off")
.addOption("5", "5")
.addOption("10", "10")
.addOption("15", "15")
.addOption("20", "20")
.addOption("30", "30")
.addOption("60", "60")
.setValue(this.plugin.settings.timestampInterval)
.onChange(async (value) => {
this.plugin.settings.timestampInterval = value;
await this.plugin.saveSettings();
}),
);

new Setting(containerEl)
.setName("Swiftink Settings")
.setClass("swiftink-settings")
Expand Down
104 changes: 72 additions & 32 deletions src/transcribe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import * as tus from "tus-js-client";
import { WhisperASRSegment } from "./types/whisper-asr";

type TranscriptionBackend = (file: TFile) => Promise<string>;

Check warning on line 11 in src/transcribe.ts

View workflow job for this annotation

GitHub Actions / build

'file' is defined but never used

const MAX_TRIES = 100

Expand Down Expand Up @@ -43,24 +43,60 @@
segmentsToTimestampedString(
segments: components["schemas"]["TimestampedTextSegment"][],
timestampFormat: string,
interval: number = 0 // in seconds, default is 0 which means no interval adjustment
): string {
let transcription = "";
for (const segment of segments) {
let start = new Date(segment.start * 1000);
let end = new Date(segment.end * 1000);
let maxDuration = 0;

start = new Date(
start.getTime() + start.getTimezoneOffset() * 60000,
);
end = new Date(end.getTime() + end.getTimezoneOffset() * 60000);
// Find the largest timestamp in the segments
segments.forEach(segment => {
maxDuration = Math.max(maxDuration, segment.end);
});

const start_formatted = format(start, timestampFormat);
const end_formatted = format(end, timestampFormat);
// Decide format based on maxDuration
const autoFormat = maxDuration < 3600 ? "mm:ss" : "HH:mm:ss";

const renderSegments = (segments: components["schemas"]["TimestampedTextSegment"][]) => (
segments.reduce((transcription: string, segment ) => {
let start = new Date(segment.start * 1000);
let end = new Date(segment.end * 1000);
start = new Date(start.getTime() + start.getTimezoneOffset() * 60000);
end = new Date(end.getTime() + end.getTimezoneOffset() * 60000);
const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat;
const start_formatted = format(start, formatToUse);
const end_formatted = format(end, formatToUse);
const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text.trim()}\n`;
transcription += segment_string;
return transcription;
}, ""));

if (interval > 0) {
// Group segments based on interval
const groupedSegments: Record<string, { start: number, end: number, texts: string[] }> = {};
segments.forEach(segment => {
// Determine which interval the segment's start time falls into
const intervalStart = Math.floor(segment.start / interval) * interval;
if (!groupedSegments[intervalStart]) {
groupedSegments[intervalStart] = {
start: segment.start,
end: segment.end,
texts: [segment.text]
};
} else {
groupedSegments[intervalStart].end = Math.max(groupedSegments[intervalStart].end, segment.end);
groupedSegments[intervalStart].texts.push(segment.text);
}
});

const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text}\n`;
transcription += segment_string;
const bucketedSegments = Object.values(groupedSegments).map(group => ({
start: group.start,
end: group.end,
text: group.texts.join("").trim()
}));
return renderSegments(bucketedSegments);
} else {
// Default behavior: timestamp each segment individually
return renderSegments(segments);
}
return transcription;
}

async getTranscription(file: TFile): Promise<string> {
Expand Down Expand Up @@ -92,11 +128,11 @@
await payloadGenerator(payload_data);

let args = "output=json"; // always output json, so we can have the timestamps if we need them
const { translate, encode, vadFilter, timestamps, wordTimestamps, language, initialPrompt } = this.settings;
args += `&word_timestamps=true`; // always output word timestamps, so we can have the timestamps if we need them
const { translate, encode, vadFilter, language, initialPrompt } = this.settings;
if (translate) args += `&task=translate`;
if (encode !== DEFAULT_SETTINGS.encode) args += `&encode=${encode}`;
if (vadFilter !== DEFAULT_SETTINGS.vadFilter) args += `&vad_filter=${vadFilter}`;
if (timestamps && wordTimestamps !== DEFAULT_SETTINGS.wordTimestamps) args += `&word_timestamps=${wordTimestamps}`;
if (language !== DEFAULT_SETTINGS.language) args += `&language=${language}`;
if (initialPrompt) args += `&initial_prompt=${initialPrompt}`;

Expand Down Expand Up @@ -124,23 +160,24 @@
const preprocessed = preprocessWhisperASRResponse(response.json);
if (this.settings.debug) console.log("Preprocessed response:", preprocessed);

if (
this.settings.wordTimestamps
&& preprocessed.segments.some((segment: WhisperASRSegment) => segment.wordTimestamps)
) {
// Create segments for each word timestamp if word timestamps are available and enabled
const wordSegments = preprocessed.segments
.reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => {
if (segment.wordTimestamps) {
acc.push(...segment.wordTimestamps.map(wordTimestamp => ({
start: wordTimestamp.start,
end: wordTimestamp.end,
text: wordTimestamp.word
} as components["schemas"]["TimestampedTextSegment"])));
}
return acc;
}, []);
// Create segments for each word timestamp if word timestamps are available
const wordSegments = preprocessed.segments
.reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => {
if (segment.wordTimestamps) {
acc.push(...segment.wordTimestamps.map(wordTimestamp => ({
start: wordTimestamp.start,
end: wordTimestamp.end,
text: wordTimestamp.word
} as components["schemas"]["TimestampedTextSegment"])));
}
return acc;
}, []);

if (this.settings.wordTimestamps) {
return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat);
} else if (parseInt(this.settings.timestampInterval)) {
// Feed the function word segments with the interval
return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat, parseInt(this.settings.timestampInterval));
} else if (this.settings.timestamps) {
// Use existing segment-to-string functionality if only segment timestamps are needed
const segments = preprocessed.segments.map((segment: WhisperASRSegment) => ({
Expand All @@ -151,7 +188,10 @@
return this.segmentsToTimestampedString(segments, this.settings.timestampFormat);
} else if (preprocessed.segments) {
// Concatenate all segments into a single string if no timestamps are required
return preprocessed.segments.map((segment: WhisperASRSegment) => segment.text).join("\n");
return preprocessed.segments
.map((segment: WhisperASRSegment) => segment.text)
.map(s => s.trim())
.join("\n");
} else {
// Fallback to full text if no segments are there
return preprocessed.text;
Expand Down
Loading