From e28a9b976e17764466cd5464b5ba2e6f7f8037e7 Mon Sep 17 00:00:00 2001 From: Mike Dallas Date: Fri, 10 Nov 2023 01:04:07 +0000 Subject: [PATCH] run transcribe tasks in parallel --- Cargo.toml | 2 ++ src/main.rs | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6c33293..9926a57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ edition = "2021" [dependencies] anyhow = "1.0.69" clap = { version = "4.1.8", features = ["derive", "cargo"] } +futures = "0.3.29" +log = "0.4.20" reqwest = { version = "0.11.14", features = ["json", "multipart", "stream"] } serde = { version = "1.0.153", features = ["derive"] } serde_json = "1.0.94" diff --git a/src/main.rs b/src/main.rs index 2827cca..8557ad7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ use clap::{Parser, crate_authors, crate_version, crate_description}; use std::path::PathBuf; -use tokio::{self, fs::File, io::AsyncWriteExt}; +use tokio::{self, task}; use tempdir::TempDir; +use futures::future::join_all; +use log::debug; pub mod gpt; pub mod whisper; @@ -26,16 +28,41 @@ async fn main() { let tmp_dir = TempDir::new("audio").expect("Could not create temporary directory"); let segments = util::split_file(args.file.clone(), &tmp_dir).await.expect("Could not split file"); - let mut transcribed = Vec::new(); + + let mut transcribe_tasks = vec![]; - let client = whisper::WhisperClient::new(api_key.clone()); for segment in segments { - let segment = client.transcribe(segment).await.expect("Could not transcribe audio"); - transcribed.push(segment); + let client = whisper::WhisperClient::new(api_key.clone()); + let task = task::spawn(async move { + debug!("Transcribing segment {:?}", segment.index); + let res = client.transcribe(segment).await; + + match res { + Ok(segment) => { + debug!("End segment {:?}", segment.index); + return segment + }, + Err(e) => panic!("Error transcribing segment: {}", e), + } + }); + transcribe_tasks.push(task); } + let results: Vec<_> = join_all(transcribe_tasks).await.into_iter().collect(); + + tmp_dir.close().expect("Could not delete temporary directory"); + let mut transcribed = Vec::new(); + + for result in results { + match result { + Ok(segment) => transcribed.push(segment), + + Err(e) => eprintln!("Task failed: {}", e), + } + } + let transcript = transcribed.iter().map(|segment| { segment.transcript.clone() }).collect::>().join("\n");