diff --git a/examples/azure/src/batch-create-transcription.rs b/examples/azure/src/batch-create-transcription.rs index 77416b6..1f33029 100644 --- a/examples/azure/src/batch-create-transcription.rs +++ b/examples/azure/src/batch-create-transcription.rs @@ -18,7 +18,11 @@ async fn main() -> Result<(), Box> { let trans = trans.status().await?; if let Some(Status::Succeeded) = trans.status { let results = trans.files().await?; - println!("{:#?}", results.values); + let files = results.values; + + if files.len() > 0 { + println!("{:#?}", files.get(0).unwrap().file().await?); + } } Ok(()) diff --git a/rust-ai/src/azure/apis/speech.rs b/rust-ai/src/azure/apis/speech.rs index 235a8e6..96abf8b 100644 --- a/rust-ai/src/azure/apis/speech.rs +++ b/rust-ai/src/azure/apis/speech.rs @@ -43,7 +43,7 @@ use crate::azure::{ common::{MicrosoftOutputFormat, ResponseExpectation, ResponseType}, speech::{ entity::EntityReference, - file::PaginatedFiles, + file::{File, PaginatedFiles}, filter::FilterOperator, health::ServiceHealth, transcription::{Status, Transcription}, @@ -285,10 +285,10 @@ impl Transcription { } /// Create a new audio transcription job. - pub async fn create(self) -> Result> { + pub async fn create(&self) -> Result> { return if let ResponseType::Text(text) = request_post_endpoint( &SpeechServiceEndpoint::Post_Create_Transcription_v3_1, - self, + self.clone(), ResponseExpectation::Text, None, ) @@ -320,7 +320,7 @@ impl Transcription { /// /// This will only succeed when you've submitted the initial batch create /// request to Azure endpoint. - pub async fn status(self) -> Result> { + pub async fn status(&self) -> Result> { let text = request_get_endpoint( &SpeechServiceEndpoint::Get_Transcription_v3_1, None, @@ -348,7 +348,7 @@ impl Transcription { } /// Get batch transcription result from Azure endpoint - pub async fn files(self) -> Result> { + pub async fn files(&self) -> Result> { if let None = self.status.clone() { return Err("You should submit the create request first.".into()); } else { @@ -386,7 +386,7 @@ impl Transcription { return match serde_json::from_str::(&text) { Ok(files) => Ok(files), Err(e) => { - warn!(target: "azure", "Unable to parse transcription status result: `{:#?}`", e); + warn!(target: "azure", "Unable to parse transcription files list result: `{:#?}`", e); match serde_json::from_str::(&text) { Ok(error) => { println!("{:#?}", error); @@ -415,3 +415,40 @@ impl PaginatedFiles { } } } + +impl File { + /// Get file info from Azure transcription endpoint. + pub async fn file(&self) -> Result> { + let (trans_id, file_id) = self.file_id()?; + + let mut params = HashMap::::new(); + if let Some(sas) = self.sas_validity_in_seconds.clone() { + params.insert("sasValidityInSeconds".into(), sas.to_string()); + } + + let text = request_get_endpoint( + &SpeechServiceEndpoint::Get_Transcription_File_v3_1, + Some(params), + Some(format!("{}/files/{}", trans_id, file_id)), + ) + .await?; + + return match serde_json::from_str::(&text) { + Ok(file) => Ok(file), + Err(e) => { + warn!(target: "azure", "Unable to parse transcription result file: `{:#?}`", e); + match serde_json::from_str::(&text) { + Ok(error) => { + println!("{:#?}", error); + error!(target: "azure", "Error from Azure: `{:?}`", e); + Err(Box::new(e)) + } + Err(e) => { + error!(target: "azure", "Unable to parse error response: `{:?}`", e); + Err(Box::new(e)) + } + } + } + }; + } +} diff --git a/rust-ai/src/azure/endpoint.rs b/rust-ai/src/azure/endpoint.rs index 871ac86..e42b47c 100644 --- a/rust-ai/src/azure/endpoint.rs +++ b/rust-ai/src/azure/endpoint.rs @@ -21,6 +21,7 @@ pub enum SpeechServiceEndpoint { Post_Create_Transcription_v3_1, Get_Transcription_v3_1, Get_Transcription_Files_v3_1, + Get_Transcription_File_v3_1, } impl SpeechServiceEndpoint { @@ -60,6 +61,11 @@ impl SpeechServiceEndpoint { "https://{}.api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions/", region ), + + Self::Get_Transcription_File_v3_1 => format!( + "https://{}.api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions/", + region + ), } } } diff --git a/rust-ai/src/azure/types/speech/file.rs b/rust-ai/src/azure/types/speech/file.rs index a33eba1..84f6721 100644 --- a/rust-ai/src/azure/types/speech/file.rs +++ b/rust-ai/src/azure/types/speech/file.rs @@ -1,3 +1,5 @@ +use lazy_static::lazy_static; +use regex::Regex; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Deserialize, Serialize)] @@ -38,6 +40,37 @@ pub struct File { /// Must be a valid URI string. #[serde(rename = "self")] pub _self: String, + + /// Format - int32. The duration in seconds that an SAS url should be valid. + /// The default duration is 12 hours. When using BYOS (https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-encryption-of-data-at-rest#bring-your-own-storage-byos-for-customization-and-logging): + /// A value of 0 means that a plain blob URI without SAS token will be + /// generated. + #[serde(skip)] + pub sas_validity_in_seconds: Option, +} + +lazy_static! { + static ref RE_ID_EXTRACT: Regex = + Regex::new(r"/(?P[\da-z-]+)/files/(?P[\da-z-]+)$").unwrap(); +} + +impl File { + /// Get file ID from a File instance. + pub fn file_id(&self) -> Result<(String, String), Box> { + if let Some(captures) = RE_ID_EXTRACT.captures(&self._self) { + let error_message = match (captures.name("trans_id"), captures.name("file_id")) { + (None, None) => "Neither transcription ID nor file ID found in `self`", + (None, Some(_)) => "Transcription ID not found in `self`", + (Some(_), None) => "File ID not found in `self`", + (Some(trans_id), Some(file_id)) => { + return Ok((trans_id.as_str().into(), file_id.as_str().into())); + } + }; + Err(format!("{}: `{}`", error_message, self._self).into()) + } else { + Err(format!("Incorrect format: `{}`", self._self).into()) + } + } } /// Type of data. diff --git a/rust-ai/src/azure/types/speech/transcription.rs b/rust-ai/src/azure/types/speech/transcription.rs index 9006de2..e61ef59 100644 --- a/rust-ai/src/azure/types/speech/transcription.rs +++ b/rust-ai/src/azure/types/speech/transcription.rs @@ -180,8 +180,8 @@ lazy_static! { impl Transcription { /// Get transcription ID from a batch creation job. - pub fn transcription_id(self) -> Result> { - if let Some(_self) = self._self { + pub fn transcription_id(&self) -> Result> { + if let Some(_self) = self._self.clone() { if let Some(captures) = RE_TRANS_ID.captures(&_self) { if let Some(trans_id) = captures.name("trans_id") { Ok(trans_id.as_str().into())