Add: Azure speech cognitive service - retrieve transcription report a…

…nd result content (v3.1)
dongsxyz · Mar 28, 2023 · 5aba965 · 5aba965
1 parent e5dfb30
commit 5aba965
Show file tree

Hide file tree

Showing 4 changed files with 372 additions and 23 deletions.
diff --git a/examples/azure/src/batch-transcription.rs b/examples/azure/src/batch-transcription.rs
@@ -1,36 +1,67 @@
 use std::{error::Error, time::Duration};
 
-use rust_ai::azure::{types::speech::Status, Speech};
+use rust_ai::azure::{
+    types::speech::{file::FileType, transcription::TranscriptionProperties, Status},
+    Speech,
+};
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn Error>> {
     std::env::set_var("RUST_LOG", "debug");
     std::env::set_var("RUST_BACKTRACE", "1");
     log4rs::init_file("log4rs.yml", Default::default()).unwrap();
 
-    let trans = Speech::new_transcription("Test".into())
+    let mut trans = Speech::new_transcription("Test".into())
         .content_urls(vec![String::from(
             "https://crbn.us/whatstheweatherlike.wav",
         )])
+        .properties(|props| {
+            if let Some(props) = props {
+                props
+            } else {
+                TranscriptionProperties::default()
+            }
+            .display_form_word_level_timestamps_enabled(true)
+        })
         .create()
         .await?;
 
-    std::thread::sleep(Duration::from_secs(5));
-
     // Check transcription job status.
-    let trans = trans.status().await?;
-    if let Some(Status::Succeeded) = trans.status {
-        // Get transcription result files.
-        let results = trans.files().await?;
-        let files = results.values.clone();
+    while [Status::Running, Status::NotStarted]
+        .contains(&trans.status().await?.status.clone().unwrap())
+    {
+        std::thread::sleep(Duration::from_secs(2));
+    }
+
+    // Get transcription result files.
+    let results = trans.files().await?;
+    let files = results.values.clone();
 
-        if files.len() > 0 {
-            // Get transcription report.
-            let report = results.report().await?;
-            println!("{:#?}", report);
+    if files.len() > 0 {
+        // Get transcription report.
+        let _report = results.report().await?;
+        // println!("{:#?}", _report);
 
-            // Get transcription result file via individual API endpoint.
-            let file = files.get(0).unwrap().file().await?;
-            println!("{:#?}", file);
+        // Get transcription result file via individual API endpoint.
+        for file in files.iter() {
+            match file.details().await? {
+                FileType::TranscriptionReport(report) => {
+                    println!(
+                        "Completed transcription: {} of {}",
+                        report.successful_transcriptions_count,
+                        report.successful_transcriptions_count + report.failed_transcriptions_count
+                    );
+                }
+                FileType::Transcription(transcription) => {
+                    println!(
+                        "Transcription output: {}",
+                        transcription
+                            .combined_recognized_phrases
+                            .get(0)
+                            .unwrap()
+                            .display
+                    );
+                }
+            };
         }
     }
 

diff --git a/rust-ai/src/azure/apis/speech.rs b/rust-ai/src/azure/apis/speech.rs
@@ -43,10 +43,13 @@ use crate::azure::{
         common::{MicrosoftOutputFormat, ResponseExpectation, ResponseType},
         speech::{
             entity::EntityReference,
-            file::{File, FileKind, PaginatedFiles},
+            file::{File, FileKind, FileType, PaginatedFiles},
             filter::FilterOperator,
             health::ServiceHealth,
-            transcription::{Status, Transcription, TranscriptionReport},
+            transcription::{
+                Status, Transcription, TranscriptionProperties, TranscriptionReport,
+                TranscriptionResult,
+            },
             ErrorResponse,
         },
         tts::Voice,
@@ -247,6 +250,17 @@ impl Transcription {
         Self { locale, ..self }
     }
 
+    /// Pass in a closure to set the inner properties.
+    pub fn properties<F>(self, mut f: F) -> Self
+    where
+        F: FnMut(Option<TranscriptionProperties>) -> TranscriptionProperties,
+    {
+        Self {
+            properties: Some(f(self.properties)),
+            ..self
+        }
+    }
+
     /// [Custom Speech]
     /// Gets the list of custom models for the authenticated subscription.
     ///
@@ -320,7 +334,28 @@ impl Transcription {
     ///
     /// This will only succeed when you've submitted the initial batch create
     /// request to Azure endpoint.
-    pub async fn status(&self) -> Result<Transcription, Box<dyn std::error::Error>> {
+    ///
+    /// It requires a mutable Transcription instance because each [`status()`]
+    /// call. So you can use while loops to check for status updates
+    /// periodically.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::time::Duration;
+    /// use rust_ai::azure::{
+    ///     types::speech::Status,
+    ///     Speech,
+    /// };
+    /// let mut trans = Speech::new_transcription("Test".into()).create().await?;
+    ///
+    /// // Check transcription job status.
+    /// while [Status::Running, Status::NotStarted].contains(&trans.status().await?.status.clone().unwrap())
+    /// {
+    ///     std::thread::sleep(Duration::from_secs(2));
+    /// }
+    /// ```
+    pub async fn status(&mut self) -> Result<Transcription, Box<dyn std::error::Error>> {
         let text = request_get_endpoint(
             &SpeechServiceEndpoint::Get_Transcription_v3_1,
             None,
@@ -329,7 +364,11 @@ impl Transcription {
         .await?;
 
         return match serde_json::from_str::<Transcription>(&text) {
-            Ok(trans) => Ok(trans),
+            Ok(trans) => {
+                self.last_action_date_time = trans.last_action_date_time.clone();
+                self.status = trans.status.clone();
+                Ok(trans)
+            }
             Err(e) => {
                 warn!(target: "azure", "Unable to parse transcription status result: `{:#?}`", e);
                 match serde_json::from_str::<ErrorResponse>(&text) {
@@ -415,7 +454,8 @@ impl PaginatedFiles {
         }
     }
 
-    pub async fn report(self) -> Result<Option<TranscriptionReport>, Box<dyn std::error::Error>> {
+    /// Only obtain the report file from Azure container.
+    pub async fn report(&self) -> Result<Option<TranscriptionReport>, Box<dyn std::error::Error>> {
         if self.values.len() == 0 {
             return Ok(None);
         }
@@ -491,4 +531,62 @@ impl File {
             }
         };
     }
+
+    /// Obtain transcription result from Azure container.
+    ///
+    /// The result file is wrapped inside [`FileType`] and thus have multiple
+    /// variants.
+    pub async fn details(&self) -> Result<FileType, Box<dyn std::error::Error>> {
+        let text = request_get_endpoint(
+            &SpeechServiceEndpoint::None,
+            None,
+            Some(self.links.content_url.clone()),
+        )
+        .await?;
+
+        return match self.kind {
+            FileKind::TranscriptionReport => {
+                match serde_json::from_str::<TranscriptionReport>(&text) {
+                    Ok(report) => Ok(FileType::TranscriptionReport(report)),
+                    Err(e) => {
+                        warn!(target: "azure", "Unable to parse transcription result file: `{:#?}`", e);
+                        match serde_json::from_str::<ErrorResponse>(&text) {
+                            Ok(error) => {
+                                println!("{:#?}", error);
+                                error!(target: "azure", "Error from Azure: `{:?}`", e);
+                                Err(Box::new(e))
+                            }
+                            Err(e) => {
+                                error!(target: "azure", "Unable to parse error response: `{:?}`", e);
+                                Err(Box::new(e))
+                            }
+                        }
+                    }
+                }
+            }
+            FileKind::Transcription => match serde_json::from_str::<TranscriptionResult>(&text) {
+                Ok(report) => Ok(FileType::Transcription(report)),
+                Err(e) => {
+                    warn!(target: "azure", "Unable to parse transcription result file: `{:#?}`", e);
+                    match serde_json::from_str::<ErrorResponse>(&text) {
+                        Ok(error) => {
+                            println!("{:#?}", error);
+                            error!(target: "azure", "Error from Azure: `{:?}`", e);
+                            Err(Box::new(e))
+                        }
+                        Err(e) => {
+                            error!(target: "azure", "Unable to parse error response: `{:?}`", e);
+                            Err(Box::new(e))
+                        }
+                    }
+                }
+            },
+            _ => {
+                todo!(
+                    "Not yet supported file type `{}`",
+                    Into::<String>::into(self.kind.clone())
+                );
+            }
+        };
+    }
 }
diff --git a/rust-ai/src/azure/types/speech/file.rs b/rust-ai/src/azure/types/speech/file.rs
@@ -2,6 +2,8 @@ use lazy_static::lazy_static;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 
+use super::transcription::{TranscriptionResult, TranscriptionReport};
+
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct PaginatedFiles {
     pub values: Vec<File>,
@@ -144,3 +146,38 @@ pub struct FileProperties {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub duration: Option<String>,
 }
+
+/// Unofficial type representation of transcription result. Each variant 
+/// contains correspondent data.
+pub enum FileType {
+
+    // /// Type of data is dataset report.
+    // DatasetReport,
+
+    // /// Type of data is audio.
+    // Audio,
+
+    // /// Type of data is language data.
+    // LanguageData,
+
+    // /// Type of data is pronunciation data.
+    // PronunciationData,
+
+    // /// Type of data is acoustic data archive.
+    // AcousticDataArchive,
+
+    // /// Type of data is acoustic data transcription v2.
+   //  AcousticDataTranscriptionV2,
+
+    /// Type of data is transcription.
+    Transcription(TranscriptionResult),
+
+    /// Type of data is transcription report.
+    TranscriptionReport(TranscriptionReport),
+
+    // /// Type of data is evaluation details.
+    // EvaluationDetails,
+
+    // /// Type of data is model report.
+    // ModelReport,
+}