Skip to content

Commit

Permalink
merge PA codes for C# (#2144)
Browse files Browse the repository at this point in the history
Co-authored-by: v-jizh23 <[email protected]>
  • Loading branch information
jinshan1979 and v-jizh23 authored Nov 23, 2023
1 parent b650226 commit 8d2f204
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 7 deletions.
4 changes: 4 additions & 0 deletions samples/csharp/dotnet-windows/console/samples/samples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<Content Include="..\..\..\sharedcontent\console\pronunciation_assessment_fall.wav">
<Link>pronunciation_assessment_fall.wav</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="..\..\..\sharedcontent\console\whatstheweatherlike.wav">
<Link>whatstheweatherlike.wav</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down
5 changes: 5 additions & 0 deletions samples/csharp/sharedcontent/console/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ private static void PronunciationAssessment()
Console.WriteLine(" 2. Pronunciation assessment with stream input.");
Console.WriteLine(" 3. Pronunciation assessment configured with json.");
Console.WriteLine(" 4. Pronunciation assessment continuous with file.");
Console.WriteLine(" 5. Pronunciation assessment with content assessment.");
Console.WriteLine("");
Console.Write(prompt);

Expand All @@ -821,6 +822,10 @@ private static void PronunciationAssessment()
case ConsoleKey.NumPad4:
SpeechRecognitionSamples.PronunciationAssessmentContinuousWithFile().Wait();
break;
case ConsoleKey.D5:
case ConsoleKey.NumPad5:
SpeechRecognitionSamples.PronunciationAssessmentWithContentAssessment().Wait();
break;
case ConsoleKey.D0:
case ConsoleKey.NumPad0:
Console.WriteLine(back);
Expand Down
Binary file not shown.
108 changes: 101 additions & 7 deletions samples/csharp/sharedcontent/console/speech_recognition_samples.cs
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,8 @@ public static async Task PronunciationAssessmentWithMicrophoneAsync()
var pronunciationConfig = new PronunciationAssessmentConfig(referenceText,
GradingSystem.HundredMark, Granularity.Phoneme, true);

pronunciationConfig.EnableProsodyAssessment();

// Creates a speech recognizer for the specified language, using microphone as audio input.
using (var recognizer = new SpeechRecognizer(config, language))
{
Expand Down Expand Up @@ -1023,7 +1025,7 @@ public static async Task PronunciationAssessmentWithMicrophoneAsync()

var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
Console.WriteLine(
$" Accuracy score: {pronunciationResult.AccuracyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");
$" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");

Console.WriteLine(" Word-level details:");

Expand Down Expand Up @@ -1091,6 +1093,9 @@ private static async Task PronunciationAssessmentWithStreamInternalAsync(SpeechC
{
// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
var pronAssessmentConfig = new PronunciationAssessmentConfig(referenceText, GradingSystem.HundredMark, Granularity.Phoneme, false);

pronAssessmentConfig.EnableProsodyAssessment();

pronAssessmentConfig.ApplyTo(speechRecognizer);

audioInputStream.Write(audioData);
Expand Down Expand Up @@ -1134,12 +1139,15 @@ public static async Task PronunciationAssessmentContinuousWithFile()

var pronConfig = new PronunciationAssessmentConfig(referenceText, GradingSystem.HundredMark, Granularity.Phoneme, enableMiscue);

pronConfig.EnableProsodyAssessment();

pronConfig.ApplyTo(recognizer);

var recognizedWords = new List<string>();
var pronWords = new List<Word>();
var finalWords = new List<Word>();
var fluency_scores = new List<double>();
var prosody_scores = new List<double>();
var durations = new List<int>();
var done = false;

Expand All @@ -1156,9 +1164,10 @@ public static async Task PronunciationAssessmentContinuousWithFile()
recognizer.Recognized += (s, e) => {
Console.WriteLine($"RECOGNIZED: Text={e.Result.Text}");
var pronResult = PronunciationAssessmentResult.FromResult(e.Result);
Console.WriteLine($" Accuracy score: {pronResult.AccuracyScore}, pronunciation score: {pronResult.PronunciationScore}, completeness score: {pronResult.CompletenessScore}, fluency score: {pronResult.FluencyScore}");
Console.WriteLine($" Accuracy score: {pronResult.AccuracyScore}, prosody score:{pronResult.ProsodyScore}, pronunciation score: {pronResult.PronunciationScore}, completeness score: {pronResult.CompletenessScore}, fluency score: {pronResult.FluencyScore}");

fluency_scores.Add(pronResult.FluencyScore);
prosody_scores.Add(pronResult.ProsodyScore);

foreach(var word in pronResult.Words)
{
Expand Down Expand Up @@ -1236,18 +1245,21 @@ public static async Task PronunciationAssessmentContinuousWithFile()
finalWords = pronWords;
}

//We can calculate whole accuracy by averaging
// We can calculate whole accuracy by averaging
var filteredWords = finalWords.Where(item => item.ErrorType != "Insertion");
var accuracyScore = filteredWords.Sum(item => item.AccuracyScore) / filteredWords.Count();

//Re-calculate fluency score
// Recalculate the prosody score by averaging
var prosodyScore = prosody_scores.Average();

// Recalculate fluency score
var fluencyScore = fluency_scores.Zip(durations, (x, y) => x * y).Sum() / durations.Sum();

//Calculate whole completeness score
// Calculate whole completeness score
var completenessScore = (double)pronWords.Count(item => item.ErrorType == "None") / referenceWords.Length * 100;
completenessScore = completenessScore <= 100 ? completenessScore : 100;

Console.WriteLine("Paragraph accuracy score: {0}, completeness score: {1}, fluency score: {2}", accuracyScore, completenessScore, fluencyScore);
Console.WriteLine("Paragraph accuracy score: {0}, prosody score: {1} completeness score: {2}, fluency score: {3}", accuracyScore, prosodyScore, completenessScore, fluencyScore);

for (int idx = 0; idx < finalWords.Count(); idx++)
{
Expand All @@ -1259,6 +1271,86 @@ public static async Task PronunciationAssessmentContinuousWithFile()
}
}

// Pronunciation assessment with content score
// See more information at https://aka.ms/csspeech/pa
public static async Task PronunciationAssessmentWithContentAssessment()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Creates a speech recognizer using file as audio input.
using (var audioInput = AudioConfig.FromWavFileInput(@"pronunciation_assessment_fall.wav"))
{
// Switch to other languages for example Spanish, change language "en-US" to "es-ES". Language name is not case sensitive.
var language = "en-US";

using (var recognizer = new SpeechRecognizer(config, language, audioInput))
{
var theTopic = "the season of the fall";

bool enableMiscue = false;

var pronConfig = new PronunciationAssessmentConfig("", GradingSystem.HundredMark, Granularity.Phoneme, enableMiscue);

pronConfig.EnableProsodyAssessment();
pronConfig.EnableContentAssessmentWithTopic(theTopic);

pronConfig.ApplyTo(recognizer);

var recognizedTexts = new List<string>();
var contentResults = new List<ContentAssessmentResult>();
var done = false;

recognizer.SessionStopped += (s, e) => {
Console.WriteLine("ClOSING on {0}", e);
done = true;
};

recognizer.Canceled += (s, e) => {
Console.WriteLine("ClOSING on {0}", e);
done = true;
};

recognizer.Recognized += (s, e) => {
if (!string.IsNullOrEmpty(e.Result.Text.TrimEnd('.')))
{
recognizedTexts.Add(e.Result.Text);
}

var contentResult = PronunciationAssessmentResult.FromResult(e.Result).ContentAssessmentResult;
contentResults.Add(contentResult);
};

// Starts continuous recognition.
await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

while (!done)
{
// Allow the program to run and process results continuously.
await Task.Delay(1000); // Adjust the delay as needed.
}

// Waits for completion.
await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

// Content assessment result is in the contentResults
Console.WriteLine("Contet assessment for: {0}", string.Join(" ", recognizedTexts.Where(item => !string.IsNullOrEmpty(item))));

if (contentResults.Count > 0)
{
var content = contentResults[contentResults.Count - 1];
Console.WriteLine("Content Assessment results:\n\tGrammar score: {0}, Vocabulary score: {1}, Topic score: {2}", content.GrammarScore, content.VocabularyScore, content.TopicScore);
}
else
{
Console.WriteLine("The contentResult list is empty!");
}

}
}
}

private static async Task<RecognitionResult> RecognizeOnceAsyncInternal(string key, string region)
{
RecognitionResult recognitionResult = null;
Expand Down Expand Up @@ -1391,6 +1483,8 @@ public static async Task PronunciationAssessmentConfiguredWithJson()
var pronunciationConfig = PronunciationAssessmentConfig.FromJson(json_config);
pronunciationConfig.ReferenceText = referenceText;

pronunciationConfig.EnableProsodyAssessment();

// Creates a speech recognizer for the specified language
using (var recognizer = new SpeechRecognizer(config, language, audioConfig))
{
Expand All @@ -1409,7 +1503,7 @@ public static async Task PronunciationAssessmentConfiguredWithJson()

var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
Console.WriteLine(
$" Accuracy score: {pronunciationResult.AccuracyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");
$" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");

Console.WriteLine(" Word-level details:");

Expand Down

0 comments on commit 8d2f204

Please sign in to comment.