fix(iOS): Event listener for volume changes (#121)

Voice.onSpeechVolumeChanged(event) can work with iOS, Here I am Normalising the db value to scale 0-10, Similar to value what the android values Please do Suggest the best way ,Should i Normalise it or else provide the raw db value obtained(raw db scales from 0 to -80db) Co-authored-by: Jason Safaiyeh <[email protected]>
react-native-voice · Aug 21, 2020 · 177f1a8 · 177f1a8
1 parent 601d241
commit 177f1a8
Showing 1 changed file with 56 additions and 11 deletions.
diff --git a/ios/Voice/Voice.m b/ios/Voice/Voice.m
@@ -4,6 +4,8 @@
 #import <React/RCTUtils.h>
 #import <React/RCTEventEmitter.h>
 #import <Speech/Speech.h>
+#import <Accelerate/Accelerate.h>
+
 
 @interface Voice () <SFSpeechRecognizerDelegate>
 
@@ -19,7 +21,9 @@ @interface Voice () <SFSpeechRecognizerDelegate>
 @property (nonatomic) NSString *sessionId;
 /** Previous category the user was on prior to starting speech recognition */
 @property (nonatomic) NSString* priorAudioCategory;
-
+/** Volume level Metering*/
+@property float averagePowerForChannel0;
+@property float averagePowerForChannel1;
 
 @end
 
@@ -218,6 +222,35 @@ - (void) setupAndStartRecognizing:(NSString*)localeStr {
     // Start recording and append recording buffer to speech recognizer
     @try {
         [mixer installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
+            //Volume Level Metering
+            //Buffer frame can be reduced, if you need more output values
+            [buffer setFrameLength:4096];
+            UInt32 inNumberFrames = buffer.frameLength;
+            float LEVEL_LOWPASS_TRIG = 0.5;
+            if(buffer.format.channelCount>0)
+            {
+                Float32* samples = (Float32*)buffer.floatChannelData[0];
+                Float32 avgValue = 0;
+
+                vDSP_maxmgv((Float32*)samples, 1, &avgValue, inNumberFrames);
+                self.averagePowerForChannel0 = (LEVEL_LOWPASS_TRIG*((avgValue==0)?-100:20.0*log10f(avgValue))) + ((1-LEVEL_LOWPASS_TRIG)*self.averagePowerForChannel0) ;
+                self.averagePowerForChannel1 = self.averagePowerForChannel0;
+            }
+
+            if(buffer.format.channelCount>1)
+            {
+                Float32* samples = (Float32*)buffer.floatChannelData[1];
+                Float32 avgValue = 0;
+
+                vDSP_maxmgv((Float32*)samples, 1, &avgValue, inNumberFrames);
+                self.averagePowerForChannel1 = (LEVEL_LOWPASS_TRIG*((avgValue==0)?-100:20.0*log10f(avgValue))) + ((1-LEVEL_LOWPASS_TRIG)*self.averagePowerForChannel1) ;
+
+            }
+            // Normalizing the Volume Value on scale of (0-10)
+            self.averagePowerForChannel1 = [self _normalizedPowerLevelFromDecibels:self.averagePowerForChannel1]*10;
+            NSNumber *value = [NSNumber numberWithFloat:self.averagePowerForChannel1];
+            [self sendEventWithName:@"onSpeechVolumeChanged" body:@{@"value": value}];
+
             // Todo: write recording buffer to file (if user opts in)
             if (self.recognitionRequest != nil) {
                 [self.recognitionRequest appendAudioPCMBuffer:buffer];
@@ -241,17 +274,29 @@ - (void) setupAndStartRecognizing:(NSString*)localeStr {
     }
 }
 
+- (CGFloat)_normalizedPowerLevelFromDecibels:(CGFloat)decibels {
+    if (decibels < -80.0f || decibels == 0.0f) {
+        return 0.0f;
+    }
+    CGFloat power = powf((powf(10.0f, 0.05f * decibels) - powf(10.0f, 0.05f * -80.0f)) * (1.0f / (1.0f - powf(10.0f, 0.05f * -80.0f))), 1.0f / 2.0f);
+    if (power < 1.0f) {
+        return power;
+    }else{
+        return 1.0f;
+    }
+}
+
 - (NSArray<NSString *> *)supportedEvents
 {
     return @[
-        @"onSpeechResults",
-        @"onSpeechStart",
-        @"onSpeechPartialResults",
-        @"onSpeechError",
-        @"onSpeechEnd",
-        @"onSpeechRecognized",
-        @"onSpeechVolumeChanged"
-    ];
+             @"onSpeechResults",
+             @"onSpeechStart",
+             @"onSpeechPartialResults",
+             @"onSpeechError",
+             @"onSpeechEnd",
+             @"onSpeechRecognized",
+             @"onSpeechVolumeChanged"
+             ];
 }
 
 - (void) sendResult:(NSDictionary*)error :(NSString*)bestTranscription :(NSArray*)transcriptions :(NSNumber*)isFinal {
@@ -262,7 +307,7 @@ - (void) sendResult:(NSDictionary*)error :(NSString*)bestTranscription :(NSArray
         [self sendEventWithName:@"onSpeechResults" body:@{@"value":@[bestTranscription]} ];
     }
     if (transcriptions != nil) {
-        [self sendEventWithName:@"onSpeechPartialResults" body:@{@"value":transcriptions} ];
+        [self sendEventWithName:@"onSpeechPartialResults" body:@{@"value":transcriptions}];
     }
     if (isFinal != nil) {
         [self sendEventWithName:@"onSpeechRecognized" body: @{@"isFinal": isFinal}];
@@ -324,7 +369,7 @@ - (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidC
         [self sendResult:RCTMakeError(@"Speech recognition already started!", nil, nil) :nil :nil :nil];
         return;
     }
-
+    
     [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {
         switch (status) {
             case SFSpeechRecognizerAuthorizationStatusNotDetermined: