Skip to content

Commit

Permalink
1.0-beta3 - Move TTS API settings to Advanced settings, add voice pre…
Browse files Browse the repository at this point in the history
…view, Add support for IBM Watson TTS
  • Loading branch information
千代田桃 committed Mar 16, 2022
1 parent 1b94e93 commit 706ad36
Show file tree
Hide file tree
Showing 24 changed files with 618 additions and 126 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ KaniManabu.xcodeproj/project.xcworkspace/xcuserdata/*.*
/.DS_Store
/Dependencies/MicrosoftCognitiveServicesSpeech.xcframework
/Common/Backend/MicrosoftSpeechConstants.m
/Common/Backend/IBMSpeechConstants.m
IBMSpeechConstants.m
17 changes: 17 additions & 0 deletions Common/Backend/IBMSpeechConstants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//
// IBMSpeechConstants.h
// KaniManabu
//
// Created by 千代田桃 on 3/16/22.
//

#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

@interface IBMSpeechConstants : NSObject
FOUNDATION_EXPORT NSString *const IBMAPIKey;
FOUNDATION_EXPORT NSString *const IBMInstanceURL;
@end

NS_ASSUME_NONNULL_END
13 changes: 13 additions & 0 deletions Common/Backend/IBMSpeechConstantsSample.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//
// IBMSpeechConstants.m
// KaniManabu
//
// Created by 千代田桃 on 3/16/22.
//

#import "IBMSpeechConstants.h"

@implementation IBMSpeechConstants
NSString *const IBMAPIKey = @"";
NSString *const IBMInstanceURL = @"";
@end
16 changes: 14 additions & 2 deletions Common/Backend/SpeechSynthesis.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,31 @@
//
// Created by 千代田桃 on 2/10/22.
//

#import <Foundation/Foundation.h>
#import <TargetConditionals.h>
#if (TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR)
#import <UIKit/UIKit.h>
#elif TARGET_OS_MAC
#import <AppKit/AppKit.h>
#endif
#import <CoreData/CoreData.h>

NS_ASSUME_NONNULL_BEGIN

@interface SpeechSynthesis : NSObject
@property (strong) NSManagedObjectContext *moc;
typedef NS_ENUM(int,TTSService) {
TTSMicrosoft = 0,
TTSIBM = 1
};
+ (instancetype)sharedInstance;
- (void)sayText:(NSString *)text;
- (void)storeSubscriptionKey:(NSString *)key;
- (NSString *)getSubscriptionKey;
- (void)removeSubscriptionKey;
- (void)storeIBMAPIKey:(NSDictionary *)data;
- (NSDictionary *)getIBMAPIKey;
- (void)removeIBMAPIKey;
- (void)playSample;
@end

NS_ASSUME_NONNULL_END
131 changes: 123 additions & 8 deletions Common/Backend/SpeechSynthesis.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
#import <MicrosoftCognitiveServicesSpeech/SPXSpeechApi.h>
#import <SAMKeychain/SAMKeychain.h>
#import <AudioToolbox/AudioToolbox.h>
#import <AFNetworking/AFNetworking.h>
#import "IBMSpeechConstants.h"

@interface SpeechSynthesis ()
@property (strong) AFHTTPSessionManager *ibmmanager;
@property (strong) AVSpeechSynthesizer *synthesizer;
@property (strong, nonatomic) dispatch_queue_t privateQueue;
@property (strong) AVAudioPlayer *player;
Expand All @@ -33,6 +36,9 @@ - (instancetype)init {
if (self = [super init]) {
_synthesizer = [AVSpeechSynthesizer new];
_privateQueue = dispatch_queue_create("moe.ateliershiori.KaniManabu.speechsynthesis", DISPATCH_QUEUE_CONCURRENT);
self.ibmmanager = [AFHTTPSessionManager manager];
self.ibmmanager.requestSerializer = [AFJSONRequestSerializer serializer];
self.ibmmanager.responseSerializer = [AFHTTPResponseSerializer serializer];
return self;
}
return nil;
Expand All @@ -44,12 +50,37 @@ - (void)sayText:(NSString *)text {
if ([NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"] == 2) {
dispatch_async(self.privateQueue, ^{
// Use Microsoft Speech Synthesis. Get the audio synthesis from Microsoft Speech Services and then save it to Core Data for later use.
NSData *speechData = [self getStoredAudio:text];
NSData *speechData = [self getStoredAudio:text withService:TTSMicrosoft];
if (!speechData) {
// Get the speech audio and save it
[self getSpeechDataFromText:text completionHandler:^(bool success, NSData *audiodata) {
[self getMSSpeechDataFromText:text completionHandler:^(bool success, NSData *audiodata) {
if (success) {
[self saveAudioWithWord:text withAudioData:audiodata];
[self saveAudioWithWord:text withAudioData:audiodata withService:TTSMicrosoft];
self.playing = NO;
}
else {
// Fallback to TTS
[self macOSSayText:text];
self.playing = NO;
}
}];
}
else {
// Play back stored audio
[self playAudioWithData:speechData];
self.playing = NO;
}
});
}
else if ([NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"] == 3) {
dispatch_async(self.privateQueue, ^{

NSData *speechData = [self getStoredAudio:text withService:TTSIBM];
if (!speechData) {
// Get the speech audio and save it
[self getIBMSpeechDataFromText:text completionHandler:^(bool success, NSData *audiodata) {
if (success) {
[self saveAudioWithWord:text withAudioData:audiodata withService:TTSIBM];
self.playing = NO;
}
else {
Expand Down Expand Up @@ -77,15 +108,15 @@ - (void)sayText:(NSString *)text {
- (void)macOSSayText:(NSString *)text {
// Use macOS Speech Synthesis
AVSpeechUtterance *utterance = [AVSpeechUtterance speechUtteranceWithString:text];
utterance.voice = [AVSpeechSynthesisVoice voiceWithIdentifier: [NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"] == 0 || [NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"] == 2 ? @"com.apple.speech.synthesis.voice.kyoko.premium" : @"com.apple.speech.synthesis.voice.otoya.premium"];
utterance.voice = [AVSpeechSynthesisVoice voiceWithIdentifier:[NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"] != 1 ? @"com.apple.speech.synthesis.voice.kyoko.premium" : @"com.apple.speech.synthesis.voice.otoya.premium"];
[_synthesizer speakUtterance:utterance];
}

- (NSData *)getStoredAudio:(NSString *)text {
- (NSData *)getStoredAudio:(NSString *)text withService:(TTSService)service {
// Gets the stored audio for the word from the AudioContainer Core Data container
NSFetchRequest *fetchRequest = [NSFetchRequest new];
fetchRequest.entity = [NSEntityDescription entityForName:@"Speech" inManagedObjectContext:_moc];
NSPredicate *predicate = [NSPredicate predicateWithFormat:@"word == %@",text];
NSPredicate *predicate = [NSPredicate predicateWithFormat:@"word == %@ AND serviceid == %i",text, service];
fetchRequest.predicate = predicate;
NSError *error = nil;
NSArray *results = [_moc executeFetchRequest:fetchRequest error:&error];
Expand All @@ -107,7 +138,73 @@ - (void)removeSubscriptionKey {
[SAMKeychain deletePasswordForService:@"KaniManabu" account:@"Azure Subscription Key"];
}

- (void)getSpeechDataFromText:(NSString *)text completionHandler:(void (^)(bool success, NSData *audiodata)) completionHandler {
- (void)storeIBMAPIKey:(NSDictionary *)data {
NSData *myData = [NSKeyedArchiver archivedDataWithRootObject:data];
[SAMKeychain setPasswordData:myData forService:@"KaniManabu" account:@"IBM API Key" error:nil];
}

- (NSDictionary *)getIBMAPIKey {
NSData *myData = [SAMKeychain passwordDataForService:@"KaniManabu" account:@"IBM API Key"];
if (myData) {
return (NSDictionary *)[NSKeyedUnarchiver unarchiveObjectWithData:myData];
}
return nil;
}

- (void)removeIBMAPIKey {
[SAMKeychain deletePasswordForService:@"KaniManabu" account:@"IBM API Key"];
}

- (void)getIBMSpeechDataFromText:(NSString *)text completionHandler:(void (^)(bool success, NSData *audiodata)) completionHandler {
NSDictionary *parameters = @{@"text" : text};
NSDictionary *apicred = [self getIBMAPIKey];
NSString *APIKey = @"";
if (apicred) {
APIKey = apicred[@"apikey"];
}
else {
if ([NSUserDefaults.standardUserDefaults boolForKey:@"donated"]) {
APIKey = IBMAPIKey;
}
else {
completionHandler(false, nil);
return;
}
}
[self.ibmmanager.requestSerializer setAuthorizationHeaderFieldWithUsername:@"apikey" password:APIKey];
NSString *url = [NSString stringWithFormat:@"%@/v1/synthesize?voice=ja-JP_EmiV3Voice", apicred ? apicred[@"url"] : IBMInstanceURL];
[_ibmmanager POST:url parameters:parameters headers:@{@"Accept" : @"audio/wav"} progress:nil success:^(NSURLSessionDataTask * _Nonnull task, id _Nullable responseObject) {
NSString *filePath = [NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, YES) objectAtIndex:0];
NSString *fileName = @"/KaniManabu/pullStream.wav";
NSString *fileAtPath = [filePath stringByAppendingString:fileName];
if (![[NSFileManager defaultManager] fileExistsAtPath:fileAtPath]) {
[[NSFileManager defaultManager] createFileAtPath:fileAtPath contents:nil attributes:nil];
}
NSData *dataresponseObject = responseObject;
[dataresponseObject writeToFile:fileAtPath atomically:YES];
// Play audio while conversion is in progress
[self playAudioWithData:[NSData dataWithContentsOfURL:[NSURL fileURLWithPath:fileAtPath]]];
//Convert to AAC
[self convertWithFileName:fileAtPath completionHandler:^(NSData *audiodata) {
if (audiodata) {
[NSFileManager.defaultManager removeItemAtPath:fileAtPath error:nil];
[NSFileManager.defaultManager removeItemAtPath:[fileAtPath stringByReplacingOccurrencesOfString:@".wav" withString:@".m4a"] error:nil];
completionHandler(true, audiodata);
}
else {
completionHandler(false, nil);
}
}];

} failure:^(NSURLSessionDataTask * _Nullable task, NSError * _Nonnull error) {
NSLog(@"IBM Speech Errpr: %@", error.localizedDescription);
NSString* errResponse = [[NSString alloc] initWithData:(NSData *)error.userInfo[AFNetworkingOperationFailingURLResponseDataErrorKey] encoding:NSUTF8StringEncoding];
NSLog(@"%@",errResponse);
completionHandler(false, nil);
}];
}

- (void)getMSSpeechDataFromText:(NSString *)text completionHandler:(void (^)(bool success, NSData *audiodata)) completionHandler {
NSString *skey = [self getSubscriptionKey];
if (!skey) {
if ([NSUserDefaults.standardUserDefaults boolForKey:@"donated"]) {
Expand Down Expand Up @@ -168,11 +265,12 @@ - (void)getSpeechDataFromText:(NSString *)text completionHandler:(void (^)(bool
}];
}

- (void)saveAudioWithWord:(NSString *)word withAudioData:(NSData *)data {
- (void)saveAudioWithWord:(NSString *)word withAudioData:(NSData *)data withService:(TTSService)service {
// Stores the audio file with the kana word for later use
NSManagedObject *newAudio = [NSEntityDescription insertNewObjectForEntityForName:@"Speech" inManagedObjectContext:_moc];
[newAudio setValue:word forKey:@"word"];
[newAudio setValue:data forKey:@"audio"];
[newAudio setValue:@(service) forKey:@"serviceid"];
[_moc performBlockAndWait:^{
[_moc save:nil];
}];
Expand Down Expand Up @@ -206,4 +304,21 @@ - (void)convertWithFileName:(NSString *)filenamepath completionHandler:(void (^)
}
}];
}

- (void)playSample {
switch ([NSUserDefaults.standardUserDefaults integerForKey:@"ttsvoice"]) {
case 0:
case 1:
[self macOSSayText:@"これで勝ったと思うなよ"];
break;
case 2:
[self playAudioWithData:[[NSDataAsset alloc] initWithName:@"mssample"].data];
break;
case 3:
[self playAudioWithData:[[NSDataAsset alloc] initWithName:@"ibmsample"].data];
break;
default:
break;
}
}
@end
6 changes: 3 additions & 3 deletions KaniManabu-macOS/AppDelegate.m
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#if defined(AppStore)
#import "SubscriptionManager.h"
#import "SubscriptionPref.h"
#else
#import "LicenseManager.h"
#endif
Expand Down Expand Up @@ -129,12 +128,13 @@ - (NSWindowController *)preferencesWindowController {
{
GeneralPreferencesViewController *genview = [[GeneralPreferencesViewController alloc] init];
WaniKaniPreferences *wpref = [WaniKaniPreferences new];
AdvancedPref *apref = [AdvancedPref new];
#if defined(AppStore)
SubscriptionPref *subpref = [SubscriptionPref new];
NSArray *controllers = @[genview,wpref,subpref];
NSArray *controllers = @[genview,wpref,subpref, apref];
#else
SoftwareUpdatesPref *supref = [SoftwareUpdatesPref new];
NSArray *controllers = @[genview, wpref, supref];
NSArray *controllers = @[genview, wpref, supref, apref];
#endif
__preferencesWindowController = [[MASPreferencesWindowController alloc] initWithViewControllers:controllers];
}
Expand Down
22 changes: 22 additions & 0 deletions KaniManabu-macOS/Assets.xcassets/advanced.imageset/Contents.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"images" : [
{
"filename" : "icons8-iOS-12640-25-171718.png",
"idiom" : "universal",
"scale" : "1x"
},
{
"filename" : "icons8-iOS-12640-50-171718.png",
"idiom" : "universal",
"scale" : "2x"
},
{
"idiom" : "universal",
"scale" : "3x"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions KaniManabu-macOS/Assets.xcassets/ibmsample.dataset/Contents.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"data" : [
{
"filename" : "ibmsample.m4a",
"idiom" : "universal",
"universal-type-identifier" : "com.apple.m4a-audio"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Binary file not shown.
13 changes: 13 additions & 0 deletions KaniManabu-macOS/Assets.xcassets/mssample.dataset/Contents.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"data" : [
{
"filename" : "mssample.m4a",
"idiom" : "universal",
"universal-type-identifier" : "com.apple.m4a-audio"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>_XCCurrentVersionName</key>
<string>texttospeech 2.xcdatamodel</string>
</dict>
</plist>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<model type="com.apple.IDECoreDataModeler.DataModel" documentVersion="1.0" lastSavedToolsVersion="19461" systemVersion="21E230" minimumToolsVersion="Automatic" sourceLanguage="Objective-C" userDefinedModelVersionIdentifier="">
<entity name="Speech" representedClassName="Speech" syncable="YES" codeGenerationType="class">
<attribute name="audio" optional="YES" attributeType="Binary"/>
<attribute name="serviceid" optional="YES" attributeType="Integer 16" defaultValueString="0" usesScalarValueType="YES"/>
<attribute name="word" optional="YES" attributeType="String"/>
</entity>
<elements>
<element name="Speech" positionX="-63" positionY="-18" width="128" height="74"/>
</elements>
</model>
16 changes: 16 additions & 0 deletions KaniManabu-macOS/Preferences/AdvancedPref.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// AdvancedPref.h
// KaniManabu
//
// Created by 千代田桃 on 3/16/22.
//

#import <Cocoa/Cocoa.h>

NS_ASSUME_NONNULL_BEGIN

@interface AdvancedPref : NSViewController

@end

NS_ASSUME_NONNULL_END
Loading

0 comments on commit 706ad36

Please sign in to comment.