-
Notifications
You must be signed in to change notification settings - Fork 2
/
myapp.py
311 lines (252 loc) · 11.4 KB
/
myapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
import streamlit as st
import time
import glob
import os
from os import path
# libraries for audio transcription
from gtts import gTTS
from googletrans import Translator
import speech_recognition as sr
# For visualizing and data manipulations
import matplotlib.pyplot as plt
import numpy as np
import wave
# for NLP tasks
import spacy
import spacy_streamlit as ss
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
from PIL import Image
# disable warnings
st.set_option('deprecation.showPyplotGlobalUse', False)
st.set_option('deprecation.showfileUploaderEncoding', False)
img = Image.open('images.jpeg')
st.title('Text to Speech/AudioSpeech to Text Analytic Web App')
st.image(img, width=650)
st.subheader("Navigate to side bar to see more options")
# re-configuring page layout to restrict users from overwriting the app configuraion
hide_streamlit_style = '''
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
'''
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.sidebar.title("Menu")
analyze = st.sidebar.selectbox(
'', ["Home", "Text2Speech_Analytics", "Audio2Text_Analytics"], index=0)
def main():
#----- Text to Audio------------------------------------------------------
if analyze == "Text2Speech_Analytics":
# converting a text to audio would require a directory to save this file
# hence, play this audio
try:
os.mkdir("temp")
except:
pass
translator = Translator()
text = st.text_input("Enter your text")
in_lang = st.selectbox(
"Select your input Language",
("English", "Chinese", "Hindi", "korea", "Japanese", "Spanish"),
)
if in_lang == "English":
input_language = "en"
elif in_lang == "Chinese":
input_language = "zh-cn"
elif in_lang == "korea":
input_language = "ko"
elif in_lang == "Hindi":
input_language = "hi"
elif in_lang == "Japanese":
input_language = "ja"
# elif in_lang == "Hausa":
# input_language = "ha"
elif in_lang == "Spanish":
input_language = "es"
# elif in_lang == "Yoruba":
# input_language = "yo"
# generate output language options
out_lang = st.selectbox(
"Select your output language",
("English", "Chinese", "Hindi", "korea", "Japanese", "Spanish"),
)
if out_lang == "English":
output_language = "en"
elif out_lang == "Chinese":
output_language = "zh-cn"
elif out_lang == "korea":
output_language = "ko"
elif out_lang == "Hindi":
output_language = "hi"
# elif out_lang == "Hausa": # It turned out google API does not support Nigerian Languages just yet.
# output_language = "ha"
elif out_lang == "Japanese":
output_language = "ja"
elif out_lang == "Spanish":
output_language = "es"
# elif out_lang == "Yoruba":
# output_language = "yo"
# select english accent options for the audio output
english_accent = st.selectbox(
"Select your english accent",
(
"Default",
"India",
"United Kingdom",
"United States",
"Nigeria",
"South Africa",
),
)
if english_accent == "Default":
tld = "com"
elif english_accent == "India":
tld = "co.in"
elif english_accent == "United Kingdom":
tld = "co.uk"
elif english_accent == "Nigeria":
tld = "ng"
elif english_accent == "South Africa":
tld = "co.za"
elif english_accent == "United States":
tld = "com"
def text_to_speech(input_language, output_language, text, tld):
translation = translator.translate(text, src=input_language, dest=output_language)
trans_text = translation.text
tts = gTTS(trans_text, lang=output_language, tld=tld, slow=False)
try:
filename = text[0:30]
except:
filename = "audio"
tts.save(f"temp/{filename}.mp3")
return filename, trans_text
display_output_text = st.checkbox("Display output text")
if st.button("convert"):
result, output_text = text_to_speech(input_language, output_language, text, tld)
audio_file = open(f"temp/{result}.mp3", "rb")
audio_bytes = audio_file.read()
st.markdown(f"## Your audio:")
st.audio(audio_bytes, format="audio/mp3", start_time=0)
if display_output_text:
st.markdown(f"## Output text:")
st.write(f" {output_text}")
# ------- audio to text ----------------------------------------------------
if analyze == "Audio2Text_Analytics":
try:
os.mkdir("temp2") # create directory to save our audio file to work on
except:
pass
st.markdown('## Upload a wav Audio File')
audio_file = st.file_uploader("Choose an audio file to upload", type=["wav"])
if audio_file is not None:
st.audio(audio_file) # enabling users to play their audio file
with open(os.path.join("temp2", audio_file.name), "wb") as f: # saving file to a directory
f.write(audio_file.getbuffer())
st.success("File Saved : {} in temp2".format(audio_file.name))
r = sr.Recognizer()
AUDIO_DIR = path.join(path.dirname(path.realpath(__file__)), "temp2") # obtain the directory to saved file
AUDIO_PATH = path.join(AUDIO_DIR, f'{audio_file.name}') # merge path to file to directory
with sr.AudioFile(AUDIO_PATH) as source:
audio_text = r.record(source) # listening to audio to match language
# recognize_method will throw up request error if the google API is unreachable, we capture this error using try and except handling
try:
# using google speech recognizer
text_file = r.recognize_google(audio_text)
if st.checkbox("Look up your transcribed audio to text here..."):
st.write(text_file)
except:
st.write('Sorry...Try Again!')
st.markdown('## Explore Different Categories of your Audio Speech')
fe = st.radio(label="Feature Extraction", options=(' ', 'NER of SpeechText', 'Display AudioSpeech Signal', 'Analyze Speech Sentiment'))
if fe == "Display AudioSpeech Signal":
wav = wave.open(AUDIO_PATH, 'rb') # open wave file using the wave library
raw = wav.readframes(-1) # reading the entire wave audio frame which returns the frame as a byte object
raw = np.frombuffer(raw, "int16") # we use numpy to convert audio bytes into an array
sample_rate = wav.getframerate()
Time = np.linspace(0, len(raw)/sample_rate, num=len(raw))
fig, ax = plt.subplots()
plt.plot(Time, raw, color='blue')
ax.set_xlabel('Time (seconds)')
ax.set_ylabel('Amplitude')
ax.set_title('Input Audio Signal')
plt.tight_layout()
st.pyplot(fig)
if fe == "NER of SpeechText":
text_file = text_file
nlp = spacy.load('en_core_web_sm')
docx = nlp(text_file)
if st.button("SpeechText Attributes"):
ss.visualize_tokens(docx)
ss.visualize_ner(docx, labels=nlp.get_pipe('ner').labels)
if fe == "Analyze Speech Sentiment":
sia = SentimentIntensityAnalyzer()
t = sia.polarity_scores(text_file)
if st.button("Predict"):
st.write("Neutral, Positive and Negative value of your speech is:")
st.write(t['neu'], t['pos'], t['neg'])
if t['neu'] > t['pos'] and t['neu'] > t['neg'] and t['neu'] > 0.85:
st.markdown("Speech Text is classified as **Neutral**. :confused:")
st.balloons()
elif t['pos'] > t['neg']:
st.markdown("Speech Text is Classified as **Positive**. :smiley:")
st.balloons()
elif t['neg'] > t['pos']:
st.markdown("Speech Text is Classified as **Negative**. :disappointed:")
st.sidebar.markdown(
"""
----------
## Project Overview
This is an AI web app that transcribes text to speech in 6 different languages and speech to text, to extract specific features in the
speech like NER, Sentiment and the Audio signal time frame.
""")
st.sidebar.header("") # initialize empty space
st.sidebar.markdown(
"""
----------
## Instructions
1. For your text2Speech, select your input text language and your output speech language. Then use the convert button.
2. Upload your wav audio file to begin your speech2text analyses
3. If your file does not upload, make sure it's a mono-channel file, i.e having only one audio source.
4. If your file isn't wav formated, do not worry, [click here](https://www.movavi.com/support/how-to/how-to-convert-music-to-wav.html)
and head over to "Online Converter" to upload your mp3 file.
5. if you are getting "Sorry...Run Again" message when you upload file, try running again when you have a stable
network
""")
# preview app demo
demo = st.sidebar.checkbox('App Demo')
if demo == 1:
st.sidebar.video('https://res.cloudinary.com/dfgg73dvr/video/upload/v1624127072/ezgif.com-gif-maker_k56lry.mp4', format='mp4')
st.sidebar.header("")
st.sidebar.markdown(
"""
-----------
# Let's connect
[![Jude Leonard Ndu](https://img.shields.io/badge/[email protected]?colorA=gray&colorB=dodgergreen&logo=github)](https://www.github.com/judeleonard/)
[![Jude Ndu](https://img.shields.io/badge/LinkedIn-0077B5?style=for-the-badge&logoColor=white)](https://www.linkedin.com/in/jude-ndu-78ab38175/)
[![Jude Leonard](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=gray)](https://www.twitter.judeleonard13/)
""")
#----- deleting files from directories so we don't overload the app------
def remove_wav_files(n):
wav_files = glob.glob("temp2/*wav")
if len(wav_files) != 0:
now = time.time()
n_days = n * 86400
for f in wav_files:
if os.stat(f).st_mtime < now - n_days:
os.remove(f)
print("Deleted", f)
def remove_mp3_files(n):
mp3_files = glob.glob("temp/*mp3")
if len(mp3_files) != 0:
now = time.time()
n_days = n * 86400
for f in mp3_files:
if os.stat(f).st_mtime < now - n_days:
os.remove(f)
print("Deleted", f)
remove_mp3_files(7) # remove mp3 files from directory
remove_wav_files(7) # remove wav files from directory
if __name__ == '__main__':
main()