-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
executable file
·42 lines (35 loc) · 1.36 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import datetime
from typing import List
import soundfile
import os
def convert_to_wav(in_filename: str) -> str:
"""Convert the input audio file to a wave file"""
file_root, _ = os.path.splitext(in_filename)
out_filename = file_root + ".wav"
# check if out_filename exists
if os.path.exists(out_filename):
speech, _ = soundfile.read(out_filename)
return speech
if '.mp3' in in_filename:
_ = os.system(f"ffmpeg -y -i '{in_filename}' -acodec pcm_s16le -ac 1 -ar 16000 '{out_filename}'")
else:
_ = os.system(f"ffmpeg -hide_banner -y -i '{in_filename}' -ar 16000 '{out_filename}'")
speech, _ = soundfile.read(out_filename)
print(f"load speech shape {speech.shape}")
return speech
def chunk_strings(input_list: List[str], output_chunk_length: int) -> List[str]:
output_list, chunk_idx = [], [0]
current_chunk = ""
for idx, string in enumerate(input_list):
if len(current_chunk) + len(string) + 1 <= output_chunk_length:
if current_chunk:
current_chunk += " " + string
else:
current_chunk = string
else:
output_list.append(current_chunk)
current_chunk = string
chunk_idx.append(idx)
if current_chunk:
output_list.append(current_chunk)
return output_list, chunk_idx