-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice_caption_helper.py
140 lines (120 loc) · 4.53 KB
/
voice_caption_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os.path
import re
from time import localtime, strftime
import asyncio
import aiofiles
import edge_tts
from utils import remove_punctuations
from edge_tts.submaker import mktimestamp
from xml.sax.saxutils import escape
limit = "18"
role = "zh-CN-YunxiNeural"
rate = "+30%"
volume = "+100%"
class CustomSubMaker(edge_tts.SubMaker):
"""重写此方法更好的支持中文"""
def formatter(self, start_time: float, end_time: float, subdata: str) -> str:
"""
formatter returns the timecode and the text of the subtitle.
"""
# print(f"{start_time} to {mktimestamp(start_time)}")
# print(print(f"{end_time} to {mktimestamp(end_time)}"))
return (
f"{mktimestamp(start_time)} --> {mktimestamp(end_time)}\n"
f"{escape(subdata)}\r\n"
)
async def generate_cn_subs(self, text):
PUNCTUATION = [",", "。", "!", "?", ";", ":", "”", ",", "!"]
def clause(self):
start = 0
i = 0
text_list = []
while i < len(text):
if text[i] in PUNCTUATION:
try:
while text[i] in PUNCTUATION:
i += 1
except IndexError:
pass
text_list.append(text[start:i])
start = i
i += 1
return text_list
self.text_list = clause(self)
if len(self.subs) != len(self.offset):
raise ValueError("subs and offset are not of the same length")
data = "WEBVTT\r\n\r\n"
j = 0
for text in self.text_list:
try:
start_time = self.offset[j][0]
except IndexError:
return data
try:
while self.subs[j + 1] in text:
j += 1
except IndexError:
pass
# text = await self.remove_non_chinese_chars(text)
text = remove_punctuations(text)
data += self.formatter(start_time, self.offset[j][1], text)
j += 1
return data
async def remove_non_chinese_chars(self, text):
# 使用正则表达式匹配非中文字符和非数字
pattern = re.compile(r"[^\u4e00-\u9fff0-9]+")
# 使用空字符串替换匹配到的非中文字符和非数字
cleaned_text = re.sub(pattern, "", text)
return cleaned_text
async def create_caption_and_voice_files(
content,
audio_file_path,
srt_file_path,
voice_actor=role,
rate=rate,
volume=volume,
) -> None:
"""Create and update caption (.srt file) and voice (.mp3) for a content"""
vtt_file_path = "temp.vtt"
communicate = edge_tts.Communicate(
text=content, voice=voice_actor, rate=rate, volume=volume
)
sub_maker = CustomSubMaker()
async with aiofiles.open(audio_file_path, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
await file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub(
(chunk["offset"], chunk["duration"]),
chunk["text"],
)
async with aiofiles.open(vtt_file_path, "w", encoding="utf-8") as file:
content_to_write = await sub_maker.generate_cn_subs(content)
await file.write(content_to_write)
# vtt -》 srt
idx = 1 # 字幕序号
with open(srt_file_path, "w", encoding="utf-8") as f_out:
for line in open(vtt_file_path, encoding="utf-8"):
if "-->" in line:
f_out.write("%d\n" % idx)
idx += 1
line = line.replace(".", ",") # 这行不是必须的,srt也能识别'.'
if idx > 1: # 跳过header部分
f_out.write(line)
# os.remove(vtt_file_path)
if __name__ == "__main__":
content_file_path = r"E:\小說推文\大道至簡\short.txt"
with open(content_file_path, "+r", encoding="utf_8_sig") as content_file:
content = "\n".join(content_file.readlines())
# content = "不怪韩风觉得诧异,今日在韩家书房内,通过典籍中的记载,韩风知道。"
# print(content)
folder_path = r"E:\voice_and_srt"
name = "test"
asyncio.run(
create_caption_and_voice_files(
content=content,
audio_file_path=os.path.join(folder_path, f"{name}.mp3"),
srt_file_path=os.path.join(folder_path, f"{name}.srt"),
)
)