diff --git a/src/edge_tts/data_classes.py b/src/edge_tts/data_classes.py index 703fe33..6fa0083 100644 --- a/src/edge_tts/data_classes.py +++ b/src/edge_tts/data_classes.py @@ -83,6 +83,7 @@ class UtilArgs(argparse.Namespace): rate: str volume: str pitch: str + words_in_cue: int write_media: str write_subtitles: str proxy: str diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py index 333fb52..30da42a 100644 --- a/src/edge_tts/submaker.py +++ b/src/edge_tts/submaker.py @@ -37,6 +37,38 @@ def feed(self, msg: TTSChunk) -> None: ) ) + def merge_cues(self, words: int) -> None: + """ + Merge cues to reduce the number of cues. + + Args: + words (int): The number of words to merge. + + Returns: + None + """ + if words <= 0: + raise ValueError("Invalid number of words to merge, expected > 0") + + if len(self.cues) == 0: + return + + new_cues: List[srt.Subtitle] = [] # type: ignore + current_cue: srt.Subtitle = self.cues[0] # type: ignore + for cue in self.cues[1:]: + if len(current_cue.content.split()) < words: + current_cue = srt.Subtitle( + index=current_cue.index, + start=current_cue.start, + end=cue.end, + content=current_cue.content + " " + cue.content, + ) + else: + new_cues.append(current_cue) + current_cue = cue + new_cues.append(current_cue) + self.cues = new_cues + def get_srt(self) -> str: """ Get the SRT formatted subtitles from the SubMaker object. diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index df54b55..38acdb1 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -75,6 +75,9 @@ async def _run_tts(args: UtilArgs) -> None: elif chunk["type"] == "WordBoundary": submaker.feed(chunk) + if args.words_in_cue > 0: + submaker.merge_cues(args.words_in_cue) + if sub_file is not None: sub_file.write(submaker.get_srt()) finally: @@ -107,6 +110,12 @@ async def amain() -> None: parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%") parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%") parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz") + parser.add_argument( + "--words-in-cue", + help="number of words in a subtitle cue. Default: 10.", + default=10, + type=int, + ) parser.add_argument( "--write-media", help="send media output to file instead of stdout" )