diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py index 06e4b3ebe..46bba673e 100644 --- a/src/pipecat/services/ai_services.py +++ b/src/pipecat/services/ai_services.py @@ -28,6 +28,24 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.utils.audio import calculate_audio_volume from pipecat.utils.utils import exp_smoothing +import re + + +ENDOFSENTENCE_PATTERN_STR = r""" + (? bool: + return ENDOFSENTENCE_PATTERN.search(text.rstrip()) is not None class AIService(FrameProcessor): @@ -137,9 +155,7 @@ async def _process_text_frame(self, frame: TextFrame): text = frame.text else: self._current_sentence += frame.text - if self._current_sentence.strip().endswith( - (".", "?", "!")) and not self._current_sentence.strip().endswith( - ("Mr,", "Mrs.", "Ms.", "Dr.")): + if match_endofsentence(self._current_sentence): text = self._current_sentence self._current_sentence = "" diff --git a/tests/test_ai_services.py b/tests/test_ai_services.py index ec44d5625..fb00fc893 100644 --- a/tests/test_ai_services.py +++ b/tests/test_ai_services.py @@ -2,8 +2,8 @@ from typing import AsyncGenerator -from pipecat.services.ai_services import AIService -from pipecat.pipeline.frames import EndFrame, Frame, TextFrame +from pipecat.services.ai_services import AIService, match_endofsentence +from pipecat.frames.frames import EndFrame, Frame, TextFrame class SimpleAIService(AIService): @@ -27,6 +27,22 @@ async def test_simple_processing(self): self.assertEqual(input_frames, output_frames) + async def test_endofsentence(self): + assert match_endofsentence("This is a sentence.") + assert match_endofsentence("This is a sentence! ") + assert match_endofsentence("This is a sentence?") + assert match_endofsentence("This is a sentence:") + assert not match_endofsentence("This is not a sentence") + assert not match_endofsentence("This is not a sentence,") + assert not match_endofsentence("This is not a sentence, ") + assert not match_endofsentence("Ok, Mr. Smith let's ") + assert not match_endofsentence("Dr. Walker, I presume ") + assert not match_endofsentence("Prof. Walker, I presume ") + assert not match_endofsentence("zweitens, und 3.") + assert not match_endofsentence("Heute ist Dienstag, der 3.") # 3. Juli 2024 + assert not match_endofsentence("America, or the U.") # U.S.A. + assert not match_endofsentence("It still early, it's 3:00 a.") # 3:00 a.m. + if __name__ == "__main__": unittest.main()