From a9c85efceb00de557ab96b0c7dfaf2ea8777c007 Mon Sep 17 00:00:00 2001 From: Thomas Chaigneau Date: Mon, 9 Oct 2023 19:15:22 +0200 Subject: [PATCH] fix empty utterances (#273) --- .../services/post_processing_service.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/wordcab_transcribe/services/post_processing_service.py b/src/wordcab_transcribe/services/post_processing_service.py index 420e8c2..8eed8ba 100644 --- a/src/wordcab_transcribe/services/post_processing_service.py +++ b/src/wordcab_transcribe/services/post_processing_service.py @@ -346,21 +346,26 @@ def final_processing_before_returning( Returns: List[Utterance]: - List of utterances with final processing. + List of utterances after final processing. """ if offset_start is not None: offset_start = float(offset_start) else: offset_start = 0.0 + final_utterances = [] for utterance in utterances: - utterance.text = format_punct(utterance.text) - utterance.start = convert_timestamp( - (utterance.start + offset_start), timestamps_format - ) - utterance.end = convert_timestamp( - (utterance.end + offset_start), timestamps_format - ) - utterance.words = utterance.words if word_timestamps else None + # Check if the utterance is not empty + if utterance.text.strip(): + utterance.text = format_punct(utterance.text) + utterance.start = convert_timestamp( + (utterance.start + offset_start), timestamps_format + ) + utterance.end = convert_timestamp( + (utterance.end + offset_start), timestamps_format + ) + utterance.words = utterance.words if word_timestamps else None - return utterances + final_utterances.append(utterance) + + return final_utterances