Skip to content

Commit

Permalink
Merge pull request #91 from Wordcab/90-fix-word_timestamps
Browse files Browse the repository at this point in the history
Implement word_timestamps for batching
  • Loading branch information
Thomas Chaigneau authored Jun 7, 2023
2 parents c99a717 + 6e2a59a commit c231039
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 29 deletions.
8 changes: 4 additions & 4 deletions wordcab_transcribe/services/align_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,9 @@ def align(
start, end, score = None, None, None
if cdx in segment["clean_cdx"]:
char_seg = char_segments[segment["clean_cdx"].index(cdx)]
start = round(char_seg.start * ratio + t1, 3)
end = round(char_seg.end * ratio + t1, 3)
score = round(char_seg.score, 3)
start = round(char_seg.start * ratio + t1, 2)
end = round(char_seg.end * ratio + t1, 2)
score = round(char_seg.score, 2)

char_segments_arr.append(
{
Expand Down Expand Up @@ -468,7 +468,7 @@ def align(
word_chars = word_chars[word_chars["char"] != " "]
word_start = word_chars["start"].min()
word_end = word_chars["end"].max()
word_score = round(word_chars["score"].mean(), 3)
word_score = round(word_chars["score"].mean(), 2)

# -1 indicates unalignable
word_segment = {"word": word_text}
Expand Down
2 changes: 1 addition & 1 deletion wordcab_transcribe/services/asr_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ async def process_input(
"dual_channel": dual_channel,
"source_lang": source_lang,
"timestamps_format": timestamps_format,
"word_timestamps": False, # TODO: Implement word timestamps, False for now
"word_timestamps": word_timestamps,
"post_processed": False,
"transcription_result": None,
"transcription_done": asyncio.Event(),
Expand Down
Loading

0 comments on commit c231039

Please sign in to comment.