Skip to content

Commit

Permalink
disable %wor line by default
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Apr 5, 2024
1 parent 1d9cf1c commit 3dc3b74
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 10 deletions.
5 changes: 4 additions & 1 deletion batchalign/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def writer(doc, output):
default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
@click.option("--diarize/--nodiarize",
default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
@click.option("--wor/--nowor",
default=False, help="Should we write word level alignment line? Default to no.")
@click.pass_context
def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
"""Create a transcript from audio files."""
Expand All @@ -151,7 +153,8 @@ def writer(doc, output):
CHATFile(doc=doc, special_mor_=True).write(output
.replace(".wav", ".cha")
.replace(".mp4", ".cha")
.replace(".mp3", ".cha"))
.replace(".mp3", ".cha"),
write_wor=kwargs.get("wor", False))

asr = "rev"
if kwargs["whisper"]:
Expand Down
9 changes: 5 additions & 4 deletions batchalign/formats/chat/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(self, path=None, doc=None, lines=None, special_mor_=False):
self.__doc = doc


def write(self, path):
def write(self, path, write_wor=True):
"""Write the CHATFile to file.
Parameters
Expand All @@ -102,13 +102,13 @@ def write(self, path):
Path of where the CHAT file should get str.
"""

str_doc = self.__generate(self.__doc, self.__special_mor)
str_doc = self.__generate(self.__doc, self.__special_mor, write_wor=write_wor)

with open(path, 'w') as df:
df.write(str_doc)

@staticmethod
def __generate(doc:Document, special=False):
def __generate(doc:Document, special=False, write_wor=True):
utterances = doc.content

def __get_birthdays(line):
Expand All @@ -125,7 +125,8 @@ def __get_birthdays(line):
extra += ":\t"+i.content
main.append(extra.strip())
else:
main.append(generate_chat_utterance(i, special and doc.langs[0] == "eng"))
main.append(generate_chat_utterance(i, special and doc.langs[0] == "eng",
write_wor=write_wor))
main.append("@End\n")

return "\n".join(main)
Expand Down
4 changes: 2 additions & 2 deletions batchalign/formats/chat/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# document[3].text = None
# document[3].model_dump()

def generate_chat_utterance(utterance: Utterance, special_mor=False):
def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True):
"""Converts at Utterance to a CHAT string.
Parameters
Expand Down Expand Up @@ -73,7 +73,7 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False):
result.append(f"%{'u' if special_mor else ''}gra:\t"+" ".join([f"{i.id}|{i.dep_id}|{i.dep_type}" for i in gras]))

#### WOR LINE GENERATION ####
if has_wor:
if has_wor and write_wor:
result.append("%wor:\t"+" ".join(wor_elems))


Expand Down
6 changes: 3 additions & 3 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.7.0-beta.1
Mar 25rd, 2024
Minor bug fixes
0.7.0-beta.2
Apr 05, 2024
Disables %wor line by default in transcribe

0 comments on commit 3dc3b74

Please sign in to comment.