Skip to content

Commit

Permalink
fix ca mark
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Jan 19, 2025
1 parent 7bfb9d4 commit 37c6722
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion batchalign/formats/chat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def annotation_clean(content, special=False):
cleaned_word = cleaned_word.replace("‡","").replace("ạ","")
cleaned_word = cleaned_word.replace("ʰ","").replace("ā","")
cleaned_word = cleaned_word.replace("ʔ","").replace("ʕ","")
cleaned_word = cleaned_word.replace("š","").replace("ˈ","")
cleaned_word = cleaned_word.replace("ˈ","")
cleaned_word = cleaned_word.replace("ˌ","").replace("‹","")
cleaned_word = cleaned_word.replace("›","").replace("〔","")
cleaned_word = cleaned_word.replace("~","").replace("&~","")
Expand Down
4 changes: 2 additions & 2 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.7.9.post.2
0.7.9.post.3
Janurary 14th, 2025
more align fixes
fix croatian
18 changes: 9 additions & 9 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,33 +154,33 @@
# # forms
# utterance = Utterance(content=forms, delim=delim, text=ut)

sec = "une famille die@s:deu vacances"
# sec = "vidiš (š)to sam lepo@d našalala [: našarala] ."

forms, delim = chat_parse_utterance(sec, None, None, None, None)
utterance = Utterance(content=forms, delim=delim, text=sec)
# utterance.punct
# forms, delim = chat_parse_utterance(sec, None, None, None, None)
# utterance = Utterance(content=forms, delim=delim, text=sec)

# utterance[3].time=(1500,1600)
# ut = Document(content=[utterance], langs=["eng"])

# print(str(CHATFile(doc=ut)))

utterance[0]

# # # # # =======
# ut = Document(content=[utterance], langs=["jpn"])

# pipeline = BatchalignPipeline.new("morphosyntax", lang="jpn")
# res = pipeline(ut, retokenize=True)

# print(str(CHATFile(doc=res)))

# >>>>>>> theirs

utterance[3].time=(1500,1600)
ut = Document(content=[utterance], langs=["eng"])

# pipeline = BatchalignPipeline.new("morphosyntax", lang="jpn")
# res = pipeline(ut, retokenize=True)
# >>>>>>> Stashed changes

# # print(str(CHA
print(str(CHATFile(doc=ut)))
# print(str(CHATFile(doc=ut)))
# ut = Document(content=[utterance], langs=[lang])

# pipeline = BatchalignPipeline.new("morphosyntax", lang=lang)
Expand Down

0 comments on commit 37c6722

Please sign in to comment.