Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sjrl committed Dec 16, 2024
1 parent f521935 commit f219075
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions test/components/preprocessors/test_nltk_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_document_splitter_split_into_units_sentence(self) -> None:
document_splitter = NLTKDocumentSplitter(
split_by="sentence", split_length=2, split_overlap=0, split_threshold=0, language="en"
)
document_splitter.warm_up()

text = "Moonlight shimmered softly, wolves howled nearby, night enveloped everything. It was a dark night."
units = document_splitter._split_into_units(text=text, split_by="sentence")
Expand Down Expand Up @@ -121,11 +122,13 @@ class TestNLTKDocumentSplitterRun:
def test_run_type_error(self) -> None:
document_splitter = NLTKDocumentSplitter()
with pytest.raises(TypeError):
document_splitter.warm_up()
document_splitter.run(documents=Document(content="Moonlight shimmered softly.")) # type: ignore

def test_run_value_error(self) -> None:
document_splitter = NLTKDocumentSplitter()
with pytest.raises(ValueError):
document_splitter.warm_up()
document_splitter.run(documents=[Document(content=None)])

def test_run_split_by_sentence_1(self) -> None:
Expand All @@ -138,6 +141,7 @@ def test_run_split_by_sentence_1(self) -> None:
use_split_rules=True,
extend_abbreviations=True,
)
document_splitter.warm_up()

text = (
"Moonlight shimmered softly, wolves howled nearby, night enveloped everything. It was a dark night ... "
Expand Down Expand Up @@ -168,6 +172,7 @@ def test_run_split_by_sentence_2(self) -> None:
"This is another test sentence. (This is a third test sentence.) "
"This is the last test sentence."
)
document_splitter.warm_up()
documents = document_splitter.run(documents=[Document(content=text)])["documents"]

assert len(documents) == 4
Expand Down Expand Up @@ -201,6 +206,7 @@ def test_run_split_by_sentence_3(self) -> None:
use_split_rules=True,
extend_abbreviations=True,
)
document_splitter.warm_up()

text = "Sentence on page 1.\fSentence on page 2. \fSentence on page 3. \f\f Sentence on page 5."
documents = document_splitter.run(documents=[Document(content=text)])["documents"]
Expand Down Expand Up @@ -233,6 +239,7 @@ def test_run_split_by_sentence_4(self) -> None:
use_split_rules=True,
extend_abbreviations=True,
)
document_splitter.warm_up()

text = "Sentence on page 1.\fSentence on page 2. \fSentence on page 3. \f\f Sentence on page 5."
documents = document_splitter.run(documents=[Document(content=text)])["documents"]
Expand Down Expand Up @@ -262,6 +269,7 @@ def test_run_split_by_word_respect_sentence_boundary(self) -> None:
language="en",
respect_sentence_boundary=True,
)
document_splitter.warm_up()

text = (
"Moonlight shimmered softly, wolves howled nearby, night enveloped everything. It was a dark night.\f"
Expand Down Expand Up @@ -294,6 +302,7 @@ def test_run_split_by_word_respect_sentence_boundary_no_repeats(self) -> None:
use_split_rules=False,
extend_abbreviations=False,
)
document_splitter.warm_up()
text = (
"This is a test sentence with many many words that exceeds the split length and should not be repeated. "
"This is another test sentence. (This is a third test sentence.) "
Expand All @@ -319,6 +328,7 @@ def test_run_split_by_word_respect_sentence_boundary_with_split_overlap_and_page
extend_abbreviations=True,
respect_sentence_boundary=True,
)
document_splitter.warm_up()

text = (
"Sentence on page 1. Another on page 1.\fSentence on page 2. Another on page 2.\f"
Expand Down

0 comments on commit f219075

Please sign in to comment.