Skip to content

Commit

Permalink
fix: CombinatorialSynonmyGenerator should only check for original str…
Browse files Browse the repository at this point in the history
…ings
  • Loading branch information
paluchasz committed Oct 16, 2024
1 parent b04ee19 commit 989f772
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
6 changes: 2 additions & 4 deletions kazu/ontology_preprocessing/synonym_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __call__(
)
final_results: defaultdict[OntologyStringResource, set[Synonym]] = defaultdict(set)
original_strings = {
syn.text for resource in ontology_resources for syn in resource.active_ner_synonyms()
syn.text for resource in ontology_resources for syn in resource.original_synonyms
}
for i, permutation_list in enumerate(synonym_gen_permutations):
logger.info(
Expand All @@ -80,9 +80,7 @@ def __call__(
desc=f"generating synonyms for {generator.__class__.__name__}",
):

for syn in list(
generated_results.get(resource, resource.active_ner_synonyms())
):
for syn in list(generated_results.get(resource, resource.original_synonyms)):
new_strings = generator(syn.text)
for new_syn_text in new_strings:
if new_syn_text in original_strings:
Expand Down
51 changes: 51 additions & 0 deletions kazu/tests/test_synonym_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,57 @@ def test_greek_substitution_dict_uncode_variants():
StringReplacement(replacement_dict={"-": [" ", "_"]}, include_greek=False),
],
),
(
OntologyStringResource(
original_synonyms=frozenset(
[
Synonym(
text="estimate",
mention_confidence=MentionConfidence.PROBABLE,
case_sensitive=False,
),
Synonym(
text="estimate of",
mention_confidence=MentionConfidence.PROBABLE,
case_sensitive=False,
),
]
),
behaviour=OntologyStringBehaviour.ADD_FOR_NER_AND_LINKING,
),
set(),
[
StopWordRemover(), # normally stopword remover would remove 'of' but since "estimate" is an original synonym
# CombinatorialSynonymGenerator will skip the alternative synonym generation
],
),
(
OntologyStringResource(
original_synonyms=frozenset(
[
Synonym(
text="estimate of",
mention_confidence=MentionConfidence.PROBABLE,
case_sensitive=False,
),
]
),
behaviour=OntologyStringBehaviour.ADD_FOR_NER_AND_LINKING,
alternative_synonyms=frozenset(
[
Synonym(
text="estimate",
mention_confidence=MentionConfidence.PROBABLE,
case_sensitive=False,
)
]
),
),
{"estimate"},
[
StopWordRemover(),
],
),
),
)
def test_CombinatorialSynonymGenerator(resource, expected_syns, generators):
Expand Down

0 comments on commit 989f772

Please sign in to comment.