From 7cecbbf7080e8fcb8120ab203cf4a703e1c9c8cb Mon Sep 17 00:00:00 2001 From: Richard Jackson Date: Wed, 3 Jul 2024 13:10:53 +0100 Subject: [PATCH] strict mode on conflict analyser: if True, then the function will return True if there are multiple mention confidences for a given string, regardless of case sensitivity --- kazu/ontology_preprocessing/curation_utils.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kazu/ontology_preprocessing/curation_utils.py b/kazu/ontology_preprocessing/curation_utils.py index 06dcb05c..b33ebb3b 100644 --- a/kazu/ontology_preprocessing/curation_utils.py +++ b/kazu/ontology_preprocessing/curation_utils.py @@ -432,7 +432,8 @@ def check_for_case_conflicts_across_resources( of equal or higher rank than a case-sensitive one. :param resources: - :param strict: if True, then the function will return True if there are any conflicts in case sensitivity, regardless of the mention confidence + :param strict: if True, then the function will return True if there + are multiple mention confidences for a given string, regardless of case sensitivity :return: a set of conflicted subsets, and a set of clean resources. """ @@ -604,7 +605,8 @@ def _resource_set_has_case_conflicts( "Egfr" -> ci and POSSIBLE :param resources: - :param strict: if True, then the function will return True if there are any conflicts in case sensitivity, regardless of the mention confidence + :param strict: if True, then the function will return True if there + are multiple mention confidences for a given string, regardless of case sensitivity :return: """ cs_conf_lookup = defaultdict(set) @@ -620,15 +622,10 @@ def _resource_set_has_case_conflicts( ci_confidences: set[MentionConfidence] = ci_conf_lookup.get( cased_syn_string.lower(), set() ) - if ( - len(ci_confidences) > 1 - or len(cs_confidences) > 1 - or ( - len(ci_confidences) == 1 - and len(cs_confidences) > 0 - and min(cs_confidences) <= min(ci_confidences) - ) - or ((len(ci_confidences) + len(cs_confidences)) > 1 and strict) + if len(ci_confidences) > 1 or ( + len(ci_confidences) == 1 + and len(cs_confidences) > 0 + and (min(cs_confidences) <= min(ci_confidences) or strict) ): return True for ci_confidences in ci_conf_lookup.values():