From 5be593153aad68646747d63f20ab0722aa8f0325 Mon Sep 17 00:00:00 2001 From: kialj876 Date: Thu, 7 Feb 2019 13:52:10 -0800 Subject: [PATCH] search no longer filters names with synonyms from multiple synonym lists (#518) Signed-off-by: Kial Jinnah --- api/namex/analytics/solr.py | 13 ++++++++++--- .../python/end_points/test_synonym_match.py | 17 +++++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/api/namex/analytics/solr.py b/api/namex/analytics/solr.py index b81f2fe1d..0cb462795 100644 --- a/api/namex/analytics/solr.py +++ b/api/namex/analytics/solr.py @@ -206,8 +206,9 @@ def get_conflict_results(cls, name, bucket, start=0, rows=100): for item in result['response']['docs']: if item['name'] not in seen_ordered_names: ordered_names.append({'name_info': item, 'stems': []}) - for missed in missed_names: - current_app.logger.error('MISSED results: ', missed) + + if len(missed_names) > 0: + current_app.logger.error('MISSED results: {}'.format(missed_names)) final_names_list = [] # order based on alphabetization of swapped in synonyms @@ -635,7 +636,13 @@ def _get_synonym_list(cls, token): # Not sure what it is, pass it up. raise http_error - return json.load(connection)[1][0].split(',') + results = json.load(connection) + synonym_list = [] + # in case a token is part of multiple synonym lists + for synonyms in results[1]: + synonym_list += synonyms.split(',') + + return synonym_list # Look up each token in name, and if it is in the synonyms then we need to search for it separately. @classmethod diff --git a/api/tests/python/end_points/test_synonym_match.py b/api/tests/python/end_points/test_synonym_match.py index 6c5c0042d..e7730e115 100644 --- a/api/tests/python/end_points/test_synonym_match.py +++ b/api/tests/python/end_points/test_synonym_match.py @@ -486,11 +486,12 @@ def test_strips_stop_words(client, jwt, app, criteria, seed): query=criteria, expected_list=[seed] ) + @integration_postgres_solr @integration_synonym_api @integration_solr @pytest.mark.parametrize("query, ordered_list", [ - ('TESTING ORDER DEVELOPMENTS SYNONYMS', ['----TESTING ORDER DEVELOPMENTS SYNONYMS - PROXIMITY SEARCH', + ('TESTING ORDER DEVELOPMENT SYNONYMS', ['----TESTING ORDER DEVELOPMENT SYNONYMS - PROXIMITY SEARCH', 'TESTING ORDER DEVELOPMENT SYNONYMS', 'TESTING ORDER CONSTRUCTION SYNONYMS', 'TESTING ORDER STRUCTURE SYNONYMS', @@ -498,7 +499,7 @@ def test_strips_stop_words(client, jwt, app, criteria, seed): ]) def test_order(client, jwt, app, query, ordered_list): # for loop didn't work for seeding so manual - seed_database_with(client, jwt, 'TESTING ORDER CONSTRUCTION SYNONYMS', id='1', source='2', clear=False) + seed_database_with(client, jwt, 'TESTING ORDER CONSTRUCTION SYNONYMS', id='1', source='2') seed_database_with(client, jwt, 'TESTING ORDER DEVELOPMENT SYNONYMS', id='2', source='4', clear=False) seed_database_with(client, jwt, 'TESTING ORDER STRUCTURE SYNONYMS', id='3', source='3', clear=False) verify_order(client, jwt, query=query, expected_order=ordered_list) @@ -518,3 +519,15 @@ def test_order(client, jwt, app, query, ordered_list): def test_stems(client, jwt, app, query, stems): verify_stems(client, jwt, query=query, stems=stems) +@integration_postgres_solr +@integration_synonym_api +@integration_solr +@pytest.mark.parametrize("query, expected_list", [ + ('PACIFIC FASTFOOD', ['PACIFIC TAKEOUT', 'PACIFIC CONCESSION']), +]) +def test_synonyms_match_on_all_synonym_lists_(client, jwt, app, query, expected_list): + # some synonyms are part of multiple lists so check that they return matches on both + seed_database_with(client, jwt, 'PACIFIC TAKEOUT', id='1', source='2') + seed_database_with(client, jwt, 'PACIFIC CONCESSION', id='2', source='2', clear=False) + verify_synonym_match(client, jwt, query=query, expected_list=expected_list) +