From f108dc5e869d6d12bbbf9048da40d44a6bb12eec Mon Sep 17 00:00:00 2001 From: Simon <6615834+simon-20@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:02:48 +0000 Subject: [PATCH] Updated index part of hash to be activity based --- src/library/solrize.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/library/solrize.py b/src/library/solrize.py index 1ec261e..4c3ca06 100644 --- a/src/library/solrize.py +++ b/src/library/solrize.py @@ -140,7 +140,9 @@ def process_hash_list(document_datasets): logger.info('Adding docs for hash: ' + file_hash + ' and id: ' + file_id) - for index, fa in enumerate(flattened_activities[0]): + identifier_indices = {} + + for fa in flattened_activities[0]: hashed_iati_identifier = utils.get_hash_for_identifier(fa['iati_identifier']) blob_name = '{}/{}.xml'.format(file_id, hashed_iati_identifier) @@ -211,7 +213,11 @@ def process_hash_list(document_datasets): sub_list_data[element_name] = fa['@'+element_name] del fa['@'+element_name] - fa['id'] = "{}--{}--{}".format(file_id, hashed_iati_identifier, index) + identifier_indices[hashed_iati_identifier] = identifier_indices.get(hashed_iati_identifier, 0) + 1 + + fa['id'] = "{}--{}--{}".format(file_id, + hashed_iati_identifier, + identifier_indices[hashed_iati_identifier]) addToSolr('activity', [fa], file_hash, file_id)