Skip to content

Commit

Permalink
Updated index part of hash to be activity based
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-20 committed Jan 25, 2024
1 parent b23aa28 commit f108dc5
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/library/solrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def process_hash_list(document_datasets):

logger.info('Adding docs for hash: ' + file_hash + ' and id: ' + file_id)

for index, fa in enumerate(flattened_activities[0]):
identifier_indices = {}

for fa in flattened_activities[0]:
hashed_iati_identifier = utils.get_hash_for_identifier(fa['iati_identifier'])
blob_name = '{}/{}.xml'.format(file_id, hashed_iati_identifier)

Expand Down Expand Up @@ -211,7 +213,11 @@ def process_hash_list(document_datasets):
sub_list_data[element_name] = fa['@'+element_name]
del fa['@'+element_name]

fa['id'] = "{}--{}--{}".format(file_id, hashed_iati_identifier, index)
identifier_indices[hashed_iati_identifier] = identifier_indices.get(hashed_iati_identifier, 0) + 1

fa['id'] = "{}--{}--{}".format(file_id,
hashed_iati_identifier,
identifier_indices[hashed_iati_identifier])

addToSolr('activity', [fa], file_hash, file_id)

Expand Down

0 comments on commit f108dc5

Please sign in to comment.