Skip to content

Commit

Permalink
Added the code to cleanup duplicated external identifiers un entity u…
Browse files Browse the repository at this point in the history
…pdate
  • Loading branch information
Camelia-Orcid committed Oct 19, 2023
1 parent b0016f3 commit 116b656
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ public interface OrgDisambiguatedManager {
void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier);

public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type);

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity);

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {

@Value("${org.orcid.persistence.messaging.updated.disambiguated_org.solr:indexDisambiguatedOrgs}")
private String updateSolrQueueName;

@Value("${org.orcid.core.cleanExtIdsOnOrgUpdate:true}")
private boolean cleanDuplicateExtIdOnOrgUpdate;

@Resource(name = "jmsMessageSender")
private JmsMessageSender messaging;
Expand Down Expand Up @@ -219,6 +222,9 @@ public List<OrgDisambiguated> searchOrgsFromSolrForSelfService(String searchTerm
@Override
public OrgDisambiguatedEntity updateOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
normalizeExternalIdentifiers(orgDisambiguatedEntity);
if(cleanDuplicateExtIdOnOrgUpdate) {
cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity);
}
return orgDisambiguatedDao.merge(orgDisambiguatedEntity);
}

Expand Down Expand Up @@ -346,5 +352,41 @@ private void normalizeExternalIdentifiers(OrgDisambiguatedEntity orgDisambiguate
}
}
}

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
if (orgDisambiguatedEntity.getExternalIdentifiers() != null) {
HashMap<String, OrgDisambiguatedExternalIdentifierEntity> extIdsMapping = new HashMap<String, OrgDisambiguatedExternalIdentifierEntity>();
String extIdentifierKey ;
OrgDisambiguatedExternalIdentifierEntity mappedExtIdentifier;
List<OrgDisambiguatedExternalIdentifierEntity> duplicatedExtIdentifiersToBeRemoved = new ArrayList<OrgDisambiguatedExternalIdentifierEntity>();
for (OrgDisambiguatedExternalIdentifierEntity identifier : orgDisambiguatedEntity.getExternalIdentifiers()) {
extIdentifierKey = identifier.getIdentifierType() + "::" + identifier.getIdentifier();
if(extIdsMapping.containsKey(extIdentifierKey)) {

if(!identifier.getPreferred()) {
duplicatedExtIdentifiersToBeRemoved.add(identifier);
}
else {
mappedExtIdentifier = extIdsMapping.get(extIdentifierKey);
duplicatedExtIdentifiersToBeRemoved.add(mappedExtIdentifier);
extIdsMapping.put(extIdentifierKey, identifier);
}

}
}
//remove the duplicates from DB
try {
LOGGER.info("About to remove " + duplicatedExtIdentifiersToBeRemoved.size() + " duplicate external Ids for Disambiguated Org " + orgDisambiguatedEntity.getId() );
duplicatedExtIdentifiersToBeRemoved.stream().forEach((e) -> {
orgDisambiguatedExternalIdentifierDao.remove(e);
LOGGER.debug("Removed ext id " + e.getIdentifierType() + "::" + e.getIdentifier() + "::" + e.getId());
});

} catch (Exception ex) {
LOGGER.error("Exception when removing duplicate external ids for Disambiguated Org " + orgDisambiguatedEntity.getId(), ex);
}
}

}

}

0 comments on commit 116b656

Please sign in to comment.