diff --git a/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java b/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java index 3dd8c72cad0..76851b41b23 100644 --- a/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java +++ b/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java @@ -36,5 +36,7 @@ public interface OrgDisambiguatedManager { void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier); public List findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type); + + public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity); } diff --git a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java index 2a074b34149..03faaeb9208 100644 --- a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java @@ -11,6 +11,7 @@ import javax.annotation.Resource; import javax.transaction.Transactional; +import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.orcid.core.manager.OrgDisambiguatedManager; import org.orcid.core.messaging.JmsMessageSender; @@ -52,7 +53,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Resource private OrgDao orgDao; - + @Resource private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao; @@ -68,6 +69,9 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Value("${org.orcid.persistence.messaging.updated.disambiguated_org.solr:indexDisambiguatedOrgs}") private String updateSolrQueueName; + @Value("${org.orcid.core.cleanExtIdsForOrg:false}") + private boolean cleanDuplicateExtIdForOrg; + @Resource(name = "jmsMessageSender") private JmsMessageSender messaging; @@ -76,7 +80,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Value("${org.orcid.persistence.messaging.updated.disambiguated_org.indexing.batchSize:1000}") private int indexingBatchSize; - + @Override synchronized public void processOrgsForIndexing() { LOGGER.info("About to process disambiguated orgs for indexing"); @@ -103,7 +107,7 @@ synchronized public void markOrgsForIndexingAsGroup() { entities = orgDisambiguatedDaoReadOnly.findOrgsToGroup(startIndex, indexingBatchSize); LOGGER.info("GROUP: Found chunk of {} disambiguated orgs for indexing as group", entities.size()); for (OrgDisambiguatedEntity entity : entities) { - + new OrgGrouping(entity, this).markGroupForIndexing(orgDisambiguatedDao); } startIndex = startIndex + indexingBatchSize; @@ -147,7 +151,7 @@ private OrgDisambiguatedSolrDocument convertEntityToDocument(OrgDisambiguatedEnt document.setOrgDisambiguatedPopularity(entity.getPopularity()); Set orgNames = new HashSet<>(); orgNames.add(entity.getName()); - + List orgs = orgDao.findByOrgDisambiguatedId(entity.getId()); if (orgs != null) { for (OrgEntity org : orgs) { @@ -219,6 +223,9 @@ public List searchOrgsFromSolrForSelfService(String searchTerm @Override public OrgDisambiguatedEntity updateOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) { normalizeExternalIdentifiers(orgDisambiguatedEntity); + if (cleanDuplicateExtIdForOrg) { + cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity); + } return orgDisambiguatedDao.merge(orgDisambiguatedEntity); } @@ -262,7 +269,27 @@ public OrgDisambiguated findInDB(String idValue, String idType) { @Override public void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier) { normalizeExternalIdentifier(identifier); - orgDisambiguatedExternalIdentifierDao.persist(identifier); + boolean toPersist = true; + OrgDisambiguatedEntity orgDisambiguatedEntity = identifier.getOrgDisambiguated(); + if (orgDisambiguatedEntity != null && orgDisambiguatedEntity.getExternalIdentifiers() != null) { + String extIdentifierKeyToAdd = identifier.getIdentifierType() + "::" + identifier.getIdentifier(); + String extIdentifierKey; + for (OrgDisambiguatedExternalIdentifierEntity identifier1 : orgDisambiguatedEntity.getExternalIdentifiers()) { + extIdentifierKey = identifier1.getIdentifierType() + "::" + identifier1.getIdentifier(); + if (StringUtils.equals(extIdentifierKeyToAdd, extIdentifierKey)) { + toPersist = false; + break; + } + } + } + if (cleanDuplicateExtIdForOrg) { + cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity); + } + // check if in the current external id list the identifier already + if (toPersist) { + orgDisambiguatedExternalIdentifierDao.persist(identifier); + } + } @Override @@ -271,17 +298,16 @@ public void updateOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIde orgDisambiguatedExternalIdentifierDao.merge(identifier); } - public List findOrgDisambiguatedIdsForSameExternalIdentifier( String identifier, String type ) { + public List findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type) { List orgDisambiguatedIds = new ArrayList(); List extIds = orgDisambiguatedExternalIdentifierDao.findByIdentifierIdAndType(identifier, type); - extIds.stream().forEach((e) -> - { - OrgDisambiguatedEntity de = e.getOrgDisambiguated(); - // Group only if it is not a RINGGOLD org - if(de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) { - orgDisambiguatedIds.add(convertEntity(de)); - } - }); + extIds.stream().forEach((e) -> { + OrgDisambiguatedEntity de = e.getOrgDisambiguated(); + // Group only if it is not a RINGGOLD org + if (de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) { + orgDisambiguatedIds.add(convertEntity(de)); + } + }); return orgDisambiguatedIds; } @@ -347,4 +373,39 @@ private void normalizeExternalIdentifiers(OrgDisambiguatedEntity orgDisambiguate } } + public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) { + if (orgDisambiguatedEntity.getExternalIdentifiers() != null) { + HashMap extIdsMapping = new HashMap(); + String extIdentifierKey; + OrgDisambiguatedExternalIdentifierEntity mappedExtIdentifier; + List duplicatedExtIdentifiersToBeRemoved = new ArrayList(); + for (OrgDisambiguatedExternalIdentifierEntity identifier : orgDisambiguatedEntity.getExternalIdentifiers()) { + extIdentifierKey = identifier.getIdentifierType() + "::" + identifier.getIdentifier(); + if (extIdsMapping.containsKey(extIdentifierKey)) { + + if (!identifier.getPreferred()) { + duplicatedExtIdentifiersToBeRemoved.add(identifier); + } else { + mappedExtIdentifier = extIdsMapping.get(extIdentifierKey); + duplicatedExtIdentifiersToBeRemoved.add(mappedExtIdentifier); + extIdsMapping.put(extIdentifierKey, identifier); + } + + } + } + // remove the duplicates from DB + + LOGGER.info( + "About to remove " + duplicatedExtIdentifiersToBeRemoved.size() + " duplicate external Ids for Disambiguated Org " + orgDisambiguatedEntity.getId()); + duplicatedExtIdentifiersToBeRemoved.stream().forEach((e) -> { + try { + orgDisambiguatedExternalIdentifierDao.remove(e); + LOGGER.debug("Removed ext id " + e.getIdentifierType() + "::" + e.getIdentifier() + "::" + e.getId()); + } catch (Exception ex) { + LOGGER.error("Exception when removing duplicate external ids for Disambiguated Org " + orgDisambiguatedEntity.getId(), ex); + } + }); + + } + } }