Skip to content

Commit

Permalink
Merge branch 'EmailDomainToRorMap' of https://github.com/ORCID/ORCID-…
Browse files Browse the repository at this point in the history
…Source into EmailDomainToRorMap
  • Loading branch information
amontenegro committed Oct 20, 2023
2 parents afcb0c8 + d6b2a31 commit 9b9a1b6
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ public interface OrgDisambiguatedManager {
void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier);

public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type);

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity);

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import javax.annotation.Resource;
import javax.transaction.Transactional;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.orcid.core.manager.OrgDisambiguatedManager;
import org.orcid.core.messaging.JmsMessageSender;
Expand Down Expand Up @@ -52,7 +53,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {

@Resource
private OrgDao orgDao;

@Resource
private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao;

Expand All @@ -68,6 +69,9 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {
@Value("${org.orcid.persistence.messaging.updated.disambiguated_org.solr:indexDisambiguatedOrgs}")
private String updateSolrQueueName;

@Value("${org.orcid.core.cleanExtIdsForOrg:false}")
private boolean cleanDuplicateExtIdForOrg;

@Resource(name = "jmsMessageSender")
private JmsMessageSender messaging;

Expand All @@ -76,7 +80,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {

@Value("${org.orcid.persistence.messaging.updated.disambiguated_org.indexing.batchSize:1000}")
private int indexingBatchSize;

@Override
synchronized public void processOrgsForIndexing() {
LOGGER.info("About to process disambiguated orgs for indexing");
Expand All @@ -103,7 +107,7 @@ synchronized public void markOrgsForIndexingAsGroup() {
entities = orgDisambiguatedDaoReadOnly.findOrgsToGroup(startIndex, indexingBatchSize);
LOGGER.info("GROUP: Found chunk of {} disambiguated orgs for indexing as group", entities.size());
for (OrgDisambiguatedEntity entity : entities) {

new OrgGrouping(entity, this).markGroupForIndexing(orgDisambiguatedDao);
}
startIndex = startIndex + indexingBatchSize;
Expand Down Expand Up @@ -147,7 +151,7 @@ private OrgDisambiguatedSolrDocument convertEntityToDocument(OrgDisambiguatedEnt
document.setOrgDisambiguatedPopularity(entity.getPopularity());
Set<String> orgNames = new HashSet<>();
orgNames.add(entity.getName());

List<OrgEntity> orgs = orgDao.findByOrgDisambiguatedId(entity.getId());
if (orgs != null) {
for (OrgEntity org : orgs) {
Expand Down Expand Up @@ -219,6 +223,9 @@ public List<OrgDisambiguated> searchOrgsFromSolrForSelfService(String searchTerm
@Override
public OrgDisambiguatedEntity updateOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
normalizeExternalIdentifiers(orgDisambiguatedEntity);
if (cleanDuplicateExtIdForOrg) {
cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity);
}
return orgDisambiguatedDao.merge(orgDisambiguatedEntity);
}

Expand Down Expand Up @@ -262,7 +269,27 @@ public OrgDisambiguated findInDB(String idValue, String idType) {
@Override
public void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier) {
normalizeExternalIdentifier(identifier);
orgDisambiguatedExternalIdentifierDao.persist(identifier);
boolean toPersist = true;
OrgDisambiguatedEntity orgDisambiguatedEntity = identifier.getOrgDisambiguated();
if (orgDisambiguatedEntity != null && orgDisambiguatedEntity.getExternalIdentifiers() != null) {
String extIdentifierKeyToAdd = identifier.getIdentifierType() + "::" + identifier.getIdentifier();
String extIdentifierKey;
for (OrgDisambiguatedExternalIdentifierEntity identifier1 : orgDisambiguatedEntity.getExternalIdentifiers()) {
extIdentifierKey = identifier1.getIdentifierType() + "::" + identifier1.getIdentifier();
if (StringUtils.equals(extIdentifierKeyToAdd, extIdentifierKey)) {
toPersist = false;
break;
}
}
}
if (cleanDuplicateExtIdForOrg) {
cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity);
}
// check if in the current external id list the identifier already
if (toPersist) {
orgDisambiguatedExternalIdentifierDao.persist(identifier);
}

}

@Override
Expand All @@ -271,17 +298,16 @@ public void updateOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIde
orgDisambiguatedExternalIdentifierDao.merge(identifier);
}

public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier( String identifier, String type ) {
public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type) {
List<OrgDisambiguated> orgDisambiguatedIds = new ArrayList<OrgDisambiguated>();
List<OrgDisambiguatedExternalIdentifierEntity> extIds = orgDisambiguatedExternalIdentifierDao.findByIdentifierIdAndType(identifier, type);
extIds.stream().forEach((e) ->
{
OrgDisambiguatedEntity de = e.getOrgDisambiguated();
// Group only if it is not a RINGGOLD org
if(de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) {
orgDisambiguatedIds.add(convertEntity(de));
}
});
extIds.stream().forEach((e) -> {
OrgDisambiguatedEntity de = e.getOrgDisambiguated();
// Group only if it is not a RINGGOLD org
if (de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) {
orgDisambiguatedIds.add(convertEntity(de));
}
});
return orgDisambiguatedIds;
}

Expand Down Expand Up @@ -347,4 +373,39 @@ private void normalizeExternalIdentifiers(OrgDisambiguatedEntity orgDisambiguate
}
}

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
if (orgDisambiguatedEntity.getExternalIdentifiers() != null) {
HashMap<String, OrgDisambiguatedExternalIdentifierEntity> extIdsMapping = new HashMap<String, OrgDisambiguatedExternalIdentifierEntity>();
String extIdentifierKey;
OrgDisambiguatedExternalIdentifierEntity mappedExtIdentifier;
List<OrgDisambiguatedExternalIdentifierEntity> duplicatedExtIdentifiersToBeRemoved = new ArrayList<OrgDisambiguatedExternalIdentifierEntity>();
for (OrgDisambiguatedExternalIdentifierEntity identifier : orgDisambiguatedEntity.getExternalIdentifiers()) {
extIdentifierKey = identifier.getIdentifierType() + "::" + identifier.getIdentifier();
if (extIdsMapping.containsKey(extIdentifierKey)) {

if (!identifier.getPreferred()) {
duplicatedExtIdentifiersToBeRemoved.add(identifier);
} else {
mappedExtIdentifier = extIdsMapping.get(extIdentifierKey);
duplicatedExtIdentifiersToBeRemoved.add(mappedExtIdentifier);
extIdsMapping.put(extIdentifierKey, identifier);
}

}
}
// remove the duplicates from DB

LOGGER.info(
"About to remove " + duplicatedExtIdentifiersToBeRemoved.size() + " duplicate external Ids for Disambiguated Org " + orgDisambiguatedEntity.getId());
duplicatedExtIdentifiersToBeRemoved.stream().forEach((e) -> {
try {
orgDisambiguatedExternalIdentifierDao.remove(e);
LOGGER.debug("Removed ext id " + e.getIdentifierType() + "::" + e.getIdentifier() + "::" + e.getId());
} catch (Exception ex) {
LOGGER.error("Exception when removing duplicate external ids for Disambiguated Org " + orgDisambiguatedEntity.getId(), ex);
}
});

}
}
}

0 comments on commit 9b9a1b6

Please sign in to comment.