Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8855 duplicate isni external identifiers for same disambiguated organisation #6910

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ public interface OrgDisambiguatedManager {
void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier);

public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type);

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity);

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import javax.annotation.Resource;
import javax.transaction.Transactional;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.orcid.core.manager.OrgDisambiguatedManager;
import org.orcid.core.messaging.JmsMessageSender;
Expand Down Expand Up @@ -52,7 +53,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {

@Resource
private OrgDao orgDao;

@Resource
private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao;

Expand All @@ -68,6 +69,9 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {
@Value("${org.orcid.persistence.messaging.updated.disambiguated_org.solr:indexDisambiguatedOrgs}")
private String updateSolrQueueName;

@Value("${org.orcid.core.cleanExtIdsForOrg:false}")
private boolean cleanDuplicateExtIdForOrg;

@Resource(name = "jmsMessageSender")
private JmsMessageSender messaging;

Expand All @@ -76,7 +80,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager {

@Value("${org.orcid.persistence.messaging.updated.disambiguated_org.indexing.batchSize:1000}")
private int indexingBatchSize;

@Override
synchronized public void processOrgsForIndexing() {
LOGGER.info("About to process disambiguated orgs for indexing");
Expand All @@ -103,7 +107,7 @@ synchronized public void markOrgsForIndexingAsGroup() {
entities = orgDisambiguatedDaoReadOnly.findOrgsToGroup(startIndex, indexingBatchSize);
LOGGER.info("GROUP: Found chunk of {} disambiguated orgs for indexing as group", entities.size());
for (OrgDisambiguatedEntity entity : entities) {

new OrgGrouping(entity, this).markGroupForIndexing(orgDisambiguatedDao);
}
startIndex = startIndex + indexingBatchSize;
Expand Down Expand Up @@ -147,7 +151,7 @@ private OrgDisambiguatedSolrDocument convertEntityToDocument(OrgDisambiguatedEnt
document.setOrgDisambiguatedPopularity(entity.getPopularity());
Set<String> orgNames = new HashSet<>();
orgNames.add(entity.getName());

List<OrgEntity> orgs = orgDao.findByOrgDisambiguatedId(entity.getId());
if (orgs != null) {
for (OrgEntity org : orgs) {
Expand Down Expand Up @@ -219,6 +223,9 @@ public List<OrgDisambiguated> searchOrgsFromSolrForSelfService(String searchTerm
@Override
public OrgDisambiguatedEntity updateOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
normalizeExternalIdentifiers(orgDisambiguatedEntity);
if (cleanDuplicateExtIdForOrg) {
cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity);
}
return orgDisambiguatedDao.merge(orgDisambiguatedEntity);
}

Expand Down Expand Up @@ -262,7 +269,27 @@ public OrgDisambiguated findInDB(String idValue, String idType) {
@Override
public void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier) {
normalizeExternalIdentifier(identifier);
orgDisambiguatedExternalIdentifierDao.persist(identifier);
boolean toPersist = true;
OrgDisambiguatedEntity orgDisambiguatedEntity = identifier.getOrgDisambiguated();
if (orgDisambiguatedEntity != null && orgDisambiguatedEntity.getExternalIdentifiers() != null) {
String extIdentifierKeyToAdd = identifier.getIdentifierType() + "::" + identifier.getIdentifier();
String extIdentifierKey;
for (OrgDisambiguatedExternalIdentifierEntity identifier1 : orgDisambiguatedEntity.getExternalIdentifiers()) {
extIdentifierKey = identifier1.getIdentifierType() + "::" + identifier1.getIdentifier();
if (StringUtils.equals(extIdentifierKeyToAdd, extIdentifierKey)) {
toPersist = false;
break;
}
}
}
if (cleanDuplicateExtIdForOrg) {
cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity);
}
// check if in the current external id list the identifier already
if (toPersist) {
orgDisambiguatedExternalIdentifierDao.persist(identifier);
}

}

@Override
Expand All @@ -271,17 +298,16 @@ public void updateOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIde
orgDisambiguatedExternalIdentifierDao.merge(identifier);
}

public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier( String identifier, String type ) {
public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type) {
List<OrgDisambiguated> orgDisambiguatedIds = new ArrayList<OrgDisambiguated>();
List<OrgDisambiguatedExternalIdentifierEntity> extIds = orgDisambiguatedExternalIdentifierDao.findByIdentifierIdAndType(identifier, type);
extIds.stream().forEach((e) ->
{
OrgDisambiguatedEntity de = e.getOrgDisambiguated();
// Group only if it is not a RINGGOLD org
if(de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) {
orgDisambiguatedIds.add(convertEntity(de));
}
});
extIds.stream().forEach((e) -> {
OrgDisambiguatedEntity de = e.getOrgDisambiguated();
// Group only if it is not a RINGGOLD org
if (de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) {
orgDisambiguatedIds.add(convertEntity(de));
}
});
return orgDisambiguatedIds;
}

Expand Down Expand Up @@ -347,4 +373,39 @@ private void normalizeExternalIdentifiers(OrgDisambiguatedEntity orgDisambiguate
}
}

public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) {
if (orgDisambiguatedEntity.getExternalIdentifiers() != null) {
HashMap<String, OrgDisambiguatedExternalIdentifierEntity> extIdsMapping = new HashMap<String, OrgDisambiguatedExternalIdentifierEntity>();
String extIdentifierKey;
OrgDisambiguatedExternalIdentifierEntity mappedExtIdentifier;
List<OrgDisambiguatedExternalIdentifierEntity> duplicatedExtIdentifiersToBeRemoved = new ArrayList<OrgDisambiguatedExternalIdentifierEntity>();
for (OrgDisambiguatedExternalIdentifierEntity identifier : orgDisambiguatedEntity.getExternalIdentifiers()) {
extIdentifierKey = identifier.getIdentifierType() + "::" + identifier.getIdentifier();
if (extIdsMapping.containsKey(extIdentifierKey)) {

if (!identifier.getPreferred()) {
duplicatedExtIdentifiersToBeRemoved.add(identifier);
} else {
mappedExtIdentifier = extIdsMapping.get(extIdentifierKey);
duplicatedExtIdentifiersToBeRemoved.add(mappedExtIdentifier);
extIdsMapping.put(extIdentifierKey, identifier);
}

}
}
// remove the duplicates from DB

LOGGER.info(
"About to remove " + duplicatedExtIdentifiersToBeRemoved.size() + " duplicate external Ids for Disambiguated Org " + orgDisambiguatedEntity.getId());
duplicatedExtIdentifiersToBeRemoved.stream().forEach((e) -> {
try {
orgDisambiguatedExternalIdentifierDao.remove(e);
LOGGER.debug("Removed ext id " + e.getIdentifierType() + "::" + e.getIdentifier() + "::" + e.getId());
} catch (Exception ex) {
LOGGER.error("Exception when removing duplicate external ids for Disambiguated Org " + orgDisambiguatedEntity.getId(), ex);
}
});

}
}
}