From f6a57f19b96049180ffca5963dac277ea8f90c1f Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 11:23:42 -0600 Subject: [PATCH] Data loaders done --- .../common/manager/EmailDomainManager.java | 3 +- .../manager/impl/EmailDomainManagerImpl.java | 15 ++++++-- .../loader/cli/EmailDomainLoader.java | 8 ++-- .../loader/cli/EmailDomainToRorLoader.java | 37 +++++++++++++++---- .../controllers/EmailDomainController.java | 2 +- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java index 4fe0fb04c15..167e25e2d2f 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java @@ -2,6 +2,7 @@ import java.util.List; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.orcid.persistence.jpa.entities.EmailDomainEntity; public interface EmailDomainManager { @@ -13,5 +14,5 @@ public interface EmailDomainManager { List findByCategory(EmailDomainEntity.DomainCategory category); - EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId); + STATUS createOrUpdateEmailDomain(String emailDomain, String rorId); } diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java index d096f255f04..dcfe88f952b 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java @@ -13,6 +13,8 @@ public class EmailDomainManagerImpl implements EmailDomainManager { + public enum STATUS {CREATED, UPDATED}; + @Resource(name = "emailDomainDao") private EmailDomainDao emailDomainDao; @@ -62,16 +64,21 @@ public List findByCategory(DomainCategory category) { } @Override - public EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId) { + public STATUS createOrUpdateEmailDomain(String emailDomain, String rorId) { EmailDomainEntity existingEntity = emailDomainDaoReadOnly.findByEmailDoman(emailDomain); if(existingEntity != null) { if(!rorId.equals(existingEntity.getRorId())) { - emailDomainDao.updateRorId(existingEntity.getId(), rorId); + boolean updated = emailDomainDao.updateRorId(existingEntity.getId(), rorId); + if(updated) + return STATUS.UPDATED; } } else { - return emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + EmailDomainEntity newEntity = emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + if (newEntity != null) { + return STATUS.CREATED; + } } - return existingEntity; + return null; } } diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java index 30977c1abfe..f7517c6a436 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java @@ -97,9 +97,11 @@ private void process() { total += 1; } } - LOG.warn("List of invalid domains:"); - for(String invalidDomain : invalidDomains) { - LOG.warn(invalidDomain); + if(!invalidDomains.isEmpty()) { + LOG.warn("List of invalid domains:"); + for(String invalidDomain : invalidDomains) { + LOG.info(invalidDomain); + } } LOG.info("Process done, total: {}, new entities: {}, updated entities: {}", total, newEntities, updatedEntities); } diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java index efd4cc6319a..adfd0b603f6 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -13,6 +13,7 @@ import java.util.Set; import org.orcid.core.common.manager.EmailDomainManager; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.ApplicationContext; @@ -34,6 +35,9 @@ public class EmailDomainToRorLoader { Map map = new HashMap(); + private int updatedEntries = 0; + private int createdEntries = 0; + public EmailDomainToRorLoader(String filePath) { this.filePath = filePath; init(filePath); @@ -62,13 +66,17 @@ private void load(String filePath) throws IOException { FileReader fileReader = new FileReader(filePath); CsvMapper csvMapper = new CsvMapper(); csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); + csvMapper.enable(CsvParser.Feature.TRIM_SPACES); + MappingIterator> it = csvMapper.readerForListOf(String.class).readValues(fileReader); if (it != null) { csvData = new ArrayList>(); while(it.hasNext()) { List r = it.next(); - csvData.add(r); + // Hack to avoid adding empty lines if they are present, we need at least 2 columns, the domain and the ror id + if(r.size() > 1) + csvData.add(r); } } } @@ -93,9 +101,9 @@ private void processCsvData() { } else { dtrm.addIdWithNoParent(rorId); } - map.put(rorId, dtrm); + map.put(domain, dtrm); } else { - DomainToRorMap dtrm = map.get(rorId); + DomainToRorMap dtrm = map.get(domain); if(hasParent) { dtrm.addIdWithParent(rorId); } else { @@ -110,20 +118,33 @@ private void storeDomainToRorMap() { LOG.debug("Processing domain {}", element.getDomain()); // If the domain has only one entry with no parent, store that one if(element.getIdsWithNoParent().size() == 1) { - emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } } else if(element.getIdsWithParent().size() == 1) { // Else, if the domain has only one entry with parent, store that one - emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } } else { // Else log a warning because there is no way to provide a suggestion invalidDomains.add(element.getDomain()); } } - LOG.warn("The following domains couldn't be mapped"); - for(String invalidDomain : invalidDomains) { - LOG.warn("{}", invalidDomain); + if(!invalidDomains.isEmpty()) { + LOG.warn("The following domains couldn't be mapped ({} In total):", invalidDomains.size()); + for(String invalidDomain : invalidDomains) { + LOG.warn("{}", invalidDomain); + } } + LOG.info("Created entries: {}, updated entries: {}", createdEntries, updatedEntries); } private class DomainToRorMap { diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index 76c33f8d640..39cd788af50 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -27,7 +27,7 @@ public class EmailDomainController { ObjectMapper mapper = new ObjectMapper(); if(domain == null || domain.isBlank() || domain.length() > 254) { ObjectNode response = mapper.createObjectNode(); - response.put("error", "Domain lenght too long, empty or invalid"); + response.put("error", "Domain length too short, empty or invalid"); return response; } domain = OrcidStringUtils.stripHtml(domain);