From 00bc0d3c8d78825681f88cf40ff2ac6d2fc9cc1a Mon Sep 17 00:00:00 2001 From: Camelia Dumitru Date: Fri, 10 May 2024 09:11:52 +0100 Subject: [PATCH] Updated ROR Loader to use V2 of the ROR schema, the additional fields not stored yet --- .../loader/source/ror/RorOrgLoadSource.java | 907 +++++++++--------- .../loader/RorOrgLoadSourceTest.java | 66 +- .../ror/ror_1_org_2_ext_ids_#2_preferred.json | 75 +- .../ror/ror_1_org_5_external_identifiers.json | 94 +- .../ror/ror_1_org_6_external_identifiers.json | 99 +- ..._1_org_updated_5_external_identifiers.json | 96 +- .../ror_4_orgs_27_external_identifiers.json | 354 ++++++- 7 files changed, 1178 insertions(+), 513 deletions(-) diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java index a42ddc906c5..8e9b4f2bd46 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java @@ -43,445 +43,476 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; - @Component public class RorOrgLoadSource implements OrgLoadSource { - private static final Logger LOGGER = LoggerFactory.getLogger(RorOrgLoadSource.class); - - private static final String WIKIPEDIA_URL = "wikipedia_url"; - - - @Value("${org.orcid.core.orgs.ror.enabled:true}") - private boolean enabled; - - @Value("${org.orcid.core.orgs.clients.userAgent}") - private String userAgent; - - @Resource(name = "rorOrgDataClient") - private OrgDataClient orgDataClient; - - @Value("${org.orcid.core.orgs.ror.localZipPath:/tmp/grid/ror.zip}") - private String zipFilePath; - - @Value("${org.orcid.core.orgs.ror.localDataPath:/tmp/grid/ror.json}") - private String localDataPath; - - @Value("${org.orcid.core.orgs.ror.indexAllEnabled:false}") - private boolean indexAllEnabled; - - @Resource - private OrgDisambiguatedDao orgDisambiguatedDao; - - @Resource - private OrgDisambiguatedManager orgDisambiguatedManager; - - @Resource - private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao; - - @Value("${org.orcid.core.orgs.ror.zenodoRecordsUrl:https://zenodo.org/api/records?communities=ror-data}") - private String rorZenodoRecordsUrl; - - @Resource - private FileRotator fileRotator; - - private Set UPDATED_RORS; - - @Override - public String getSourceName() { - return "ROR"; - } - - @Override - public boolean loadOrgData() { - if (!enabled) { - throw new LoadSourceDisabledException(getSourceName()); - } - - return loadData(); - } - - @Override - public boolean downloadOrgData() { - try { - fileRotator.removeFileIfExists(zipFilePath); - fileRotator.removeFileIfExists(localDataPath); - - ZenodoRecords zenodoRecords = orgDataClient.get(rorZenodoRecordsUrl+"&sort=mostrecent&size=1", userAgent, ZenodoRecords.class); - ZenodoRecordsHit zenodoHit = zenodoRecords.getHits().getHits().get(0); - - boolean success = false; - - //we are returning the collection ordered by mostrecent and size 1, we need to get the last element in the list that has the last version - String zenodoUrl = zenodoHit.getFiles().get(zenodoHit.getFiles().size()>0?zenodoHit.getFiles().size()-1:0).getLinks().getSelf(); - LOGGER.info("Retrieving ROR data from: " + zenodoUrl); - success = orgDataClient.downloadFile(zenodoUrl, userAgent, zipFilePath); - - try { - LOGGER.info("Unzipping ROR ...."); - unzipData(); - } catch (IOException e) { - LOGGER.error("Error unzipping Zenodo ROR data", e); - throw new RuntimeException(e); - } - return success; - } catch (Exception e) { - LOGGER.error("Error downloading Zenodo ROR data", e); - return false; - } - } - - private void unzipData() throws IOException { - byte[] buffer = new byte[1024]; - ZipInputStream zis = new ZipInputStream(new FileInputStream(zipFilePath)); - ZipEntry zipEntry = zis.getNextEntry(); - while (zipEntry != null) { - File jsonData = new File(localDataPath); - FileOutputStream fos = new FileOutputStream(jsonData); - int len; - while ((len = zis.read(buffer)) > 0) { - fos.write(buffer, 0, len); - } - fos.close(); - break; - } - zis.closeEntry(); - zis.close(); - } - - private boolean loadData() { - try { - LOGGER.info("Loading ROR data..."); - Instant start = Instant.now(); - File fileToLoad = new File(localDataPath); - if (!fileToLoad.exists()) { - LOGGER.error("File {} doesn't exist", localDataPath); - return false; - } - - //ror returns the JSON as Array of institutes - JsonNode rootNode = JsonUtils.read(fileToLoad); - UPDATED_RORS = new HashSet(); - - rootNode.forEach(institute -> { - String sourceId = institute.get("id").isNull() ? null : institute.get("id").asText(); - String status = institute.get("status").isNull() ? null : institute.get("status").asText(); - if ("active".equalsIgnoreCase(status) || "inactive".equalsIgnoreCase(status)) { - String name = institute.get("name").isNull() ? null : institute.get("name").asText(); - StringJoiner sj = new StringJoiner(","); - String orgType = null; - if (!institute.get("types").isNull()) { - ((ArrayNode) institute.get("types")).forEach(x -> sj.add(x.textValue())); - orgType = sj.toString(); - } - JsonNode countryNode = institute.get("country").isNull() ? null : (JsonNode) institute.get("country"); - Iso3166Country country = null; - if(countryNode != null) { - String countryCode = countryNode.get("country_code").isNull() ? null : countryNode.get("country_code").asText(); - country = StringUtils.isBlank(countryCode) ? null : Iso3166Country.fromValue(countryCode); - } - ArrayNode addresses = institute.get("addresses").isNull() ? null : (ArrayNode) institute.get("addresses"); - String city = null; - String region = null; - - if (addresses != null) { - for (JsonNode address : addresses) { - if (addresses.size() == 1 || (address.get("primary") != null && address.get("primary").asBoolean())) { - city = address.get("city").isNull() ? null : address.get("city").asText(); - region = address.get("state").isNull() ? null : address.get("state").asText(); - } - } - } - - ArrayNode urls = institute.get("links").isNull() ? null : (ArrayNode) institute.get("links"); - // Use the first URL - String url = (urls != null && urls.size() > 0) ? urls.get(0).asText() : null; - - // Creates or updates an institute - OrgDisambiguatedEntity entity = processInstitute(sourceId, name, country, city, region, url, orgType); - - // Creates external identifiers - processExternalIdentifiers(entity, institute); - } else if ("redirected".equals(status)) { - String primaryId = institute.get("redirect").isNull() ? null : institute.get("redirect").asText(); - deprecateOrg(sourceId, primaryId); - } else if ("withdrawn".equals(status) || "obsolete".equals(status)) { - obsoleteOrg(sourceId); - } else { - LOGGER.error("Illegal status '" + status + "' for institute " + sourceId); - } - }); - - // Check if any RORs with external identifiers updated and group them - groupRORsWithUpdatedExternalModifiers(); - - LOGGER.info("Time taken to process the data: {}", Duration.between(start, Instant.now()).toString()); - return true; - } catch (Exception e) { - LOGGER.error("Error loading ROR data", e); - return false; - } - } - - private OrgDisambiguatedEntity processInstitute(String sourceId, String name, Iso3166Country country, String city, String region, String url, String orgType) { - OrgDisambiguatedEntity existingBySourceId = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, OrgDisambiguatedSourceType.ROR.name()); - if (existingBySourceId != null) { - if (entityChanged(existingBySourceId, name, country.value(), city, region, url, orgType) || indexAllEnabled) { - existingBySourceId.setCity(city); - existingBySourceId.setCountry(country.name()); - existingBySourceId.setName(name); - existingBySourceId.setOrgType(orgType); - existingBySourceId.setRegion(region); - existingBySourceId.setUrl(url); - existingBySourceId.setIndexingStatus(IndexingStatus.PENDING); - try { - // mark group for indexing - new OrgGrouping(existingBySourceId, orgDisambiguatedManager).markGroupForIndexing(orgDisambiguatedDao); - - } catch (Exception ex) { - LOGGER.error("Error when grouping by ROR and marking group orgs for reindexing, eating the exception", ex); - } - orgDisambiguatedManager.updateOrgDisambiguated(existingBySourceId); - } - return existingBySourceId; - } - - // Create a new disambiguated org - OrgDisambiguatedEntity newOrg=createDisambiguatedOrg(sourceId, name, orgType, country, city, region, url); - try { - //mark group for indexing - new OrgGrouping(newOrg, orgDisambiguatedManager).markGroupForIndexing(orgDisambiguatedDao); - } - catch (Exception ex) { - LOGGER.error("Error when grouping by ROR and removing related orgs solr index, eating the exception", ex); - } - return newOrg; - } - - private void processExternalIdentifiers(OrgDisambiguatedEntity org, JsonNode institute) { - JsonNode externalIdsContainer = institute.get("external_ids") == null ? null : institute.get("external_ids"); - if (externalIdsContainer != null) { - - Iterator> nodes = externalIdsContainer.fields(); - - while (nodes.hasNext()) { - Map.Entry entry = (Map.Entry) nodes.next(); - String identifierTypeName = entry.getKey().toUpperCase(); - String preferredId = entry.getValue().get("preferred").isNull() ? null : entry.getValue().get("preferred").asText(); - if(StringUtils.equalsIgnoreCase(OrgDisambiguatedSourceType.GRID.name(), identifierTypeName)) { - JsonNode extId = (JsonNode) entry.getValue().get("all"); - setExternalId(org, identifierTypeName, preferredId, extId); - UPDATED_RORS.add(org.getId()); - } - else { - ArrayNode elements = (ArrayNode) entry.getValue().get("all"); - for (JsonNode extId : elements) { - setExternalId(org, identifierTypeName, preferredId, extId); - UPDATED_RORS.add(org.getId()); - } - } - } - } - - if (!institute.get(WIKIPEDIA_URL).isNull()) { - String url = institute.get(WIKIPEDIA_URL).asText(); - // If the external identifier doesn't exists yet - if (orgDisambiguatedExternalIdentifierDao.findByDetails(org.getId(), url, WIKIPEDIA_URL.toUpperCase()) == null) { - createExternalIdentifier(org, url, WIKIPEDIA_URL.toUpperCase(), true); - UPDATED_RORS.add(org.getId()); - } else { - LOGGER.info("Wikipedia URL for {} already exists", org.getId()); - } - } - } - - private void setExternalId(OrgDisambiguatedEntity org, String identifierTypeName, String preferredId, JsonNode extId) { - // If the external identifier doesn't exists yet - OrgDisambiguatedExternalIdentifierEntity existingExternalId = orgDisambiguatedExternalIdentifierDao.findByDetails(org.getId(), extId.asText(), - identifierTypeName); - Boolean preferred = extId.asText().equals(preferredId); - if (existingExternalId == null) { - if (preferred) { - createExternalIdentifier(org, extId.asText(), identifierTypeName, true); - } else { - createExternalIdentifier(org, extId.asText(), identifierTypeName, false); - } - } else { - if (existingExternalId.getPreferred() != preferred) { - existingExternalId.setPreferred(preferred); - orgDisambiguatedManager.updateOrgDisambiguatedExternalIdentifier(existingExternalId); - LOGGER.info("External identifier for {} with ext id {} and type {} was updated", - new Object[] { org.getId(), extId.asText(), identifierTypeName }); - } else { - LOGGER.info("External identifier for {} with ext id {} and type {} already exists", - new Object[] { org.getId(), extId.asText(), identifierTypeName }); - } - } - } - - /** - * Indicates if an entity changed his address, url or org type - * - * @return true if the entity has changed. - */ - private boolean entityChanged(OrgDisambiguatedEntity entity, String name, String countryCode, String city, String region, String url, String orgType) { - // Check name - if (StringUtils.isNotBlank(name)) { - if (!name.equalsIgnoreCase(entity.getName())) - return true; - } else if (StringUtils.isNotBlank(entity.getName())) { - return true; - } - // Check country - if (StringUtils.isNotBlank(countryCode)) { - if (entity.getCountry() == null || !countryCode.equals(entity.getCountry())) { - return true; - } - } else if (entity.getCountry() != null) { - return true; - } - // Check city - if (StringUtils.isNotBlank(city)) { - if (entity.getCity() == null || !city.equals(entity.getCity())) { - return true; - } - } else if (StringUtils.isNotBlank(entity.getCity())) { - return true; - } - // Check region - if (StringUtils.isNotBlank(region)) { - if (entity.getRegion() == null || !region.equals(entity.getRegion())) { - return true; - } - } else if (StringUtils.isNotBlank(entity.getRegion())) { - return true; - } - // Check url - if (StringUtils.isNotBlank(url)) { - if (entity.getUrl() == null || !url.equals(entity.getUrl())) { - return true; - } - } else if (StringUtils.isNotBlank(entity.getUrl())) { - return true; - } - // Check org_type - if (StringUtils.isNotBlank(orgType)) { - if (entity.getOrgType() == null || !orgType.equals(entity.getOrgType())) { - return true; - } - } else if (StringUtils.isNotBlank(entity.getOrgType())) { - return true; - } - - return false; - } - - /** - * Creates a disambiguated ORG in the org_disambiguated table - */ - private OrgDisambiguatedEntity createDisambiguatedOrg(String sourceId, String name, String orgType, Iso3166Country country, String city, String region, String url) { - LOGGER.info("Creating disambiguated org {}", name); - OrgDisambiguatedEntity orgDisambiguatedEntity = new OrgDisambiguatedEntity(); - orgDisambiguatedEntity.setName(name); - orgDisambiguatedEntity.setCountry(country != null ? country.name() : null); - orgDisambiguatedEntity.setCity(city); - orgDisambiguatedEntity.setRegion(region); - orgDisambiguatedEntity.setUrl(url); - orgDisambiguatedEntity.setOrgType(orgType); - orgDisambiguatedEntity.setSourceId(sourceId); - orgDisambiguatedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); - orgDisambiguatedManager.createOrgDisambiguated(orgDisambiguatedEntity); - return orgDisambiguatedEntity; - } - - /** - * Creates an external identifier in the - * org_disambiguated_external_identifier table - */ - private boolean createExternalIdentifier(OrgDisambiguatedEntity disambiguatedOrg, String identifier, String externalIdType, Boolean preferred) { - LOGGER.info("Creating external identifier for {}", disambiguatedOrg.getId()); - OrgDisambiguatedExternalIdentifierEntity externalIdentifier = new OrgDisambiguatedExternalIdentifierEntity(); - externalIdentifier.setIdentifier(identifier); - externalIdentifier.setIdentifierType(externalIdType); - externalIdentifier.setOrgDisambiguated(disambiguatedOrg); - externalIdentifier.setPreferred(preferred); - orgDisambiguatedManager.createOrgDisambiguatedExternalIdentifier(externalIdentifier); - return true; - } - - /** - * Mark an existing org as DEPRECATED - */ - private void deprecateOrg(String sourceId, String primarySourceId) { - LOGGER.info("Deprecating org {} for {}", sourceId, primarySourceId); - OrgDisambiguatedEntity existingEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, OrgDisambiguatedSourceType.ROR.name()); - if (existingEntity != null) { - if (existingEntity.getStatus() == null || !existingEntity.getStatus().equals(OrganizationStatus.DEPRECATED.name()) - || !existingEntity.getSourceParentId().equals(primarySourceId)) { - existingEntity.setStatus(OrganizationStatus.DEPRECATED.name()); - existingEntity.setSourceParentId(primarySourceId); - existingEntity.setIndexingStatus(IndexingStatus.PENDING); - orgDisambiguatedManager.updateOrgDisambiguated(existingEntity); - } - } else { - OrgDisambiguatedEntity deprecatedEntity = new OrgDisambiguatedEntity(); - deprecatedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); - deprecatedEntity.setStatus(OrganizationStatus.DEPRECATED.name()); - deprecatedEntity.setSourceId(sourceId); - deprecatedEntity.setSourceParentId(primarySourceId); - // We don't need to index it - deprecatedEntity.setIndexingStatus(IndexingStatus.DONE); - orgDisambiguatedManager.createOrgDisambiguated(deprecatedEntity); - } - } - - /** - * Mark an existing org as OBSOLETE - */ - private void obsoleteOrg(String sourceId) { - LOGGER.info("Marking or as obsolete {}", sourceId); - OrgDisambiguatedEntity existingEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, OrgDisambiguatedSourceType.ROR.name()); - if (existingEntity != null) { - if (existingEntity.getStatus() == null || !existingEntity.getStatus().equals(OrganizationStatus.OBSOLETE.name())) { - existingEntity.setStatus(OrganizationStatus.OBSOLETE.name()); - existingEntity.setIndexingStatus(IndexingStatus.PENDING); - orgDisambiguatedManager.updateOrgDisambiguated(existingEntity); - new OrgGrouping(existingEntity, orgDisambiguatedManager).ungroupObsoleteRorForIndexing(orgDisambiguatedDao); - } - } else { - OrgDisambiguatedEntity obsoletedEntity = new OrgDisambiguatedEntity(); - obsoletedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); - obsoletedEntity.setStatus(OrganizationStatus.OBSOLETE.name()); - obsoletedEntity.setSourceId(sourceId); - // We don't need to index it - obsoletedEntity.setIndexingStatus(IndexingStatus.DONE); - orgDisambiguatedManager.createOrgDisambiguated(obsoletedEntity); - new OrgGrouping(obsoletedEntity, orgDisambiguatedManager).ungroupObsoleteRorForIndexing(orgDisambiguatedDao); - } - } - - @Override - public boolean isEnabled() { - return enabled; - } - - - private void groupRORsWithUpdatedExternalModifiers() { - for (Long id : UPDATED_RORS) { - OrgDisambiguatedEntity entity = orgDisambiguatedDao.find(id); - if (entity != null) { - entity.setIndexingStatus(IndexingStatus.PENDING); - try { - // mark group for indexing - new OrgGrouping(entity, orgDisambiguatedManager).markGroupForIndexing(orgDisambiguatedDao); - - } catch (Exception ex) { - LOGGER.error("Error when grouping by ROR and marking group orgs for reindexing, eating the exception", ex); - } - entity = orgDisambiguatedManager.updateOrgDisambiguated(entity); - - } - } - } - + private static final Logger LOGGER = LoggerFactory.getLogger(RorOrgLoadSource.class); + + private static final String WIKIPEDIA_URL = "wikipedia_url"; + + @Value("${org.orcid.core.orgs.ror.enabled:true}") + private boolean enabled; + + @Value("${org.orcid.core.orgs.clients.userAgent}") + private String userAgent; + + @Resource(name = "rorOrgDataClient") + private OrgDataClient orgDataClient; + + @Value("${org.orcid.core.orgs.ror.localZipPath:/tmp/grid/ror.zip}") + private String zipFilePath; + + @Value("${org.orcid.core.orgs.ror.localDataPath:/tmp/grid/ror.json}") + private String localDataPath; + + @Value("${org.orcid.core.orgs.ror.indexAllEnabled:false}") + private boolean indexAllEnabled; + + @Resource + private OrgDisambiguatedDao orgDisambiguatedDao; + + @Resource + private OrgDisambiguatedManager orgDisambiguatedManager; + + @Resource + private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao; + + @Value("${org.orcid.core.orgs.ror.zenodoRecordsUrl:https://zenodo.org/api/records?communities=ror-data}") + private String rorZenodoRecordsUrl; + + @Resource + private FileRotator fileRotator; + + private Set UPDATED_RORS; + + @Override + public String getSourceName() { + return "ROR"; + } + + @Override + public boolean loadOrgData() { + if (!enabled) { + throw new LoadSourceDisabledException(getSourceName()); + } + + return loadData(); + } + + @Override + public boolean downloadOrgData() { + try { + fileRotator.removeFileIfExists(zipFilePath); + fileRotator.removeFileIfExists(localDataPath); + + ZenodoRecords zenodoRecords = orgDataClient.get(rorZenodoRecordsUrl + "&sort=mostrecent&size=1", userAgent, + ZenodoRecords.class); + ZenodoRecordsHit zenodoHit = zenodoRecords.getHits().getHits().get(0); + + boolean success = false; + + // we are returning the collection ordered by mostrecent and size 1, we need to + // get the last element in the list that has the last version + String zenodoUrl = zenodoHit.getFiles() + .get(zenodoHit.getFiles().size() > 0 ? zenodoHit.getFiles().size() - 1 : 0).getLinks().getSelf(); + LOGGER.info("Retrieving ROR data from: " + zenodoUrl); + success = orgDataClient.downloadFile(zenodoUrl, userAgent, zipFilePath); + + try { + LOGGER.info("Unzipping ROR ...."); + unzipData(); + } catch (IOException e) { + LOGGER.error("Error unzipping Zenodo ROR data", e); + throw new RuntimeException(e); + } + return success; + } catch (Exception e) { + LOGGER.error("Error downloading Zenodo ROR data", e); + return false; + } + } + + private void unzipData() throws IOException { + byte[] buffer = new byte[1024]; + ZipInputStream zis = new ZipInputStream(new FileInputStream(zipFilePath)); + ZipEntry zipEntry = zis.getNextEntry(); + while (zipEntry != null) { + String zipEntryName = zipEntry.getName(); + if (zipEntryName.endsWith("v2.json")) { + File jsonData = new File(localDataPath); + FileOutputStream fos = new FileOutputStream(jsonData); + int len; + while ((len = zis.read(buffer)) > 0) { + fos.write(buffer, 0, len); + } + fos.close(); + break; + } + zipEntry = zis.getNextEntry(); + } + zis.closeEntry(); + zis.close(); + } + + private boolean loadData() { + try { + LOGGER.info("Loading ROR data..."); + Instant start = Instant.now(); + File fileToLoad = new File(localDataPath); + if (!fileToLoad.exists()) { + LOGGER.error("File {} doesn't exist", localDataPath); + return false; + } + + // ror returns the JSON as Array of institutes + JsonNode rootNode = JsonUtils.read(fileToLoad); + UPDATED_RORS = new HashSet(); + + rootNode.forEach(institute -> { + String sourceId = institute.get("id").isNull() ? null : institute.get("id").asText(); + String status = institute.get("status").isNull() ? null : institute.get("status").asText(); + if ("active".equalsIgnoreCase(status) || "inactive".equalsIgnoreCase(status)) { + ArrayNode namesNode = institute.get("names").isNull() ? null : (ArrayNode) institute.get("names"); + String name = null; + if (namesNode != null) { + for (JsonNode nameJson : namesNode) { + ArrayNode nameTypes = nameJson.get("types").isNull() ? null + : (ArrayNode) nameJson.get("types"); + for (JsonNode nameType : nameTypes) { + if (StringUtils.equalsIgnoreCase(nameType.asText(), "ror_display")) { + name = nameJson.get("value").asText(); + break; + } + } + } + } + + StringJoiner sj = new StringJoiner(","); + String orgType = null; + if (!institute.get("types").isNull()) { + ((ArrayNode) institute.get("types")).forEach(x -> sj.add(x.textValue())); + orgType = sj.toString(); + } + + + //location node + + ArrayNode locationsNode = institute.get("locations").isNull() ? null : (ArrayNode) institute.get("locations"); + Iso3166Country country = null; + String region = null; + String city = null; + if (locationsNode != null) { + for (JsonNode locationJson : locationsNode) { + JsonNode geoDetailsNode = locationJson.get("geonames_details").isNull() ? null + : (JsonNode) locationJson.get("geonames_details"); + + if(geoDetailsNode !=null) { + String countryCode = geoDetailsNode.get("country_code").isNull() ? null + : geoDetailsNode.get("country_code").asText(); + country = StringUtils.isBlank(countryCode) ? null : Iso3166Country.fromValue(countryCode); + //for now storing just the first location + city = geoDetailsNode.get("name").isNull() ? null + : geoDetailsNode.get("name").asText(); + if(country != null) { + break; + } + } + + } + } + + + ArrayNode urls = institute.get("links").isNull() ? null : (ArrayNode) institute.get("links"); + // Use the first URL + String url = (urls != null && urls.size() > 0) ? urls.get(0).asText() : null; + + // Creates or updates an institute + OrgDisambiguatedEntity entity = processInstitute(sourceId, name, country, city, region, url, + orgType); + + // Creates external identifiers + processExternalIdentifiers(entity, institute); + } else if ("redirected".equals(status)) { + String primaryId = institute.get("redirect").isNull() ? null : institute.get("redirect").asText(); + deprecateOrg(sourceId, primaryId); + } else if ("withdrawn".equals(status) || "obsolete".equals(status)) { + obsoleteOrg(sourceId); + } else { + LOGGER.error("Illegal status '" + status + "' for institute " + sourceId); + } + }); + + // Check if any RORs with external identifiers updated and group them + groupRORsWithUpdatedExternalModifiers(); + + LOGGER.info("Time taken to process the data: {}", Duration.between(start, Instant.now()).toString()); + return true; + } catch (Exception e) { + LOGGER.error("Error loading ROR data", e); + return false; + } + } + + private OrgDisambiguatedEntity processInstitute(String sourceId, String name, Iso3166Country country, String city, + String region, String url, String orgType) { + OrgDisambiguatedEntity existingBySourceId = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, + OrgDisambiguatedSourceType.ROR.name()); + if (existingBySourceId != null) { + if (entityChanged(existingBySourceId, name, country.value(), city, region, url, orgType) + || indexAllEnabled) { + existingBySourceId.setCity(city); + existingBySourceId.setCountry(country.name()); + existingBySourceId.setName(name); + existingBySourceId.setOrgType(orgType); + existingBySourceId.setRegion(region); + existingBySourceId.setUrl(url); + existingBySourceId.setIndexingStatus(IndexingStatus.PENDING); + try { + // mark group for indexing + new OrgGrouping(existingBySourceId, orgDisambiguatedManager) + .markGroupForIndexing(orgDisambiguatedDao); + + } catch (Exception ex) { + LOGGER.error( + "Error when grouping by ROR and marking group orgs for reindexing, eating the exception", + ex); + } + orgDisambiguatedManager.updateOrgDisambiguated(existingBySourceId); + } + return existingBySourceId; + } + + // Create a new disambiguated org + OrgDisambiguatedEntity newOrg = createDisambiguatedOrg(sourceId, name, orgType, country, city, region, url); + try { + // mark group for indexing + new OrgGrouping(newOrg, orgDisambiguatedManager).markGroupForIndexing(orgDisambiguatedDao); + } catch (Exception ex) { + LOGGER.error("Error when grouping by ROR and removing related orgs solr index, eating the exception", ex); + } + return newOrg; + } + + private void processExternalIdentifiers(OrgDisambiguatedEntity org, JsonNode institute) { + ArrayNode nodes = institute.get("external_ids") == null ? null : (ArrayNode) institute.get("external_ids"); + if (nodes!= null) { + for(JsonNode entry:nodes){ + String identifierTypeName = entry.get("type").asText().toUpperCase(); + String preferredId = entry.get("preferred").isNull() ? null + : entry.get("preferred").asText(); + if (StringUtils.equalsIgnoreCase(OrgDisambiguatedSourceType.GRID.name(), identifierTypeName)) { + JsonNode extId = (JsonNode) entry.get("all"); + setExternalId(org, identifierTypeName, preferredId, extId); + UPDATED_RORS.add(org.getId()); + } else { + ArrayNode elements = (ArrayNode) entry.get("all"); + for (JsonNode extId : elements) { + setExternalId(org, identifierTypeName, preferredId, extId); + UPDATED_RORS.add(org.getId()); + } + } + } + } + } + + private void setExternalId(OrgDisambiguatedEntity org, String identifierTypeName, String preferredId, + JsonNode extId) { + // If the external identifier doesn't exists yet + OrgDisambiguatedExternalIdentifierEntity existingExternalId = orgDisambiguatedExternalIdentifierDao + .findByDetails(org.getId(), extId.asText(), identifierTypeName); + Boolean preferred = extId.asText().equals(preferredId); + if (existingExternalId == null) { + if (preferred) { + createExternalIdentifier(org, extId.asText(), identifierTypeName, true); + } else { + createExternalIdentifier(org, extId.asText(), identifierTypeName, false); + } + } else { + if (existingExternalId.getPreferred() != preferred) { + existingExternalId.setPreferred(preferred); + orgDisambiguatedManager.updateOrgDisambiguatedExternalIdentifier(existingExternalId); + LOGGER.info("External identifier for {} with ext id {} and type {} was updated", + new Object[] { org.getId(), extId.asText(), identifierTypeName }); + } else { + LOGGER.info("External identifier for {} with ext id {} and type {} already exists", + new Object[] { org.getId(), extId.asText(), identifierTypeName }); + } + } + } + + /** + * Indicates if an entity changed his address, url or org type + * + * @return true if the entity has changed. + */ + private boolean entityChanged(OrgDisambiguatedEntity entity, String name, String countryCode, String city, + String region, String url, String orgType) { + // Check name + if (StringUtils.isNotBlank(name)) { + if (!name.equalsIgnoreCase(entity.getName())) + return true; + } else if (StringUtils.isNotBlank(entity.getName())) { + return true; + } + // Check country + if (StringUtils.isNotBlank(countryCode)) { + if (entity.getCountry() == null || !countryCode.equals(entity.getCountry())) { + return true; + } + } else if (entity.getCountry() != null) { + return true; + } + // Check city + if (StringUtils.isNotBlank(city)) { + if (entity.getCity() == null || !city.equals(entity.getCity())) { + return true; + } + } else if (StringUtils.isNotBlank(entity.getCity())) { + return true; + } + // Check region + if (StringUtils.isNotBlank(region)) { + if (entity.getRegion() == null || !region.equals(entity.getRegion())) { + return true; + } + } else if (StringUtils.isNotBlank(entity.getRegion())) { + return true; + } + // Check url + if (StringUtils.isNotBlank(url)) { + if (entity.getUrl() == null || !url.equals(entity.getUrl())) { + return true; + } + } else if (StringUtils.isNotBlank(entity.getUrl())) { + return true; + } + // Check org_type + if (StringUtils.isNotBlank(orgType)) { + if (entity.getOrgType() == null || !orgType.equals(entity.getOrgType())) { + return true; + } + } else if (StringUtils.isNotBlank(entity.getOrgType())) { + return true; + } + + return false; + } + + /** + * Creates a disambiguated ORG in the org_disambiguated table + */ + private OrgDisambiguatedEntity createDisambiguatedOrg(String sourceId, String name, String orgType, + Iso3166Country country, String city, String region, String url) { + LOGGER.info("Creating disambiguated org {}", name); + OrgDisambiguatedEntity orgDisambiguatedEntity = new OrgDisambiguatedEntity(); + orgDisambiguatedEntity.setName(name); + orgDisambiguatedEntity.setCountry(country != null ? country.name() : null); + orgDisambiguatedEntity.setCity(city); + orgDisambiguatedEntity.setRegion(region); + orgDisambiguatedEntity.setUrl(url); + orgDisambiguatedEntity.setOrgType(orgType); + orgDisambiguatedEntity.setSourceId(sourceId); + orgDisambiguatedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); + orgDisambiguatedManager.createOrgDisambiguated(orgDisambiguatedEntity); + return orgDisambiguatedEntity; + } + + /** + * Creates an external identifier in the org_disambiguated_external_identifier + * table + */ + private boolean createExternalIdentifier(OrgDisambiguatedEntity disambiguatedOrg, String identifier, + String externalIdType, Boolean preferred) { + LOGGER.info("Creating external identifier for {}", disambiguatedOrg.getId()); + OrgDisambiguatedExternalIdentifierEntity externalIdentifier = new OrgDisambiguatedExternalIdentifierEntity(); + externalIdentifier.setIdentifier(identifier); + externalIdentifier.setIdentifierType(externalIdType); + externalIdentifier.setOrgDisambiguated(disambiguatedOrg); + externalIdentifier.setPreferred(preferred); + orgDisambiguatedManager.createOrgDisambiguatedExternalIdentifier(externalIdentifier); + return true; + } + + /** + * Mark an existing org as DEPRECATED + */ + private void deprecateOrg(String sourceId, String primarySourceId) { + LOGGER.info("Deprecating org {} for {}", sourceId, primarySourceId); + OrgDisambiguatedEntity existingEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, + OrgDisambiguatedSourceType.ROR.name()); + if (existingEntity != null) { + if (existingEntity.getStatus() == null + || !existingEntity.getStatus().equals(OrganizationStatus.DEPRECATED.name()) + || !existingEntity.getSourceParentId().equals(primarySourceId)) { + existingEntity.setStatus(OrganizationStatus.DEPRECATED.name()); + existingEntity.setSourceParentId(primarySourceId); + existingEntity.setIndexingStatus(IndexingStatus.PENDING); + orgDisambiguatedManager.updateOrgDisambiguated(existingEntity); + } + } else { + OrgDisambiguatedEntity deprecatedEntity = new OrgDisambiguatedEntity(); + deprecatedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); + deprecatedEntity.setStatus(OrganizationStatus.DEPRECATED.name()); + deprecatedEntity.setSourceId(sourceId); + deprecatedEntity.setSourceParentId(primarySourceId); + // We don't need to index it + deprecatedEntity.setIndexingStatus(IndexingStatus.DONE); + orgDisambiguatedManager.createOrgDisambiguated(deprecatedEntity); + } + } + + /** + * Mark an existing org as OBSOLETE + */ + private void obsoleteOrg(String sourceId) { + LOGGER.info("Marking or as obsolete {}", sourceId); + OrgDisambiguatedEntity existingEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(sourceId, + OrgDisambiguatedSourceType.ROR.name()); + if (existingEntity != null) { + if (existingEntity.getStatus() == null + || !existingEntity.getStatus().equals(OrganizationStatus.OBSOLETE.name())) { + existingEntity.setStatus(OrganizationStatus.OBSOLETE.name()); + existingEntity.setIndexingStatus(IndexingStatus.PENDING); + orgDisambiguatedManager.updateOrgDisambiguated(existingEntity); + new OrgGrouping(existingEntity, orgDisambiguatedManager) + .ungroupObsoleteRorForIndexing(orgDisambiguatedDao); + } + } else { + OrgDisambiguatedEntity obsoletedEntity = new OrgDisambiguatedEntity(); + obsoletedEntity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); + obsoletedEntity.setStatus(OrganizationStatus.OBSOLETE.name()); + obsoletedEntity.setSourceId(sourceId); + // We don't need to index it + obsoletedEntity.setIndexingStatus(IndexingStatus.DONE); + orgDisambiguatedManager.createOrgDisambiguated(obsoletedEntity); + new OrgGrouping(obsoletedEntity, orgDisambiguatedManager) + .ungroupObsoleteRorForIndexing(orgDisambiguatedDao); + } + } + + @Override + public boolean isEnabled() { + return enabled; + } + + private void groupRORsWithUpdatedExternalModifiers() { + for (Long id : UPDATED_RORS) { + OrgDisambiguatedEntity entity = orgDisambiguatedDao.find(id); + if (entity != null) { + entity.setIndexingStatus(IndexingStatus.PENDING); + try { + // mark group for indexing + new OrgGrouping(entity, orgDisambiguatedManager).markGroupForIndexing(orgDisambiguatedDao); + + } catch (Exception ex) { + LOGGER.error( + "Error when grouping by ROR and marking group orgs for reindexing, eating the exception", + ex); + } + entity = orgDisambiguatedManager.updateOrgDisambiguated(entity); + + } + } + } + } \ No newline at end of file diff --git a/orcid-scheduler-web/src/test/java/org/orcid/scheduler/loader/RorOrgLoadSourceTest.java b/orcid-scheduler-web/src/test/java/org/orcid/scheduler/loader/RorOrgLoadSourceTest.java index f5d57932c33..15c339f0355 100644 --- a/orcid-scheduler-web/src/test/java/org/orcid/scheduler/loader/RorOrgLoadSourceTest.java +++ b/orcid-scheduler-web/src/test/java/org/orcid/scheduler/loader/RorOrgLoadSourceTest.java @@ -130,7 +130,7 @@ public void execute_Stats_Test_1() throws URISyntaxException { assertNotEquals(OrganizationStatus.DEPRECATED.name(), persisted.getStatus()); assertNotEquals(OrganizationStatus.OBSOLETE.name(), persisted.getStatus()); - verify(orgDisambiguatedManager, times(5)).createOrgDisambiguatedExternalIdentifier(any(OrgDisambiguatedExternalIdentifierEntity.class)); + verify(orgDisambiguatedManager, times(4)).createOrgDisambiguatedExternalIdentifier(any(OrgDisambiguatedExternalIdentifierEntity.class)); verify(orgDisambiguatedManager, never()).updateOrgDisambiguated(any(OrgDisambiguatedEntity.class)); verify(orgDisambiguatedExternalIdentifierDao, never()).merge(any(OrgDisambiguatedExternalIdentifierEntity.class)); } @@ -149,7 +149,7 @@ public void execute_Stats_Test_2() throws URISyntaxException { assertNotEquals(OrganizationStatus.OBSOLETE.name(), persisted.getStatus()); } - verify(orgDisambiguatedManager, times(27)).createOrgDisambiguatedExternalIdentifier(any(OrgDisambiguatedExternalIdentifierEntity.class)); + verify(orgDisambiguatedManager, times(24)).createOrgDisambiguatedExternalIdentifier(any(OrgDisambiguatedExternalIdentifierEntity.class)); verify(orgDisambiguatedManager, never()).updateOrgDisambiguated(any(OrgDisambiguatedEntity.class)); verify(orgDisambiguatedExternalIdentifierDao, never()).merge(any(OrgDisambiguatedExternalIdentifierEntity.class)); } @@ -238,10 +238,9 @@ public OrgDisambiguatedEntity answer(InvocationOnMock invocation) throws Throwab entity.setId(1L); entity.setName("org_1"); entity.setSourceId("ror.1"); - entity.setCity("City One"); - entity.setCountry(Iso3166Country.US.name()); + entity.setCity("Adelaide"); + entity.setCountry(Iso3166Country.AU.name()); entity.setOrgType("type_1"); - entity.setRegion("Alabama"); entity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); entity.setStatus("active"); entity.setUrl("http://link1.com"); @@ -256,8 +255,7 @@ public OrgDisambiguatedEntity answer(InvocationOnMock invocation) throws Throwab when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "FUNDREF1", OrgDisambiguatedSourceType.FUNDREF.name())).thenReturn(extIdPreferred); when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "ORGREF1", "ORGREF")).thenReturn(extId); when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "WIKIDATA1", "WIKIDATA")).thenReturn(extId); - when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "http://en.wikipedia.org/wiki/org_1", "WIKIPEDIA_URL")).thenReturn(extId); - + Path path = Paths.get(getClass().getClassLoader().getResource("ror/ror_1_org_updated_5_external_identifiers.json").toURI()); File testFile = path.toFile(); ReflectionTestUtils.setField(rorOrgLoadSource, "localDataPath", testFile.getAbsolutePath()); @@ -275,11 +273,9 @@ public OrgDisambiguatedEntity answer(InvocationOnMock invocation) throws Throwab assertNotEquals(OrganizationStatus.OBSOLETE.name(), orgToBeUpdated.getStatus()); assertEquals(Iso3166Country.AU.name(), orgToBeUpdated.getCountry()); assertEquals(Long.valueOf(1), orgToBeUpdated.getId()); - assertEquals("City One Updated", orgToBeUpdated.getCity()); + assertEquals("Adelaide Updated", orgToBeUpdated.getCity()); assertEquals(IndexingStatus.PENDING, orgToBeUpdated.getIndexingStatus()); assertEquals("org_1_updated", orgToBeUpdated.getName()); - assertEquals("type_1,type_2", orgToBeUpdated.getOrgType()); - assertEquals("San Jose", orgToBeUpdated.getRegion()); assertEquals("ror.1", orgToBeUpdated.getSourceId()); assertEquals(OrgDisambiguatedSourceType.ROR.name(), orgToBeUpdated.getSourceType()); assertEquals("active", orgToBeUpdated.getStatus()); @@ -439,56 +435,6 @@ public void execute_DeprecatedObsoleteInstitutes_2_Test() throws URISyntaxExcept assertEquals(2, obsoleteCount); } - @Test - public void execute_AddMissingWikipediaExtId_Test() throws URISyntaxException { - when(orgDisambiguatedDao.findBySourceIdAndSourceType("ror.1", OrgDisambiguatedSourceType.ROR.name())).thenAnswer(new Answer() { - @Override - public OrgDisambiguatedEntity answer(InvocationOnMock invocation) throws Throwable { - OrgDisambiguatedEntity entity = new OrgDisambiguatedEntity(); - entity.setId(1L); - entity.setName("org_1"); - entity.setSourceId("ror.1"); - entity.setCity("City One"); - entity.setCountry(Iso3166Country.US.name()); - entity.setOrgType("type_1"); - entity.setRegion("Alabama"); - entity.setSourceType(OrgDisambiguatedSourceType.ROR.name()); - entity.setStatus("active"); - entity.setUrl("http://link1.com"); - return entity; - } - }); - OrgDisambiguatedExternalIdentifierEntity extId = new OrgDisambiguatedExternalIdentifierEntity(); - extId.setPreferred(false); - - OrgDisambiguatedExternalIdentifierEntity extIdPreferred = new OrgDisambiguatedExternalIdentifierEntity(); - extIdPreferred.setPreferred(true); - - when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "ISNI1", "ISNI")).thenReturn(extId); - when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "FUNDREF1", OrgDisambiguatedSourceType.FUNDREF.name())).thenReturn(extIdPreferred); - when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "ORGREF1", "ORGREF")).thenReturn(extId); - when(orgDisambiguatedExternalIdentifierDao.findByDetails(1L, "WIKIDATA1", "WIKIDATA")).thenReturn(extId); - - Path path = Paths.get(getClass().getClassLoader().getResource("ror/ror_1_org_5_external_identifiers.json").toURI()); - File testFile = path.toFile(); - ReflectionTestUtils.setField(rorOrgLoadSource, "localDataPath", testFile.getAbsolutePath()); - rorOrgLoadSource.loadOrgData(); - - verify(orgDisambiguatedDao, never()).persist(Mockito.any(OrgDisambiguatedEntity.class)); - verify(orgDisambiguatedManager, times(1)).createOrgDisambiguatedExternalIdentifier(any(OrgDisambiguatedExternalIdentifierEntity.class)); - verify(orgDisambiguatedDao, never()).merge(any(OrgDisambiguatedEntity.class)); - verify(orgDisambiguatedExternalIdentifierDao, never()).merge(any(OrgDisambiguatedExternalIdentifierEntity.class)); - - ArgumentCaptor captor = ArgumentCaptor.forClass(OrgDisambiguatedExternalIdentifierEntity.class); - - verify(orgDisambiguatedManager).createOrgDisambiguatedExternalIdentifier(captor.capture()); - - OrgDisambiguatedExternalIdentifierEntity orgToBeUpdated = captor.getValue(); - assertEquals("http://en.wikipedia.org/wiki/org_1", orgToBeUpdated.getIdentifier()); - assertEquals("WIKIPEDIA_URL", orgToBeUpdated.getIdentifierType()); - assertEquals(Boolean.TRUE, orgToBeUpdated.getPreferred()); - } - @Test public void execute_UpdatePreferredIndicator_Test() throws URISyntaxException { when(orgDisambiguatedDao.findBySourceIdAndSourceType("ror.1", OrgDisambiguatedSourceType.ROR.name())).thenAnswer(new Answer() { diff --git a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_2_ext_ids_#2_preferred.json b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_2_ext_ids_#2_preferred.json index 09e79215482..a7c897a838d 100644 --- a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_2_ext_ids_#2_preferred.json +++ b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_2_ext_ids_#2_preferred.json @@ -1,4 +1,71 @@ - - [ - { "name": "org_1","country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": null, "email_address": null, "links": ["http://link1.com"], "aliases": ["alias 1"], "acronyms": ["ACR_1"], "types": ["type_1"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "ror.1", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "Wikidata": { "preferred": "WIKIDATA2", "all": ["WIKIDATA1", "WIKIDATA2"] } }, "weight": 1 } - ] \ No newline at end of file +[ + { + "names":[ + { + "value":"org_1", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations": [ + { + "geonames_id": 2078025, + "geonames_details": { + "country_code": "AU", + "country_name": "Australia", + "lat": -35.024038, + "lng": 138.572615, + "name": "Adelaide" + } + } + ], + "email_address": null, + "links": [ + "http://link1.com" + ], + "aliases": [ + "alias 1" + ], + "acronyms": [ + "ACR_1" + ], + "types": [ + "type_1" + ], + "ip_addresses": [], + "labels": [], + "id": "ror.1", + "status": "active", + "established": 1946, + "relationships": [ + { + "type": "Related", + "label": "Calvary Hospital", + "id": "grid.460694.9" + }, + { + "type": "Related", + "label": "Canberra Hospital", + "id": "grid.413314.0" + }, + { + "type": "Related", + "label": "Goulburn Base Hospital", + "id": "grid.460721.6" + } + ], + "external_ids": [ + { + "type": "Wikidata", + "preferred": "WIKIDATA2", + "all": [ + "WIKIDATA1", + "WIKIDATA2" + ] + } + ] + } +] \ No newline at end of file diff --git a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_5_external_identifiers.json b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_5_external_identifiers.json index b90837de596..fe2564c06e4 100644 --- a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_5_external_identifiers.json +++ b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_5_external_identifiers.json @@ -1,3 +1,95 @@ [ - { "name": "org_1", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": "http://en.wikipedia.org/wiki/org_1", "email_address": null, "links": ["http://link1.com"], "aliases": ["alias 1"], "acronyms": ["ACR_1"], "types": ["type_1"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "ror.1", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1"] } }, "weight": 1 } + { + "names": [ + { + "value": "org_1", + "types": [ + "ror_display", + "label" + ], + "lang": null + } ], + + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com" + ], + "aliases":[ + "alias 1" + ], + "acronyms":[ + "ACR_1" + ], + "types":[ + "type_1" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"ror.1", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1" + ] + } + ] + } ] \ No newline at end of file diff --git a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_6_external_identifiers.json b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_6_external_identifiers.json index 8173c8de50b..0236aca6df2 100644 --- a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_6_external_identifiers.json +++ b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_6_external_identifiers.json @@ -1,3 +1,96 @@ - [ - { "name": "org_1", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": "http://en.wikipedia.org/wiki/org_1", "email_address": null, "links": ["http://link1.com"], "aliases": ["alias 1"], "acronyms": ["ACR_1"], "types": ["type_1"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "ror.1", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1", "FUNDREF2"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1"] } }, "weight": 1 } - ] +[ + { + "names":[ + { + "value":"org_1", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com" + ], + "aliases":[ + "alias 1" + ], + "acronyms":[ + "ACR_1" + ], + "types":[ + "type_1" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"ror.1", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1", + "FUNDREF2" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1" + ] + } + ] + } +] \ No newline at end of file diff --git a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_updated_5_external_identifiers.json b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_updated_5_external_identifiers.json index fb6ebaf5c34..b92728e8316 100644 --- a/orcid-scheduler-web/src/test/resources/ror/ror_1_org_updated_5_external_identifiers.json +++ b/orcid-scheduler-web/src/test/resources/ror/ror_1_org_updated_5_external_identifiers.json @@ -1,3 +1,95 @@ [ - { "name": "org_1_updated","country": {"country_code": "AU", "country_name": "Australia" }, "wikipedia_url": "http://en.wikipedia.org/wiki/org_1", "email_address": null, "links": ["http://link1.com/updated"], "aliases": ["alias 1"], "acronyms": ["ACR_1"], "types": ["type_1", "type_2"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One Updated", "state": "San Jose", "state_code": "SJ", "country": "Costa Rica", "country_code": "CR", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "ror.1", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1"] } }, "weight": 1 } - ] + { + "names": [ + { + "value": "org_1_updated", + "types": [ + "ror_display", + "label" + ], + "lang": null + } ], + + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide Updated" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com/updated" + ], + "aliases":[ + "alias 1" + ], + "acronyms":[ + "ACR_1" + ], + "types":[ + "type_1" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"ror.1", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1" + ] + } + ] + } +] \ No newline at end of file diff --git a/orcid-scheduler-web/src/test/resources/ror/ror_4_orgs_27_external_identifiers.json b/orcid-scheduler-web/src/test/resources/ror/ror_4_orgs_27_external_identifiers.json index 912f7a6731c..191e806239e 100644 --- a/orcid-scheduler-web/src/test/resources/ror/ror_4_orgs_27_external_identifiers.json +++ b/orcid-scheduler-web/src/test/resources/ror/ror_4_orgs_27_external_identifiers.json @@ -1,6 +1,350 @@ [ - { "name": "org_0", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": null, "email_address": null, "links": null, "aliases": null, "acronyms": null, "types": null, "ip_addresses": [], "addresses": null, "labels": [], "id": "grid.1", "status": "active", "established": 1946, "relationships": null, "external_ids": null, "weight": 1 }, - { "name": "org_1", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": "http://en.wikipedia.org/wiki/org_1", "email_address": null, "links": ["http://link1.com"], "aliases": ["alias 1"], "acronyms": ["ACR_1"], "types": ["type_1"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "grid.1", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1"] } }, "weight": 1 }, - { "name": "org_2", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": "http://en.wikipedia.org/wiki/org_2", "email_address": null, "links": ["http://link1.com", "http://link2.com"], "aliases": ["alias 1", "alias 2"], "acronyms": ["ACR_1", "ACR_2"], "types": ["type_1", "type_2"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }, { "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City Two", "state": "Alaska", "state_code": "AK", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "grid.2", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1", "ISNI2"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1", "FUNDREF2"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1", "ORGREF2"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1", "WIKIDATA2"] } }, "weight": 1 }, - { "name": "org_3", "country": {"country_code": "AU", "country_name": "Australia" },"wikipedia_url": "http://en.wikipedia.org/wiki/org_3", "email_address": null, "links": ["http://link1.com", "http://link2.com", "http://link3.com"], "aliases": ["alias 1", "alias 2", "alias 3"], "acronyms": ["ACR_1", "ACR_2", "ACR_3"], "types": ["type_1", "type_2", "type_3"], "ip_addresses": [], "addresses": [{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City One", "state": "Alabama", "state_code": "AL", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }, { "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": false, "city": "City Two", "state": "Alaska", "state_code": "AK", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } },{ "line_1": "", "line_2": "", "line_3": null, "lat": 0, "lng": 0, "postcode": "", "primary": true, "city": "City Three", "state": "Arizona", "state_code": "AZ", "country": "United States", "country_code": "US", "geonames_city": { "id": 2172517, "city": "Canberra", "nuts_level1": null, "nuts_level2": null, "nuts_level3": null, "geonames_admin1": { "name": "ACT", "ascii_name": "ACT", "code": "AU.01" }, "geonames_admin2": null, "license": { "attribution": "Data from geonames.org under a CC-BY 3.0 license", "license": "http://creativecommons.org/licenses/by/3.0/" } } }], "labels": [], "id": "grid.3", "status": "active", "established": 1946, "relationships": [{ "type": "Related", "label": "Calvary Hospital", "id": "grid.460694.9" }, { "type": "Related", "label": "Canberra Hospital", "id": "grid.413314.0" }, { "type": "Related", "label": "Goulburn Base Hospital", "id": "grid.460721.6" }], "external_ids": { "ISNI": { "preferred": null, "all": ["ISNI1", "ISNI2", "ISNI3"] }, "FundRef": { "preferred": "FUNDREF1", "all": ["FUNDREF1", "FUNDREF2", "FUNDREF3"] }, "OrgRef": { "preferred": null, "all": ["ORGREF1","ORGREF2","ORGREF3"] }, "Wikidata": { "preferred": null, "all": ["WIKIDATA1","WIKIDATA2","WIKIDATA3"] } }, "weight": 1 } - ] + { + "names":[ + { + "value":"org_0", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":null, + "aliases":null, + "acronyms":null, + "types":null, + "ip_addresses":[ + + ], + "addresses":null, + "labels":[ + + ], + "id":"grid.1", + "status":"active", + "established":1946, + "relationships":null, + "external_ids":[], + "weight":1 + }, + { + "names":[ + { + "value":"org_1", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com" + ], + "aliases":[ + "alias 1" + ], + "acronyms":[ + "ACR_1" + ], + "types":[ + "type_1" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"grid.1", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1" + ] + } + ], + "weight":1 + }, + { + "names":[ + { + "value":"org_2", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com", + "http://link2.com" + ], + "aliases":[ + "alias 1", + "alias 2" + ], + "acronyms":[ + "ACR_1", + "ACR_2" + ], + "types":[ + "type_1", + "type_2" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"grid.2", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1", + "ISNI2" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1", + "FUNDREF2" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1", + "ORGREF2" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1", + "WIKIDATA2" + ] + } + ], + "weight":1 + }, + { + "names":[ + { + "value":"org_3", + "types":[ + "ror_display", + "label" + ], + "lang":null + } + ], + "locations":[ + { + "geonames_id":2078025, + "geonames_details":{ + "country_code":"AU", + "country_name":"Australia", + "lat":-35.024038, + "lng":138.572615, + "name":"Adelaide" + } + } + ], + "email_address":null, + "links":[ + "http://link1.com", + "http://link2.com", + "http://link3.com" + ], + "aliases":[ + "alias 1", + "alias 2", + "alias 3" + ], + "acronyms":[ + "ACR_1", + "ACR_2", + "ACR_3" + ], + "types":[ + "type_1", + "type_2", + "type_3" + ], + "ip_addresses":[ + + ], + "labels":[ + + ], + "id":"grid.3", + "status":"active", + "established":1946, + "relationships":[ + { + "type":"Related", + "label":"Calvary Hospital", + "id":"grid.460694.9" + }, + { + "type":"Related", + "label":"Canberra Hospital", + "id":"grid.413314.0" + }, + { + "type":"Related", + "label":"Goulburn Base Hospital", + "id":"grid.460721.6" + } + ], + "external_ids":[ + { + "type":"ISNI", + "preferred":null, + "all":[ + "ISNI1", + "ISNI2", + "ISNI3" + ] + }, + { + "type":"FundRef", + "preferred":"FUNDREF1", + "all":[ + "FUNDREF1", + "FUNDREF2", + "FUNDREF3" + ] + }, + { + "type":"OrgRef", + "preferred":null, + "all":[ + "ORGREF1", + "ORGREF2", + "ORGREF3" + ] + }, + { + "type":"Wikidata", + "preferred":null, + "all":[ + "WIKIDATA1", + "WIKIDATA2", + "WIKIDATA3" + ] + } + ], + "weight":1 + } +]