From 4f5478a2284d502ea0841fa8668587d56200802f Mon Sep 17 00:00:00 2001 From: Camelia Dumitru Date: Tue, 18 Jun 2024 15:22:50 +0100 Subject: [PATCH] Changes for v2 fields stored in SOLR --- .../impl/OrgDisambiguatedManagerImpl.java | 42 +++++++++++++++++-- .../java/org/orcid/pojo/OrgDisambiguated.java | 22 ++++++++++ .../OrgDisambiguatedSolrDocument.java | 35 ++++++++++++++++ .../utils/solr/entities/SolrConstants.java | 2 + .../org_disambiguated_extra_columns.xml | 8 ++++ solr-config/cores/org/conf/schema.xml | 4 ++ 6 files changed, 110 insertions(+), 3 deletions(-) diff --git a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java index 3f695120808..f9120a548b2 100644 --- a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java @@ -13,6 +13,8 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; +import org.json.JSONArray; +import org.json.JSONObject; import org.orcid.core.manager.OrgDisambiguatedManager; import org.orcid.core.messaging.JmsMessageSender; import org.orcid.core.orgs.OrgDisambiguatedSourceType; @@ -141,16 +143,25 @@ private OrgDisambiguatedSolrDocument convertEntityToDocument(OrgDisambiguatedEnt document.setOrgDisambiguatedId(String.valueOf(entity.getId())); document.setOrgDisambiguatedName(entity.getName()); document.setOrgDisambiguatedCity(entity.getCity()); - document.setOrgDisambiguatedRegion(entity.getRegion()); + if (entity.getRegion() != null) + document.setOrgDisambiguatedRegion(entity.getRegion()); if (entity.getCountry() != null) document.setOrgDisambiguatedCountry(entity.getCountry()); document.setOrgDisambiguatedIdFromSource(entity.getSourceId()); document.setOrgDisambiguatedIdSourceType(entity.getSourceType()); document.setOrgDisambiguatedType(entity.getOrgType()); document.setOrgDisambiguatedPopularity(entity.getPopularity()); - Set orgNames = new HashSet<>(); - orgNames.add(entity.getName()); + + Set orgNames = getOrgNamesFromJson(entity.getNamesJson(), entity.getName()); + + if(entity.getLocationsJson() != null) { + document.setOrgLocationsJson(entity.getLocationsJson()); + } + if(entity.getNamesJson() != null) { + document.setOrgLocationsJson(entity.getNamesJson()); + } + List orgs = orgDao.findByOrgDisambiguatedId(entity.getId()); if (orgs != null) { for (OrgEntity org : orgs) { @@ -244,6 +255,8 @@ private OrgDisambiguated convertSolrDocument(OrgDisambiguatedSolrDocument doc) { org.setDisambiguatedAffiliationIdentifier(doc.getOrgDisambiguatedId()); org.setSourceType(doc.getOrgDisambiguatedIdSourceType()); org.setSourceId(doc.getOrgDisambiguatedIdFromSource()); + org.setLocationsJson(doc.getOrgLocationsJson()); + org.setNamesJson(doc.getOrgNamesJson()); return org; } @@ -320,6 +333,8 @@ private OrgDisambiguated convertEntity(OrgDisambiguatedEntity orgDisambiguatedEn org.setSourceId(orgDisambiguatedEntity.getSourceId()); org.setSourceType(orgDisambiguatedEntity.getSourceType()); org.setUrl(orgDisambiguatedEntity.getUrl()); + org.setNamesJson(orgDisambiguatedEntity.getNamesJson()); + org.setLocationsJson(orgDisambiguatedEntity.getLocationsJson()); Map externalIdsMap = new HashMap(); if (orgDisambiguatedEntity.getExternalIdentifiers() != null && !orgDisambiguatedEntity.getExternalIdentifiers().isEmpty()) { for (OrgDisambiguatedExternalIdentifierEntity extIdEntity : orgDisambiguatedEntity.getExternalIdentifiers()) { @@ -407,4 +422,25 @@ public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguat } } + + private Set getOrgNamesFromJson(String orgNamesStr, String name){ + Set orgNames = new HashSet<>(); + orgNames.add(name); + if(orgNamesStr != null) { + + //add aliases, labels, acronyms + try { + JSONArray namesArr = new JSONArray(orgNamesStr); + for(Object nameObj: namesArr) { + JSONObject nameJson = (JSONObject)nameObj; + orgNames.add(nameJson.getString("value")); + } + } + catch (Exception ex) { + LOGGER.error("ORG Disambiguated exception when parsing names json: " + orgNamesStr, ex); + } + } + + return orgNames; + } } diff --git a/orcid-core/src/main/java/org/orcid/pojo/OrgDisambiguated.java b/orcid-core/src/main/java/org/orcid/pojo/OrgDisambiguated.java index b831d197c9e..e8149942f80 100644 --- a/orcid-core/src/main/java/org/orcid/pojo/OrgDisambiguated.java +++ b/orcid-core/src/main/java/org/orcid/pojo/OrgDisambiguated.java @@ -19,6 +19,8 @@ public class OrgDisambiguated implements Serializable { public String sourceType; public String url; public String disambiguatedAffiliationIdentifier; + private String locationsJson; + private String namesJson; private List orgDisambiguatedExternalIdentifiers; @@ -109,6 +111,22 @@ public void setOrgDisambiguatedExternalIdentifiers(List toMap() { HashMap datum = new HashMap(); @@ -123,10 +141,14 @@ public Map toMap() { datum.put("countryForDisplay", this.getCountryForDisplay()); datum.put("disambiguatedAffiliationIdentifier", this.getDisambiguatedAffiliationIdentifier()); datum.put("affiliationKey", this.getAffiliationKey()); + datum.put("locationJson", this.getLocationsJson()); + datum.put("namesJson", this.getNamesJson()); return datum; } public String toString() { return this.toMap().toString(); } + + } diff --git a/orcid-core/src/main/java/org/orcid/utils/solr/entities/OrgDisambiguatedSolrDocument.java b/orcid-core/src/main/java/org/orcid/utils/solr/entities/OrgDisambiguatedSolrDocument.java index 5dc61c8058a..d837de9a4c2 100644 --- a/orcid-core/src/main/java/org/orcid/utils/solr/entities/OrgDisambiguatedSolrDocument.java +++ b/orcid-core/src/main/java/org/orcid/utils/solr/entities/OrgDisambiguatedSolrDocument.java @@ -53,6 +53,12 @@ public class OrgDisambiguatedSolrDocument implements Serializable { @Field(SolrConstants.ORG_CHOSEN_BY_MEMBER) private boolean isOrgChosenByMember; + @Field(SolrConstants.ORG_NAMES_JSON) + private String orgNamesJson; + + @Field(SolrConstants.ORG_LOCATIONS_JSON) + private String orgLocationsJson; + public String getOrgDisambiguatedId() { return orgDisambiguatedId; } @@ -163,6 +169,22 @@ public void setOrgDisambiguatedStatus(String orgDisambiguatedStatus) { this.orgDisambiguatedStatus = orgDisambiguatedStatus; } + public String getOrgNamesJson() { + return orgNamesJson; + } + + public void setOrgNamesJson(String orgNamesJson) { + this.orgNamesJson = orgNamesJson; + } + + public String getOrgLocationsJson() { + return orgLocationsJson; + } + + public void setOrgLocationsJson(String orgLocationsJson) { + this.orgLocationsJson = orgLocationsJson; + } + @Override public int hashCode() { final int prime = 31; @@ -180,6 +202,8 @@ public int hashCode() { result = prime * result + ((orgDisambiguatedStatus == null) ? 0 : orgDisambiguatedStatus.hashCode()); result = prime * result + ((orgDisambiguatedType == null) ? 0 : orgDisambiguatedType.hashCode()); result = prime * result + ((orgNames == null) ? 0 : orgNames.hashCode()); + result = prime * result + ((orgNamesJson == null) ? 0 : orgNamesJson.hashCode()); + result = prime * result + ((orgLocationsJson == null) ? 0 : orgLocationsJson.hashCode()); return result; } @@ -251,6 +275,17 @@ public boolean equals(Object obj) { return false; } else if (!orgNames.equals(other.orgNames)) return false; + if (orgLocationsJson == null) { + if (other.orgLocationsJson != null) + return false; + } else if (!orgLocationsJson.equals(other.orgLocationsJson)) + return false; + if (orgNamesJson == null) { + if (other.orgNamesJson != null) + return false; + } else if (!orgNamesJson.equals(other.orgNamesJson)) + return false; return true; } + } diff --git a/orcid-core/src/main/java/org/orcid/utils/solr/entities/SolrConstants.java b/orcid-core/src/main/java/org/orcid/utils/solr/entities/SolrConstants.java index e04ee56c0fa..a1f73751863 100644 --- a/orcid-core/src/main/java/org/orcid/utils/solr/entities/SolrConstants.java +++ b/orcid-core/src/main/java/org/orcid/utils/solr/entities/SolrConstants.java @@ -73,6 +73,8 @@ public class SolrConstants { public static final String ORG_DISAMBIGUATED_POPULARITY = "org-disambiguated-popularity"; public static final String ORG_DEFINED_FUNDING_TYPE = "org-defined-funding-type"; public static final String ORG_CHOSEN_BY_MEMBER = "org-chosen-by-member"; + public static final String ORG_LOCATIONS_JSON = "org-locations-json"; + public static final String ORG_NAMES_JSON = "org-names-json"; public static final String ORG_NAMES = "org-names"; public static final String IS_FUNDING_ORG = "is-funding-org"; public static final String DYNAMIC_SELF = "-self"; diff --git a/orcid-persistence/src/main/resources/db/updates/org_disambiguated_extra_columns.xml b/orcid-persistence/src/main/resources/db/updates/org_disambiguated_extra_columns.xml index e0b2613f4c1..a674b2123b2 100644 --- a/orcid-persistence/src/main/resources/db/updates/org_disambiguated_extra_columns.xml +++ b/orcid-persistence/src/main/resources/db/updates/org_disambiguated_extra_columns.xml @@ -2,6 +2,14 @@ xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-2.0.xsd"> + + + + + + + + diff --git a/solr-config/cores/org/conf/schema.xml b/solr-config/cores/org/conf/schema.xml index c439c93830b..ea3e7a8e734 100644 --- a/solr-config/cores/org/conf/schema.xml +++ b/solr-config/cores/org/conf/schema.xml @@ -1031,6 +1031,10 @@ multiValued="false" /> + + org-disambiguated-id