Skip to content

Commit

Permalink
Merge pull request #9402 from GlobalDataverseCommunityConsortium/IQSS…
Browse files Browse the repository at this point in the history
…/9150-handle_fundreg_reqs_for_ext_cvv

Iqss/9150 handle fundreg reqs for ext cvv
  • Loading branch information
kcondon authored Apr 24, 2023
2 parents aba08e6 + bd7cdb4 commit 6dc9a5f
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 108 deletions.
1 change: 1 addition & 0 deletions doc/release-notes/9150-improved-external-vocab-supprt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
It is now possible to write external vocabulary scripts that target a single child field in a metadata block. Example scripts are now available at https://github.com/gdcc/dataverse-external-vocab-support that can be configured to support lookup from the Research Orgnaization Registry (ROR) for the Author Affiliation Field and for the CrossRef Funding Registry (Fundreg) in the Funding Information/Agency field, both in the standard Citation metadata block. Application if these scripts to other fields, and the development of other scripts targetting child fields are now possible.
190 changes: 130 additions & 60 deletions src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
import javax.inject.Named;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonException;
import javax.json.JsonObject;
import javax.json.JsonObjectBuilder;
import javax.json.JsonReader;
import javax.json.JsonString;
import javax.json.JsonValue;
import javax.json.JsonValue.ValueType;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.NonUniqueResultException;
Expand Down Expand Up @@ -343,33 +345,33 @@ public Map<Long, JsonObject> getCVocConf(boolean byTermUriField){
public void registerExternalVocabValues(DatasetField df) {
DatasetFieldType dft =df.getDatasetFieldType();
logger.fine("Registering for field: " + dft.getName());
JsonObject cvocEntry = getCVocConf(false).get(dft.getId());
if(dft.isPrimitive()) {
for(DatasetFieldValue dfv: df.getDatasetFieldValues()) {
JsonObject cvocEntry = getCVocConf(true).get(dft.getId());
if (dft.isPrimitive()) {
for (DatasetFieldValue dfv : df.getDatasetFieldValues()) {
registerExternalTerm(cvocEntry, dfv.getValue());
}
} else {
if (df.getDatasetFieldType().isCompound()) {
DatasetFieldType termdft = findByNameOpt(cvocEntry.getString("term-uri-field"));
for (DatasetFieldCompoundValue cv : df.getDatasetFieldCompoundValues()) {
for (DatasetField cdf : cv.getChildDatasetFields()) {
logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId());
if(cdf.getDatasetFieldType().equals(termdft)) {
registerExternalTerm(cvocEntry, cdf.getValue());
}
} else {
if (df.getDatasetFieldType().isCompound()) {
DatasetFieldType termdft = findByNameOpt(cvocEntry.getString("term-uri-field"));
for (DatasetFieldCompoundValue cv : df.getDatasetFieldCompoundValues()) {
for (DatasetField cdf : cv.getChildDatasetFields()) {
logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId());
if (cdf.getDatasetFieldType().equals(termdft)) {
registerExternalTerm(cvocEntry, cdf.getValue());
}
}
}
}
}
}

/**
* Retrieves indexable strings from a cached externalvocabularyvalue entry.
*
* This method assumes externalvocabularyvalue entries have been filtered and
* the externalvocabularyvalue entry contain a single JsonObject whose values
* are either Strings or an array of objects with "lang" and "value" keys. The
* string, or the "value"s for each language are added to the set.
* the externalvocabularyvalue entry contain a single JsonObject whose "personName" or "termName" values
* are either Strings or an array of objects with "lang" and ("value" or "content") keys. The
* string, or the "value/content"s for each language are added to the set.
*
* Any parsing error results in no entries (there can be unfiltered entries with
* unknown structure - getting some strings from such an entry could give fairly
Expand All @@ -385,16 +387,25 @@ public Set<String> getStringsFor(String termUri) {
if (jo != null) {
try {
for (String key : jo.keySet()) {
JsonValue jv = jo.get(key);
if (jv.getValueType().equals(JsonValue.ValueType.STRING)) {
logger.fine("adding " + jo.getString(key) + " for " + termUri);
strings.add(jo.getString(key));
} else {
if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) {
JsonArray jarr = jv.asJsonArray();
for (int i = 0; i < jarr.size(); i++) {
logger.fine("adding " + jarr.getJsonObject(i).getString("value") + " for " + termUri);
strings.add(jarr.getJsonObject(i).getString("value"));
if (key.equals("termName") || key.equals("personName")) {
JsonValue jv = jo.get(key);
if (jv.getValueType().equals(JsonValue.ValueType.STRING)) {
logger.fine("adding " + jo.getString(key) + " for " + termUri);
strings.add(jo.getString(key));
} else {
if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) {
JsonArray jarr = jv.asJsonArray();
for (int i = 0; i < jarr.size(); i++) {
JsonObject entry = jarr.getJsonObject(i);
if (entry.containsKey("value")) {
logger.fine("adding " + entry.getString("value") + " for " + termUri);
strings.add(entry.getString("value"));
} else if (entry.containsKey("content")) {
logger.fine("adding " + entry.getString("content") + " for " + termUri);
strings.add(entry.getString("content"));

}
}
}
}
}
Expand All @@ -410,7 +421,7 @@ public Set<String> getStringsFor(String termUri) {
}

/**
* Perform a query to retrieve a cached valie from the externalvocabularvalue table
* Perform a query to retrieve a cached value from the externalvocabularvalue table
* @param termUri
* @return - the entry's value as a JsonObject
*/
Expand Down Expand Up @@ -444,9 +455,25 @@ public void registerExternalTerm(JsonObject cvocEntry, String term) {
logger.fine("Ingoring blank term");
return;
}
boolean isExternal = false;
JsonObject vocabs = cvocEntry.getJsonObject("vocabs");
for (String key: vocabs.keySet()) {
JsonObject vocab = vocabs.getJsonObject(key);
if (vocab.containsKey("uriSpace")) {
if (term.startsWith(vocab.getString("uriSpace"))) {
isExternal = true;
break;
}
}
}
if (!isExternal) {
logger.fine("Ignoring free text entry: " + term);
return;
}
logger.fine("Registering term: " + term);
try {
URI uri = new URI(term);
//Assure the term is in URI form - should be if the uriSpace entry was correct
new URI(term);
ExternalVocabularyValue evv = null;
try {
evv = em.createQuery("select object(o) from ExternalVocabularyValue as o where o.uri=:uri",
Expand Down Expand Up @@ -542,37 +569,7 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S
String[] pathParts = param.split("/");
logger.fine("PP: " + String.join(", ", pathParts));
JsonValue curPath = readObject;
for (int j = 0; j < pathParts.length - 1; j++) {
if (pathParts[j].contains("=")) {
JsonArray arr = ((JsonArray) curPath);
for (int k = 0; k < arr.size(); k++) {
String[] keyVal = pathParts[j].split("=");
logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]);
JsonObject jo = arr.getJsonObject(k);
String val = jo.getString(keyVal[0]);
String expected = keyVal[1];
if (expected.equals("@id")) {
expected = termUri;
}
if (val.equals(expected)) {
logger.fine("Found: " + jo.toString());
curPath = jo;
break;
}
}
} else {
curPath = ((JsonObject) curPath).get(pathParts[j]);
logger.fine("Found next Path object " + curPath.toString());
}
}
JsonValue jv = ((JsonObject) curPath).get(pathParts[pathParts.length - 1]);
if (jv.getValueType().equals(JsonValue.ValueType.STRING)) {
vals.add(i, ((JsonString) jv).getString());
} else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) {
vals.add(i, jv);
} else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) {
vals.add(i, jv);
}
vals.add(i, processPathSegment(0, pathParts, curPath, termUri));
logger.fine("Added param value: " + i + ": " + vals.get(i));
} else {
logger.fine("Param is: " + param);
Expand Down Expand Up @@ -615,6 +612,7 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S
} catch (Exception e) {
logger.warning("External Vocabulary: " + termUri + " - Failed to find value for " + filterKey + ": "
+ e.getMessage());
e.printStackTrace();
}
}
}
Expand All @@ -628,6 +626,66 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S
}
}

Object processPathSegment(int index, String[] pathParts, JsonValue curPath, String termUri) {
if (index < pathParts.length - 1) {
if (pathParts[index].contains("=")) {
JsonArray arr = ((JsonArray) curPath);
String[] keyVal = pathParts[index].split("=");
logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]);
String expected = keyVal[1];

if (!expected.equals("*")) {
if (expected.equals("@id")) {
expected = termUri;
}
for (int k = 0; k < arr.size(); k++) {
JsonObject jo = arr.getJsonObject(k);
String val = jo.getString(keyVal[0]);
if (val.equals(expected)) {
logger.fine("Found: " + jo.toString());
curPath = jo;
return processPathSegment(index + 1, pathParts, curPath, termUri);
}
}
} else {
JsonArrayBuilder parts = Json.createArrayBuilder();
for (JsonValue subPath : arr) {
if (subPath instanceof JsonObject) {
JsonValue nextValue = ((JsonObject) subPath).get(keyVal[0]);
Object obj = processPathSegment(index + 1, pathParts, nextValue, termUri);
if (obj instanceof String) {
parts.add((String) obj);
} else {
parts.add((JsonValue) obj);
}
}
}
return parts.build();
}

} else {
curPath = ((JsonObject) curPath).get(pathParts[index]);
logger.fine("Found next Path object " + curPath.toString());
return processPathSegment(index + 1, pathParts, curPath, termUri);
}
} else {
logger.fine("Last segment: " + curPath.toString());
logger.fine("Looking for : " + pathParts[index]);
JsonValue jv = ((JsonObject) curPath).get(pathParts[index]);
ValueType type =jv.getValueType();
if (type.equals(JsonValue.ValueType.STRING)) {
return ((JsonString) jv).getString();
} else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) {
return jv;
} else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) {
return jv;
}
}

return null;

}

/**
* Supports validation of externally controlled values. If the value is a URI it
* must be in the namespace (start with) one of the uriSpace values of an
Expand Down Expand Up @@ -669,8 +727,20 @@ public boolean isValidCVocValue(DatasetFieldType dft, String value) {
public List<String> getVocabScripts( Map<Long, JsonObject> cvocConf) {
//ToDo - only return scripts that are needed (those fields are set on display pages, those blocks/fields are allowed in the Dataverse collection for create/edit)?
Set<String> scripts = new HashSet<String>();
for(JsonObject jo: cvocConf.values()) {
scripts.add(jo.getString("js-url"));
for (JsonObject jo : cvocConf.values()) {
// Allow either a single script (a string) or an array of scripts (used, for
// example, to allow use of the common cvocutils.js script along with a main
// script for the field.)
JsonValue scriptValue = jo.get("js-url");
ValueType scriptType = scriptValue.getValueType();
if (scriptType.equals(ValueType.STRING)) {
scripts.add(((JsonString) scriptValue).getString());
} else if (scriptType.equals(ValueType.ARRAY)) {
JsonArray scriptArray = ((JsonArray) scriptValue);
for (int i = 0; i < scriptArray.size(); i++) {
scripts.add(scriptArray.getString(i));
}
}
}
String customScript = settingsService.getValueForKey(SettingsServiceBean.Key.ControlledVocabularyCustomJavaScript);
if (customScript != null && !customScript.isEmpty()) {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -3624,9 +3624,9 @@ public String save() {
((UpdateDatasetVersionCommand) cmd).setValidateLenient(true);
}
dataset = commandEngine.submit(cmd);
for (DatasetField df : dataset.getLatestVersion().getDatasetFields()) {
for (DatasetField df : dataset.getLatestVersion().getFlatDatasetFields()) {
logger.fine("Found id: " + df.getDatasetFieldType().getId());
if (fieldService.getCVocConf(false).containsKey(df.getDatasetFieldType().getId())) {
if (fieldService.getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) {
fieldService.registerExternalVocabValues(df);
}
}
Expand Down Expand Up @@ -5799,7 +5799,7 @@ public Set<Entry<String, String>> getMetadataLanguages() {
}

public List<String> getVocabScripts() {
return fieldService.getVocabScripts(settingsWrapper.getCVocConf());
return fieldService.getVocabScripts(settingsWrapper.getCVocConf(false));
}

public String getFieldLanguage(String languages) {
Expand Down
18 changes: 13 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public class SettingsWrapper implements java.io.Serializable {

//External Vocabulary support
private Map<Long, JsonObject> cachedCvocMap = null;
private Map<Long, JsonObject> cachedCvocByTermFieldMap = null;

private Long zipDownloadLimit = null;

Expand Down Expand Up @@ -656,12 +657,19 @@ public String getFooterCopyrightAndYear() {
return footerCopyrightAndYear;
}

public Map<Long, JsonObject> getCVocConf() {
//Cache this in the view
if(cachedCvocMap==null) {
cachedCvocMap = fieldService.getCVocConf(false);
public Map<Long, JsonObject> getCVocConf(boolean byTermField) {
if (byTermField) {
if (cachedCvocByTermFieldMap == null) {
cachedCvocByTermFieldMap = fieldService.getCVocConf(true);
}
return cachedCvocByTermFieldMap;
} else {
// Cache this in the view
if (cachedCvocMap == null) {
cachedCvocMap = fieldService.getCVocConf(false);
}
return cachedCvocMap;
}
return cachedCvocMap;
}

public String getMetricsUrl() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public GlobalId parsePersistentId(String pidString) {

@Override
public GlobalId parsePersistentId(String protocol, String identifierString) {
logger.info("Checking Perma: " + identifierString);
logger.fine("Checking Perma: " + identifierString);
if (!PERMA_PROTOCOL.equals(protocol)) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
}

Set<String> langs = settingsService.getConfiguredLanguages();
Map<Long, JsonObject> cvocMap = datasetFieldService.getCVocConf(false);
Map<Long, JsonObject> cvocMap = datasetFieldService.getCVocConf(true);
Set<String> metadataBlocksWithValue = new HashSet<>();
for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {

Expand Down
Loading

0 comments on commit 6dc9a5f

Please sign in to comment.