forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Datacite submission & Orcid authority (#665)
* A DataCite submission workflow and coresponding updates in the dissemination crosswalk. Add new metadata fields via the registry-loader ``` /dspace/bin/dspace registry-loader -metadata \ /dspace/config/registries/datacite.xml ``` There's a lot of TODOs - ideas for future improvements, or where the mapping isn't ideal. * ORCID authority which stores the ids in the authority column easier to access in ui/dissemination... At the moment only the getLabel calls are cached.
- Loading branch information
Showing
13 changed files
with
1,636 additions
and
57 deletions.
There are no files selected for viewing
164 changes: 164 additions & 0 deletions
164
dspace-api/src/main/java/org/dspace/content/authority/SimpleORCIDAuthority.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.content.authority; | ||
|
||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.dspace.external.CachingOrcidRestConnector; | ||
import org.dspace.external.provider.orcid.xml.ExpandedSearchConverter; | ||
import org.dspace.utils.DSpace; | ||
|
||
|
||
/** | ||
* ChoiceAuthority using the ORCID API. | ||
* It uses the orcid as the authority value and thus is simpler to use then the * SolrAuthority. | ||
*/ | ||
public class SimpleORCIDAuthority implements ChoiceAuthority { | ||
|
||
private static final Logger log = LogManager.getLogger(SimpleORCIDAuthority.class); | ||
|
||
private String pluginInstanceName; | ||
private final CachingOrcidRestConnector orcidRestConnector = new DSpace().getServiceManager().getServiceByName( | ||
"CachingOrcidRestConnector", CachingOrcidRestConnector.class); | ||
private static final int maxResults = 100; | ||
|
||
/** | ||
* Get all values from the authority that match the preferred value. | ||
* Note that the offering was entered by the user and may contain | ||
* mixed/incorrect case, whitespace, etc so the plugin should be careful | ||
* to clean up user data before making comparisons. | ||
* <p> | ||
* Value of a "Name" field will be in canonical DSpace person name format, | ||
* which is "Lastname, Firstname(s)", e.g. "Smith, John Q.". | ||
* <p> | ||
* Some authorities with a small set of values may simply return the whole | ||
* set for any sample value, although it's a good idea to set the | ||
* defaultSelected index in the Choices instance to the choice, if any, | ||
* that matches the value. | ||
* | ||
* @param text user's value to match | ||
* @param start choice at which to start, 0 is first. | ||
* @param limit maximum number of choices to return, 0 for no limit. | ||
* @param locale explicit localization key if available, or null | ||
* @return a Choices object (never null). | ||
*/ | ||
@Override | ||
public Choices getMatches(String text, int start, int limit, String locale) { | ||
log.debug("getMatches: " + text + ", start: " + start + ", limit: " + limit + ", locale: " + locale); | ||
if (text == null || text.trim().isEmpty()) { | ||
return new Choices(true); | ||
} | ||
|
||
start = Math.max(start, 0); | ||
if (limit < 1 || limit > maxResults) { | ||
limit = maxResults; | ||
} | ||
|
||
ExpandedSearchConverter.Results search = orcidRestConnector.search(text, start, limit); | ||
List<Choice> choices = search.results().stream() | ||
.map(this::toChoice) | ||
.collect(Collectors.toList()); | ||
|
||
|
||
int confidence = !search.isOk() ? Choices.CF_FAILED : | ||
choices.isEmpty() ? Choices.CF_NOTFOUND : | ||
choices.size() == 1 ? Choices.CF_UNCERTAIN | ||
: Choices.CF_AMBIGUOUS; | ||
int total = search.numFound().intValue(); | ||
return new Choices(choices.toArray(new Choice[0]), start, total, | ||
confidence, total > (start + limit)); | ||
} | ||
|
||
/** | ||
* Get the single "best" match (if any) of a value in the authority | ||
* to the given user value. The "confidence" element of Choices is | ||
* expected to be set to a meaningful value about the circumstances of | ||
* this match. | ||
* <p> | ||
* This call is typically used in non-interactive metadata ingest | ||
* where there is no interactive agent to choose from among options. | ||
* | ||
* @param text user's value to match | ||
* @param locale explicit localization key if available, or null | ||
* @return a Choices object (never null) with 1 or 0 values. | ||
*/ | ||
@Override | ||
public Choices getBestMatch(String text, String locale) { | ||
log.debug("getBestMatch: " + text); | ||
Choices matches = getMatches(text, 0, 1, locale); | ||
if (matches.values.length != 0 && !matches.values[0].value.equalsIgnoreCase(text)) { | ||
// novalue | ||
matches = new Choices(false); | ||
} | ||
return matches; | ||
} | ||
|
||
/** | ||
* Get the canonical user-visible "label" (i.e. short descriptive text) | ||
* for a key in the authority. Can be localized given the implicit | ||
* or explicit locale specification. | ||
* <p> | ||
* This may get called many times while populating a Web page so it should | ||
* be implemented as efficiently as possible. | ||
* | ||
* @param key authority key known to this authority. | ||
* @param locale explicit localization key if available, or null | ||
* @return descriptive label - should always return something, never null. | ||
*/ | ||
@Override | ||
public String getLabel(String key, String locale) { | ||
log.debug("getLabel: " + key); | ||
String label = orcidRestConnector.getLabel(key); | ||
return label != null ? label : key; | ||
} | ||
|
||
/** | ||
* Get the instance's particular name. | ||
* Returns the name by which the class was chosen when | ||
* this instance was created. Only works for instances created | ||
* by <code>PluginService</code>, or if someone remembers to call <code>setPluginName.</code> | ||
* <p> | ||
* Useful when the implementation class wants to be configured differently | ||
* when it is invoked under different names. | ||
* | ||
* @return name or null if not available. | ||
*/ | ||
@Override | ||
public String getPluginInstanceName() { | ||
return pluginInstanceName; | ||
} | ||
|
||
/** | ||
* Set the name under which this plugin was instantiated. | ||
* Not to be invoked by application code, it is | ||
* called automatically by <code>PluginService.getNamedPlugin()</code> | ||
* when the plugin is instantiated. | ||
* | ||
* @param name -- name used to select this class. | ||
*/ | ||
@Override | ||
public void setPluginInstanceName(String name) { | ||
this.pluginInstanceName = name; | ||
} | ||
|
||
private Choice toChoice(ExpandedSearchConverter.Result result) { | ||
Choice c = new Choice(result.authority(), result.value(), result.label()); | ||
//add orcid to extras so it's shown | ||
c.extras.put("orcid", result.authority()); | ||
// add the value to extra information only if it is present | ||
//in dspace-angular the extras are keys for translation form.other-information.<extra> | ||
result.creditName().ifPresent(val -> c.extras.put("credit-name", val)); | ||
result.otherNames().ifPresent(val -> c.extras.put("other-names", val)); | ||
result.institutionNames().ifPresent(val -> c.extras.put("institution", val)); | ||
|
||
return c; | ||
} | ||
} |
222 changes: 222 additions & 0 deletions
222
dspace-api/src/main/java/org/dspace/external/CachingOrcidRestConnector.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.external; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URI; | ||
import java.net.URLEncoder; | ||
import java.net.http.HttpClient; | ||
import java.net.http.HttpRequest; | ||
import java.net.http.HttpResponse; | ||
import java.nio.charset.StandardCharsets; | ||
import java.time.Duration; | ||
import java.util.Optional; | ||
import java.util.regex.Pattern; | ||
|
||
import org.apache.commons.lang.StringUtils; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.dspace.external.provider.orcid.xml.ExpandedSearchConverter; | ||
import org.dspace.services.ConfigurationService; | ||
import org.dspace.utils.DSpace; | ||
import org.json.JSONObject; | ||
import org.springframework.cache.annotation.Cacheable; | ||
|
||
/** | ||
* A different implementation of the communication with the ORCID API. | ||
* The API returns no-cache headers, we use @Cacheable to cache the labels (id->name) for some time. | ||
* Originally the idea was to reuse the OrcidRestConnector, but in the end that just wraps apache http client. | ||
*/ | ||
public class CachingOrcidRestConnector { | ||
private static final Logger log = LogManager.getLogger(CachingOrcidRestConnector.class); | ||
|
||
private String apiURL; | ||
// Access tokens are long-lived ~ 20years, don't bother with refreshing | ||
private volatile String _accessToken; | ||
private final ExpandedSearchConverter converter = new ExpandedSearchConverter(); | ||
|
||
private static final Pattern p = Pattern.compile("^\\p{Alpha}+", Pattern.UNICODE_CHARACTER_CLASS); | ||
private static final String edismaxParams = "&defType=edismax&qf=" + | ||
URLEncoder.encode( "family-name^4.0 credit-name^3.0 other-names^2.0 text", StandardCharsets.UTF_8); | ||
|
||
private final HttpClient httpClient = HttpClient | ||
.newBuilder() | ||
.connectTimeout( Duration.ofSeconds(5)) | ||
.build(); | ||
|
||
/* | ||
* We basically need to obtain the access token only once, but there is no guarantee this will succeed. The | ||
* failure shouldn't be fatal, so we'll try again next time. | ||
*/ | ||
private Optional<String> init() { | ||
if (_accessToken == null) { | ||
synchronized (CachingOrcidRestConnector.class) { | ||
if (_accessToken == null) { | ||
log.info("Initializing Orcid connector"); | ||
ConfigurationService configurationService = new DSpace().getConfigurationService(); | ||
String clientSecret = configurationService.getProperty("orcid.application-client-secret"); | ||
String clientId = configurationService.getProperty("orcid.application-client-id"); | ||
String OAUTHUrl = configurationService.getProperty("orcid.token-url"); | ||
|
||
try { | ||
_accessToken = getAccessToken(clientSecret, clientId, OAUTHUrl); | ||
} catch (Exception e) { | ||
log.error("Error during initialization of the Orcid connector", e); | ||
} | ||
} | ||
} | ||
} | ||
return Optional.ofNullable(_accessToken); | ||
} | ||
|
||
/** | ||
* Set the URL of the ORCID API | ||
* @param apiURL | ||
*/ | ||
public void setApiURL(String apiURL) { | ||
this.apiURL = apiURL; | ||
} | ||
|
||
/** | ||
* Search the ORCID API | ||
* | ||
* The query is passed to the ORCID API as is, except when it contains just 'unicode letters'. | ||
* In that case, we try to be smart and turn it into edismax query with wildcard. | ||
* | ||
* @param query - the search query | ||
* @param start - initial offset when paging results | ||
* @param limit - maximum number of results to return | ||
* @return the results | ||
*/ | ||
public ExpandedSearchConverter.Results search(String query, int start, int limit) { | ||
String extra; | ||
// if query contains just 'unicode letters'; try to be smart and turn it into edismax query with wildcard | ||
if (p.matcher(query).matches()) { | ||
query += " || " + query + "*"; | ||
extra = edismaxParams; | ||
} else { | ||
extra = ""; | ||
} | ||
final String searchPath = String.format("expanded-search?q=%s&start=%s&rows=%s%s", URLEncoder.encode(query, | ||
StandardCharsets.UTF_8), start, limit, extra); | ||
|
||
return init().map(token -> { | ||
try (InputStream inputStream = httpGet(searchPath, token)) { | ||
return converter.convert(inputStream); | ||
} catch (IOException e) { | ||
log.error("Error during search", e); | ||
return ExpandedSearchConverter.ERROR; | ||
} | ||
}).orElse(ExpandedSearchConverter.ERROR); | ||
} | ||
|
||
/** | ||
* Get the label for an ORCID, ideally the name of the person. | ||
* | ||
* Null is: | ||
* - either an error -> won't be cached, | ||
* - or it means no result, which'd be odd provided we get here with a valid orcid -> not caching should be ok | ||
* | ||
* @param orcid the id you are looking for | ||
* @return the label or null in case nothing found/error | ||
*/ | ||
@Cacheable(cacheNames = "orcid-labels", unless = "#result == null") | ||
public String getLabel(String orcid) { | ||
log.debug("getLabel: " + orcid); | ||
// in theory, we could use orcid.org/v3.0/<ORCID>/personal-details, but didn't want to write another converter | ||
ExpandedSearchConverter.Results search = search("orcid:" + orcid, 0, 1); | ||
if (search.isOk() && search.numFound() > 0) { | ||
return search.results().get(0).label(); | ||
} | ||
return null; | ||
} | ||
|
||
protected String getAccessToken(String clientSecret, String clientId, String OAUTHUrl) { | ||
if (StringUtils.isNotBlank(clientSecret) | ||
&& StringUtils.isNotBlank(clientId) | ||
&& StringUtils.isNotBlank(OAUTHUrl)) { | ||
String authenticationParameters = | ||
String.format("client_id=%s&client_secret=%s&scope=/read-public&grant_type=client_credentials", | ||
clientId, clientSecret); | ||
|
||
HttpRequest request = HttpRequest.newBuilder() | ||
.uri(java.net.URI.create(OAUTHUrl)) | ||
.POST(HttpRequest.BodyPublishers.ofString(authenticationParameters)) | ||
.timeout(Duration.ofSeconds(5)) | ||
.header("Accept", "application/json") | ||
.header("Content-Type", "application/x-www-form-urlencoded") | ||
.build(); | ||
|
||
try { | ||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); | ||
if (isSuccess(response)) { | ||
JSONObject responseObject = new JSONObject(response.body()); | ||
return responseObject.getString("access_token"); | ||
} else { | ||
log.error("Error during initialization of the Orcid connector, status code: " | ||
+ response.statusCode()); | ||
throw new RuntimeException("Error during initialization of the Orcid connector, status code: " | ||
+ response.statusCode()); | ||
} | ||
} catch (IOException | InterruptedException e) { | ||
log.error("Error during initialization of the Orcid connector", e); | ||
throw new RuntimeException(e); | ||
} | ||
} else { | ||
log.error("Missing configuration for Orcid connector"); | ||
throw new RuntimeException("Missing configuration for Orcid connector"); | ||
} | ||
} | ||
|
||
private InputStream httpGet(String path, String accessToken) throws IOException { | ||
String trimmedPath = path.replaceFirst("^/+", "").replaceFirst("/+$", ""); | ||
|
||
String fullPath = apiURL + '/' + trimmedPath; | ||
|
||
HttpRequest request = HttpRequest.newBuilder() | ||
.uri(URI.create(fullPath)) | ||
.timeout(Duration.ofSeconds(5)) | ||
.header("Content-Type", "application/vnd.orcid+xml") | ||
.header("Authorization", "Bearer " + accessToken) | ||
.build(); | ||
|
||
try { | ||
HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); | ||
if (isSuccess(response)) { | ||
return response.body(); | ||
} else { | ||
log.error("Error in rest connector for path: " + fullPath + ", status code: " + response.statusCode()); | ||
throw new UnexpectedStatusException("Error in rest connector for path: " | ||
+ fullPath + ", status code: " + response.statusCode()); | ||
} | ||
} catch (UnexpectedStatusException e) { | ||
throw e; | ||
} catch (IOException | InterruptedException e) { | ||
log.error("Error in rest connector for path: " + fullPath, e); | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private boolean isSuccess(HttpResponse<?> response) { | ||
return response.statusCode() >= 200 && response.statusCode() < 300; | ||
} | ||
|
||
private static class UnexpectedStatusException extends IOException { | ||
public UnexpectedStatusException(String message) { | ||
super(message); | ||
} | ||
} | ||
|
||
//Just for testing | ||
protected void forceAccessToken(String accessToken) { | ||
synchronized (CachingOrcidRestConnector.class) { | ||
this._accessToken = accessToken; | ||
} | ||
} | ||
} |
Oops, something went wrong.