Skip to content

Commit

Permalink
Datacite submission & Orcid authority (#665)
Browse files Browse the repository at this point in the history
* A DataCite submission workflow

and coresponding updates in the dissemination crosswalk.
Add new metadata fields via the registry-loader
```
/dspace/bin/dspace registry-loader -metadata \
/dspace/config/registries/datacite.xml
```

There's a lot of TODOs - ideas for future improvements, or where the
mapping isn't ideal.

* ORCID authority which stores the ids in the authority column

easier to access in ui/dissemination...
At the moment only the getLabel calls are cached.
  • Loading branch information
kosarko authored Jun 3, 2024
1 parent a815e83 commit 0a89245
Show file tree
Hide file tree
Showing 13 changed files with 1,636 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.authority;

import java.util.List;
import java.util.stream.Collectors;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.external.CachingOrcidRestConnector;
import org.dspace.external.provider.orcid.xml.ExpandedSearchConverter;
import org.dspace.utils.DSpace;


/**
* ChoiceAuthority using the ORCID API.
* It uses the orcid as the authority value and thus is simpler to use then the * SolrAuthority.
*/
public class SimpleORCIDAuthority implements ChoiceAuthority {

private static final Logger log = LogManager.getLogger(SimpleORCIDAuthority.class);

private String pluginInstanceName;
private final CachingOrcidRestConnector orcidRestConnector = new DSpace().getServiceManager().getServiceByName(
"CachingOrcidRestConnector", CachingOrcidRestConnector.class);
private static final int maxResults = 100;

/**
* Get all values from the authority that match the preferred value.
* Note that the offering was entered by the user and may contain
* mixed/incorrect case, whitespace, etc so the plugin should be careful
* to clean up user data before making comparisons.
* <p>
* Value of a "Name" field will be in canonical DSpace person name format,
* which is "Lastname, Firstname(s)", e.g. "Smith, John Q.".
* <p>
* Some authorities with a small set of values may simply return the whole
* set for any sample value, although it's a good idea to set the
* defaultSelected index in the Choices instance to the choice, if any,
* that matches the value.
*
* @param text user's value to match
* @param start choice at which to start, 0 is first.
* @param limit maximum number of choices to return, 0 for no limit.
* @param locale explicit localization key if available, or null
* @return a Choices object (never null).
*/
@Override
public Choices getMatches(String text, int start, int limit, String locale) {
log.debug("getMatches: " + text + ", start: " + start + ", limit: " + limit + ", locale: " + locale);
if (text == null || text.trim().isEmpty()) {
return new Choices(true);
}

start = Math.max(start, 0);
if (limit < 1 || limit > maxResults) {
limit = maxResults;
}

ExpandedSearchConverter.Results search = orcidRestConnector.search(text, start, limit);
List<Choice> choices = search.results().stream()
.map(this::toChoice)
.collect(Collectors.toList());


int confidence = !search.isOk() ? Choices.CF_FAILED :
choices.isEmpty() ? Choices.CF_NOTFOUND :
choices.size() == 1 ? Choices.CF_UNCERTAIN
: Choices.CF_AMBIGUOUS;
int total = search.numFound().intValue();
return new Choices(choices.toArray(new Choice[0]), start, total,
confidence, total > (start + limit));
}

/**
* Get the single "best" match (if any) of a value in the authority
* to the given user value. The "confidence" element of Choices is
* expected to be set to a meaningful value about the circumstances of
* this match.
* <p>
* This call is typically used in non-interactive metadata ingest
* where there is no interactive agent to choose from among options.
*
* @param text user's value to match
* @param locale explicit localization key if available, or null
* @return a Choices object (never null) with 1 or 0 values.
*/
@Override
public Choices getBestMatch(String text, String locale) {
log.debug("getBestMatch: " + text);
Choices matches = getMatches(text, 0, 1, locale);
if (matches.values.length != 0 && !matches.values[0].value.equalsIgnoreCase(text)) {
// novalue
matches = new Choices(false);
}
return matches;
}

/**
* Get the canonical user-visible "label" (i.e. short descriptive text)
* for a key in the authority. Can be localized given the implicit
* or explicit locale specification.
* <p>
* This may get called many times while populating a Web page so it should
* be implemented as efficiently as possible.
*
* @param key authority key known to this authority.
* @param locale explicit localization key if available, or null
* @return descriptive label - should always return something, never null.
*/
@Override
public String getLabel(String key, String locale) {
log.debug("getLabel: " + key);
String label = orcidRestConnector.getLabel(key);
return label != null ? label : key;
}

/**
* Get the instance's particular name.
* Returns the name by which the class was chosen when
* this instance was created. Only works for instances created
* by <code>PluginService</code>, or if someone remembers to call <code>setPluginName.</code>
* <p>
* Useful when the implementation class wants to be configured differently
* when it is invoked under different names.
*
* @return name or null if not available.
*/
@Override
public String getPluginInstanceName() {
return pluginInstanceName;
}

/**
* Set the name under which this plugin was instantiated.
* Not to be invoked by application code, it is
* called automatically by <code>PluginService.getNamedPlugin()</code>
* when the plugin is instantiated.
*
* @param name -- name used to select this class.
*/
@Override
public void setPluginInstanceName(String name) {
this.pluginInstanceName = name;
}

private Choice toChoice(ExpandedSearchConverter.Result result) {
Choice c = new Choice(result.authority(), result.value(), result.label());
//add orcid to extras so it's shown
c.extras.put("orcid", result.authority());
// add the value to extra information only if it is present
//in dspace-angular the extras are keys for translation form.other-information.<extra>
result.creditName().ifPresent(val -> c.extras.put("credit-name", val));
result.otherNames().ifPresent(val -> c.extras.put("other-names", val));
result.institutionNames().ifPresent(val -> c.extras.put("institution", val));

return c;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.external;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Optional;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.external.provider.orcid.xml.ExpandedSearchConverter;
import org.dspace.services.ConfigurationService;
import org.dspace.utils.DSpace;
import org.json.JSONObject;
import org.springframework.cache.annotation.Cacheable;

/**
* A different implementation of the communication with the ORCID API.
* The API returns no-cache headers, we use @Cacheable to cache the labels (id->name) for some time.
* Originally the idea was to reuse the OrcidRestConnector, but in the end that just wraps apache http client.
*/
public class CachingOrcidRestConnector {
private static final Logger log = LogManager.getLogger(CachingOrcidRestConnector.class);

private String apiURL;
// Access tokens are long-lived ~ 20years, don't bother with refreshing
private volatile String _accessToken;
private final ExpandedSearchConverter converter = new ExpandedSearchConverter();

private static final Pattern p = Pattern.compile("^\\p{Alpha}+", Pattern.UNICODE_CHARACTER_CLASS);
private static final String edismaxParams = "&defType=edismax&qf=" +
URLEncoder.encode( "family-name^4.0 credit-name^3.0 other-names^2.0 text", StandardCharsets.UTF_8);

private final HttpClient httpClient = HttpClient
.newBuilder()
.connectTimeout( Duration.ofSeconds(5))
.build();

/*
* We basically need to obtain the access token only once, but there is no guarantee this will succeed. The
* failure shouldn't be fatal, so we'll try again next time.
*/
private Optional<String> init() {
if (_accessToken == null) {
synchronized (CachingOrcidRestConnector.class) {
if (_accessToken == null) {
log.info("Initializing Orcid connector");
ConfigurationService configurationService = new DSpace().getConfigurationService();
String clientSecret = configurationService.getProperty("orcid.application-client-secret");
String clientId = configurationService.getProperty("orcid.application-client-id");
String OAUTHUrl = configurationService.getProperty("orcid.token-url");

try {
_accessToken = getAccessToken(clientSecret, clientId, OAUTHUrl);
} catch (Exception e) {
log.error("Error during initialization of the Orcid connector", e);
}
}
}
}
return Optional.ofNullable(_accessToken);
}

/**
* Set the URL of the ORCID API
* @param apiURL
*/
public void setApiURL(String apiURL) {
this.apiURL = apiURL;
}

/**
* Search the ORCID API
*
* The query is passed to the ORCID API as is, except when it contains just 'unicode letters'.
* In that case, we try to be smart and turn it into edismax query with wildcard.
*
* @param query - the search query
* @param start - initial offset when paging results
* @param limit - maximum number of results to return
* @return the results
*/
public ExpandedSearchConverter.Results search(String query, int start, int limit) {
String extra;
// if query contains just 'unicode letters'; try to be smart and turn it into edismax query with wildcard
if (p.matcher(query).matches()) {
query += " || " + query + "*";
extra = edismaxParams;
} else {
extra = "";
}
final String searchPath = String.format("expanded-search?q=%s&start=%s&rows=%s%s", URLEncoder.encode(query,
StandardCharsets.UTF_8), start, limit, extra);

return init().map(token -> {
try (InputStream inputStream = httpGet(searchPath, token)) {
return converter.convert(inputStream);
} catch (IOException e) {
log.error("Error during search", e);
return ExpandedSearchConverter.ERROR;
}
}).orElse(ExpandedSearchConverter.ERROR);
}

/**
* Get the label for an ORCID, ideally the name of the person.
*
* Null is:
* - either an error -> won't be cached,
* - or it means no result, which'd be odd provided we get here with a valid orcid -> not caching should be ok
*
* @param orcid the id you are looking for
* @return the label or null in case nothing found/error
*/
@Cacheable(cacheNames = "orcid-labels", unless = "#result == null")
public String getLabel(String orcid) {
log.debug("getLabel: " + orcid);
// in theory, we could use orcid.org/v3.0/<ORCID>/personal-details, but didn't want to write another converter
ExpandedSearchConverter.Results search = search("orcid:" + orcid, 0, 1);
if (search.isOk() && search.numFound() > 0) {
return search.results().get(0).label();
}
return null;
}

protected String getAccessToken(String clientSecret, String clientId, String OAUTHUrl) {
if (StringUtils.isNotBlank(clientSecret)
&& StringUtils.isNotBlank(clientId)
&& StringUtils.isNotBlank(OAUTHUrl)) {
String authenticationParameters =
String.format("client_id=%s&client_secret=%s&scope=/read-public&grant_type=client_credentials",
clientId, clientSecret);

HttpRequest request = HttpRequest.newBuilder()
.uri(java.net.URI.create(OAUTHUrl))
.POST(HttpRequest.BodyPublishers.ofString(authenticationParameters))
.timeout(Duration.ofSeconds(5))
.header("Accept", "application/json")
.header("Content-Type", "application/x-www-form-urlencoded")
.build();

try {
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (isSuccess(response)) {
JSONObject responseObject = new JSONObject(response.body());
return responseObject.getString("access_token");
} else {
log.error("Error during initialization of the Orcid connector, status code: "
+ response.statusCode());
throw new RuntimeException("Error during initialization of the Orcid connector, status code: "
+ response.statusCode());
}
} catch (IOException | InterruptedException e) {
log.error("Error during initialization of the Orcid connector", e);
throw new RuntimeException(e);
}
} else {
log.error("Missing configuration for Orcid connector");
throw new RuntimeException("Missing configuration for Orcid connector");
}
}

private InputStream httpGet(String path, String accessToken) throws IOException {
String trimmedPath = path.replaceFirst("^/+", "").replaceFirst("/+$", "");

String fullPath = apiURL + '/' + trimmedPath;

HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(fullPath))
.timeout(Duration.ofSeconds(5))
.header("Content-Type", "application/vnd.orcid+xml")
.header("Authorization", "Bearer " + accessToken)
.build();

try {
HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
if (isSuccess(response)) {
return response.body();
} else {
log.error("Error in rest connector for path: " + fullPath + ", status code: " + response.statusCode());
throw new UnexpectedStatusException("Error in rest connector for path: "
+ fullPath + ", status code: " + response.statusCode());
}
} catch (UnexpectedStatusException e) {
throw e;
} catch (IOException | InterruptedException e) {
log.error("Error in rest connector for path: " + fullPath, e);
throw new RuntimeException(e);
}
}

private boolean isSuccess(HttpResponse<?> response) {
return response.statusCode() >= 200 && response.statusCode() < 300;
}

private static class UnexpectedStatusException extends IOException {
public UnexpectedStatusException(String message) {
super(message);
}
}

//Just for testing
protected void forceAccessToken(String accessToken) {
synchronized (CachingOrcidRestConnector.class) {
this._accessToken = accessToken;
}
}
}
Loading

0 comments on commit 0a89245

Please sign in to comment.