Skip to content

Commit

Permalink
feat: #4 integrate with Beacon
Browse files Browse the repository at this point in the history
This commit integrates the Dataset Discovery
Service with the Beacon Network.
It adds the following features:
- Call to `/2.0.0/filtering_terms` to retrieve Beacon facets;
- Call to `/2.0.0/individuals` to retrieve datasets;
- Call to `/LSAAI/token` to retrieve the LSAAI access token;

Beacon Network does not provide DCAT-AP metadata.
Instead, it provides a list of datasets and record counts
hit by the beacon query.

In that sense, `BeaconIndividualsSearchService` is the main class
of this commit. It decorates existing `DatasetsSearchService`,
adding the call to `/2.0.0/individuals`. It passes to
`DatasetsSearchService` the list of dataset identifiers as CKAN facets.

It also enhances search facets, to include beacon filtering terms,
fetched from `/2.0.0/filtering_terms`, by `BeaconFilteringTermsService`.

`facetGroupCount` is a new field in `DatasetsSearchResponse`. It
is the count per facet group, either `ckan` or `beacon`. This is
important to find any descrepancies between the number of datasets
returned by CKAN and Beacon (e.g. if CKAN returns 0 but Beacon
returns 0>, it is a sign that CKAN filters were incorrectly selected).
  • Loading branch information
brunopacheco1 committed Apr 9, 2024
1 parent 0a655b2 commit 5aadf1f
Show file tree
Hide file tree
Showing 25 changed files with 110,317 additions and 57 deletions.
2 changes: 1 addition & 1 deletion _http/beacon.http
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Content-Type: application/json
"query":{
"filters":[
{
"id":"NCIT:C16352",
"id":"UBERON:0005352",
"scope": "individual"
}
],
Expand Down
20 changes: 19 additions & 1 deletion _http/discovery.http
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,29 @@ Content-Type: application/json
}, {
"facetGroup": "ckan",
"facet": "theme",
"value": "http://purl.org/zonmw/covid19/10003"
"value": "http://purl.org/zonmw/covid19/10006"
}, {
"facetGroup": "ckan",
"facet": "tags",
"value": "COVID-19"
}, {
"facetGroup": "beacon",
"facet": "cellosaurus",
"value": "NCIT:C16352"
}]
}

###

POST http://localhost:8080/api/v1/datasets/search
Content-Type: application/json

{
"query": "COVID",
"facets": [{
"facetGroup": "beacon",
"facet": "cellosaurus",
"value": "NCIT:C16352"
}]
}

Expand Down
19 changes: 14 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
<?xml version="1.0"?>
<project
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.github.genomicdatainfrastructure</groupId>
<artifactId>gdi-userportal-dataset-discovery-service</artifactId>
Expand All @@ -25,6 +24,7 @@
<surefire.version>3.0.0-M7</surefire.version>
<quarkus-wiremock.version>1.3.0</quarkus-wiremock.version>
<jacoco-maven-plugin.version>0.8.11</jacoco-maven-plugin.version>
<commons-lang3.version>3.14.0</commons-lang3.version>
</properties>
<dependencyManagement>
<dependencies>
Expand Down Expand Up @@ -77,10 +77,19 @@
<artifactId>quarkus-openapi-generator</artifactId>
<version>${quarkus-openapi-generator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang3.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-oidc</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-cache</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import io.github.genomicdatainfrastructure.discovery.model.DatasetSearchQuery;
import io.github.genomicdatainfrastructure.discovery.model.DatasetsSearchResponse;
import io.github.genomicdatainfrastructure.discovery.model.RetrievedDataset;
import io.github.genomicdatainfrastructure.discovery.services.DatasetsSearchService;
import io.github.genomicdatainfrastructure.discovery.services.BeaconDatasetsSearchService;
import io.github.genomicdatainfrastructure.discovery.services.RetrieveDatasetService;
import io.quarkus.oidc.runtime.OidcJwtCallerPrincipal;
import io.quarkus.security.identity.SecurityIdentity;
Expand All @@ -17,7 +17,7 @@
public class DatasetQueryApiImpl implements DatasetQueryApi {

private final SecurityIdentity identity;
private final DatasetsSearchService datasetsSearchService;
private final BeaconDatasetsSearchService datasetsSearchService;
private final RetrieveDatasetService retrievedDatasetService;

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// SPDX-FileCopyrightText: 2024 PNED G.I.E.
//
// SPDX-License-Identifier: Apache-2.0

package io.github.genomicdatainfrastructure.discovery.services;

import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.toCollection;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.ObjectUtils.isNotEmpty;
import static java.util.stream.Collectors.toMap;
import static io.github.genomicdatainfrastructure.discovery.services.PackagesSearchResponseMapper.CKAN_FACET_GROUP;
import static io.github.genomicdatainfrastructure.discovery.services.BeaconFilteringTermsService.BEACON_FACET_GROUP;

import org.apache.commons.lang3.ObjectUtils;
import org.eclipse.microprofile.rest.client.inject.RestClient;
import io.github.genomicdatainfrastructure.discovery.model.DatasetSearchQuery;
import io.github.genomicdatainfrastructure.discovery.model.DatasetSearchQueryFacet;
import io.github.genomicdatainfrastructure.discovery.model.DatasetsSearchResponse;
import io.github.genomicdatainfrastructure.discovery.model.FacetGroup;
import io.github.genomicdatainfrastructure.discovery.model.SearchedDataset;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.api.BeaconQueryApi;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsRequest;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsResponse;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsResponseContent;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconResultSet;
import io.github.genomicdatainfrastructure.discovery.remote.keycloak.api.KeycloakQueryApi;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import java.util.ArrayList;
import java.util.Objects;
import java.util.HashMap;
import java.util.List;

@ApplicationScoped
public class BeaconDatasetsSearchService implements DatasetsSearchService {

private static final String BEACON_IDP_ALIAS = "LSAAI";
private static final String BEARER_PATTERN = "Bearer %s";
private static final String BEACON_DATASET_TYPE = "dataset";
private static final String CKAN_IDENTIFIER_FIELD = "identifier";

private final BeaconQueryApi beaconQueryApi;
private final KeycloakQueryApi keycloakQueryApi;
private final CkanDatasetsSearchService datasetsSearchService;
private final BeaconFilteringTermsService beaconFilteringTermsService;

@Inject
public BeaconDatasetsSearchService(
@RestClient BeaconQueryApi beaconQueryApi,
@RestClient KeycloakQueryApi keycloakQueryApi,
CkanDatasetsSearchService datasetsSearchService,
BeaconFilteringTermsService beaconFilteringTermsService
) {
this.beaconQueryApi = beaconQueryApi;
this.keycloakQueryApi = keycloakQueryApi;
this.datasetsSearchService = datasetsSearchService;
this.beaconFilteringTermsService = beaconFilteringTermsService;
}

@Override
public DatasetsSearchResponse search(DatasetSearchQuery query, String accessToken) {
var beaconQuery = BeaconIndividualsRequestMapper.from(query);

if (accessToken == null || beaconQuery.getQuery().getFilters().isEmpty()) {
return datasetsSearchService.search(query, accessToken);
}

var beaconAuthorization = retrieveBeaconAuthorization(accessToken);

var beaconResponse = queryOnBeacon(beaconAuthorization, beaconQuery);

var enhancedQuery = enhanceQueryFacets(query, beaconResponse);

var datasetsReponse = datasetsSearchService.search(enhancedQuery, accessToken);

return enhanceDatasetsResponse(beaconAuthorization, datasetsReponse, beaconResponse);
}

private String retrieveBeaconAuthorization(String accessToken) {
var keycloakAuthorization = BEARER_PATTERN.formatted(accessToken);
var response = keycloakQueryApi.retriveIdpTokens(BEACON_IDP_ALIAS, keycloakAuthorization);
return BEARER_PATTERN.formatted(response.getAccessToken());
}

private List<BeaconResultSet> queryOnBeacon(
String beaconAuthorization,
BeaconIndividualsRequest beaconQuery
) {

var response = beaconQueryApi.listIndividuals(beaconAuthorization, beaconQuery);

var nonNullResultSets = ofNullable(response)
.map(BeaconIndividualsResponse::getResponse)
.map(BeaconIndividualsResponseContent::getResultSets)
.filter(ObjectUtils::isNotEmpty)
.orElseGet(List::of);

return nonNullResultSets.stream()
.filter(Objects::nonNull)
.filter(it -> BEACON_DATASET_TYPE.equals(it.getSetType()))
.filter(it -> isNotBlank(it.getId()))
.filter(it -> it.getResultsCount() > 0)
.toList();
}

private DatasetSearchQuery enhanceQueryFacets(
DatasetSearchQuery query,
List<BeaconResultSet> resultSets
) {
var enhancedFacets = resultSets.stream()
.map(BeaconResultSet::getId)
.map(it -> DatasetSearchQueryFacet.builder()
.facetGroup(CKAN_FACET_GROUP)
.facet(CKAN_IDENTIFIER_FIELD)
.value(it)
.build())
.collect(toCollection(ArrayList::new));

if (query.getFacets() != null) {
enhancedFacets.addAll(query.getFacets());
}

return query.toBuilder()
.facets(enhancedFacets)
.build();
}

private DatasetsSearchResponse enhanceDatasetsResponse(
String beaconAuthorization,
DatasetsSearchResponse datasetsReponse,
List<BeaconResultSet> resultSets
) {
var facetGroupCount = new HashMap<String, Integer>();
facetGroupCount.put(BEACON_FACET_GROUP, resultSets.size());
if (isNotEmpty(datasetsReponse.getFacetGroupCount())) {
facetGroupCount.putAll(datasetsReponse.getFacetGroupCount());
}

var facetGroups = new ArrayList<FacetGroup>();
facetGroups.add(beaconFilteringTermsService.listFilteringTerms(beaconAuthorization));
if (isNotEmpty(datasetsReponse.getFacetGroups())) {
facetGroups.addAll(datasetsReponse.getFacetGroups());
}

var results = List.<SearchedDataset>of();
if (isNotEmpty(datasetsReponse.getResults())) {
var recordCounts = resultSets.stream()
.collect(toMap(
BeaconResultSet::getId,
BeaconResultSet::getResultsCount
));

results = datasetsReponse.getResults()
.stream()
.map(it -> it.toBuilder()
.recordsCount(recordCounts.get(it.getIdentifier()))
.build())
.toList();
}

return datasetsReponse.toBuilder()
.facetGroupCount(facetGroupCount)
.facetGroups(facetGroups)
.results(results)
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// SPDX-FileCopyrightText: 2024 PNED G.I.E.
//
// SPDX-License-Identifier: Apache-2.0

package io.github.genomicdatainfrastructure.discovery.services;

import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toList;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.ObjectUtils.isNotEmpty;

import org.eclipse.microprofile.rest.client.inject.RestClient;
import io.github.genomicdatainfrastructure.discovery.model.Facet;
import io.github.genomicdatainfrastructure.discovery.model.FacetGroup;
import io.github.genomicdatainfrastructure.discovery.model.ValueLabel;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.api.BeaconQueryApi;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTerm;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTermsResponse;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTermsResponseContent;
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconResource;
import io.quarkus.cache.CacheResult;
import jakarta.enterprise.context.ApplicationScoped;
import java.util.List;
import java.util.Map;
import java.util.Objects;

@ApplicationScoped
public class BeaconFilteringTermsService {

public static final String BEACON_FACET_GROUP = "beacon";

private static final String DEFAULT_SCOPE = "individual";

private final BeaconQueryApi beaconQueryApi;

public BeaconFilteringTermsService(
@RestClient BeaconQueryApi beaconQueryApi
) {
this.beaconQueryApi = beaconQueryApi;
}

@CacheResult(cacheName = "beacon-facet-group-cache")
public FacetGroup listFilteringTerms(String authorization) {
var filteringTermsResponse = retreiveNonNullFilteringTermsResponse(authorization);

var valuesGroupedByFacetName = groupValuesByFacetName(filteringTermsResponse);

var facetIdsMappedByName = mapFacetIdsByFacetName(filteringTermsResponse);

var facets = buildFacets(valuesGroupedByFacetName, facetIdsMappedByName);

return FacetGroup.builder()
.key(BEACON_FACET_GROUP)
.label("Beacon")
.facets(facets)
.build();
}

private BeaconFilteringTermsResponseContent retreiveNonNullFilteringTermsResponse(
String authorization
) {
var filteringTerms = beaconQueryApi.listFilteringTerms(authorization);

return ofNullable(filteringTerms)
.map(BeaconFilteringTermsResponse::getResponse)
.filter(it -> isNotEmpty(it.getFilteringTerms()))
.filter(it -> isNotEmpty(it.getResources()))
.orElseGet(BeaconFilteringTermsResponseContent::new);
}

private Map<String, List<ValueLabel>> groupValuesByFacetName(
BeaconFilteringTermsResponseContent filteringTermsResponse
) {
return filteringTermsResponse.getFilteringTerms().stream()
.filter(Objects::nonNull)
.filter(it -> isNotBlank(it.getLabel()))
.filter(it -> isNotBlank(it.getId()))
.filter(it -> isNotBlank(it.getType()))
.filter(it -> isNotEmpty(it.getScopes()))
.filter(it -> it.getScopes().contains(DEFAULT_SCOPE))
.collect(groupingBy(
BeaconFilteringTerm::getType,
mapping(this::mapFilteringTermToValueLabel, toList())
));
}

private ValueLabel mapFilteringTermToValueLabel(BeaconFilteringTerm term) {
return ValueLabel.builder()
.value(term.getId())
.label(term.getLabel())
.build();
}

private Map<String, String> mapFacetIdsByFacetName(
BeaconFilteringTermsResponseContent filteringTermsResponse
) {
return filteringTermsResponse.getResources().stream()
.filter(it -> isNotBlank(it.getId()))
.filter(it -> isNotBlank(it.getName()))
.collect(toMap(
BeaconResource::getName,
BeaconResource::getId
));
}

private List<Facet> buildFacets(
Map<String, List<ValueLabel>> termsGroupedByType,
Map<String, String> facetIdsMappedByName
) {
return termsGroupedByType.entrySet().stream()
.filter(entry -> facetIdsMappedByName.containsKey(entry.getKey()))
.map(entry -> Facet.builder()
.key(entry.getKey())
.label(facetIdsMappedByName.get(entry.getKey()))
.values(entry.getValue())
.build())
.toList();
}
}
Loading

0 comments on commit 5aadf1f

Please sign in to comment.