generated from GenomicDataInfrastructure/oss-project-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit integrates the Dataset Discovery Service with the Beacon Network. It adds the following features: - Call to `/2.0.0/filtering_terms` to retrieve Beacon facets; - Call to `/2.0.0/individuals` to retrieve datasets; - Call to `/LSAAI/token` to retrieve the LSAAI access token; Beacon Network does not provide DCAT-AP metadata. Instead, it provides a list of datasets and record counts hit by the beacon query. In that sense, `BeaconIndividualsSearchService` is the main class of this commit. It decorates existing `DatasetsSearchService`, adding the call to `/2.0.0/individuals`. It passes to `DatasetsSearchService` the list of dataset identifiers as CKAN facets. It also enhances search facets, to include beacon filtering terms, fetched from `/2.0.0/filtering_terms`, by `BeaconFilteringTermsService`. `facetGroupCount` is a new field in `DatasetsSearchResponse`. It is the count per facet group, either `ckan` or `beacon`. This is important to find any descrepancies between the number of datasets returned by CKAN and Beacon (e.g. if CKAN returns 0 but Beacon returns 0>, it is a sign that CKAN filters were incorrectly selected).
- Loading branch information
1 parent
0a655b2
commit 5aadf1f
Showing
25 changed files
with
110,317 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
168 changes: 168 additions & 0 deletions
168
...a/io/github/genomicdatainfrastructure/discovery/services/BeaconDatasetsSearchService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
// SPDX-FileCopyrightText: 2024 PNED G.I.E. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package io.github.genomicdatainfrastructure.discovery.services; | ||
|
||
import static java.util.Optional.ofNullable; | ||
import static java.util.stream.Collectors.toCollection; | ||
import static org.apache.commons.lang3.StringUtils.isNotBlank; | ||
import static org.apache.commons.lang3.ObjectUtils.isNotEmpty; | ||
import static java.util.stream.Collectors.toMap; | ||
import static io.github.genomicdatainfrastructure.discovery.services.PackagesSearchResponseMapper.CKAN_FACET_GROUP; | ||
import static io.github.genomicdatainfrastructure.discovery.services.BeaconFilteringTermsService.BEACON_FACET_GROUP; | ||
|
||
import org.apache.commons.lang3.ObjectUtils; | ||
import org.eclipse.microprofile.rest.client.inject.RestClient; | ||
import io.github.genomicdatainfrastructure.discovery.model.DatasetSearchQuery; | ||
import io.github.genomicdatainfrastructure.discovery.model.DatasetSearchQueryFacet; | ||
import io.github.genomicdatainfrastructure.discovery.model.DatasetsSearchResponse; | ||
import io.github.genomicdatainfrastructure.discovery.model.FacetGroup; | ||
import io.github.genomicdatainfrastructure.discovery.model.SearchedDataset; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.api.BeaconQueryApi; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsRequest; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsResponse; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconIndividualsResponseContent; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconResultSet; | ||
import io.github.genomicdatainfrastructure.discovery.remote.keycloak.api.KeycloakQueryApi; | ||
import jakarta.enterprise.context.ApplicationScoped; | ||
import jakarta.inject.Inject; | ||
import java.util.ArrayList; | ||
import java.util.Objects; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
|
||
@ApplicationScoped | ||
public class BeaconDatasetsSearchService implements DatasetsSearchService { | ||
|
||
private static final String BEACON_IDP_ALIAS = "LSAAI"; | ||
private static final String BEARER_PATTERN = "Bearer %s"; | ||
private static final String BEACON_DATASET_TYPE = "dataset"; | ||
private static final String CKAN_IDENTIFIER_FIELD = "identifier"; | ||
|
||
private final BeaconQueryApi beaconQueryApi; | ||
private final KeycloakQueryApi keycloakQueryApi; | ||
private final CkanDatasetsSearchService datasetsSearchService; | ||
private final BeaconFilteringTermsService beaconFilteringTermsService; | ||
|
||
@Inject | ||
public BeaconDatasetsSearchService( | ||
@RestClient BeaconQueryApi beaconQueryApi, | ||
@RestClient KeycloakQueryApi keycloakQueryApi, | ||
CkanDatasetsSearchService datasetsSearchService, | ||
BeaconFilteringTermsService beaconFilteringTermsService | ||
) { | ||
this.beaconQueryApi = beaconQueryApi; | ||
this.keycloakQueryApi = keycloakQueryApi; | ||
this.datasetsSearchService = datasetsSearchService; | ||
this.beaconFilteringTermsService = beaconFilteringTermsService; | ||
} | ||
|
||
@Override | ||
public DatasetsSearchResponse search(DatasetSearchQuery query, String accessToken) { | ||
var beaconQuery = BeaconIndividualsRequestMapper.from(query); | ||
|
||
if (accessToken == null || beaconQuery.getQuery().getFilters().isEmpty()) { | ||
return datasetsSearchService.search(query, accessToken); | ||
} | ||
|
||
var beaconAuthorization = retrieveBeaconAuthorization(accessToken); | ||
|
||
var beaconResponse = queryOnBeacon(beaconAuthorization, beaconQuery); | ||
|
||
var enhancedQuery = enhanceQueryFacets(query, beaconResponse); | ||
|
||
var datasetsReponse = datasetsSearchService.search(enhancedQuery, accessToken); | ||
|
||
return enhanceDatasetsResponse(beaconAuthorization, datasetsReponse, beaconResponse); | ||
} | ||
|
||
private String retrieveBeaconAuthorization(String accessToken) { | ||
var keycloakAuthorization = BEARER_PATTERN.formatted(accessToken); | ||
var response = keycloakQueryApi.retriveIdpTokens(BEACON_IDP_ALIAS, keycloakAuthorization); | ||
return BEARER_PATTERN.formatted(response.getAccessToken()); | ||
} | ||
|
||
private List<BeaconResultSet> queryOnBeacon( | ||
String beaconAuthorization, | ||
BeaconIndividualsRequest beaconQuery | ||
) { | ||
|
||
var response = beaconQueryApi.listIndividuals(beaconAuthorization, beaconQuery); | ||
|
||
var nonNullResultSets = ofNullable(response) | ||
.map(BeaconIndividualsResponse::getResponse) | ||
.map(BeaconIndividualsResponseContent::getResultSets) | ||
.filter(ObjectUtils::isNotEmpty) | ||
.orElseGet(List::of); | ||
|
||
return nonNullResultSets.stream() | ||
.filter(Objects::nonNull) | ||
.filter(it -> BEACON_DATASET_TYPE.equals(it.getSetType())) | ||
.filter(it -> isNotBlank(it.getId())) | ||
.filter(it -> it.getResultsCount() > 0) | ||
.toList(); | ||
} | ||
|
||
private DatasetSearchQuery enhanceQueryFacets( | ||
DatasetSearchQuery query, | ||
List<BeaconResultSet> resultSets | ||
) { | ||
var enhancedFacets = resultSets.stream() | ||
.map(BeaconResultSet::getId) | ||
.map(it -> DatasetSearchQueryFacet.builder() | ||
.facetGroup(CKAN_FACET_GROUP) | ||
.facet(CKAN_IDENTIFIER_FIELD) | ||
.value(it) | ||
.build()) | ||
.collect(toCollection(ArrayList::new)); | ||
|
||
if (query.getFacets() != null) { | ||
enhancedFacets.addAll(query.getFacets()); | ||
} | ||
|
||
return query.toBuilder() | ||
.facets(enhancedFacets) | ||
.build(); | ||
} | ||
|
||
private DatasetsSearchResponse enhanceDatasetsResponse( | ||
String beaconAuthorization, | ||
DatasetsSearchResponse datasetsReponse, | ||
List<BeaconResultSet> resultSets | ||
) { | ||
var facetGroupCount = new HashMap<String, Integer>(); | ||
facetGroupCount.put(BEACON_FACET_GROUP, resultSets.size()); | ||
if (isNotEmpty(datasetsReponse.getFacetGroupCount())) { | ||
facetGroupCount.putAll(datasetsReponse.getFacetGroupCount()); | ||
} | ||
|
||
var facetGroups = new ArrayList<FacetGroup>(); | ||
facetGroups.add(beaconFilteringTermsService.listFilteringTerms(beaconAuthorization)); | ||
if (isNotEmpty(datasetsReponse.getFacetGroups())) { | ||
facetGroups.addAll(datasetsReponse.getFacetGroups()); | ||
} | ||
|
||
var results = List.<SearchedDataset>of(); | ||
if (isNotEmpty(datasetsReponse.getResults())) { | ||
var recordCounts = resultSets.stream() | ||
.collect(toMap( | ||
BeaconResultSet::getId, | ||
BeaconResultSet::getResultsCount | ||
)); | ||
|
||
results = datasetsReponse.getResults() | ||
.stream() | ||
.map(it -> it.toBuilder() | ||
.recordsCount(recordCounts.get(it.getIdentifier())) | ||
.build()) | ||
.toList(); | ||
} | ||
|
||
return datasetsReponse.toBuilder() | ||
.facetGroupCount(facetGroupCount) | ||
.facetGroups(facetGroups) | ||
.results(results) | ||
.build(); | ||
} | ||
} |
122 changes: 122 additions & 0 deletions
122
...a/io/github/genomicdatainfrastructure/discovery/services/BeaconFilteringTermsService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
// SPDX-FileCopyrightText: 2024 PNED G.I.E. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package io.github.genomicdatainfrastructure.discovery.services; | ||
|
||
import static java.util.Optional.ofNullable; | ||
import static java.util.stream.Collectors.groupingBy; | ||
import static java.util.stream.Collectors.mapping; | ||
import static java.util.stream.Collectors.toMap; | ||
import static java.util.stream.Collectors.toList; | ||
import static org.apache.commons.lang3.StringUtils.isNotBlank; | ||
import static org.apache.commons.lang3.ObjectUtils.isNotEmpty; | ||
|
||
import org.eclipse.microprofile.rest.client.inject.RestClient; | ||
import io.github.genomicdatainfrastructure.discovery.model.Facet; | ||
import io.github.genomicdatainfrastructure.discovery.model.FacetGroup; | ||
import io.github.genomicdatainfrastructure.discovery.model.ValueLabel; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.api.BeaconQueryApi; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTerm; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTermsResponse; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconFilteringTermsResponseContent; | ||
import io.github.genomicdatainfrastructure.discovery.remote.beacon.model.BeaconResource; | ||
import io.quarkus.cache.CacheResult; | ||
import jakarta.enterprise.context.ApplicationScoped; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
@ApplicationScoped | ||
public class BeaconFilteringTermsService { | ||
|
||
public static final String BEACON_FACET_GROUP = "beacon"; | ||
|
||
private static final String DEFAULT_SCOPE = "individual"; | ||
|
||
private final BeaconQueryApi beaconQueryApi; | ||
|
||
public BeaconFilteringTermsService( | ||
@RestClient BeaconQueryApi beaconQueryApi | ||
) { | ||
this.beaconQueryApi = beaconQueryApi; | ||
} | ||
|
||
@CacheResult(cacheName = "beacon-facet-group-cache") | ||
public FacetGroup listFilteringTerms(String authorization) { | ||
var filteringTermsResponse = retreiveNonNullFilteringTermsResponse(authorization); | ||
|
||
var valuesGroupedByFacetName = groupValuesByFacetName(filteringTermsResponse); | ||
|
||
var facetIdsMappedByName = mapFacetIdsByFacetName(filteringTermsResponse); | ||
|
||
var facets = buildFacets(valuesGroupedByFacetName, facetIdsMappedByName); | ||
|
||
return FacetGroup.builder() | ||
.key(BEACON_FACET_GROUP) | ||
.label("Beacon") | ||
.facets(facets) | ||
.build(); | ||
} | ||
|
||
private BeaconFilteringTermsResponseContent retreiveNonNullFilteringTermsResponse( | ||
String authorization | ||
) { | ||
var filteringTerms = beaconQueryApi.listFilteringTerms(authorization); | ||
|
||
return ofNullable(filteringTerms) | ||
.map(BeaconFilteringTermsResponse::getResponse) | ||
.filter(it -> isNotEmpty(it.getFilteringTerms())) | ||
.filter(it -> isNotEmpty(it.getResources())) | ||
.orElseGet(BeaconFilteringTermsResponseContent::new); | ||
} | ||
|
||
private Map<String, List<ValueLabel>> groupValuesByFacetName( | ||
BeaconFilteringTermsResponseContent filteringTermsResponse | ||
) { | ||
return filteringTermsResponse.getFilteringTerms().stream() | ||
.filter(Objects::nonNull) | ||
.filter(it -> isNotBlank(it.getLabel())) | ||
.filter(it -> isNotBlank(it.getId())) | ||
.filter(it -> isNotBlank(it.getType())) | ||
.filter(it -> isNotEmpty(it.getScopes())) | ||
.filter(it -> it.getScopes().contains(DEFAULT_SCOPE)) | ||
.collect(groupingBy( | ||
BeaconFilteringTerm::getType, | ||
mapping(this::mapFilteringTermToValueLabel, toList()) | ||
)); | ||
} | ||
|
||
private ValueLabel mapFilteringTermToValueLabel(BeaconFilteringTerm term) { | ||
return ValueLabel.builder() | ||
.value(term.getId()) | ||
.label(term.getLabel()) | ||
.build(); | ||
} | ||
|
||
private Map<String, String> mapFacetIdsByFacetName( | ||
BeaconFilteringTermsResponseContent filteringTermsResponse | ||
) { | ||
return filteringTermsResponse.getResources().stream() | ||
.filter(it -> isNotBlank(it.getId())) | ||
.filter(it -> isNotBlank(it.getName())) | ||
.collect(toMap( | ||
BeaconResource::getName, | ||
BeaconResource::getId | ||
)); | ||
} | ||
|
||
private List<Facet> buildFacets( | ||
Map<String, List<ValueLabel>> termsGroupedByType, | ||
Map<String, String> facetIdsMappedByName | ||
) { | ||
return termsGroupedByType.entrySet().stream() | ||
.filter(entry -> facetIdsMappedByName.containsKey(entry.getKey())) | ||
.map(entry -> Facet.builder() | ||
.key(entry.getKey()) | ||
.label(facetIdsMappedByName.get(entry.getKey())) | ||
.values(entry.getValue()) | ||
.build()) | ||
.toList(); | ||
} | ||
} |
Oops, something went wrong.