Skip to content

Commit

Permalink
feat(bibframe-search): Implement bibframe search functionality
Browse files Browse the repository at this point in the history
Implement bibframe search functionality
  - Create new open-search index for bibframe
  - Consume events from kafka topic "search.bibframe"
  - Expose new API - "GET /search/bibframe"

Closes: MSEARCH-781
  • Loading branch information
aleksei-pronichev authored May 31, 2024
1 parent 088759a commit c01bc91
Show file tree
Hide file tree
Showing 42 changed files with 1,471 additions and 40 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Return Unified List of Inventory Locations in a Consortium ([MSEARCH-681](https://folio-org.atlassian.net/browse/MSEARCH-681))
* Remove ability to match on LCCN searches without a prefix ([MSEARCH-752](https://folio-org.atlassian.net/browse/MSEARCH-752))
* Search consolidated items/holdings data in consortium ([MSEARCH-759](https://folio-org.atlassian.net/browse/MSEARCH-759))
* Create bibframe index and process bibframe events ([MSEARCH-781](https://folio-org.atlassian.net/browse/MSEARCH-781))

### Bug fixes
* Do not delete kafka topics if collection topic is enabled ([MSEARCH-725](https://folio-org.atlassian.net/browse/MSEARCH-725))
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ and [Cross-cluster replication](https://docs.aws.amazon.com/opensearch-service/l
| KAFKA_CONTRIBUTORS_TOPIC_REPLICATION_FACTOR | - | Replication factor for `search.instance-contributor` topic. |
| KAFKA_CONSORTIUM_INSTANCE_CONCURRENCY | 2 | Custom number of kafka concurrent threads for consortium.instance message consuming. |
| KAFKA_LOCATION_CONCURRENCY | 1 | Custom number of kafka concurrent threads for inventory.location message consuming. |
| KAFKA_BIBFRAME_CONCURRENCY | 1 | Custom number of kafka concurrent threads for bibframe message consuming. |
| KAFKA_CONSORTIUM_INSTANCE_TOPIC_PARTITIONS | 50 | Amount of partitions for `search.consortium.instance` topic. |
| KAFKA_CONSORTIUM_INSTANCE_TOPIC_REPLICATION_FACTOR | - | Replication factor for `search.consortium.instance` topic. |
| KAFKA_SUBJECTS_CONCURRENCY | 2 | Custom number of kafka concurrent threads for subject message consuming. |
Expand Down Expand Up @@ -417,6 +418,7 @@ Consortium feature on module enable is defined by 'centralTenantId' tenant param
|:-------|:------------------------------|:-------------------------------------------------------------------------------------|
| GET | `/search/instances` | Search by instances and to this instance items and holding-records |
| GET | `/search/authorities` | Search by authority records |
| GET | `/search/bibframe` | Search linked data graph resource descriptions |
| GET | `/search/{recordType}/facets` | Get facets where recordType could be: instances, authorities, contributors, subjects |
| GET | ~~`/search/instances/ids`~~ | (DEPRECATED) Stream instance ids as JSON or plain text |
| GET | ~~`/search/holdings/ids`~~ | (DEPRECATED) Stream holding record ids as JSON or plain text |
Expand Down
14 changes: 14 additions & 0 deletions descriptors/ModuleDescriptor-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@
"user-tenants.collection.get"
]
},
{
"methods": [
"GET"
],
"pathPattern": "/search/bibframe",
"permissionsRequired": [
"search.bibframe.collection.get"
]
},
{
"methods": [
"GET"
Expand Down Expand Up @@ -573,6 +582,11 @@
"displayName": "Search - searches authorities by given query",
"description": "Searches authorities by given query"
},
{
"permissionName": "search.bibframe.collection.get",
"displayName": "Search - searches bibframe by given query",
"description": "Searches bibframe by given query"
},
{
"permissionName": "browse.call-numbers.instances.collection.get",
"displayName": "Browse - provides collections of browse items for instance by call number",
Expand Down
22 changes: 22 additions & 0 deletions src/main/java/org/folio/search/controller/SearchController.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import lombok.RequiredArgsConstructor;
import org.folio.search.domain.dto.Authority;
import org.folio.search.domain.dto.AuthoritySearchResult;
import org.folio.search.domain.dto.Bibframe;
import org.folio.search.domain.dto.BibframeSearchResult;
import org.folio.search.domain.dto.Instance;
import org.folio.search.domain.dto.InstanceSearchResult;
import org.folio.search.model.service.CqlSearchRequest;
Expand Down Expand Up @@ -47,4 +49,24 @@ public ResponseEntity<InstanceSearchResult> searchInstances(String tenantId, Str
.instances(result.getRecords())
.totalRecords(result.getTotalRecords()));
}

@Override
public ResponseEntity<BibframeSearchResult> searchBibframe(String tenant, String query, Integer limit,
Integer offset) {
var searchRequest = CqlSearchRequest.of(
Bibframe.class, tenant, query, limit, offset, true);
var result = searchService.search(searchRequest);
return ResponseEntity.ok(new BibframeSearchResult()
.searchQuery(query)
.content(result.getRecords())
.pageNumber(divPlusOneIfRemainder(offset, limit))
.totalPages(divPlusOneIfRemainder(result.getTotalRecords(), limit))
.totalRecords(result.getTotalRecords())
);
}

private int divPlusOneIfRemainder(int one, int two) {
var modulo = one % two;
return one / two + (modulo > 0 ? 1 : 0);
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
package org.folio.search.cql;

import lombok.RequiredArgsConstructor;
import org.folio.search.utils.SearchUtils;
import org.folio.search.service.lccn.LccnNormalizer;
import org.springframework.stereotype.Component;

@Component
@RequiredArgsConstructor
public class LccnSearchTermProcessor implements SearchTermProcessor {

private final LccnNormalizer lccnNormalizer;

@Override
public String getSearchTerm(String inputTerm) {
return SearchUtils.normalizeLccn(inputTerm);
return lccnNormalizer.apply(inputTerm)
.orElse(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import static org.folio.search.utils.SearchConverterUtils.getEventPayload;
import static org.folio.search.utils.SearchConverterUtils.getResourceEventId;
import static org.folio.search.utils.SearchConverterUtils.getResourceSource;
import static org.folio.search.utils.SearchUtils.BIBFRAME_RESOURCE;
import static org.folio.search.utils.SearchUtils.ID_FIELD;
import static org.folio.search.utils.SearchUtils.INSTANCE_ID_FIELD;
import static org.folio.search.utils.SearchUtils.INSTANCE_RESOURCE;
Expand Down Expand Up @@ -195,6 +196,22 @@ public void handleLocationEvents(List<ConsumerRecord<String, ResourceEvent>> con
indexResources(batch, resourceService::indexResources);
}

@KafkaListener(
id = KafkaConstants.BIBFRAME_LISTENER_ID,
containerFactory = "standardListenerContainerFactory",
groupId = "#{folioKafkaProperties.listener['bibframe'].groupId}",
concurrency = "#{folioKafkaProperties.listener['bibframe'].concurrency}",
topicPattern = "#{folioKafkaProperties.listener['bibframe'].topicPattern}")
public void handleBibframeEvents(List<ConsumerRecord<String, ResourceEvent>> consumerRecords) {
log.info("Processing bibframe events from Kafka [number of events: {}]", consumerRecords.size());
var batch = consumerRecords.stream()
.map(ConsumerRecord::value)
.map(bibframe -> bibframe.resourceName(BIBFRAME_RESOURCE).id(getResourceEventId(bibframe)))
.toList();

indexResources(batch, resourceService::indexResources);
}

private void indexResources(List<ResourceEvent> batch, Consumer<List<ResourceEvent>> indexConsumer) {
var batchByTenant = batch.stream().collect(Collectors.groupingBy(ResourceEvent::getTenant));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.folio.search.service.lccn;

import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Service;

@Service
@Primary
public class DefaultLccnNormalizer implements LccnNormalizer {

@Override
public Optional<String> apply(String lccn) {
if (StringUtils.isBlank(lccn)) {
return Optional.empty();
}

return Optional.of(StringUtils.deleteWhitespace(lccn))
.map(String::toLowerCase);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.folio.search.service.lccn;

import java.util.Optional;
import java.util.function.Function;

public interface LccnNormalizer extends Function<String, Optional<String>> {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package org.folio.search.service.lccn;

import jakarta.validation.constraints.NotNull;
import java.util.Optional;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

/**
* Class responsible for normalizing Structure B LCCN values.
*/
@Log4j2
@Service
public class LccnNormalizerStructureB implements LccnNormalizer {
private static final String NORMALIZED_LCCN_REGEX = "\\d{10}";
private static final char HYPHEN = '-';

/**
* Normalizes the given LCCN value and returns the normalized LCCN.
* If the given LCCN is invalid, an empty Optional is returned.
*
* @param lccn LCCN to be normalized
* @return Returns the normalized LCCN. If the given LCCN is invalid, returns an empty Optional
*/
public Optional<String> apply(@NotNull final String lccn) {
var normalizedLccn = lccn;

// Remove white spaces
normalizedLccn = normalizedLccn.replaceAll("\\s", StringUtils.EMPTY);

// If lccn contains "/", remove it & all characters to the right of "/"
normalizedLccn = normalizedLccn.replaceAll("/.*", StringUtils.EMPTY);

// Process the serial number component of LCCN
normalizedLccn = processSerialNumber(normalizedLccn);

if (normalizedLccn.matches(NORMALIZED_LCCN_REGEX)) {
return Optional.of(normalizedLccn);
}

log.warn("LCCN is not in expected format: [{}]", lccn);
return Optional.empty();
}

/**
* Serial number is demarcated by a hyphen (fifth character in the value). Further, the serial number must be six
* digits in length. If fewer than six digits, remove the hyphen and left fill with zeroes so that there are six
* digits in the serial number.
*/
private String processSerialNumber(String lccn) {
if (lccn.length() >= 5 && lccn.charAt(4) == HYPHEN) {
var lccnParts = lccn.split(String.valueOf(HYPHEN));
if (lccnParts.length == 2) {
String prefix = lccnParts[0];
StringBuilder serialNumber = new StringBuilder(lccnParts[1]);

// Left fill the serial number with zeroes to make it six digits
while (serialNumber.length() < 6) {
serialNumber.insert(0, "0");
}

return serialNumber.insert(0, prefix).toString();
}
}
return lccn;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,28 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.folio.search.domain.dto.Identifier;
import org.folio.search.integration.ReferenceDataService;
import org.folio.search.utils.SearchUtils;
import org.folio.search.service.lccn.LccnNormalizer;

public abstract class AbstractLccnProcessor<T> extends AbstractIdentifierProcessor<T> {

private static final List<String> LCCN_IDENTIFIER_NAME = List.of("LCCN", "Canceled LCCN");
private final LccnNormalizer lccnNormalizer;

protected AbstractLccnProcessor(ReferenceDataService referenceDataService) {
protected AbstractLccnProcessor(ReferenceDataService referenceDataService, LccnNormalizer lccnNormalizer) {
super(referenceDataService, LCCN_IDENTIFIER_NAME);
this.lccnNormalizer = lccnNormalizer;
}

@Override
public Set<String> getFieldValue(T entity) {
return filterIdentifiersValue(getIdentifiers(entity)).stream()
.map(SearchUtils::normalizeLccn)
.map(lccnNormalizer)
.flatMap(Optional::stream)
.filter(Objects::nonNull)
.collect(Collectors.toCollection(LinkedHashSet::new));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.folio.search.domain.dto.Authority;
import org.folio.search.domain.dto.Identifier;
import org.folio.search.integration.ReferenceDataService;
import org.folio.search.service.lccn.LccnNormalizer;
import org.folio.search.service.setter.AbstractLccnProcessor;
import org.springframework.stereotype.Component;

Expand All @@ -16,9 +17,10 @@ public class LccnAuthorityProcessor extends AbstractLccnProcessor<Authority> {
* Used by dependency injection.
*
* @param referenceDataService {@link ReferenceDataService} bean
* @param lccnNormalizer {@link LccnNormalizer} bean
*/
public LccnAuthorityProcessor(ReferenceDataService referenceDataService) {
super(referenceDataService);
public LccnAuthorityProcessor(ReferenceDataService referenceDataService, LccnNormalizer lccnNormalizer) {
super(referenceDataService, lccnNormalizer);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.folio.search.service.setter.bibframe;

import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.toCollection;

import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.folio.search.domain.dto.Bibframe;
import org.folio.search.domain.dto.BibframeContributorsInner;
import org.folio.search.domain.dto.BibframeInstancesInner;
import org.folio.search.service.setter.FieldProcessor;
import org.springframework.stereotype.Component;

@Component
public class BibframeContributorProcessor implements FieldProcessor<Bibframe, Set<String>> {

@Override
public Set<String> getFieldValue(Bibframe bibframe) {
var workContributors = ofNullable(bibframe.getContributors()).stream().flatMap(Collection::stream);
var instanceContributors = ofNullable(bibframe.getInstances()).stream().flatMap(Collection::stream)
.map(BibframeInstancesInner::getContributors).filter(Objects::nonNull).flatMap(Collection::stream);
return Stream.concat(workContributors, instanceContributors)
.filter(Objects::nonNull)
.map(BibframeContributorsInner::getName)
.filter(StringUtils::isNotBlank)
.map(String::trim)
.collect(toCollection(LinkedHashSet::new));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.folio.search.service.setter.bibframe;

import static java.util.Objects.nonNull;
import static java.util.Optional.ofNullable;
import static java.util.stream.Collectors.toCollection;
import static org.folio.search.domain.dto.BibframeInstancesInnerIdentifiersInner.TypeEnum.ISBN;

import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import lombok.RequiredArgsConstructor;
import org.folio.search.domain.dto.Bibframe;
import org.folio.search.domain.dto.BibframeInstancesInnerIdentifiersInner;
import org.folio.search.service.setter.FieldProcessor;
import org.folio.search.service.setter.instance.IsbnProcessor;
import org.springframework.stereotype.Component;

@Component
@RequiredArgsConstructor
public class BibframeIsbnProcessor implements FieldProcessor<Bibframe, Set<String>> {

private final IsbnProcessor isbnProcessor;

@Override
public Set<String> getFieldValue(Bibframe bibframe) {
return ofNullable(bibframe.getInstances()).stream()
.flatMap(Collection::stream)
.filter(i -> nonNull(i.getIdentifiers()))
.flatMap(i -> i.getIdentifiers().stream())
.filter(i -> ISBN.equals(i.getType()))
.map(BibframeInstancesInnerIdentifiersInner::getValue)
.filter(Objects::nonNull)
.map(isbnProcessor::normalizeIsbn)
.flatMap(Collection::stream)
.collect(toCollection(LinkedHashSet::new));
}
}
Loading

0 comments on commit c01bc91

Please sign in to comment.