Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IBX-6649: Added support for spell checking #52

Merged
merged 5 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/init_solr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,11 @@ solr_cloud_configure_collection() {
# modify solrconfig.xml to remove section that doesn't agree with our schema
sed -i.bak '/<updateRequestProcessorChain name="add-unknown-fields-to-the-schema".*/,/<\/updateRequestProcessorChain>/d' ${TEMPLATE_DIR}/solrconfig.xml
# Adapt autoSoftCommit to have a recommended value
sed -i.bak2 's/${solr.autoSoftCommit.maxTime:-1}/${solr.autoSoftCommit.maxTime:20}/' "${TEMPLATE_DIR}/solrconfig.xml" || exit_on_error "Can't modify file '${TEMPLATE_DIR}/solrconfig.xml'"
sed -i.bak 's/${solr.autoSoftCommit.maxTime:-1}/${solr.autoSoftCommit.maxTime:20}/' "${TEMPLATE_DIR}/solrconfig.xml" || exit_on_error "Can't modify file '${TEMPLATE_DIR}/solrconfig.xml'"
# Configure spellcheck component
sed -i.bar 's/<str name="field">_text_<\/str>/<str name="field">meta_content__text_t<\/str>/' "${TEMPLATE_DIR}/solrconfig.xml"
# Add spellcheck component to /select handler
sed -i.bak 's/<requestHandler name="\/select" class="solr.SearchHandler">/<requestHandler name="\/select" class="solr.SearchHandler">\n <arr name="last-components">\n <str>spellcheck<\/str>\n <\/arr>/' "${TEMPLATE_DIR}/solrconfig.xml"
}

solr_cloud_upload_collection_configuration() {
Expand Down
4 changes: 4 additions & 0 deletions bin/generate-solr-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ fi
# Adapt autoSoftCommit to have a recommended value, and remove add-unknown-fields-to-the-schema
sed -i.bak '/<updateRequestProcessorChain name="add-unknown-fields-to-the-schema".*/,/<\/updateRequestProcessorChain>/d' $DESTINATION_DIR/solrconfig.xml
sed -i.bak 's/${solr.autoSoftCommit.maxTime:-1}/${solr.autoSoftCommit.maxTime:20}/' $DESTINATION_DIR/solrconfig.xml
# Configure spellcheck component
sed -i.bar 's/<str name="field">_text_<\/str>/<str name="field">meta_content__text_t<\/str>/' $DESTINATION_DIR/solrconfig.xml
# Add spellcheck component to /select handler
sed -i.bak 's/<requestHandler name="\/select" class="solr.SearchHandler">/<requestHandler name="\/select" class="solr.SearchHandler">\n <arr name="last-components">\n <str>spellcheck<\/str>\n <\/arr>/' $DESTINATION_DIR/solrconfig.xml

rm $DESTINATION_DIR/solrconfig.xml.bak

Expand Down
12 changes: 8 additions & 4 deletions src/lib/Handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use Ibexa\Core\Base\Exceptions\NotFoundException;

/**
* The Content Search handler retrieves sets of of Content objects, based on a
* The Content Search handler retrieves sets of Content objects, based on a
* set of criteria.
*
* The basic idea of this class is to do the following:
Expand All @@ -33,7 +33,7 @@
* sensible queries from all criterion definitions.
*
* 3) The query might be possible to optimize (remove empty statements),
* reduce singular and and or constructs…
* reduce singular and or constructs…
*
* 4) Additionally we might need a post-query filtering step, which filters
* content objects based on criteria, which could not be converted in to
Expand Down Expand Up @@ -153,7 +153,8 @@ public function findContent(Query $query, array $languageFilter = [])
$this->gateway->findContent($query, $languageFilter),
$query->facetBuilders,
$query->aggregations,
$languageFilter
$languageFilter,
$query->spellcheck
);
}

Expand Down Expand Up @@ -224,7 +225,9 @@ public function findLocations(LocationQuery $query, array $languageFilter = [])
return $this->locationResultExtractor->extract(
$this->gateway->findLocations($query, $languageFilter),
$query->facetBuilders,
$query->aggregations
$query->aggregations,
$languageFilter,
$query->spellcheck
);
}

Expand Down Expand Up @@ -478,6 +481,7 @@ public function supports(int $capabilityFlag): bool
case SearchService::CAPABILITY_SCORING:
case SearchService::CAPABILITY_FACETS:
case SearchService::CAPABILITY_CUSTOM_FIELDS:
case SearchService::CAPABILITY_SPELLCHECK:
case SearchService::CAPABILITY_ADVANCED_FULLTEXT:
case SearchService::CAPABILITY_AGGREGATIONS:
return true;
Expand Down
7 changes: 7 additions & 0 deletions src/lib/Query/Common/QueryConverter/NativeQueryConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ public function convert(Query $query, array $languageSettings = [])
}
}

if ($query->spellcheck !== null) {
$params['spellcheck'] = 'true';
$params['spellcheck.q'] = $query->spellcheck->getQuery();
$params['spellcheck.count'] = 1;
$params['spellcheck.collate'] = 'true';
}

return $params;
}

Expand Down
28 changes: 26 additions & 2 deletions src/lib/ResultExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
*/
namespace Ibexa\Solr;

use Ibexa\Contracts\Core\Repository\Values\Content\Query\Spellcheck;
use Ibexa\Contracts\Core\Repository\Values\Content\Search\AggregationResultCollection;
use Ibexa\Contracts\Core\Repository\Values\Content\Search\SearchHit;
use Ibexa\Contracts\Core\Repository\Values\Content\Search\SearchResult;
use Ibexa\Contracts\Core\Repository\Values\Content\Search\SpellcheckResult;
use Ibexa\Contracts\Solr\ResultExtractor\AggregationResultExtractor;
use Ibexa\Solr\Gateway\EndpointRegistry;
use Ibexa\Solr\Query\FacetFieldVisitor;
Expand Down Expand Up @@ -49,8 +51,13 @@ public function __construct(
*
* @return \Ibexa\Contracts\Core\Repository\Values\Content\Search\SearchResult
*/
public function extract($data, array $facetBuilders = [], array $aggregations = [], array $languageFilter = [])
{
public function extract(
$data,
array $facetBuilders = [],
array $aggregations = [],
array $languageFilter = [],
?Spellcheck $spellcheck = null
) {
$result = new SearchResult(
[
'time' => $data->responseHeader->QTime / 1000,
Expand All @@ -61,6 +68,7 @@ public function extract($data, array $facetBuilders = [], array $aggregations =

$result->facets = $this->extractFacets($data, $facetBuilders, $languageFilter);
$result->aggregations = $this->extractAggregations($data, $aggregations, $languageFilter);
$result->spellcheck = $this->extractSpellcheck($data, $spellcheck);

foreach ($data->response->docs as $doc) {
$result->searchHits[] = $this->extractSearchHit($doc, $languageFilter);
Expand Down Expand Up @@ -186,6 +194,22 @@ protected function extractSearchHit(stdClass $doc, array $languageFilter): Searc
]
);
}

protected function extractSpellcheck(stdClass $data, ?Spellcheck $spellcheck): ?SpellcheckResult
{
if ($spellcheck === null) {
return null;
}

if (isset($data->spellcheck)) {
$incorrect = !empty($data->spellcheck->collations);
$query = $data->spellcheck->collations[1] ?? $spellcheck->getQuery();

return new SpellcheckResult($query, $incorrect);
}

return new SpellcheckResult($spellcheck->getQuery(), false);
}
}

class_alias(ResultExtractor::class, 'EzSystems\EzPlatformSolrSearchEngine\ResultExtractor');