diff --git a/composer.json b/composer.json index 487ab14..cdf424f 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,8 @@ "elasticsearch/elasticsearch": "^2.0", "ramsey/uuid": "^3.5", - "doctrine/dbal": "^2.5" + "doctrine/dbal": "^2.5", + "symfony/property-access": "^3.3" }, "require-dev": { "doctrine/doctrine-bundle": "^1.6", diff --git a/src/Component/Client/IndexInterface.php b/src/Component/Client/IndexInterface.php index d1d7742..cef2450 100644 --- a/src/Component/Client/IndexInterface.php +++ b/src/Component/Client/IndexInterface.php @@ -36,4 +36,6 @@ public function search(Search $search, $type); * @return array */ public function get($type, $id); + + public function optimize(); } diff --git a/src/Component/Elasticsearch/ElasticsearchIndex.php b/src/Component/Elasticsearch/ElasticsearchIndex.php index e33da99..d52fbc0 100644 --- a/src/Component/Elasticsearch/ElasticsearchIndex.php +++ b/src/Component/Elasticsearch/ElasticsearchIndex.php @@ -105,4 +105,12 @@ public function get($type, $id) return $response; } + + /** + * {@inheritdoc} + */ + public function optimize() + { + // nothing to optimize currently. + } } diff --git a/src/Component/Pucene/Compiler/Element/CompositeElement.php b/src/Component/Pucene/Compiler/Element/CompositeElement.php index c4236fe..0a5a6cb 100644 --- a/src/Component/Pucene/Compiler/Element/CompositeElement.php +++ b/src/Component/Pucene/Compiler/Element/CompositeElement.php @@ -6,8 +6,8 @@ class CompositeElement extends BaseElement { - const OR = 'or'; - const AND = 'and'; + const OPERATOR_OR = 'or'; + const OPERATOR_AND = 'and'; /** * @var string diff --git a/src/Component/Pucene/Compiler/Visitor/Compound/BoolVisitor.php b/src/Component/Pucene/Compiler/Visitor/Compound/BoolVisitor.php index e8f9412..6b9e5d5 100644 --- a/src/Component/Pucene/Compiler/Visitor/Compound/BoolVisitor.php +++ b/src/Component/Pucene/Compiler/Visitor/Compound/BoolVisitor.php @@ -45,11 +45,11 @@ public function visit(QueryInterface $query, StorageInterface $storage) } if (count($andElements) === 0) { - return new CompositeElement(CompositeElement:: OR, $shouldElements); + return new CompositeElement(CompositeElement::OPERATOR_OR, $shouldElements); } return new BoolElement( - new CompositeElement(CompositeElement:: AND, $andElements), + new CompositeElement(CompositeElement::OPERATOR_AND, $andElements), array_merge($mustElements, $shouldElements) ); } diff --git a/src/Component/Pucene/Compiler/Visitor/FullText/MatchVisitor.php b/src/Component/Pucene/Compiler/Visitor/FullText/MatchVisitor.php index 9052235..4945591 100644 --- a/src/Component/Pucene/Compiler/Visitor/FullText/MatchVisitor.php +++ b/src/Component/Pucene/Compiler/Visitor/FullText/MatchVisitor.php @@ -39,6 +39,6 @@ public function visit(QueryInterface $query, StorageInterface $storage) $terms[] = new TermElement($query->getField(), $token->getEncodedTerm()); } - return new CompositeElement(CompositeElement:: OR, $terms); + return new CompositeElement(CompositeElement:: OPERATOR_OR, $terms); } } diff --git a/src/Component/Pucene/Compiler/Visitor/Specialized/MoreLikeThisVisitor.php b/src/Component/Pucene/Compiler/Visitor/Specialized/MoreLikeThisVisitor.php index d5a9259..81dbde5 100644 --- a/src/Component/Pucene/Compiler/Visitor/Specialized/MoreLikeThisVisitor.php +++ b/src/Component/Pucene/Compiler/Visitor/Specialized/MoreLikeThisVisitor.php @@ -62,15 +62,15 @@ public function visit(QueryInterface $query, StorageInterface $storage) $mustNotElements = $this->getMustNotElements($query->getLike()); if (0 === count($mustNotElements)) { - return new CompositeElement(CompositeElement:: OR, $elements); + return new CompositeElement(CompositeElement:: OPERATOR_OR, $elements); } return new BoolElement( new CompositeElement( - CompositeElement:: AND, + CompositeElement:: OPERATOR_AND, [ - new CompositeElement(CompositeElement:: AND, $mustNotElements), - new CompositeElement(CompositeElement:: OR, $elements), + new CompositeElement(CompositeElement:: OPERATOR_AND, $mustNotElements), + new CompositeElement(CompositeElement:: OPERATOR_OR, $elements), ] ), $elements diff --git a/src/Component/Pucene/Compiler/Visitor/TermLevel/IdsVisitor.php b/src/Component/Pucene/Compiler/Visitor/TermLevel/IdsVisitor.php index cc9944f..9cbb9c1 100644 --- a/src/Component/Pucene/Compiler/Visitor/TermLevel/IdsVisitor.php +++ b/src/Component/Pucene/Compiler/Visitor/TermLevel/IdsVisitor.php @@ -24,6 +24,6 @@ public function visit(QueryInterface $query, StorageInterface $storage) return $ids; } - return new CompositeElement(CompositeElement:: AND, [$ids, new TypeElement($query->getType())]); + return new CompositeElement(CompositeElement:: OPERATOR_AND, [$ids, new TypeElement($query->getType())]); } } diff --git a/src/Component/Pucene/Dbal/DbalStorage.php b/src/Component/Pucene/Dbal/DbalStorage.php index 39cd051..ed6deb5 100644 --- a/src/Component/Pucene/Dbal/DbalStorage.php +++ b/src/Component/Pucene/Dbal/DbalStorage.php @@ -144,6 +144,11 @@ public function termStatistics() return new DbalTermStatistics($this->connection, $this->getSchema()); } + public function optimize() + { + $this->persister->optimize(); + } + public function getConnection() { return $this->connection; diff --git a/src/Component/Pucene/Dbal/DocumentPersister.php b/src/Component/Pucene/Dbal/DocumentPersister.php index a54e9b3..5bd07db 100644 --- a/src/Component/Pucene/Dbal/DocumentPersister.php +++ b/src/Component/Pucene/Dbal/DocumentPersister.php @@ -12,12 +12,12 @@ class DocumentPersister /** * @var Connection */ - private $connection; + public $connection; /** * @var PuceneSchema */ - private $schema; + public $schema; /** * @param Connection $connection @@ -56,21 +56,51 @@ public function persist(Document $document, array $fields) $token->getEncodedTerm(), ElasticsearchPrecision::fieldNorm($field->getNumberOfTerms()) ); + + $this->connection->createQueryBuilder() + ->update($this->schema->getDocumentTermsTableName()) + ->set('frequency', 'frequency + 1') + ->andWhere('field_name = :fieldName') + ->andWhere('term = :term') + ->setParameter('fieldName', $field->getName()) + ->setParameter('term', $token->getEncodedTerm()) + ->execute(); } // update term frequency foreach ($fieldTerms as $term => $frequency) { - $this->connection->update( - $this->schema->getDocumentTermsTableName(), - [ - 'term_frequency' => $frequency, - ], - ['document_id' => $document->getId(), 'field_name' => $field->getName(), 'term' => $term] - ); + $this->connection->createQueryBuilder() + ->update($this->schema->getDocumentTermsTableName()) + ->set('term_frequency', sqrt($frequency)) + ->set('score', 'field_norm * ' . sqrt($frequency)) + ->andWhere('document_ID = :document') + ->andWhere('field_name = :fieldName') + ->andWhere('term = :term') + ->setParameter('document', $document->getId()) + ->setParameter('fieldName', $field->getName()) + ->setParameter('term', $term) + ->execute(); } } } + public function optimize() + { + // TODO recalculate term frequency + + $docCount = $this->connection->createQueryBuilder() + ->select('COUNT(id)') + ->from($this->schema->getDocumentsTableName()) + ->execute() + ->fetchColumn(); + + // calculate inverse-document-frequency + $this->connection->createQueryBuilder() + ->update($this->schema->getDocumentTermsTableName()) + ->set('idf', '1 + log(' . $docCount . ' / (frequency + 1))') + ->execute(); + } + /** * @param Document $document */ @@ -101,6 +131,16 @@ protected function insertDocument(Document $document) */ protected function insertToken(string $documentId, string $fieldName, $term, $fieldNorm) { + $frequency = $this->connection->createQueryBuilder() + ->select('frequency') + ->from($this->schema->getDocumentTermsTableName()) + ->andWhere('field_name = :fieldName') + ->andWhere('term = :term') + ->setParameter('fieldName', $fieldName) + ->setParameter('term', $term) + ->execute() + ->fetchColumn(); + $this->connection->insert( $this->schema->getDocumentTermsTableName(), [ @@ -108,6 +148,7 @@ protected function insertToken(string $documentId, string $fieldName, $term, $fi 'field_name' => $fieldName, 'term' => $term, 'field_norm' => $fieldNorm, + 'frequency' => $frequency ?: 0, ] ); } diff --git a/src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php index 5afcb6f..b6ef451 100644 --- a/src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php @@ -8,10 +8,17 @@ use Pucene\Component\Pucene\Model\Document; use Pucene\Component\QueryBuilder\Search; use Pucene\Component\QueryBuilder\Sort\IdSort; +use Pucene\Component\QueryBuilder\Sort\ScoreSort; use Pucene\Component\Symfony\Pool\PoolInterface; +use Pucene\Component\Utils\SortUtils; class DbalInterpreter { + public static $sortPaths = [ + ScoreSort::class => 'score', + IdSort::class => 'id', + ]; + /** * @var PoolInterface */ @@ -42,8 +49,6 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El ->select('document.*') ->from($schema->getDocumentsTableName(), 'document') ->where('document.type IN (?)') - ->setMaxResults($search->getSize()) - ->setFirstResult($search->getFrom()) ->setParameter(0, implode(',', $types)); /** @var InterpreterInterface $interpreter */ @@ -54,21 +59,6 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El } $scoringAlgorithm = new ScoringAlgorithm($queryBuilder, $schema, $this->interpreterPool); - $expression = $interpreter->scoring($element, $scoringAlgorithm); - - if ($expression) { - $queryBuilder->addSelect('(' . $expression . ') as score')->orderBy('score', 'desc'); - } else { - $queryBuilder->addSelect('1 as score'); - } - - if (0 < count($search->getSorts())) { - foreach ($search->getSorts() as $sort) { - if ($sort instanceof IdSort) { - $queryBuilder->addOrderBy('id', $sort->getOrder()); - } - } - } $result = []; foreach ($queryBuilder->execute()->fetchAll() as $row) { @@ -77,10 +67,17 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El $row['type'], $storage->getName(), json_decode($row['document'], true), - array_key_exists('score', $row) ? (float) $row['score'] : 1 + $interpreter->newScoring($element, $scoringAlgorithm, $row) ); } - return $result; + $paths = []; + foreach ($search->getSorts() as $sort) { + $paths[] = self::$sortPaths[get_class($sort)]; + } + + $result = SortUtils::multisort($result, $paths); + + return array_splice($result, $search->getFrom(), $search->getSize()); } } diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php index dc9a5bf..0945e0a 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php @@ -35,6 +35,10 @@ public function __construct(PoolInterface $interpreterPool) */ public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBuilder) { + foreach ($element->getScoringElements() as $innerElement) { + $this->getInterpreter($innerElement)->interpret($innerElement, $queryBuilder); + } + return $this->getInterpreter($element->getElement())->interpret($element->getElement(), $queryBuilder); } @@ -74,6 +78,42 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q ); } + /** + * {@inheritdoc} + * + * @param BoolElement $element + */ + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + if (count($element->getScoringElements()) === 0 || $element->getBoost() === 0) { + return 0; + } elseif (count($element->getScoringElements()) === 1) { + $innerElement = $element->getScoringElements()[0]; + $interpreter = $this->interpreterPool->get(get_class($innerElement)); + + return $interpreter->newScoring($innerElement, $scoring, $row); + } + + if (!$queryNorm) { + $queryNorm = $scoring->queryNorm($this->getTerms($element->getScoringElements())); + } + + $score = 0; + $coord = 0; + foreach ($element->getScoringElements() as $innerElement) { + /** @var InterpreterInterface $interpreter */ + $interpreter = $this->interpreterPool->get(get_class($innerElement)); + + $score += $interpreter->newScoring($innerElement, $scoring, $row, $queryNorm); + + if ($interpreter->matches($innerElement, $row)) { + $coord += 1 / count($element->getScoringElements()); + } + } + + return $score * $coord * $element->getBoost(); + } + private function getTerms(array $elements) { $terms = []; diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/CompositeInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/CompositeInterpreter.php index 44245f0..34697f5 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/CompositeInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/CompositeInterpreter.php @@ -29,7 +29,7 @@ public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBu $expr = $queryBuilder->expr(); $expression = $expr->orX(); - if ($element->getOperator() === CompositeElement:: AND) { + if ($element->getOperator() === CompositeElement:: OPERATOR_AND) { $expression = $expr->andX(); } @@ -53,4 +53,35 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q { return parent::scoring(new BoolElement($element, $element->getElements()), $scoring, $queryNorm); } + + /** + * {@inheritdoc} + * + * @param CompositeElement $element + */ + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + return parent::newScoring(new BoolElement($element, $element->getElements()), $scoring, $row, $queryNorm); + } + + /** + * {@inheritdoc} + * + * @param CompositeElement $element + */ + public function matches(ElementInterface $element, array $row) + { + foreach ($element->getElements() as $innerElement) { + $interpreter = $this->interpreterPool->get(get_class($innerElement)); + if ($interpreter->matches($innerElement, $row)) { + if ($element->getOperator() === CompositeElement::OPERATOR_OR) { + return true; + } + } elseif ($element->getOperator() === CompositeElement::OPERATOR_AND) { + return false; + } + } + + return $element->getOperator() === CompositeElement::OPERATOR_AND; + } } diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php index a63db91..839885a 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php @@ -38,4 +38,14 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q { return (new MathExpressionBuilder())->value(1); } + + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + return 1; + } + + public function matches(ElementInterface $element, array $row) + { + return in_array($row['id'], $element->getIds()); + } } diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/MatchAllInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/MatchAllInterpreter.php index ba5b621..c340875 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/MatchAllInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/MatchAllInterpreter.php @@ -30,4 +30,9 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q { return (new MathExpressionBuilder())->value(1); } + + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + return 1; + } } diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/NotInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/NotInterpreter.php index 78edb52..70ed3d2 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/NotInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/NotInterpreter.php @@ -53,6 +53,13 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q ); } + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + $interpreter = $this->getInterpreter($element->getElement()); + + return $element->getBoost() * $interpreter->newScoring($element, $scoring, $row, $queryNorm); + } + /** * Returns interpreter for element. * diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/TermInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/TermInterpreter.php index 5ea3c3d..a18958c 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/TermInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/TermInterpreter.php @@ -18,10 +18,12 @@ class TermInterpreter implements InterpreterInterface public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBuilder) { $expr = $queryBuilder->expr(); + $name = $queryBuilder->joinTerm($element->getField(), $element->getTerm()); - return $expr->isNotNull( - $queryBuilder->joinTerm($element->getField(), $element->getTerm()) . '.id' - ); + $queryBuilder->addSelect(sprintf('(%1$s.score) as %1$sValue', $name)); + $queryBuilder->addSelect(sprintf('%1$s.idf as %1$sIdf', $name)); + + return $expr->isNotNull($name . '.id'); } /** @@ -33,4 +35,43 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q { return $scoring->scoreTerm($element, $queryNorm, $element->getBoost()); } + + /** + * {@inheritdoc} + * + * @param TermElement $element + */ + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + $termName = 'term' . ucfirst($element->getField()) . ucfirst($element->getTerm()); + $idfName = $termName . 'Idf'; + $valueName = $termName . 'Value'; + if (!array_key_exists($valueName, $row) + || !array_key_exists($idfName, $row) + || $row[$valueName] === null + || $row[$idfName] === null + ) { + return 0; + } + + $idf = $row[$idfName]; + $factor = $idf * $element->getBoost(); + if ($queryNorm) { + $factor *= $idf * $queryNorm; + } + + return $row[$valueName] * $factor; + } + + /** + * {@inheritdoc} + * + * @param TermElement $element + */ + public function matches(ElementInterface $element, array $row) + { + $termName = 'term' . ucfirst($element->getField()) . ucfirst($element->getTerm()) . 'Value'; + + return array_key_exists($termName, $row) && $row[$termName] !== null; + } } diff --git a/src/Component/Pucene/Dbal/Interpreter/Element/TypeInterpreter.php b/src/Component/Pucene/Dbal/Interpreter/Element/TypeInterpreter.php index d70301d..06fb524 100644 --- a/src/Component/Pucene/Dbal/Interpreter/Element/TypeInterpreter.php +++ b/src/Component/Pucene/Dbal/Interpreter/Element/TypeInterpreter.php @@ -30,4 +30,14 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q { return (new MathExpressionBuilder())->value(1); } + + public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null) + { + return 1; + } + + public function matches(ElementInterface $element, array $row) + { + return $row['id'] === $element->getType(); + } } diff --git a/src/Component/Pucene/Dbal/PuceneSchema.php b/src/Component/Pucene/Dbal/PuceneSchema.php index f4a21cb..d1fbd23 100644 --- a/src/Component/Pucene/Dbal/PuceneSchema.php +++ b/src/Component/Pucene/Dbal/PuceneSchema.php @@ -58,8 +58,11 @@ private function createDocumentTermsTable() $fields->addColumn('document_id', 'string', ['length' => 255]); $fields->addColumn('field_name', 'string', ['length' => 255]); $fields->addColumn('term', 'string', ['length' => 255]); - $fields->addColumn('term_frequency', 'integer', ['default' => 0]); + $fields->addColumn('term_frequency', 'float', ['default' => 0]); $fields->addColumn('field_norm', 'float', ['default' => 0]); + $fields->addColumn('score', 'float', ['default' => 0]); + $fields->addColumn('frequency', 'integer', ['default' => 0]); + $fields->addColumn('idf', 'float', ['default' => 0]); $fields->setPrimaryKey(['id']); $fields->addForeignKeyConstraint( diff --git a/src/Component/Pucene/Dbal/ScoringAlgorithm.php b/src/Component/Pucene/Dbal/ScoringAlgorithm.php index 77cb22f..8a5e3b2 100644 --- a/src/Component/Pucene/Dbal/ScoringAlgorithm.php +++ b/src/Component/Pucene/Dbal/ScoringAlgorithm.php @@ -38,6 +38,11 @@ class ScoringAlgorithm */ private $docCount; + /** + * @var int[] + */ + private $docCounts = []; + /** * @param PuceneQueryBuilder $queryBuilder * @param PuceneSchema $schema @@ -98,7 +103,7 @@ public function getConnection(): Connection return $this->queryBuilder->getConnection(); } - private function inverseDocumentFrequency(ElementInterface $element): float + public function inverseDocumentFrequency(ElementInterface $element): float { return $this->calculateInverseDocumentFrequency($this->getDocCountForElement($element)); } @@ -108,13 +113,18 @@ private function inverseDocumentFrequency(ElementInterface $element): float * * @return float */ - private function calculateInverseDocumentFrequency($docCount) + public function calculateInverseDocumentFrequency($docCount) { return 1 + log((float) $this->getDocCount() / ($docCount + 1)); } private function getDocCountForElement(ElementInterface $element) { + $key = $element->getField() . $element->getTerm(); + if (array_key_exists($key, $this->docCounts)) { + return $this->docCounts[$key]; + } + $queryBuilder = (new PuceneQueryBuilder($this->queryBuilder->getConnection(), $this->schema)) ->select('count(document.id) as count') ->from($this->schema->getDocumentsTableName(), 'document'); @@ -124,10 +134,10 @@ private function getDocCountForElement(ElementInterface $element) $queryBuilder->where($expression); } - return (int) $queryBuilder->execute()->fetchColumn(); + return $this->docCounts[$key] = (int)$queryBuilder->execute()->fetchColumn(); } - private function getDocCount() + public function getDocCount() { if ($this->docCount) { return $this->docCount; diff --git a/src/Component/Pucene/PuceneIndex.php b/src/Component/Pucene/PuceneIndex.php index bde92c8..c61715e 100644 --- a/src/Component/Pucene/PuceneIndex.php +++ b/src/Component/Pucene/PuceneIndex.php @@ -96,4 +96,9 @@ public function get($type, $id) { return $this->storage->get($type, $id); } + + public function optimize() + { + $this->storage->optimize(); + } } diff --git a/src/Component/Pucene/StorageInterface.php b/src/Component/Pucene/StorageInterface.php index bda636d..f890224 100644 --- a/src/Component/Pucene/StorageInterface.php +++ b/src/Component/Pucene/StorageInterface.php @@ -20,4 +20,6 @@ public function search(Search $search, $type); public function get($type, $id); public function termStatistics(); + + public function optimize(); } diff --git a/src/Component/QueryBuilder/Search.php b/src/Component/QueryBuilder/Search.php index d55e695..98ee04d 100644 --- a/src/Component/QueryBuilder/Search.php +++ b/src/Component/QueryBuilder/Search.php @@ -3,6 +3,7 @@ namespace Pucene\Component\QueryBuilder; use Pucene\Component\QueryBuilder\Query\QueryInterface; +use Pucene\Component\QueryBuilder\Sort\ScoreSort; use Pucene\Component\QueryBuilder\Sort\SortInterface; class Search @@ -39,6 +40,8 @@ class Search public function __construct(QueryInterface $query = null) { $this->query = $query; + + $this->sorts = [new ScoreSort()]; } /** diff --git a/src/Component/QueryBuilder/Sort/ScoreSort.php b/src/Component/QueryBuilder/Sort/ScoreSort.php new file mode 100644 index 0000000..a266665 --- /dev/null +++ b/src/Component/QueryBuilder/Sort/ScoreSort.php @@ -0,0 +1,27 @@ +order = $order; + } + + /** + * {@inheritdoc} + */ + public function getOrder(): string + { + return $this->order; + } +} diff --git a/src/Component/Utils/SortUtils.php b/src/Component/Utils/SortUtils.php new file mode 100644 index 0000000..44e9946 --- /dev/null +++ b/src/Component/Utils/SortUtils.php @@ -0,0 +1,102 @@ + 'b'), + * array('foobar' => 'a'), + * ); + * + * SortUtils::multisort($data, '[foobar]', 'asc'); + * + * echo $data[0]; // "a" + * + * You can also use method names: + * + * SortUtils::multisort($data, 'getFoobar', 'asc'); + * + * Or sort on multidimensional arrays: + * + * SortUtils::multisort($data, 'foobar.bar.getFoobar', 'asc'); + * + * And you can sort on multiple paths: + * + * SortUtils::multisort($data, array('foo', 'bar'), 'asc'); + * + * The path is any path accepted by the property access component: + * + * @see http://symfony.com/doc/current/components/property_access/introduction.html + * + * @param array $values + * @param string|array $paths Path or paths on which to sort on + * @param string $direction Direction to sort in (either ASC or DESC) + * + * @return array + */ + public static function multisort($values, $paths, $direction = 'ASC') + { + $accessor = PropertyAccess::createPropertyAccessor(); + + $values = (array) $values; + $paths = (array) $paths; + + usort( + $values, + function ($a, $b) use ($accessor, $paths) { + foreach ($paths as $i => $path) { + $aOrder = $accessor->getValue($a, $path); + $bOrder = $accessor->getValue($b, $path); + + if (is_string($aOrder)) { + $aOrder = strtolower($aOrder); + $bOrder = strtolower($bOrder); + } + + if ($aOrder == $bOrder) { + if (count($paths) == ($i + 1)) { + return 0; + } else { + continue; + } + } + + return ($aOrder < $bOrder) ? -1 : 1; + } + } + ); + + if (strtoupper($direction) == 'DESC') { + $values = array_reverse($values); + } + + return $values; + } +} diff --git a/tests/src/TestBundle/Command/ImportJsonCommand.php b/tests/src/TestBundle/Command/ImportJsonCommand.php index 4fee19f..2f566c7 100644 --- a/tests/src/TestBundle/Command/ImportJsonCommand.php +++ b/tests/src/TestBundle/Command/ImportJsonCommand.php @@ -2,6 +2,7 @@ namespace Pucene\Tests\TestBundle\Command; +use Pucene\Component\Client\ClientInterface; use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; use Symfony\Component\Console\Helper\ProgressBar; use Symfony\Component\Console\Input\InputArgument; @@ -30,6 +31,7 @@ protected function configure() */ protected function execute(InputInterface $input, OutputInterface $output) { + /** @var ClientInterface $client */ $client = $this->getContainer()->get('pucene.' . $input->getOption('adapter') . '.client'); $index = $client->get($input->getArgument('index')); @@ -44,6 +46,8 @@ protected function execute(InputInterface $input, OutputInterface $output) $progressBar->advance(); } + $index->optimize(); + $progressBar->finish(); } } diff --git a/tests/src/TestBundle/Command/ImportWikidataCommand.php b/tests/src/TestBundle/Command/ImportWikidataCommand.php index d8612f0..6d51301 100644 --- a/tests/src/TestBundle/Command/ImportWikidataCommand.php +++ b/tests/src/TestBundle/Command/ImportWikidataCommand.php @@ -58,6 +58,8 @@ protected function execute(InputInterface $input, OutputInterface $output) $progressBar->advance(); } + $index->optimize(); + $progressBar->finish(); } }