From bee2cf823ea35822d787dafc2b00b0a77e2a6c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Katarina=20Mio=C4=8Di=C4=87?= Date: Mon, 15 Apr 2024 15:05:55 +0200 Subject: [PATCH] NGSTACK-834 page indexing implementation from fina + FieldMapper implementation for elasticsearch --- bundle/DependencyInjection/Configuration.php | 45 +++ .../NetgenIbexaSearchExtraExtension.php | 36 ++- bundle/NetgenIbexaSearchExtraBundle.php | 2 + composer.json | 12 +- lib/Command/IndexPageContentCommand.php | 170 +++++++++++ ...sticsearchContentFieldMapperMapperPass.php | 50 ++++ .../Compiler/LayoutsPageIndexingPass.php | 61 ++++ lib/Core/Search/Common/PageTextExtractor.php | 283 ++++++++++++++++++ .../DocumentMapper/BlockFieldMapper.php | 20 ++ .../BlockFieldMapper/Aggregate.php | 54 ++++ .../BlockTranslationFieldMapper.php | 20 ++ .../BlockTranslationFieldMapper/Aggregate.php | 53 ++++ .../BlockPageTextFieldMapper.php | 45 +++ .../DocumentMapper/ContentFieldMapper.php | 20 ++ .../ContentFieldMapper/Aggregate.php | 53 ++++ .../ContentVisibilityFieldMapper.php | 27 ++ .../ContentTranslationFieldMapper.php | 20 ++ .../Aggregate.php | 53 ++++ .../DocumentMapper/DocumentFactory.php | 89 ++++++ .../DocumentMapper/LocationFieldMapper.php | 21 ++ .../LocationFieldMapper/Aggregate.php | 56 ++++ .../LocationVisibilityFieldMapper.php | 36 +++ .../LocationTranslationFieldMapper.php | 20 ++ .../Aggregate.php | 55 ++++ .../Content/VisibilityVisitor.php | 31 ++ .../Location/VisibilityVisitor.php | 31 ++ .../ContentPageTextFieldMapper.php | 41 +++ .../IndexPageUnavailableException.php | 15 + lib/Resources/config/search/common.yaml | 2 + .../common/layouts_page_text_indexing.yaml | 30 ++ .../elastic_search/document_factory.yaml | 44 +++ 31 files changed, 1491 insertions(+), 4 deletions(-) create mode 100644 lib/Command/IndexPageContentCommand.php create mode 100644 lib/Container/Compiler/AggregateElasticsearchContentFieldMapperMapperPass.php create mode 100644 lib/Container/Compiler/LayoutsPageIndexingPass.php create mode 100644 lib/Core/Search/Common/PageTextExtractor.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/BlockFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/BlockFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper/BlockPageTextFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper/ContentVisibilityFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/ContentTranslationFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/ContentTranslationFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/DocumentFactory.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper/LocationVisibilityFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/LocationTranslationFieldMapper.php create mode 100644 lib/Core/Search/Elasticsearch/DocumentMapper/LocationTranslationFieldMapper/Aggregate.php create mode 100644 lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Content/VisibilityVisitor.php create mode 100644 lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Location/VisibilityVisitor.php create mode 100644 lib/Core/Search/Solr/FieldMapper/ContentTranslation/ContentPageTextFieldMapper.php create mode 100644 lib/Exception/IndexPageUnavailableException.php create mode 100644 lib/Resources/config/search/common/layouts_page_text_indexing.yaml create mode 100644 lib/Resources/config/search/elastic_search/document_factory.yaml diff --git a/bundle/DependencyInjection/Configuration.php b/bundle/DependencyInjection/Configuration.php index ce69b341..6b8f3599 100644 --- a/bundle/DependencyInjection/Configuration.php +++ b/bundle/DependencyInjection/Configuration.php @@ -4,6 +4,7 @@ namespace Netgen\Bundle\IbexaSearchExtraBundle\DependencyInjection; +use Ibexa\Contracts\Core\Repository\LanguageService; use Symfony\Component\Config\Definition\Builder\ArrayNodeDefinition; use Symfony\Component\Config\Definition\Builder\TreeBuilder; use Symfony\Component\Config\Definition\ConfigurationInterface; @@ -25,6 +26,8 @@ public function getConfigTreeBuilder(): TreeBuilder $this->addIndexableFieldTypeSection($rootNode); $this->addSearchResultExtractorSection($rootNode); $this->addAsynchronousIndexingSection($rootNode); + $this->addUsePageIndexingSection($rootNode); + $this->addPageIndexingSection($rootNode); return $treeBuilder; } @@ -73,4 +76,46 @@ private function addAsynchronousIndexingSection(ArrayNodeDefinition $nodeDefinit ->end() ->end(); } + + private function addUsePageIndexingSection(ArrayNodeDefinition $nodeDefinition): void + { + $nodeDefinition + ->children() + ->booleanNode('use_page_indexing') + ->info('Use layouts page text indexing') + ->defaultFalse() + ->end() + ->end(); + } + + private function addPageIndexingSection(ArrayNodeDefinition $nodeDefinition): void + { + $nodeDefinition + ->children() + ->arrayNode('page_indexing') + ->info('Page indexing configuration') + ->children() + ->arrayNode('site_roots') + ->scalarPrototype()->end() + ->info('Location ids of site roots') + ->end() + ->arrayNode('languages_siteaccess_map') + ->prototype('array') + ->prototype('scalar')->end() + ->end() + ->end() + ->scalarNode('host') + ->end() + ->arrayNode('config') + ->prototype('array') + ->prototype('scalar')->end() + ->end() + ->end() + ->arrayNode('allowed_content_types') + ->scalarPrototype()->end() + ->end() + ->end() + ->end(); + } + } diff --git a/bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php b/bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php index 729acae5..23ae1096 100644 --- a/bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php +++ b/bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php @@ -82,12 +82,12 @@ private function loadBundleSolrEngine(ContainerBuilder $container): void private function processExtensionConfiguration(array $configs, ContainerBuilder $container): void { $configuration = $this->getConfiguration($configs, $container); - $configuration = $this->processConfiguration($configuration, $configs); - $this->processIndexableFieldTypeConfiguration($configuration, $container); $this->processSearchResultExtractorConfiguration($configuration, $container); $this->processAsynchronousIndexingConfiguration($configuration, $container); + $this->processUsePageIndexingConfiguration($configuration, $container); + $this->processPageIndexingConfiguration($configuration, $container); } private function processSearchResultExtractorConfiguration(array $configuration, ContainerBuilder $container): void @@ -117,4 +117,36 @@ private function processAsynchronousIndexingConfiguration(array $configuration, $configuration['use_asynchronous_indexing'], ); } + + private function processUsePageIndexingConfiguration(array $configuration, ContainerBuilder $container): void + { + $container->setParameter( + 'netgen_ibexa_search_extra.use_page_indexing', + $configuration['use_page_indexing'], + ); + } + + private function processPageIndexingConfiguration(array $configuration, ContainerBuilder $container): void + { + $container->setParameter( + 'netgen_ibexa_search_extra.page_indexing.site_roots', + $configuration['page_indexing']['site_roots'] ?? [], + ); + $container->setParameter( + 'netgen_ibexa_search_extra.page_indexing.languages_siteaccess_map', + $configuration['page_indexing']['languages_siteaccess_map'] ?? [], + ); + $container->setParameter( + 'netgen_ibexa_search_extra.page_indexing.host', + $configuration['page_indexing']['host'] ?? [], + ); + $container->setParameter( + 'netgen_ibexa_search_extra.page_indexing.config', + $configuration['page_indexing']['config'] ?? [], + ); + $container->setParameter( + 'netgen_ibexa_search_extra.page_indexing.allowed_content_types', + $configuration['page_indexing']['allowed_content_types'] ?? [], + ); + } } diff --git a/bundle/NetgenIbexaSearchExtraBundle.php b/bundle/NetgenIbexaSearchExtraBundle.php index eb3dbec3..7a57e113 100644 --- a/bundle/NetgenIbexaSearchExtraBundle.php +++ b/bundle/NetgenIbexaSearchExtraBundle.php @@ -24,5 +24,7 @@ public function build(ContainerBuilder $container): void $container->addCompilerPass(new Compiler\FieldType\RichTextIndexablePass()); $container->addCompilerPass(new Compiler\SearchResultExtractorPass()); $container->addCompilerPass(new Compiler\RawFacetBuilderDomainVisitorPass()); + $container->addCompilerPass(new Compiler\LayoutsPageIndexingPass()); + $container->addCompilerPass(new Compiler\AggregateElasticsearchContentFieldMapperMapperPass()); } } diff --git a/composer.json b/composer.json index c629c7e4..c2a07a34 100644 --- a/composer.json +++ b/composer.json @@ -15,7 +15,9 @@ "ext-dom": "*", "ibexa/core": "^4.6", "symfony/messenger": "^5.4", - "symfony/proxy-manager-bridge": "^5.4" + "symfony/proxy-manager-bridge": "^5.4", + "ext-libxml": "*", + "ext-curl": "*" }, "require-dev": { "ibexa/fieldtype-richtext": "^4.5", @@ -30,7 +32,8 @@ }, "suggest": { "netgen/ibexa-site-api": "Boost your site-building productivity with Ibexa CMS", - "ibexa/solr": "Supports advanced capabilities with Ibexa search API" + "ibexa/solr": "Supports advanced capabilities with Ibexa search API", + "ibexa/elasticsearch": "Needed for layouts indexer" }, "autoload": { "psr-4": { @@ -51,5 +54,10 @@ }, "scripts": { "test": "@php vendor/bin/phpunit --colors=always" + }, + "config": { + "allow-plugins": { + "php-http/discovery": false + } } } diff --git a/lib/Command/IndexPageContentCommand.php b/lib/Command/IndexPageContentCommand.php new file mode 100644 index 00000000..20aea676 --- /dev/null +++ b/lib/Command/IndexPageContentCommand.php @@ -0,0 +1,170 @@ + $allowedContentTypes + */ + public function __construct( + private readonly ContentService $contentService, + private readonly SearchHandler $searchHandler, + private readonly PersistenceHandler $persistenceHandler, + private readonly array $allowedContentTypes, + ) { + parent::__construct($this::$defaultName); + } + + protected function configure(): void + { + $this + ->setDescription('Index content related through layouts') + ->addOption( + 'content-ids', + null, + InputOption::VALUE_REQUIRED, + 'Comma separated list of content id\'s of content to index.', + ); + } + + /** + * @throws NotFoundException + * @throws InvalidArgumentException + * @throws UnauthorizedException + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $contentIds = $input->getOption('content-ids'); + if ($contentIds !== null) { + $contentIds = explode(',', $contentIds); + + $totalCount = count($contentIds); + $output->writeln("Number of objects to index: {$totalCount}"); + + $progressBar = new ProgressBar($output, $totalCount); + $progressBar->start(); + foreach ($contentIds as $contentId) { + $content = $this->contentService->loadContent((int) $contentId); + $this->indexContentWithLocations($content); + $progressBar->advance(); + } + } else { + $query = new Query(); + $offset = 0; + $limit = 50; + $query->query = new Criterion\ContentTypeIdentifier($this->allowedContentTypes); + $totalCount = $this->getTotalCount($query); + $progressBar = new ProgressBar($output, $totalCount); + + if ($totalCount <= 0) { + $output->writeln('No content found to index, exiting.'); + + return Command::SUCCESS; + } + + $output->writeln('Found ' . $totalCount . ' content objects...'); + $output->writeln(''); + + $progressBar->start($totalCount); + + while ($offset < $totalCount) { + $chunk = $this->getChunk($query, $limit, $offset); + + $this->processChunk($chunk, $output, $progressBar); + + $offset += $limit; + } + + $progressBar->finish(); + + $output->writeln(''); + $output->writeln(''); + $output->writeln('Finished.'); + } + + return Command::SUCCESS; + } + + /** + * @throws InvalidArgumentException + */ + private function getTotalCount(Query $query): int + { + $filter = new Filter(); + $filter + ->withCriterion( + new Query\Criterion\ContentTypeIdentifier($this->allowedContentTypes) + ) + ->withLimit(0) + ->withOffset(0) + ; + + return $this->contentService->find($filter)->getTotalCount() ?? 0; + } + + /** + * @throws InvalidArgumentException + */ + private function getChunk(Query $query, int $limit, int $offset): ContentList + { + $filter = new Filter(); + $filter + ->withLimit($limit) + ->withOffset($offset) + ; + return $this->contentService->find($filter); + } + + private function processChunk(ContentList $contentList, OutputInterface $output, ProgressBar $progressBar): void + { + foreach ($contentList->getIterator() as $content) { + try { + //$this->indexContentWithLocations($content); + $progressBar->advance(); + } catch (IndexPageUnavailableException $exception) { + $output->writeln($exception->getMessage()); + } + } + } + + private function indexContentWithLocations(Content $content): void + { + $this->searchHandler->indexContent( + $this->persistenceHandler->contentHandler()->load($content->id, $content->versionInfo->versionNo), + ); + + $locations = $this->persistenceHandler->locationHandler()->loadLocationsByContent($content->id); + foreach ($locations as $location) { + $this->searchHandler->indexLocation($location); + } + } +} diff --git a/lib/Container/Compiler/AggregateElasticsearchContentFieldMapperMapperPass.php b/lib/Container/Compiler/AggregateElasticsearchContentFieldMapperMapperPass.php new file mode 100644 index 00000000..d7490313 --- /dev/null +++ b/lib/Container/Compiler/AggregateElasticsearchContentFieldMapperMapperPass.php @@ -0,0 +1,50 @@ +processVisitors($container, 'block_translation'); + $this->processVisitors($container, 'block'); + $this->processVisitors($container, 'content'); + $this->processVisitors($container, 'content_translation'); + $this->processVisitors($container, 'location'); + $this->processVisitors($container, 'location_translation'); + } + + private function processVisitors(ContainerBuilder $container, string $name): void + { + if (!$container->hasDefinition(sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s.aggregate', $name))) { + return; + } + + $aggregateDefinition = $container->getDefinition( + sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s.aggregate', $name), + ); + + $this->registerMappers($aggregateDefinition, $container->findTaggedServiceIds(sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s', $name))); + } + + private function registerMappers(Definition $definition, array $mapperIds): void + { + foreach (array_keys($mapperIds) as $id) { + $definition->addMethodCall('addMapper', [new Reference($id)]); + } + } +} diff --git a/lib/Container/Compiler/LayoutsPageIndexingPass.php b/lib/Container/Compiler/LayoutsPageIndexingPass.php new file mode 100644 index 00000000..9e53835b --- /dev/null +++ b/lib/Container/Compiler/LayoutsPageIndexingPass.php @@ -0,0 +1,61 @@ +getParameter( + 'netgen_ibexa_search_extra.use_page_indexing', + ); + + + if ($usePageIndexing !== true) { + return; + } + + $container + ->register(DocumentFactory::class, DocumentFactory::class) + ->setDecoratedService(DocumentFactoryInterface::class) + ->setArguments([ + new Reference('.inner'), + new Reference(Handler::class), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.content.aggregate'), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.location.aggregate'), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.content_translation.aggregate'), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.location_translation.aggregate'), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.block.aggregate'), + new Reference('netgen.ibexa_search_extra.elasticsearch.field_mapper.block_translation.aggregate'), + ]); + + $container + ->register(ContentVisibilityVisitor::class, ContentVisibilityVisitor::class) + ->addTag('ibexa.search.elasticsearch.query.content.criterion.visitor'); + + $container + ->register(LocationVisibilityVisitor::class, LocationVisibilityVisitor::class) + ->addTag('ibexa.search.elasticsearch.query.location.criterion.visitor'); + + $container + ->register(ContentPageTextFieldMapper::class, ContentPageTextFieldMapper::class) + ->setArguments([ + new Reference('netgen.ibexa_search_extra.page_indexing.page_text_extractor'), + '%netgen.ibexa_search_extra.page_indexing.allowed_content_types%', + ]) + ->addTag('ibexa.search.solr.field.mapper.content.translation'); + } +} \ No newline at end of file diff --git a/lib/Core/Search/Common/PageTextExtractor.php b/lib/Core/Search/Common/PageTextExtractor.php new file mode 100644 index 00000000..827e3755 --- /dev/null +++ b/lib/Core/Search/Common/PageTextExtractor.php @@ -0,0 +1,283 @@ +|string>>> */ + private array $cache = []; + + private LoggerInterface $logger; + + /** + * @param array $siteRoots + * @param array> $languageAccessibility + * @param array> $pageTextConfig + */ + public function __construct( + private readonly ContentHandler $contentHandler, + private readonly LocationHandler $locationHandler, + private readonly RouterInterface $router, + private readonly array $siteRoots, + private readonly array $languageAccessibility, + private readonly string $pageIndexingHost, + private readonly array $pageTextConfig, + ) { + $this->logger = new NullLogger(); + } + + public function setLogger(LoggerInterface $logger): void + { + $this->logger = $logger; + } + + /** + * @param int $contentId + * @param string $languageCode + * + * @return array|string> + */ + public function extractPageText(int $contentId, string $languageCode): array + { + if (isset($this->cache[$contentId][$languageCode])) { + return $this->cache[$contentId][$languageCode]; + } + + if (count($this->cache) > 10) { + $this->cache = []; + } + + try { + $html = $this->fetchPageSource($contentId, $languageCode); + } catch (IndexPageUnavailableException|RuntimeException $e) { + $this->logger->error($e->getMessage()); + + return []; + } + + $textArray = $this->extractTextArray($html); + + $this->cache[$contentId][$languageCode] = $textArray; + + return $textArray; + } + + /** + * @param string $languageCode + * @param int $contentId + * + * @throws \Ibexa\Contracts\Core\Repository\Exceptions\NotFoundException + * @throws \Ibexa\Contracts\Core\Repository\Exceptions\UnauthorizedException + * + * @return string + */ + private function generateUrl(string $languageCode, int $contentId): string + { + $contentInfo = $this->contentHandler->loadContentInfo($contentId); + $siteAccess = $this->resolveSiteAccess($contentInfo, $languageCode); + + $relativePath = $this->router->generate( + 'ibexa.url.alias', + [ + 'locationId' => (int) $contentInfo->mainLocationId, + 'siteaccess' => $siteAccess, + ], + UrlGeneratorInterface::RELATIVE_PATH, + ); + + return $this->pageIndexingHost . $relativePath; + } + + private function resolveSiteAccess(ContentInfo $contentInfo, string $languageCode): string + { + try { + $location = $this->locationHandler->load($contentInfo->mainLocationId); + } catch (NotFoundException) { + throw new RuntimeException( + sprintf( + 'Content #%d does not have a location', + $contentInfo->id, + ), + ); + } + + $pathArray = explode('/', $location->pathString); + + foreach ($this->siteRoots as $site => $siteRoot) { + if (!in_array((string) $siteRoot, $pathArray, true)) { + continue; + } + + if (!isset($this->languageAccessibility[$site][$languageCode])) { + throw new RuntimeException("Language not supported for matched siteaccess group '{$site}'"); + } + + return $this->languageAccessibility[$site][$languageCode]; + } + + throw new RuntimeException("Failed to match content ID '{$contentInfo->id}' to a siteaccess"); + } + + /** + * @param \DOMNode $node + * @param array> $textArray + * + * @return array> + */ + private function recursiveExtractTextArray(DOMNode $node, array &$textArray): array + { + if ($node->nodeType === XML_ELEMENT_NODE || $node->nodeType === XML_HTML_DOCUMENT_NODE) { + $fieldLevel = $this->getFieldName($node); + + if ($fieldLevel !== null) { + $textArray[$fieldLevel][] = $node->textContent; + } else { + foreach ($node->childNodes as $childNode) { + $this->recursiveExtractTextArray($childNode, $textArray); + } + } + } elseif ($node->nodeType === XML_TEXT_NODE) { + $textContent = trim($node->textContent); + if ($textContent !== '') { + $textArray['other'][] = $textContent; + } + } + + return $textArray; + } + + private function getFieldName(DOMNode $node): null|string + { + foreach ($this->pageTextConfig as $level => $tags) { + foreach ($tags as $tag) { + $tagParts = explode('.', $tag); // Split tag and class if present + $tagName = $tagParts[0]; // Get the tag name + $class = $tagParts[1] ?? null; // Get the class if exists + + if ($node->nodeName !== $tagName) { + continue; + } + + if ($class !== null && !$this->hasClass($node, $class)) { + continue; + } + + return $level; + } + } + + return null; + } + + private function hasClass(DOMNode $node, string $className): bool + { + /** @var \DOMElement $node */ + $classes = explode(' ', $node->getAttribute('class')); + + return in_array($className, $classes, true); + } + + /** + * @throws NotFoundException + * @throws UnauthorizedException + * @throws \RuntimeException + */ + private function fetchPageSource(int $contentId, string $languageCode): string + { + $url = $this->generateUrl($languageCode, $contentId); + $curlHandle = curl_init($url); + + if ($curlHandle === false) { + throw new RuntimeException('There was an error initializing a cURL session'); + } + + curl_setopt($curlHandle, CURLOPT_RETURNTRANSFER, 1); + + $html = curl_exec($curlHandle); + if (!is_string($html)) { + throw new RuntimeException('curl_exec could not fetch url'); + } + + $httpCode = curl_getinfo($curlHandle, CURLINFO_HTTP_CODE); + + if ($httpCode !== 200) { + throw new IndexPageUnavailableException( + sprintf( + 'Could not fetch URL "%s": %s', + $url, + curl_error($curlHandle), + ), + ); + } + + curl_close($curlHandle); + + return $html; + } + + /** + * @param string $html + * + * @return array> + */ + private function extractTextArray(string $html): array + { + $startTag = ''; + $endTag = ''; + + $startPos = mb_strpos($html, $startTag); + $endPos = mb_strpos($html, $endTag); + + $textArray = []; + + if ($startPos !== false && $endPos !== false) { + $startPos += mb_strlen($startTag); + $extractedContent = mb_substr($html, $startPos, $endPos - $startPos); + + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML($extractedContent); + libxml_use_internal_errors(false); + $textArray = $this->recursiveExtractTextArray($doc, $textArray); + } + + return $textArray; + } +} diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/BlockFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockFieldMapper.php new file mode 100644 index 00000000..6d9fa85f --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockFieldMapper.php @@ -0,0 +1,20 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(BlockFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPIContent $content): bool + { + return true; + } + + public function mapFields(SPIContent $content): array + { + + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($content)) { + $fields = [...$fields, ...$mapper->mapFields($content)]; + } + } + + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper.php new file mode 100644 index 00000000..bdcccb10 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper.php @@ -0,0 +1,20 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(BlockTranslationFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPIContent $content, string $languageCode): bool + { + return true; + } + + public function mapFields(SPIContent $content, string $languageCode): array + { + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($content, $languageCode)) { + $fields = [...$fields, ...$mapper->mapFields($content, $languageCode)]; + } + } + + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper/BlockPageTextFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper/BlockPageTextFieldMapper.php new file mode 100644 index 00000000..07862137 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/BlockTranslationFieldMapper/BlockPageTextFieldMapper.php @@ -0,0 +1,45 @@ + $allowedContentTypes + */ + public function __construct( + private readonly PageTextExtractor $pageTextExtractor, + private readonly ContentTypeHandler $contentTypeHandler, + private readonly array $allowedContentTypes, + ) {} + + public function accept(SPIContent $content, string $languageCode): bool + { + return true; + } + + /** + * @throws NotFoundException + */ + public function mapFields(SPIContent $content, string $languageCode): array + { + $fields = []; + $contentType = $this->contentTypeHandler->load($content->versionInfo->contentInfo->contentTypeId); + + if (in_array($contentType->identifier, $this->allowedContentTypes, true)) { + $text = $this->pageTextExtractor->extractPageText($content->versionInfo->contentInfo->id, $languageCode); + foreach ($text as $level => $value) { + $fields[] = new Field('page_text_' . $level, $value, new FullTextField()); + } + } + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper.php new file mode 100644 index 00000000..5a4889d0 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper.php @@ -0,0 +1,20 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(ContentFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPIContent $content): bool + { + return true; + } + + public function mapFields(SPIContent $content): array + { + + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($content)) { + $fields = [...$fields, ...$mapper->mapFields($content)]; + } + } + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper/ContentVisibilityFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper/ContentVisibilityFieldMapper.php new file mode 100644 index 00000000..de03ab2a --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentFieldMapper/ContentVisibilityFieldMapper.php @@ -0,0 +1,27 @@ +versionInfo->contentInfo->isHidden, + new BooleanField(), + )]; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/ContentTranslationFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentTranslationFieldMapper.php new file mode 100644 index 00000000..600785a1 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/ContentTranslationFieldMapper.php @@ -0,0 +1,20 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(ContentTranslationFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPIContent $content, string $languageCode): bool + { + return true; + } + + public function mapFields(SPIContent $content, string $languageCode): array + { + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($content, $languageCode)) { + $fields = [...$fields, ...$mapper->mapFields($content, $languageCode)]; + } + } + + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/DocumentFactory.php b/lib/Core/Search/Elasticsearch/DocumentMapper/DocumentFactory.php new file mode 100644 index 00000000..65bc94a7 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/DocumentFactory.php @@ -0,0 +1,89 @@ + $allowedContentTypes + */ + public function __construct( + private readonly DocumentFactoryInterface $innerDocumentFactory, + private readonly ContentHandler $contentHandler, + private readonly ContentFieldMapper $contentFieldMapper, + private readonly LocationFieldMapper $locationFieldMapper, + private readonly ContentTranslationFieldMapper $contentTranslationFieldMapper, + private readonly LocationTranslationFieldMapper $locationTranslationFieldMapper, + private readonly BlockFieldMapper $blockFieldMapper, + private readonly BlockTranslationFieldMapper $blockTranslationMapper + ) {} + + public function fromContent(Content $content): Iterator + { + $result = $this->innerDocumentFactory->fromContent($content); + + /** @var \Ibexa\Contracts\Elasticsearch\Mapping\ContentDocument[] $documents */ + $documents = iterator_to_array($result); + + foreach ($documents as $document) { + $contentFields = $this->contentFieldMapper->mapFields($content); + $contentTranslationDependentFields = $this->contentTranslationFieldMapper->mapFields($content, $document->languageCode); + $blockFields = $this->blockFieldMapper->mapFields($content); + $blockTranslationDependentFields = $this->blockTranslationMapper->mapFields($content, $document->languageCode); + + $document->fields = [ + ...$document->fields, + ...$contentFields, + ...$contentTranslationDependentFields, + ...$blockFields, + ...$blockTranslationDependentFields + ]; + } + + return new ArrayIterator($documents); + } + + public function fromLocation(Location $location, ?Content $content = null): Iterator + { + if ($content === null) { + $content = $this->contentHandler->load($location->contentId); + } + $result = $this->innerDocumentFactory->fromLocation($location, $content); + + /** @var \Ibexa\Contracts\Elasticsearch\Mapping\LocationDocument[] $documents */ + $documents = iterator_to_array($result); + + + foreach ($documents as $document) { + $locationFields = $this->locationFieldMapper->mapFields($location); + $locationTranslationDependentFields = $this->locationTranslationFieldMapper->mapFields($location, $document->languageCode); + $blockFields = $this->blockFieldMapper->mapFields($content); + $blockTranslationDependentFields = $this->blockTranslationMapper->mapFields($content, $document->languageCode); + + $document->fields = [ + ...$document->fields, + ...$locationFields, + ...$locationTranslationDependentFields, + ...$blockFields, + ...$blockTranslationDependentFields + ]; + } + + return new ArrayIterator($documents); + } + +} diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper.php new file mode 100644 index 00000000..3a71b3dc --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper.php @@ -0,0 +1,21 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(LocationFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPILocation $location): bool + { + return true; + } + + public function mapFields(SPILocation $location): array + { + + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($location)) { + $fields = [...$fields, ...$mapper->mapFields($location)]; + } + } + + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper/LocationVisibilityFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper/LocationVisibilityFieldMapper.php new file mode 100644 index 00000000..78aed4d0 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationFieldMapper/LocationVisibilityFieldMapper.php @@ -0,0 +1,36 @@ +contentHandler->load($location->contentId); + return [ + new Field( + 'ng_location_visible', + !$location->hidden && !$location->invisible && !$content->versionInfo->contentInfo->isHidden, + new BooleanField(), + ) + ]; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/DocumentMapper/LocationTranslationFieldMapper.php b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationTranslationFieldMapper.php new file mode 100644 index 00000000..e05052b3 --- /dev/null +++ b/lib/Core/Search/Elasticsearch/DocumentMapper/LocationTranslationFieldMapper.php @@ -0,0 +1,20 @@ +addMapper($mapper); + } + } + + /** + * Adds given $mapper to the internal array as the next one in priority. + */ + public function addMapper(LocationTranslationFieldMapper $mapper): void + { + $this->mappers[] = $mapper; + } + + public function accept(SPILocation $location, string $languageCode): bool + { + return true; + } + + public function mapFields(SPILocation $location, string $languageCode): array + { + + $fields = []; + + foreach ($this->mappers as $mapper) { + if ($mapper->accept($location, $languageCode)) { + $fields = [...$fields, ...$mapper->mapFields($location, $languageCode)]; + } + } + + return $fields; + } +} \ No newline at end of file diff --git a/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Content/VisibilityVisitor.php b/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Content/VisibilityVisitor.php new file mode 100644 index 00000000..0d5d2fba --- /dev/null +++ b/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Content/VisibilityVisitor.php @@ -0,0 +1,31 @@ + $value */ + $value = $criterion->value; + + return $value[0] === true; + } +} diff --git a/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Location/VisibilityVisitor.php b/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Location/VisibilityVisitor.php new file mode 100644 index 00000000..e82d2abc --- /dev/null +++ b/lib/Core/Search/Elasticsearch/Query/CriterionVisitor/Location/VisibilityVisitor.php @@ -0,0 +1,31 @@ + $value */ + $value = $criterion->value; + + return $value[0] === true; + } +} diff --git a/lib/Core/Search/Solr/FieldMapper/ContentTranslation/ContentPageTextFieldMapper.php b/lib/Core/Search/Solr/FieldMapper/ContentTranslation/ContentPageTextFieldMapper.php new file mode 100644 index 00000000..a4d5285b --- /dev/null +++ b/lib/Core/Search/Solr/FieldMapper/ContentTranslation/ContentPageTextFieldMapper.php @@ -0,0 +1,41 @@ +versionInfo->contentInfo->contentTypeId; + + if (!in_array($contentTypeIdentifier, $this->allowedContentTypes, true)) { + return []; + } + + $text = $this->pageTextExtractor->extractPageText($content->versionInfo->contentInfo->id, $languageCode); + $pageTextFields = []; + foreach ($text as $level => $value) { + $pageTextFields[] = new Field( + 'page_text_' . $level, + $value, + new FullTextField(), + ); + } + return $pageTextFields; + } +} \ No newline at end of file diff --git a/lib/Exception/IndexPageUnavailableException.php b/lib/Exception/IndexPageUnavailableException.php new file mode 100644 index 00000000..40e4813c --- /dev/null +++ b/lib/Exception/IndexPageUnavailableException.php @@ -0,0 +1,15 @@ +