From eb993cd886e65b137d475435bbe4eaf0c5e7f3bd Mon Sep 17 00:00:00 2001 From: Steve Boyd Date: Mon, 3 Jul 2023 13:20:00 +1200 Subject: [PATCH] ENH Optimise site search --- docs/en/searching-blocks.md | 30 ++++++++ .../ElementSiteTreeFilterSearch.php | 14 +++- src/Extensions/ElementalPageExtension.php | 72 +++++++++++++++++-- src/Models/BaseElement.php | 50 +++++++++++++ 4 files changed, 157 insertions(+), 9 deletions(-) diff --git a/docs/en/searching-blocks.md b/docs/en/searching-blocks.md index e983dd02..0d270ccc 100644 --- a/docs/en/searching-blocks.md +++ b/docs/en/searching-blocks.md @@ -40,3 +40,33 @@ to make it clear in search results where one piece of content ends and another b Page: search_index_element_delimiter: ' ... ' ``` + +## CMS page search + +CMS page search will include search results for pages with elements that match the search query. + +By default it uses the same method as the search indexing where it will fully render every element that is +being searched. This is an expensive operation and can cause performance issues if you have a large site with a lot of elements. + +To increase performance by a large amount, likely more than doubling it, you can disable the rendering of elements and instead just look at the database values of the elements directly. + +```yml +DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch: + render_elements: false +``` + +If `render_elements` is to `false`, then all fields that are not generic fields such as keys or something like `CanViewType` are searched. Individiual fields on blocks can be excluded from the search by adding fields to the `exclude_fields_from_cms_search` array config variable on the element class. e.g. + +```yml +App\MyElement: + exclude_fields_from_cms_search: + - MyFieldToExclude + - AnotherFieldToExclude +``` + +If the above is still not performant enough, searching elements for content in CMS page search can be disabled entirely can be disabled: + +```yml +DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch: + search_for_term_in_content: false +``` diff --git a/src/Controllers/ElementSiteTreeFilterSearch.php b/src/Controllers/ElementSiteTreeFilterSearch.php index 2313c556..8fea62fe 100644 --- a/src/Controllers/ElementSiteTreeFilterSearch.php +++ b/src/Controllers/ElementSiteTreeFilterSearch.php @@ -20,6 +20,11 @@ class ElementSiteTreeFilterSearch extends CMSSiteTreeFilter_Search */ private static $search_for_term_in_content = true; + /** + * @todo Change to false for CMS6? + */ + private static bool $render_elements = false; + /** * @var array */ @@ -47,8 +52,13 @@ protected function applyDefaultFilters($query) return false; } - // Check whether the search term exists in the nested page content - $pageContent = $siteTree->getElementsForSearch(); + if ($this->config()->get('render_elements') === true) { + // Check whether the search term exists in the nested page content + $pageContent = $siteTree->getElementsForSearch(); + } else { + $pageContent = $siteTree->getContentFromElementsForCmsSearch(); + } + return stripos($pageContent ?? '', $this->params['Term'] ?? '') !== false; }); diff --git a/src/Extensions/ElementalPageExtension.php b/src/Extensions/ElementalPageExtension.php index 3df01a97..246e5128 100644 --- a/src/Extensions/ElementalPageExtension.php +++ b/src/Extensions/ElementalPageExtension.php @@ -38,6 +38,13 @@ class ElementalPageExtension extends ElementalAreasExtension */ private static $search_index_element_delimiter = ' '; + /** + * Used to cache all ElementalArea's prior to eager loading elements + * + * @internal + */ + private static ?array $elementalAreas = null; + /** * Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing * @@ -49,14 +56,17 @@ public function getElementsForSearch() SSViewer::set_themes(SSViewer::config()->get('themes')); try { $output = []; - $this->loopThroughElements(function (BaseElement $element) use (&$output) { - if ($element->getSearchIndexable()) { - $content = $element->getContentForSearchIndex(); - if ($content) { - $output[] = $content; - } + $elements = $this->getEagerLoadedElements(); + /** @var BaseElement $element */ + foreach ($elements as $element) { + if (!$element->getSearchIndexable()) { + continue; } - }); + $content = $element->getContentForSearchIndex(); + if ($content) { + $output[] = $content; + } + } } finally { // Reset theme if an exception occurs, if you don't have a // try / finally around code that might throw an Exception, @@ -66,6 +76,28 @@ public function getElementsForSearch() return implode($this->owner->config()->get('search_index_element_delimiter') ?? '', $output); } + /** + * Returns the contents of all Elements on the pages ElementalAreas for use in CMS search + */ + public function getContentFromElementsForCmsSearch(): string + { + $output = []; + $elements = $this->getEagerLoadedElements(); + /** @var BaseElement $element */ + foreach ($elements as $element) { + if (!$element->getSearchIndexable()) { + continue; + } + $content = $element->getContentForCmsSearch(); + if ($content) { + $output[] = $content; + } + } + // Use |%| to delimite different elements rather than space so that you don't + // accidentally join results of two elements that are next to each other in a table + return implode('|%|', $output); + } + /** * @see SiteTree::getAnchorsOnPage() */ @@ -98,6 +130,32 @@ public function MetaTags(&$tags) } } + private function getEagerLoadedElements(): array + { + $elements = []; + if (is_null(self::$elementalAreas)) { + self::$elementalAreas = []; + foreach (ElementalArea::get()->eagerLoad('Elements') as $elementalArea) { + self::$elementalAreas[$elementalArea->ID] = $elementalArea; + } + } + foreach ($this->owner->hasOne() as $relation => $class) { + if (!is_a($class, ElementalArea::class, true)) { + continue; + } + $elementalAreaID = $this->owner->{"{$relation}ID"}; + if ($elementalAreaID && array_key_exists($elementalAreaID, self::$elementalAreas)) { + $elementalArea = self::$elementalAreas[$elementalAreaID]; + } else { + $elementalArea = $this->owner->$relation(); + } + foreach ($elementalArea->Elements() as $element) { + $elements[] = $element; + } + } + return $elements; + } + /** * Call some function over all elements belonging to this page */ diff --git a/src/Models/BaseElement.php b/src/Models/BaseElement.php index 44925ad8..50c4e31a 100644 --- a/src/Models/BaseElement.php +++ b/src/Models/BaseElement.php @@ -66,6 +66,12 @@ class BaseElement extends DataObject implements CMSPreviewable */ private static $description = 'Base element class'; + /** + * List of fields to exclude from CMS SiteTree seatch + * @see ElementSiteTreeFilterSearch::applyDefaultFilters() + */ + private static array $exclude_fields_from_cms_search = []; + private static $db = [ 'Title' => 'Varchar(255)', 'ShowTitle' => 'Boolean', @@ -528,6 +534,50 @@ public function getContentForSearchIndex(): string return $content; } + /** + * Provides content for CMS search + */ + public function getContentForCmsSearch(): string + { + $contents = []; + foreach ($this->config()->get('db') as $fieldName => $fieldType) { + // https://docs.silverstripe.org/en/developer_guides/model/data_types_and_casting/ + // probably won't actually get things like 'CanViewType' in db config + if ($fieldName === 'LastEdited' + || $fieldName === 'Created' + || $fieldName === 'CanViewType' + || $fieldName === 'CanEditType' + || $fieldName === 'Version' + || $fieldName === 'ShowInMenus' + || $fieldName === 'ShowInSearch' + || $fieldName === 'Sort' + || $fieldName === 'HasBrokenFile' + || $fieldName === 'HasBrokenLink' + || $fieldName === 'ReportClass' + || substr($fieldType, -2) === 'ID' + || substr($fieldType, -3) === 'Key' + || substr($fieldName, -9) === 'ClassName' + || substr($fieldType, -4) === 'Hash' + || in_array($fieldName, $this->config()->get('exclude_fields_from_cms_search')) + ) { + continue; + } + // TODO: if $fieldType is HTMLText then run shortcode parser on it? + // (possibly not? unlikely to really matter from a cms search perspective?) + $contents[] = $this->$fieldName; + } + // Use |#| to delimite different fields rather than space so that you don't + // accidentally join results of two columns that are next to each other in a table + $content = implode('|#|', $contents); + + // Strips tags and be sure there's a space between words. + $content = trim(strip_tags(str_replace('<', ' <', $content))); + + // Allow projects to update content of third-party elements. + $this->extend('updateContentForCmsSearch', $content); + return $content; + } + /** * Default way to render element in templates. Note that all blocks should * be rendered through their {@link ElementController} class as this