From 39de753b870bec08cd77a5510ac123e581c1fb32 Mon Sep 17 00:00:00 2001 From: Steve Boyd Date: Tue, 4 Jul 2023 16:05:48 +1200 Subject: [PATCH] ENH Optimise site search --- docs/en/searching-blocks.md | 30 +++++++ .../ElementSiteTreeFilterSearch.php | 14 +++- src/Extensions/ElementalPageExtension.php | 72 ++++++++++++++-- src/Models/BaseElement.php | 63 ++++++++++++++ tests/BaseElementTest.php | 8 ++ .../ElementSiteTreeFilterSearchTest.php | 82 +++++++++++++++++++ .../ElementSiteTreeFilterSearchTest.yml | 9 ++ tests/Src/TestElementContentExtension.php | 20 +++++ 8 files changed, 289 insertions(+), 9 deletions(-) create mode 100644 tests/Src/TestElementContentExtension.php diff --git a/docs/en/searching-blocks.md b/docs/en/searching-blocks.md index e983dd02..d8a37206 100644 --- a/docs/en/searching-blocks.md +++ b/docs/en/searching-blocks.md @@ -40,3 +40,33 @@ to make it clear in search results where one piece of content ends and another b Page: search_index_element_delimiter: ' ... ' ``` + +## CMS page search + +CMS page search will include search results for pages with elements that match the search query. + +By default it uses the same method as the search indexing where it will fully render every element that is +being searched. This is an expensive operation and can cause performance issues if you have a large site with a lot of elements. + +To increase performance by a large amount, likely more than doubling it, you can disable the rendering of elements and instead just look at the database values of the elements directly. + +```yml +DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch: + render_elements: false +``` + +If `render_elements` is `false`, then all fields that have stored as a Varchar or Text like are searched. Individual fields on blocks can be excluded from the search by adding fields to the `exclude_fields_from_cms_search` array config variable on the element class. e.g. + +```yml +App\MyElement: + fields_excluded_from_cms_search: + - MyFieldToExclude + - AnotherFieldToExclude +``` + +If the above is still not performant enough, searching elements for content in CMS page search can be disabled entirely can be disabled: + +```yml +DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch: + search_for_term_in_content: false +``` diff --git a/src/Controllers/ElementSiteTreeFilterSearch.php b/src/Controllers/ElementSiteTreeFilterSearch.php index 2313c556..2fcc74e6 100644 --- a/src/Controllers/ElementSiteTreeFilterSearch.php +++ b/src/Controllers/ElementSiteTreeFilterSearch.php @@ -20,6 +20,11 @@ class ElementSiteTreeFilterSearch extends CMSSiteTreeFilter_Search */ private static $search_for_term_in_content = true; + /** + * Whether to render elements with templates when doing a CMS SiteTree search + */ + private static bool $render_elements = true; + /** * @var array */ @@ -47,8 +52,13 @@ protected function applyDefaultFilters($query) return false; } - // Check whether the search term exists in the nested page content - $pageContent = $siteTree->getElementsForSearch(); + if ($this->config()->get('render_elements') === true) { + // Check whether the search term exists in the nested page content + $pageContent = $siteTree->getElementsForSearch(); + } else { + $pageContent = $siteTree->getContentFromElementsForCmsSearch(); + } + return stripos($pageContent ?? '', $this->params['Term'] ?? '') !== false; }); diff --git a/src/Extensions/ElementalPageExtension.php b/src/Extensions/ElementalPageExtension.php index 3df01a97..246e5128 100644 --- a/src/Extensions/ElementalPageExtension.php +++ b/src/Extensions/ElementalPageExtension.php @@ -38,6 +38,13 @@ class ElementalPageExtension extends ElementalAreasExtension */ private static $search_index_element_delimiter = ' '; + /** + * Used to cache all ElementalArea's prior to eager loading elements + * + * @internal + */ + private static ?array $elementalAreas = null; + /** * Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing * @@ -49,14 +56,17 @@ public function getElementsForSearch() SSViewer::set_themes(SSViewer::config()->get('themes')); try { $output = []; - $this->loopThroughElements(function (BaseElement $element) use (&$output) { - if ($element->getSearchIndexable()) { - $content = $element->getContentForSearchIndex(); - if ($content) { - $output[] = $content; - } + $elements = $this->getEagerLoadedElements(); + /** @var BaseElement $element */ + foreach ($elements as $element) { + if (!$element->getSearchIndexable()) { + continue; } - }); + $content = $element->getContentForSearchIndex(); + if ($content) { + $output[] = $content; + } + } } finally { // Reset theme if an exception occurs, if you don't have a // try / finally around code that might throw an Exception, @@ -66,6 +76,28 @@ public function getElementsForSearch() return implode($this->owner->config()->get('search_index_element_delimiter') ?? '', $output); } + /** + * Returns the contents of all Elements on the pages ElementalAreas for use in CMS search + */ + public function getContentFromElementsForCmsSearch(): string + { + $output = []; + $elements = $this->getEagerLoadedElements(); + /** @var BaseElement $element */ + foreach ($elements as $element) { + if (!$element->getSearchIndexable()) { + continue; + } + $content = $element->getContentForCmsSearch(); + if ($content) { + $output[] = $content; + } + } + // Use |%| to delimite different elements rather than space so that you don't + // accidentally join results of two elements that are next to each other in a table + return implode('|%|', $output); + } + /** * @see SiteTree::getAnchorsOnPage() */ @@ -98,6 +130,32 @@ public function MetaTags(&$tags) } } + private function getEagerLoadedElements(): array + { + $elements = []; + if (is_null(self::$elementalAreas)) { + self::$elementalAreas = []; + foreach (ElementalArea::get()->eagerLoad('Elements') as $elementalArea) { + self::$elementalAreas[$elementalArea->ID] = $elementalArea; + } + } + foreach ($this->owner->hasOne() as $relation => $class) { + if (!is_a($class, ElementalArea::class, true)) { + continue; + } + $elementalAreaID = $this->owner->{"{$relation}ID"}; + if ($elementalAreaID && array_key_exists($elementalAreaID, self::$elementalAreas)) { + $elementalArea = self::$elementalAreas[$elementalAreaID]; + } else { + $elementalArea = $this->owner->$relation(); + } + foreach ($elementalArea->Elements() as $element) { + $elements[] = $element; + } + } + return $elements; + } + /** * Call some function over all elements belonging to this page */ diff --git a/src/Models/BaseElement.php b/src/Models/BaseElement.php index 44925ad8..d97d4c21 100644 --- a/src/Models/BaseElement.php +++ b/src/Models/BaseElement.php @@ -33,6 +33,8 @@ use SilverStripe\View\Parsers\URLSegmentFilter; use SilverStripe\View\Requirements; use SilverStripe\ORM\CMSPreviewable; +use SilverStripe\Core\Config\Config; +use SilverStripe\ORM\DataObjectSchema; /** * Class BaseElement @@ -66,6 +68,14 @@ class BaseElement extends DataObject implements CMSPreviewable */ private static $description = 'Base element class'; + /** + * List of fields to exclude from CMS SiteTree seatch + * @see ElementSiteTreeFilterSearch::applyDefaultFilters() + */ + private static array $fields_excluded_from_cms_search = [ + 'ExtraClass', + ]; + private static $db = [ 'Title' => 'Varchar(255)', 'ShowTitle' => 'Boolean', @@ -528,6 +538,59 @@ public function getContentForSearchIndex(): string return $content; } + /** + * Provides content for CMS search if ElementSiteTreeFilterSearch.render_elements is false + */ + public function getContentForCmsSearch(): string + { + $fieldNames = $this->getTextualDatabaseFieldNames(); + $excludedFieldNames = $this->getFieldNamesExcludedFromCmsSearch(); + $contents = []; + foreach ($fieldNames as $fieldName) { + if (in_array($fieldName, $excludedFieldNames)) { + continue; + } + $contents[] = $this->$fieldName; + } + // Allow projects to update contents of third-party elements. + $this->extend('updateContentForCmsSearch', $contents); + + // Use |#| to delimit different fields rather than space so that you don't + // accidentally join results of two columns that are next to each other in a table + $content = implode('|#|', $contents); + + // Strips tags and be sure there's a space between words. + $content = trim(strip_tags(str_replace('<', ' <', $content))); + + return $content; + } + + /** + * Get field names that have a Varchar or Text like type in the database + */ + private function getTextualDatabaseFieldNames(): array + { + $fieldNames = []; + $textualDatabaseFields = DataObject::getSchema()->databaseFields($this); + foreach ($textualDatabaseFields as $fieldName => $databaseFieldType) { + $lcType = strtolower(strtok($databaseFieldType ?? '', '(')); + if (str_contains($lcType, 'varchar') || str_contains($lcType, 'text')) { + $fieldNames[] = $fieldName; + } + } + return $fieldNames; + } + + private function getFieldNamesExcludedFromCmsSearch(): array + { + return [ + // `fixed_fields` contains ['ID', 'ClassName', 'LastEdited', 'Created'] and possibly more + ...array_keys(static::config()->get('fixed_fields')), + // manually excluded fields + ...static::config()->get('fields_excluded_from_cms_search') + ]; + } + /** * Default way to render element in templates. Note that all blocks should * be rendered through their {@link ElementController} class as this diff --git a/tests/BaseElementTest.php b/tests/BaseElementTest.php index 99e1df50..4d86b7c3 100644 --- a/tests/BaseElementTest.php +++ b/tests/BaseElementTest.php @@ -495,4 +495,12 @@ public function testPreviewLink(string $class, string $elementIdentifier, ?strin $this->assertSame($link, $previewLink); } } + + public function testGetContentForCmsSearch() + { + $element = $this->objFromFixture(ElementContent::class, 'content1'); + $this->assertSame('Test Content', $element->getContentForCmsSearch()); + $element = $this->objFromFixture(TestElement::class, 'elementDataObject3'); + $this->assertSame('Element 3|#|Hello Test', $element->getContentForCmsSearch()); + } } diff --git a/tests/Controllers/ElementSiteTreeFilterSearchTest.php b/tests/Controllers/ElementSiteTreeFilterSearchTest.php index b79adbc7..da7ae7f3 100644 --- a/tests/Controllers/ElementSiteTreeFilterSearchTest.php +++ b/tests/Controllers/ElementSiteTreeFilterSearchTest.php @@ -6,6 +6,13 @@ use DNADesign\Elemental\Tests\Src\TestPage; use SilverStripe\CMS\Controllers\CMSSiteTreeFilter_Search; use SilverStripe\Dev\SapphireTest; +use DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch; +use ReflectionMethod; +use SilverStripe\ORM\DataObject; +use SilverStripe\Core\Config\Config; +use SilverStripe\ORM\DataObjectSchema; +use DNADesign\Elemental\Models\ElementContent; +use DNADesign\Elemental\Tests\Src\TestElementContentExtension; class ElementSiteTreeFilterSearchTest extends SapphireTest { @@ -15,6 +22,9 @@ class ElementSiteTreeFilterSearchTest extends SapphireTest TestPage::class => [ ElementalPageExtension::class, ], + ElementContent::class => [ + TestElementContentExtension::class, + ] ]; protected static $extra_dataobjects = [ @@ -52,4 +62,76 @@ public function searchProvider() ]], ]; } + + /** + * @dataProvider provideApplyDefaultFilters + */ + public function testApplyDefaultFilters(bool $renderElements, string $term, array $expected): void + { + // Set protected method visibility - applyDefaultFilters() is essentially an + // extension method that's called by silverstripe/cms, so use of reflection here + // is pretty much required + $method = new ReflectionMethod(ElementSiteTreeFilterSearch::class, 'applyDefaultFilters'); + Config::modify()->set(ElementSiteTreeFilterSearch::class, 'render_elements', $renderElements); + $filterSearch = new ElementSiteTreeFilterSearch(['Term' => $term]); + $ret = $method->invoke($filterSearch, DataObject::get(TestPage::class)); + $this->assertSame($expected, $ret->column('Title')); + } + + public function provideApplyDefaultFilters(): array + { + + return [ + 'render_elements true - text search' => [ + 'render_elements' => true, + 'term' => 'This content is rendered', + 'expected' => ['Content blocks page'] + ], + 'render_elements true - unrendered search' => [ + 'render_elements' => true, + 'term' => 'This field is unrendered', + 'expected' => [] + ], + 'render_elements true - extended search' => [ + 'render_elements' => true, + 'term' => 'This content is from an extension hook', + 'expected' => [] + ], + 'render_elements true - int search' => [ + 'render_elements' => true, + 'term' => '456', + 'expected' => [] + ], + 'render_elements true - enum search' => [ + 'render_elements' => true, + 'term' => 'Sunny', + 'expected' => [] + ], + 'render_elements false - text search' => [ + 'render_elements' => false, + 'term' => 'This content is rendered', + 'expected' => ['Content blocks page'] + ], + 'render_elements false - unrendered search' => [ + 'render_elements' => false, + 'term' => 'This field is unrendered', + 'expected' => ['Content blocks page'] + ], + 'render_elements false - extended search' => [ + 'render_elements' => false, + 'term' => 'This content is from an extension hook', + 'expected' => ['Content blocks page'] + ], + 'render_elements false - int search' => [ + 'render_elements' => false, + 'term' => '456', + 'expected' => [] + ], + 'render_elements false - enum search' => [ + 'render_elements' => false, + 'term' => 'Sunny', + 'expected' => [] + ], + ]; + } } diff --git a/tests/Controllers/ElementSiteTreeFilterSearchTest.yml b/tests/Controllers/ElementSiteTreeFilterSearchTest.yml index 1e5645bb..30d8d77b 100644 --- a/tests/Controllers/ElementSiteTreeFilterSearchTest.yml +++ b/tests/Controllers/ElementSiteTreeFilterSearchTest.yml @@ -17,3 +17,12 @@ DNADesign\Elemental\Models\ElementContent: Title: Content HTML: Specifically blocks page content ParentID: =>DNADesign\Elemental\Models\ElementalArea.blocks_page_area + MyInt: 123 + MyEnum: Cloudy + blocks_page_unrendered_content: + Title: My title + HTML: This content is rendered + ParentID: =>DNADesign\Elemental\Models\ElementalArea.blocks_page_area + UnrenderedField: This field is unrendered + MyInt: 456 + MyEnum: Sunny diff --git a/tests/Src/TestElementContentExtension.php b/tests/Src/TestElementContentExtension.php new file mode 100644 index 00000000..d73931b3 --- /dev/null +++ b/tests/Src/TestElementContentExtension.php @@ -0,0 +1,20 @@ + 'Varchar(255)', + 'MyInt' => 'Int', + 'MyEnum' => 'Enum("Sunny, Cloudy", "Sunny")' + ]; + + public function updateContentForCmsSearch(array &$contents) + { + $contents[] = 'This content is from an extension hook'; + } +}