Skip to content

Commit

Permalink
ENH Optimise site search
Browse files Browse the repository at this point in the history
  • Loading branch information
emteknetnz committed Jun 28, 2023
1 parent 96c81fe commit 611cff2
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 5 deletions.
21 changes: 21 additions & 0 deletions docs/en/searching-blocks.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,24 @@ to make it clear in search results where one piece of content ends and another b
Page:
search_index_element_delimiter: ' ... '
```

## CMS page search

CMS page search will include search results for pages with elements that match the search query.

By default it uses the same method as the search indexing where it will fully render every element that is
being search. This is an expensive operation and can cause performance issues if you have a large site with a lot of elements.

To increase performance by a large amount, likely more than doubling it, you can disable the rendering of elements and instead just look at the database values of the elements instead.

```yml
DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch:
render_elements_for_cms_search: false
```

Searching elements for content in CMS page search can be disabled entirely can be disabled:

```yml
DNADesign\Elemental\Controllers\ElementSiteTreeFilterSearch:
search_for_term_in_content: false
```
14 changes: 12 additions & 2 deletions src/Controllers/ElementSiteTreeFilterSearch.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ class ElementSiteTreeFilterSearch extends CMSSiteTreeFilter_Search
*/
private static $search_for_term_in_content = true;

/**
* @todo Change to false for CMS6?
*/
private static bool $render_elements_for_cms_search = true;

/**
* @var array
*/
Expand Down Expand Up @@ -47,8 +52,13 @@ protected function applyDefaultFilters($query)
return false;
}

// Check whether the search term exists in the nested page content
$pageContent = $siteTree->getElementsForSearch();
if ($this->config()->get('render_elements_for_cms_search') === true) {
// Check whether the search term exists in the nested page content
$pageContent = $siteTree->getElementsForSearch();
} else {
$pageContent = $siteTree->getContentFromElementsForCmsSearch();
}

return stripos($pageContent ?? '', $this->params['Term'] ?? '') !== false;
});

Expand Down
75 changes: 72 additions & 3 deletions src/Extensions/ElementalPageExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ class ElementalPageExtension extends ElementalAreasExtension
*/
private static $search_index_element_delimiter = ' ';

/**
* Used to cache all ElementalArea's prior to eager loading elements
*
* @internal
*/
private static ?array $elementalAreas = null;

/**
* Returns the contents of each ElementalArea has_one's markup for use in Solr or Elastic search indexing
*
Expand All @@ -49,14 +56,30 @@ public function getElementsForSearch()
SSViewer::set_themes(SSViewer::config()->get('themes'));
try {
$output = [];
$this->loopThroughElements(function (BaseElement $element) use (&$output) {
if ($element->getSearchIndexable()) {
// don't merge this, I've kept it here just for testing performance
$legacy = false;
if ($legacy) {
$this->loopThroughElements(function (BaseElement $element) use (&$output) {
if ($element->getSearchIndexable()) {
$content = $element->getContentForSearchIndex();
if ($content) {
$output[] = $content;
}
}
});
} else {
$elements = $this->getEagerLoadedElements();
/** @var BaseElement $element */
foreach ($elements as $element) {
if (!$element->getSearchIndexable()) {
continue;
}
$content = $element->getContentForSearchIndex();
if ($content) {
$output[] = $content;
}
}
});
}
} finally {
// Reset theme if an exception occurs, if you don't have a
// try / finally around code that might throw an Exception,
Expand All @@ -66,6 +89,26 @@ public function getElementsForSearch()
return implode($this->owner->config()->get('search_index_element_delimiter') ?? '', $output);
}

/**
* Returns the contents of all Elements on the pages ElementalAreas for use in CMS search
*/
public function getContentFromElementsForCmsSearch(): string
{
$output = [];
$elements = $this->getEagerLoadedElements();
/** @var BaseElement $element */
foreach ($elements as $element) {
// note: not calling $element->getSearchIndexable() here because it's not public facing
$content = $element->getContentForCmsSearch();
if ($content) {
$output[] = $content;
}
}
// Use |%| to delimite different elements rather than space so that you don't
// accidentally join results of two elements that are next to each other in a table
return implode('|%|', $output);
}

/**
* @see SiteTree::getAnchorsOnPage()
*/
Expand Down Expand Up @@ -98,6 +141,32 @@ public function MetaTags(&$tags)
}
}

private function getEagerLoadedElements(): array
{
$elements = [];
if (is_null(self::$elementalAreas)) {
self::$elementalAreas = [];
foreach (ElementalArea::get()->eagerLoad('Elements') as $elementalArea) {
self::$elementalAreas[$elementalArea->ID] = $elementalArea;
}
}
foreach ($this->owner->hasOne() as $relation => $class) {
if (!is_a($class, ElementalArea::class, true)) {
continue;
}
$elementalAreaID = $this->owner->{"{$relation}ID"};
if ($elementalAreaID && array_key_exists($elementalAreaID, self::$elementalAreas)) {
$elementalArea = self::$elementalAreas[$elementalAreaID];
} else {
$elementalArea = $this->owner->$relation();
}
foreach ($elementalArea->Elements() as $element) {
$elements[] = $element;
}
}
return $elements;
}

/**
* Call some function over all elements belonging to this page
*/
Expand Down
43 changes: 43 additions & 0 deletions src/Models/BaseElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,49 @@ public function getContentForSearchIndex(): string
return $content;
}

/**
* Provides content for CMS search
*/
public function getContentForCmsSearch(): string
{
$contents = [];
foreach ($this->config()->get('db') as $fieldName => $fieldType) {
// https://docs.silverstripe.org/en/developer_guides/model/data_types_and_casting/
// probably won't actually get things like 'CanViewType' in db config
if ($fieldName === 'LastEdited'
|| $fieldName === 'Created'
|| $fieldName === 'CanViewType'
|| $fieldName === 'CanEditType'
|| $fieldName === 'Version'
|| $fieldName === 'ShowInMenus'
|| $fieldName === 'ShowInSearch'
|| $fieldName === 'Sort'
|| $fieldName === 'HasBrokenFile'
|| $fieldName === 'HasBrokenLink'
|| $fieldName === 'ReportClass'
|| substr($fieldType, -2) === 'ID'
|| substr($fieldType, -3) === 'Key'
|| substr($fieldName, -9) === 'ClassName'
|| substr($fieldType, -4) === 'Hash'
) {
continue;
}
// TODO: if $fieldType is HTMLText then run shortcode parser on it?
// (possibly not? unlikely to really matter from a cms search perspective?)
$contents[] = $this->$fieldName;
}
// Use |#| to delimite different fields rather than space so that you don't
// accidentally join results of two columns that are next to each other in a table
$content = implode('|#|', $contents);

// Strips tags and be sure there's a space between words.
$content = trim(strip_tags(str_replace('<', ' <', $content)));

// Allow projects to update content of third-party elements.
$this->extend('updateContentForCmsSearch', $content);
return $content;
}

/**
* Default way to render element in templates. Note that all blocks should
* be rendered through their {@link ElementController} class as this
Expand Down

0 comments on commit 611cff2

Please sign in to comment.