Skip to content

Commit

Permalink
Merge pull request #48652 from margelo/perf/search-suffix-ukkonen-tree
Browse files Browse the repository at this point in the history
Search suffix tree implementation
  • Loading branch information
marcaaron authored Oct 17, 2024
2 parents 2aae716 + 59d2562 commit 91c6e0c
Show file tree
Hide file tree
Showing 8 changed files with 736 additions and 14 deletions.
3 changes: 3 additions & 0 deletions src/CONST.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,9 @@ const CONST = {
SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300,
RESIZE_DEBOUNCE_TIME: 100,
UNREAD_UPDATE_DEBOUNCE_TIME: 300,
SEARCH_CONVERT_SEARCH_VALUES: 'search_convert_search_values',
SEARCH_MAKE_TREE: 'search_make_tree',
SEARCH_BUILD_TREE: 'search_build_tree',
SEARCH_FILTER_OPTIONS: 'search_filter_options',
USE_DEBOUNCED_STATE_DELAY: 300,
},
Expand Down
62 changes: 58 additions & 4 deletions src/components/Search/SearchRouter/SearchRouter.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import useKeyboardShortcut from '@hooks/useKeyboardShortcut';
import useLocalize from '@hooks/useLocalize';
import useResponsiveLayout from '@hooks/useResponsiveLayout';
import useThemeStyles from '@hooks/useThemeStyles';
import FastSearch from '@libs/FastSearch';
import Log from '@libs/Log';
import * as OptionsListUtils from '@libs/OptionsListUtils';
import {getAllTaxRates} from '@libs/PolicyUtils';
Expand Down Expand Up @@ -63,6 +64,49 @@ function SearchRouter({onRouterClose}: SearchRouterProps) {
return OptionsListUtils.getSearchOptions(options, '', betas ?? []);
}, [areOptionsInitialized, betas, options]);

/**
* Builds a suffix tree and returns a function to search in it.
*/
const findInSearchTree = useMemo(() => {
const fastSearch = FastSearch.createFastSearch([
{
data: searchOptions.personalDetails,
toSearchableString: (option) => {
const displayName = option.participantsList?.[0]?.displayName ?? '';
return [option.login ?? '', option.login !== displayName ? displayName : ''].join();
},
},
{
data: searchOptions.recentReports,
toSearchableString: (option) => {
const searchStringForTree = [option.text ?? '', option.login ?? ''];

if (option.isThread) {
if (option.alternateText) {
searchStringForTree.push(option.alternateText);
}
} else if (!!option.isChatRoom || !!option.isPolicyExpenseChat) {
if (option.subtitle) {
searchStringForTree.push(option.subtitle);
}
}

return searchStringForTree.join();
},
},
]);
function search(searchInput: string) {
const [personalDetails, recentReports] = fastSearch.search(searchInput);

return {
personalDetails,
recentReports,
};
}

return search;
}, [searchOptions.personalDetails, searchOptions.recentReports]);

const filteredOptions = useMemo(() => {
if (debouncedInputValue.trim() === '') {
return {
Expand All @@ -73,15 +117,25 @@ function SearchRouter({onRouterClose}: SearchRouterProps) {
}

Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS);
const newOptions = OptionsListUtils.filterOptions(searchOptions, debouncedInputValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true});
const newOptions = findInSearchTree(debouncedInputValue);
Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS);

return {
const recentReports = newOptions.recentReports.concat(newOptions.personalDetails);

const userToInvite = OptionsListUtils.pickUserToInvite({
canInviteUser: true,
recentReports: newOptions.recentReports,
personalDetails: newOptions.personalDetails,
userToInvite: newOptions.userToInvite,
searchValue: debouncedInputValue,
optionsToExclude: [{login: CONST.EMAIL.NOTIFICATIONS}],
});

return {
recentReports,
personalDetails: [],
userToInvite,
};
}, [debouncedInputValue, searchOptions]);
}, [debouncedInputValue, findInSearchTree]);

const recentReports: OptionData[] = useMemo(() => {
const currentSearchOptions = debouncedInputValue === '' ? searchOptions : filteredOptions;
Expand Down
140 changes: 140 additions & 0 deletions src/libs/FastSearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/* eslint-disable rulesdir/prefer-at */
import CONST from '@src/CONST';
import Timing from './actions/Timing';
import SuffixUkkonenTree from './SuffixUkkonenTree';

type SearchableData<T> = {
/**
* The data that should be searchable
*/
data: T[];
/**
* A function that generates a string from a data entry. The string's value is used for searching.
* If you have multiple fields that should be searchable, simply concat them to the string and return it.
*/
toSearchableString: (data: T) => string;
};

// There are certain characters appear very often in our search data (email addresses), which we don't need to search for.
const charSetToSkip = new Set(['@', '.', '#', '$', '%', '&', '*', '+', '-', '/', ':', ';', '<', '=', '>', '?', '_', '~', '!', ' ']);

/**
* Creates a new "FastSearch" instance. "FastSearch" uses a suffix tree to search for substrings in a list of strings.
* You can provide multiple datasets. The search results will be returned for each dataset.
*
* Note: Creating a FastSearch instance with a lot of data is computationally expensive. You should create an instance once and reuse it.
* Searches will be very fast though, even with a lot of data.
*/
function createFastSearch<T>(dataSets: Array<SearchableData<T>>) {
Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);
const maxNumericListSize = 400_000;
// The user might provide multiple data sets, but internally, the search values will be stored in this one list:
let concatenatedNumericList = new Uint8Array(maxNumericListSize);
// Here we store the index of the data item in the original data list, so we can map the found occurrences back to the original data:
const occurrenceToIndex = new Uint32Array(maxNumericListSize * 4);
// As we are working with ArrayBuffers, we need to keep track of the current offset:
const offset = {value: 1};
// We store the last offset for a dataSet, so we can map the found occurrences to the correct dataSet:
const listOffsets: number[] = [];

for (const {data, toSearchableString} of dataSets) {
// Performance critical: the array parameters are passed by reference, so we don't have to create new arrays every time:
dataToNumericRepresentation(concatenatedNumericList, occurrenceToIndex, offset, {data, toSearchableString});
listOffsets.push(offset.value);
}
concatenatedNumericList[offset.value++] = SuffixUkkonenTree.END_CHAR_CODE;
listOffsets[listOffsets.length - 1] = offset.value;
Timing.end(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);

// The list might be larger than necessary, so we clamp it to the actual size:
concatenatedNumericList = concatenatedNumericList.slice(0, offset.value);

// Create & build the suffix tree:
Timing.start(CONST.TIMING.SEARCH_MAKE_TREE);
const tree = SuffixUkkonenTree.makeTree(concatenatedNumericList);
Timing.end(CONST.TIMING.SEARCH_MAKE_TREE);

Timing.start(CONST.TIMING.SEARCH_BUILD_TREE);
tree.build();
Timing.end(CONST.TIMING.SEARCH_BUILD_TREE);

/**
* Searches for the given input and returns results for each dataset.
*/
function search(searchInput: string): T[][] {
const cleanedSearchString = cleanString(searchInput);
const {numeric} = SuffixUkkonenTree.stringToNumeric(cleanedSearchString, {
charSetToSkip,
// stringToNumeric might return a list that is larger than necessary, so we clamp it to the actual size
// (otherwise the search could fail as we include in our search empty array values):
clamp: true,
});
const result = tree.findSubstring(Array.from(numeric));

const resultsByDataSet = Array.from({length: dataSets.length}, () => new Set<T>());
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < result.length; i++) {
const occurrenceIndex = result[i];
const itemIndexInDataSet = occurrenceToIndex[occurrenceIndex];
const dataSetIndex = listOffsets.findIndex((listOffset) => occurrenceIndex < listOffset);

if (dataSetIndex === -1) {
throw new Error(`[FastSearch] The occurrence index ${occurrenceIndex} is not in any dataset`);
}
const item = dataSets[dataSetIndex].data[itemIndexInDataSet];
if (!item) {
throw new Error(`[FastSearch] The item with index ${itemIndexInDataSet} in dataset ${dataSetIndex} is not defined`);
}
resultsByDataSet[dataSetIndex].add(item);
}

return resultsByDataSet.map((set) => Array.from(set));
}

return {
search,
};
}

/**
* The suffix tree can only store string like values, and internally stores those as numbers.
* This function converts the user data (which are most likely objects) to a numeric representation.
* Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data.
*/
function dataToNumericRepresentation<T>(concatenatedNumericList: Uint8Array, occurrenceToIndex: Uint32Array, offset: {value: number}, {data, toSearchableString}: SearchableData<T>): void {
data.forEach((option, index) => {
const searchStringForTree = toSearchableString(option);
const cleanedSearchStringForTree = cleanString(searchStringForTree);

if (cleanedSearchStringForTree.length === 0) {
return;
}

SuffixUkkonenTree.stringToNumeric(cleanedSearchStringForTree, {
charSetToSkip,
out: {
outArray: concatenatedNumericList,
offset,
outOccurrenceToIndex: occurrenceToIndex,
index,
},
});
// eslint-disable-next-line no-param-reassign
occurrenceToIndex[offset.value] = index;
// eslint-disable-next-line no-param-reassign
concatenatedNumericList[offset.value++] = SuffixUkkonenTree.DELIMITER_CHAR_CODE;
});
}

/**
* Everything in the tree is treated as lowercase.
*/
function cleanString(input: string) {
return input.toLowerCase();
}

const FastSearch = {
createFastSearch,
};

export default FastSearch;
38 changes: 28 additions & 10 deletions src/libs/OptionsListUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2383,6 +2383,31 @@ function getPersonalDetailSearchTerms(item: Partial<ReportUtils.OptionData>) {
function getCurrentUserSearchTerms(item: ReportUtils.OptionData) {
return [item.text ?? '', item.login ?? '', item.login?.replace(CONST.EMAIL_SEARCH_REGEX, '') ?? ''];
}

type PickUserToInviteParams = {
canInviteUser: boolean;
recentReports: ReportUtils.OptionData[];
personalDetails: ReportUtils.OptionData[];
searchValue: string;
config?: FilterOptionsConfig;
optionsToExclude: Option[];
};

const pickUserToInvite = ({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude}: PickUserToInviteParams) => {
let userToInvite = null;
if (canInviteUser) {
if (recentReports.length === 0 && personalDetails.length === 0) {
userToInvite = getUserToInviteOption({
searchValue,
selectedOptions: config?.selectedOptions,
optionsToExclude,
});
}
}

return userToInvite;
};

/**
* Filters options based on the search input value
*/
Expand Down Expand Up @@ -2459,16 +2484,7 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt
recentReports = orderOptions(recentReports, searchValue);
}

let userToInvite = null;
if (canInviteUser) {
if (recentReports.length === 0 && personalDetails.length === 0) {
userToInvite = getUserToInviteOption({
searchValue,
selectedOptions: config?.selectedOptions,
optionsToExclude,
});
}
}
const userToInvite = pickUserToInvite({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude});

if (maxRecentReportsToShow > 0 && recentReports.length > maxRecentReportsToShow) {
recentReports.splice(maxRecentReportsToShow);
Expand Down Expand Up @@ -2536,6 +2552,7 @@ export {
formatMemberForList,
formatSectionsFromSearchTerm,
getShareLogOptions,
orderOptions,
filterOptions,
createOptionList,
createOptionFromReport,
Expand All @@ -2549,6 +2566,7 @@ export {
getEmptyOptions,
shouldUseBoldText,
getAlternateText,
pickUserToInvite,
hasReportErrors,
};

Expand Down
Loading

0 comments on commit 91c6e0c

Please sign in to comment.