Skip to content

Commit

Permalink
example implementation usage of Suffixtree
Browse files Browse the repository at this point in the history
  • Loading branch information
hannojg committed Sep 5, 2024
1 parent 07981f3 commit 2b586a8
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 7 deletions.
16 changes: 13 additions & 3 deletions src/libs/SuffixUkkonenTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2;
// TODO:
// make makeTree faster
// how to deal with unicode characters such as spanish ones?
// i think we need to support numbers as well

/**
* Converts a string to an array of numbers representing the characters of the string.
* The numbers are offset by the character code of 'a' (97).
* The numbers are offset by the character code of 'a' (97).
* - This is so that the numbers from a-z are in the range 0-25.
* - 26 is for the delimiter character "{",
* - 27 is for the end character "|".
Expand All @@ -33,7 +34,7 @@ function stringToArray(input: string) {
* If you then use this tree for search you should clean your search input as well (so that a search query of "[email protected]" becomes "testusermyemailcom").
*/
function makeTree(a: number[]) {
const N = 1000000;
const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings
const start = performance.now();
const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)) as number[][];

Check failure on line 39 in src/libs/SuffixUkkonenTree.ts

View workflow job for this annotation

GitHub Actions / Run ESLint

Unsafe return of an `any[]` typed value
const l = Array(N).fill(0) as number[];
Expand Down Expand Up @@ -134,6 +135,15 @@ function makeTree(a: number[]) {

/**
* Returns all occurrences of the given (sub)string in the input string.
*
* You can think of the tree that we create as a big string that looks like this:
*
* "banana{pancake{apple|"
* The delimiter character '{' is used to separate the different strings.
* The end character '|' is used to indicate the end of our search string.
*
* This function will return the index(es) of found occurrences within this big string.
* So, when searching for "an", it would return [1, 4, 11].
*/
function findSubstring(searchString: string) {
const occurrences: number[] = [];
Expand Down Expand Up @@ -202,4 +212,4 @@ function testEmojis() {
return performanceProfile(searchString, 'smile');
}

export {makeTree, stringToArray, runTest, testEmojis};
export {makeTree, stringToArray, testEmojis};
117 changes: 113 additions & 4 deletions src/pages/ChatFinderPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type {RootStackParamList} from '@libs/Navigation/types';
import * as OptionsListUtils from '@libs/OptionsListUtils';
import Performance from '@libs/Performance';
import type {OptionData} from '@libs/ReportUtils';
import {makeTree, stringToArray} from '@libs/SuffixUkkonenTree';
import * as Report from '@userActions/Report';
import Timing from '@userActions/Timing';
import CONST from '@src/CONST';
Expand Down Expand Up @@ -51,6 +52,8 @@ const setPerformanceTimersEnd = () => {

const ChatFinderPageFooterInstance = <ChatFinderPageFooter />;

const aToZRegex = /[^a-z]/gi;

function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPageProps) {
const [isScreenTransitionEnd, setIsScreenTransitionEnd] = useState(false);
const {translate} = useLocalize();
Expand Down Expand Up @@ -94,6 +97,112 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa
return {...optionList, headerMessage: header};
}, [areOptionsInitialized, betas, isScreenTransitionEnd, options]);

/**
* Builds a suffix tree and returns a function to search in it.
*
* // TODO:
* - The results we get from tree.findSubstring are the indexes of the occurrence in the original string
* I implemented a manual mapping function here, we probably want to put that inside the tree implementation
* (including the implementation detail of the delimiter character)
*/
const findInSearchTree = useMemo(() => {
// The character that separates the different options in the search string
const delimiterChar = '{';

const searchIndexListRecentReports: Array<OptionData | undefined> = [];
const searchIndexListPersonalDetails: Array<OptionData | undefined> = [];

let start = performance.now();
let searchString = searchOptions.personalDetails
.map((option) => {
// TODO: there are probably more fields we'd like to add to the search string
let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : '');
// Remove all none a-z chars:
searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, '');

if (searchStringForTree.length > 0) {
// We need to push an array that has the same length as the length of the string we insert for this option:
const indexes = Array.from({length: searchStringForTree.length}, () => option);
// Note: we add undefined for the delimiter character
searchIndexListPersonalDetails.push(...indexes, undefined);
} else {
return undefined;
}

return searchStringForTree;
})
.filter(Boolean)
.join(delimiterChar);
searchString += searchOptions.recentReports
.map((option) => {
let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : '');
searchStringForTree += option.reportID ?? '';
searchStringForTree += option.name ?? '';
// Remove all none a-z chars:
searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, '');

if (searchStringForTree.length > 0) {
// We need to push an array that has the same length as the length of the string we insert for this option:
const indexes = Array.from({length: searchStringForTree.length}, () => option);
searchIndexListRecentReports.push(...indexes, undefined);
} else {
return undefined;
}

return searchStringForTree;
})
// TODO: this can probably improved by a reduce
.filter(Boolean)
.join(delimiterChar);
searchString += '|'; // End Character
console.log(searchIndexListPersonalDetails.slice(0, 20));

Check failure on line 158 in src/pages/ChatFinderPage/index.tsx

View workflow job for this annotation

GitHub Actions / Run ESLint

Unexpected console statement
console.log(searchString.substring(0, 20));

Check failure on line 159 in src/pages/ChatFinderPage/index.tsx

View workflow job for this annotation

GitHub Actions / Run ESLint

Unexpected console statement
console.log('building search strings', performance.now() - start);

// TODO: stringToArray is probably also an implementation detail we want to hide from the developer
start = performance.now();
const numbers = stringToArray(searchString);
console.log('stringToArray', performance.now() - start);
start = performance.now();
const tree = makeTree(numbers);
console.log('makeTree', performance.now() - start);
start = performance.now();
tree.build();
console.log('build', performance.now() - start);

function search(searchInput: string) {
start = performance.now();
const result = tree.findSubstring(searchInput);
console.log('FindSubstring index result for searchInput', searchInput, result);
// Map the results to the original options
const mappedResults = {
personalDetails: [] as OptionData[],
recentReports: [] as OptionData[],
};
result.forEach((index) => {
// const textInSearchString = searchString.substring(index, searchString.indexOf(delimiterChar, index));
// console.log('textInSearchString', textInSearchString);

if (index < searchIndexListPersonalDetails.length) {
const option = searchIndexListPersonalDetails[index];
if (option) {
mappedResults.personalDetails.push(option);
}
} else {
const option = searchIndexListRecentReports[index - searchIndexListPersonalDetails.length];
if (option) {
mappedResults.recentReports.push(option);
}
}
});

console.log('search', performance.now() - start);
return mappedResults;
}

return search;
}, [searchOptions.personalDetails, searchOptions.recentReports]);

const filteredOptions = useMemo(() => {
if (debouncedSearchValue.trim() === '') {
return {
Expand All @@ -105,17 +214,17 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa
}

Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS);
const newOptions = OptionsListUtils.filterOptions(searchOptions, debouncedSearchValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true});
const newOptions = findInSearchTree(debouncedSearchValue.toLowerCase().replace(aToZRegex, ''));
Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS);

const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length + Number(!!newOptions.userToInvite) > 0, false, debouncedSearchValue);
const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length > 0, false, debouncedSearchValue);
return {
recentReports: newOptions.recentReports,
personalDetails: newOptions.personalDetails,
userToInvite: newOptions.userToInvite,
userToInvite: undefined, // newOptions.userToInvite,
headerMessage: header,
};
}, [debouncedSearchValue, searchOptions]);
}, [debouncedSearchValue, findInSearchTree]);

const {recentReports, personalDetails: localPersonalDetails, userToInvite, headerMessage} = debouncedSearchValue.trim() !== '' ? filteredOptions : searchOptions;

Expand Down

0 comments on commit 2b586a8

Please sign in to comment.