From 54250820933bb1747ba744bc827c31876e65968f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Thu, 5 Sep 2024 13:27:56 +0200 Subject: [PATCH 001/264] suffix tree impl --- src/libs/SuffixUkkonenTree.ts | 234 ++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 src/libs/SuffixUkkonenTree.ts diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts new file mode 100644 index 000000000000..7599ab2a25f3 --- /dev/null +++ b/src/libs/SuffixUkkonenTree.ts @@ -0,0 +1,234 @@ +import enEmojis from '@assets/emojis/en'; +import {DATA} from './test'; + +const CHAR_CODE_A = 'a'.charCodeAt(0); +const ALPHABET_SIZE = 28; +const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; + +function stringToArray(input: string) { + const res: number[] = []; + for (let i = 0; i < input.length; i++) { + const charCode = input.charCodeAt(i) - CHAR_CODE_A; + if (charCode >= 0 && charCode < ALPHABET_SIZE) { + res.push(charCode); + } + } + return res; +} + +function makeTree(a: number[]) { + const N = 1000000; + const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)) as number[][]; + const l = Array(N).fill(0) as number[]; + const r = Array(N).fill(0) as number[]; + const p = Array(N).fill(0) as number[]; + const s = Array(N).fill(0) as number[]; + + let tv = 0; + let tp = 0; + let ts = 2; + let la = 0; + + function initializeTree() { + r.fill(a.length - 1); + s[0] = 1; + l[0] = -1; + r[0] = -1; + l[1] = -1; + r[1] = -1; + t[1].fill(0); + } + + function processCharacter(c: number) { + while (true) { + if (r[tv] < tp) { + if (t[tv][c] === -1) { + createNewLeaf(c); + continue; + } + tv = t[tv][c]; + tp = l[tv]; + } + if (tp === -1 || c === a[tp]) { + tp++; + } else { + splitEdge(c); + continue; + } + break; + } + if (c === DELIMITER_CHAR_CODE) { + resetTreeTraversal(); + } + } + + function createNewLeaf(c: number) { + t[tv][c] = ts; + l[ts] = la; + p[ts++] = tv; + tv = s[tv]; + tp = r[tv] + 1; + } + + function splitEdge(c: number) { + l[ts] = l[tv]; + r[ts] = tp - 1; + p[ts] = p[tv]; + t[ts][a[tp]] = tv; + t[ts][c] = ts + 1; + l[ts + 1] = la; + p[ts + 1] = ts; + l[tv] = tp; + p[tv] = ts; + t[p[ts]][a[l[ts]]] = ts; + ts += 2; + handleDescent(ts); + } + + function handleDescent(ts: number) { + tv = s[p[ts - 2]]; + tp = l[ts - 2]; + while (tp <= r[ts - 2]) { + tv = t[tv][a[tp]]; + tp += r[tv] - l[tv] + 1; + } + if (tp === r[ts - 2] + 1) { + s[ts - 2] = tv; + } else { + s[ts - 2] = ts; + } + tp = r[tv] - (tp - r[ts - 2]) + 2; + } + + function resetTreeTraversal() { + tv = 0; + tp = 0; + } + + function build() { + initializeTree(); + for (la = 0; la < a.length; ++la) { + const c = a[la]; + processCharacter(c); + } + } + + function findSubstring(sString: string) { + const s = stringToArray(sString); + const occurrences: number[] = []; + const st: Array<[number, number]> = [[0, 0]]; + + while (st.length > 0) { + const [node, depth] = st.pop()!; + + let isLeaf = true; + const leftRange = l[node]; + const rightRange = r[node]; + const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; + + let matches = true; + for (let i = 0; i < rangeLen && depth + i < s.length; i++) { + if (s[depth + i] !== a[leftRange + i]) { + matches = false; + break; + } + } + + if (!matches) { + continue; + } + + for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { + if (t[node][i] !== -1) { + isLeaf = false; + st.push([t[node][i], depth + rangeLen]); + } + } + + if (isLeaf && depth + rangeLen >= s.length) { + occurrences.push(a.length - (depth + rangeLen)); + } + } + + return occurrences; + } + + function findSubstringRecursive(s: string) { + const occurrences: number[] = []; + + function dfs(node: number, depth: number) { + const leftRange = l[node]; + const rightRange = r[node]; + const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; + + for (let i = 0; i < rangeLen && depth + i < s.length; i++) { + if (s.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { + return; + } + } + + let isLeaf = true; + for (let i = 0; i < ALPHABET_SIZE; ++i) { + if (t[node][i] !== -1) { + isLeaf = false; + dfs(t[node][i], depth + rangeLen); + } + } + + if (isLeaf && depth >= s.length) { + occurrences.push(a.length - (depth + rangeLen)); + } + } + + dfs(0, 0); + return occurrences; + } + + return { + build, + findSubstring, + findSubstringRecursive, + }; +} + +function performanceProfile(input: string, search = 'sasha') { + const {build, findSubstring, findSubstringRecursive} = makeTree(stringToArray(input)); + + const buildStart = performance.now(); + build(); + const buildEnd = performance.now(); + console.log('Building time:', buildEnd - buildStart, 'ms'); + + const searchStart = performance.now(); + const results = findSubstring(search); + const searchEnd = performance.now(); + console.log('Search time:', searchEnd - searchStart, 'ms'); + console.log(results); + + const recursiveStart = performance.now(); + const resultsRecursive = findSubstringRecursive(search); + const recursiveEnd = performance.now(); + console.log('Recursive search time:', recursiveEnd - recursiveStart, 'ms'); + console.log(resultsRecursive); + + return { + buildTime: buildEnd - buildStart, + searchTime: searchEnd - searchStart, + recursiveSearchTime: recursiveEnd - recursiveStart, + }; +} + +function testEmojis() { + let searchString = ''; + Object.values(enEmojis).forEach(({keywords}) => { + searchString += `${keywords.join('')}{`; + }); + return performanceProfile(searchString, 'smile'); +} + +console.log('Read string of length', DATA.length); +function runTest() { + return performanceProfile(DATA); +} + +export {makeTree, stringToArray, runTest, testEmojis}; From 54a7b6017a602e05a983284e912c6228196b9644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Thu, 5 Sep 2024 13:37:46 +0200 Subject: [PATCH 002/264] add some helpful comments --- src/libs/SuffixUkkonenTree.ts | 91 ++++++++++++----------------------- 1 file changed, 31 insertions(+), 60 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 7599ab2a25f3..3bf8d2ed66a9 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,10 +1,20 @@ import enEmojis from '@assets/emojis/en'; -import {DATA} from './test'; const CHAR_CODE_A = 'a'.charCodeAt(0); const ALPHABET_SIZE = 28; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; +// TODO: +// make makeTree faster +// how to deal with unicode characters such as spanish ones? + +/** + * Converts a string to an array of numbers representing the characters of the string. + * The numbers are offset by the character code of 'a' (97). + * - This is so that the numbers from a-z are in the range 0-25. + * - 26 is for the delimiter character "{", + * - 27 is for the end character "|". + */ function stringToArray(input: string) { const res: number[] = []; for (let i = 0; i < input.length; i++) { @@ -16,13 +26,22 @@ function stringToArray(input: string) { return res; } +/** + * Makes a tree from an input string, which has been converted by {@link stringToArray}. + * **Important:** As we only support an alphabet of 26 characters, the input string should only contain characters from a-z. + * Thus, all input data must be cleaned before being passed to this function. + * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). + */ function makeTree(a: number[]) { const N = 1000000; + const start = performance.now(); const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)) as number[][]; const l = Array(N).fill(0) as number[]; const r = Array(N).fill(0) as number[]; const p = Array(N).fill(0) as number[]; const s = Array(N).fill(0) as number[]; + const end = performance.now(); + console.log('Allocating memory took:', end - start, 'ms'); let tv = 0; let tp = 0; @@ -113,47 +132,10 @@ function makeTree(a: number[]) { } } - function findSubstring(sString: string) { - const s = stringToArray(sString); - const occurrences: number[] = []; - const st: Array<[number, number]> = [[0, 0]]; - - while (st.length > 0) { - const [node, depth] = st.pop()!; - - let isLeaf = true; - const leftRange = l[node]; - const rightRange = r[node]; - const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - - let matches = true; - for (let i = 0; i < rangeLen && depth + i < s.length; i++) { - if (s[depth + i] !== a[leftRange + i]) { - matches = false; - break; - } - } - - if (!matches) { - continue; - } - - for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { - if (t[node][i] !== -1) { - isLeaf = false; - st.push([t[node][i], depth + rangeLen]); - } - } - - if (isLeaf && depth + rangeLen >= s.length) { - occurrences.push(a.length - (depth + rangeLen)); - } - } - - return occurrences; - } - - function findSubstringRecursive(s: string) { + /** + * Returns all occurrences of the given (sub)string in the input string. + */ + function findSubstring(searchString: string) { const occurrences: number[] = []; function dfs(node: number, depth: number) { @@ -161,8 +143,8 @@ function makeTree(a: number[]) { const rightRange = r[node]; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - for (let i = 0; i < rangeLen && depth + i < s.length; i++) { - if (s.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { + for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { + if (searchString.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { return; } } @@ -175,7 +157,7 @@ function makeTree(a: number[]) { } } - if (isLeaf && depth >= s.length) { + if (isLeaf && depth >= searchString.length) { occurrences.push(a.length - (depth + rangeLen)); } } @@ -187,12 +169,12 @@ function makeTree(a: number[]) { return { build, findSubstring, - findSubstringRecursive, }; } function performanceProfile(input: string, search = 'sasha') { - const {build, findSubstring, findSubstringRecursive} = makeTree(stringToArray(input)); + // TODO: For emojis we could precalculate the stringToArray or even the makeTree function during build time using a babel plugin + const {build, findSubstring} = makeTree(stringToArray(input)); const buildStart = performance.now(); build(); @@ -205,19 +187,13 @@ function performanceProfile(input: string, search = 'sasha') { console.log('Search time:', searchEnd - searchStart, 'ms'); console.log(results); - const recursiveStart = performance.now(); - const resultsRecursive = findSubstringRecursive(search); - const recursiveEnd = performance.now(); - console.log('Recursive search time:', recursiveEnd - recursiveStart, 'ms'); - console.log(resultsRecursive); - return { buildTime: buildEnd - buildStart, - searchTime: searchEnd - searchStart, - recursiveSearchTime: recursiveEnd - recursiveStart, + recursiveSearchTime: searchEnd - searchStart, }; } +// Demo function testing the performance for emojis function testEmojis() { let searchString = ''; Object.values(enEmojis).forEach(({keywords}) => { @@ -226,9 +202,4 @@ function testEmojis() { return performanceProfile(searchString, 'smile'); } -console.log('Read string of length', DATA.length); -function runTest() { - return performanceProfile(DATA); -} - export {makeTree, stringToArray, runTest, testEmojis}; From 8622670fd891e59e266602ae3605868d3d5da997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Thu, 5 Sep 2024 16:09:28 +0200 Subject: [PATCH 003/264] example implementation usage of Suffixtree --- src/libs/SuffixUkkonenTree.ts | 16 +++- src/pages/ChatFinderPage/index.tsx | 117 ++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 7 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 3bf8d2ed66a9..217588fae5fa 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -7,10 +7,11 @@ const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; // TODO: // make makeTree faster // how to deal with unicode characters such as spanish ones? +// i think we need to support numbers as well /** * Converts a string to an array of numbers representing the characters of the string. - * The numbers are offset by the character code of 'a' (97). + * The numbers are offset by the character code of 'a' (97). * - This is so that the numbers from a-z are in the range 0-25. * - 26 is for the delimiter character "{", * - 27 is for the end character "|". @@ -33,7 +34,7 @@ function stringToArray(input: string) { * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). */ function makeTree(a: number[]) { - const N = 1000000; + const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)) as number[][]; const l = Array(N).fill(0) as number[]; @@ -134,6 +135,15 @@ function makeTree(a: number[]) { /** * Returns all occurrences of the given (sub)string in the input string. + * + * You can think of the tree that we create as a big string that looks like this: + * + * "banana{pancake{apple|" + * The delimiter character '{' is used to separate the different strings. + * The end character '|' is used to indicate the end of our search string. + * + * This function will return the index(es) of found occurrences within this big string. + * So, when searching for "an", it would return [1, 4, 11]. */ function findSubstring(searchString: string) { const occurrences: number[] = []; @@ -202,4 +212,4 @@ function testEmojis() { return performanceProfile(searchString, 'smile'); } -export {makeTree, stringToArray, runTest, testEmojis}; +export {makeTree, stringToArray, testEmojis}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index aabf881a8bed..cbdf5ec739c1 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -18,6 +18,7 @@ import type {RootStackParamList} from '@libs/Navigation/types'; import * as OptionsListUtils from '@libs/OptionsListUtils'; import Performance from '@libs/Performance'; import type {OptionData} from '@libs/ReportUtils'; +import {makeTree, stringToArray} from '@libs/SuffixUkkonenTree'; import * as Report from '@userActions/Report'; import Timing from '@userActions/Timing'; import CONST from '@src/CONST'; @@ -51,6 +52,8 @@ const setPerformanceTimersEnd = () => { const ChatFinderPageFooterInstance = ; +const aToZRegex = /[^a-z]/gi; + function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPageProps) { const [isScreenTransitionEnd, setIsScreenTransitionEnd] = useState(false); const {translate} = useLocalize(); @@ -94,6 +97,112 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa return {...optionList, headerMessage: header}; }, [areOptionsInitialized, betas, isScreenTransitionEnd, options]); + /** + * Builds a suffix tree and returns a function to search in it. + * + * // TODO: + * - The results we get from tree.findSubstring are the indexes of the occurrence in the original string + * I implemented a manual mapping function here, we probably want to put that inside the tree implementation + * (including the implementation detail of the delimiter character) + */ + const findInSearchTree = useMemo(() => { + // The character that separates the different options in the search string + const delimiterChar = '{'; + + const searchIndexListRecentReports: Array = []; + const searchIndexListPersonalDetails: Array = []; + + let start = performance.now(); + let searchString = searchOptions.personalDetails + .map((option) => { + // TODO: there are probably more fields we'd like to add to the search string + let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + // Remove all none a-z chars: + searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); + + if (searchStringForTree.length > 0) { + // We need to push an array that has the same length as the length of the string we insert for this option: + const indexes = Array.from({length: searchStringForTree.length}, () => option); + // Note: we add undefined for the delimiter character + searchIndexListPersonalDetails.push(...indexes, undefined); + } else { + return undefined; + } + + return searchStringForTree; + }) + .filter(Boolean) + .join(delimiterChar); + searchString += searchOptions.recentReports + .map((option) => { + let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + searchStringForTree += option.reportID ?? ''; + searchStringForTree += option.name ?? ''; + // Remove all none a-z chars: + searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); + + if (searchStringForTree.length > 0) { + // We need to push an array that has the same length as the length of the string we insert for this option: + const indexes = Array.from({length: searchStringForTree.length}, () => option); + searchIndexListRecentReports.push(...indexes, undefined); + } else { + return undefined; + } + + return searchStringForTree; + }) + // TODO: this can probably improved by a reduce + .filter(Boolean) + .join(delimiterChar); + searchString += '|'; // End Character + console.log(searchIndexListPersonalDetails.slice(0, 20)); + console.log(searchString.substring(0, 20)); + console.log('building search strings', performance.now() - start); + + // TODO: stringToArray is probably also an implementation detail we want to hide from the developer + start = performance.now(); + const numbers = stringToArray(searchString); + console.log('stringToArray', performance.now() - start); + start = performance.now(); + const tree = makeTree(numbers); + console.log('makeTree', performance.now() - start); + start = performance.now(); + tree.build(); + console.log('build', performance.now() - start); + + function search(searchInput: string) { + start = performance.now(); + const result = tree.findSubstring(searchInput); + console.log('FindSubstring index result for searchInput', searchInput, result); + // Map the results to the original options + const mappedResults = { + personalDetails: [] as OptionData[], + recentReports: [] as OptionData[], + }; + result.forEach((index) => { + // const textInSearchString = searchString.substring(index, searchString.indexOf(delimiterChar, index)); + // console.log('textInSearchString', textInSearchString); + + if (index < searchIndexListPersonalDetails.length) { + const option = searchIndexListPersonalDetails[index]; + if (option) { + mappedResults.personalDetails.push(option); + } + } else { + const option = searchIndexListRecentReports[index - searchIndexListPersonalDetails.length]; + if (option) { + mappedResults.recentReports.push(option); + } + } + }); + + console.log('search', performance.now() - start); + return mappedResults; + } + + return search; + }, [searchOptions.personalDetails, searchOptions.recentReports]); + const filteredOptions = useMemo(() => { if (debouncedSearchValue.trim() === '') { return { @@ -105,17 +214,17 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa } Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const newOptions = OptionsListUtils.filterOptions(searchOptions, debouncedSearchValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true}); + const newOptions = findInSearchTree(debouncedSearchValue.toLowerCase().replace(aToZRegex, '')); Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length + Number(!!newOptions.userToInvite) > 0, false, debouncedSearchValue); + const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length > 0, false, debouncedSearchValue); return { recentReports: newOptions.recentReports, personalDetails: newOptions.personalDetails, - userToInvite: newOptions.userToInvite, + userToInvite: undefined, // newOptions.userToInvite, headerMessage: header, }; - }, [debouncedSearchValue, searchOptions]); + }, [debouncedSearchValue, findInSearchTree]); const {recentReports, personalDetails: localPersonalDetails, userToInvite, headerMessage} = debouncedSearchValue.trim() !== '' ? filteredOptions : searchOptions; From 01162fee73c8147556d65e4b5990f0a25d855d7e Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Wed, 11 Sep 2024 17:39:36 +0200 Subject: [PATCH 004/264] fix: resolved one TODO --- src/libs/SuffixUkkonenTree.ts | 14 ++++++++------ src/pages/ChatFinderPage/index.tsx | 8 ++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 217588fae5fa..52a7ebb2b7d9 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -28,15 +28,16 @@ function stringToArray(input: string) { } /** - * Makes a tree from an input string, which has been converted by {@link stringToArray}. + * Makes a tree from an input string * **Important:** As we only support an alphabet of 26 characters, the input string should only contain characters from a-z. * Thus, all input data must be cleaned before being passed to this function. * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). */ -function makeTree(a: number[]) { +function makeTree(searchString: string) { + const a = stringToArray(searchString); const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); - const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)) as number[][]; + const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1) as number[]); const l = Array(N).fill(0) as number[]; const r = Array(N).fill(0) as number[]; const p = Array(N).fill(0) as number[]; @@ -183,8 +184,9 @@ function makeTree(a: number[]) { } function performanceProfile(input: string, search = 'sasha') { - // TODO: For emojis we could precalculate the stringToArray or even the makeTree function during build time using a babel plugin - const {build, findSubstring} = makeTree(stringToArray(input)); + // TODO: For emojis we could precalculate the makeTree function during build time using a babel plugin + // maybe babel plugin that just precalculates the result of function execution (so that it can be generic purpose plugin) + const {build, findSubstring} = makeTree(input); const buildStart = performance.now(); build(); @@ -212,4 +214,4 @@ function testEmojis() { return performanceProfile(searchString, 'smile'); } -export {makeTree, stringToArray, testEmojis}; +export {makeTree, testEmojis}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index cbdf5ec739c1..f7860d4cc1e3 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -18,7 +18,7 @@ import type {RootStackParamList} from '@libs/Navigation/types'; import * as OptionsListUtils from '@libs/OptionsListUtils'; import Performance from '@libs/Performance'; import type {OptionData} from '@libs/ReportUtils'; -import {makeTree, stringToArray} from '@libs/SuffixUkkonenTree'; +import {makeTree} from '@libs/SuffixUkkonenTree'; import * as Report from '@userActions/Report'; import Timing from '@userActions/Timing'; import CONST from '@src/CONST'; @@ -159,12 +159,8 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa console.log(searchString.substring(0, 20)); console.log('building search strings', performance.now() - start); - // TODO: stringToArray is probably also an implementation detail we want to hide from the developer start = performance.now(); - const numbers = stringToArray(searchString); - console.log('stringToArray', performance.now() - start); - start = performance.now(); - const tree = makeTree(numbers); + const tree = makeTree(searchString); console.log('makeTree', performance.now() - start); start = performance.now(); tree.build(); From 09e8aa7362424a49e1e8889bb97bc15ba6648dc5 Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Thu, 12 Sep 2024 12:42:19 +0200 Subject: [PATCH 005/264] fix: reduce code duplication --- src/libs/SuffixUkkonenTree.ts | 36 +++++++++++++++++-- src/pages/ChatFinderPage/index.tsx | 56 +++++++----------------------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 52a7ebb2b7d9..2a0d0d309a48 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -27,14 +27,44 @@ function stringToArray(input: string) { return res; } +const aToZRegex = /[^a-z]/gi; +// The character that separates the different options in the search string +const delimiterChar = '{'; + +function prepareData({data, transform}: {data: T[]; transform: (data: T) => string}): [string, Array] { + const searchIndexList: Array = []; + const str = data + .map((option) => { + let searchStringForTree = transform(option); + // Remove all none a-z chars: + searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); + + if (searchStringForTree.length > 0) { + // We need to push an array that has the same length as the length of the string we insert for this option: + const indexes = Array.from({length: searchStringForTree.length}, () => option); + // Note: we add undefined for the delimiter character + searchIndexList.push(...indexes, undefined); + } else { + return undefined; + } + + return searchStringForTree; + }) + // TODO: this can probably improved by a reduce + .filter(Boolean) + .join(delimiterChar); + + return [str, searchIndexList]; +} + /** * Makes a tree from an input string * **Important:** As we only support an alphabet of 26 characters, the input string should only contain characters from a-z. * Thus, all input data must be cleaned before being passed to this function. * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). */ -function makeTree(searchString: string) { - const a = stringToArray(searchString); +function makeTree(stringToSearch: string) { + const a = stringToArray(stringToSearch); const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1) as number[]); @@ -214,4 +244,4 @@ function testEmojis() { return performanceProfile(searchString, 'smile'); } -export {makeTree, testEmojis}; +export {makeTree, prepareData, testEmojis}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index f7860d4cc1e3..97feafa892b7 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -18,7 +18,7 @@ import type {RootStackParamList} from '@libs/Navigation/types'; import * as OptionsListUtils from '@libs/OptionsListUtils'; import Performance from '@libs/Performance'; import type {OptionData} from '@libs/ReportUtils'; -import {makeTree} from '@libs/SuffixUkkonenTree'; +import {makeTree, prepareData} from '@libs/SuffixUkkonenTree'; import * as Report from '@userActions/Report'; import Timing from '@userActions/Timing'; import CONST from '@src/CONST'; @@ -106,55 +106,25 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa * (including the implementation detail of the delimiter character) */ const findInSearchTree = useMemo(() => { - // The character that separates the different options in the search string - const delimiterChar = '{'; - - const searchIndexListRecentReports: Array = []; - const searchIndexListPersonalDetails: Array = []; - let start = performance.now(); - let searchString = searchOptions.personalDetails - .map((option) => { + const [personalDetailsSearchString, searchIndexListPersonalDetails] = prepareData({ + data: searchOptions.personalDetails, + transform: (option) => { // TODO: there are probably more fields we'd like to add to the search string - let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); - // Remove all none a-z chars: - searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); - - if (searchStringForTree.length > 0) { - // We need to push an array that has the same length as the length of the string we insert for this option: - const indexes = Array.from({length: searchStringForTree.length}, () => option); - // Note: we add undefined for the delimiter character - searchIndexListPersonalDetails.push(...indexes, undefined); - } else { - return undefined; - } - - return searchStringForTree; - }) - .filter(Boolean) - .join(delimiterChar); - searchString += searchOptions.recentReports - .map((option) => { + return (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + }, + }); + const [recentReportsSearchString, searchIndexListRecentReports] = prepareData({ + data: searchOptions.recentReports, + transform: (option) => { let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); searchStringForTree += option.reportID ?? ''; searchStringForTree += option.name ?? ''; - // Remove all none a-z chars: - searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); - - if (searchStringForTree.length > 0) { - // We need to push an array that has the same length as the length of the string we insert for this option: - const indexes = Array.from({length: searchStringForTree.length}, () => option); - searchIndexListRecentReports.push(...indexes, undefined); - } else { - return undefined; - } return searchStringForTree; - }) - // TODO: this can probably improved by a reduce - .filter(Boolean) - .join(delimiterChar); - searchString += '|'; // End Character + }, + }); + const searchString = `${personalDetailsSearchString}${recentReportsSearchString}|`; // End Character console.log(searchIndexListPersonalDetails.slice(0, 20)); console.log(searchString.substring(0, 20)); console.log('building search strings', performance.now() - start); From fa81e13878d8d35b44b374e7a454f02cfc3cdc37 Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Thu, 12 Sep 2024 13:01:13 +0200 Subject: [PATCH 006/264] refactor: O(2) -> O(1) --- src/libs/SuffixUkkonenTree.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 2a0d0d309a48..fc11e194b8e0 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -50,9 +50,18 @@ function prepareData({data, transform}: {data: T[]; transform: (data: T) => s return searchStringForTree; }) - // TODO: this can probably improved by a reduce - .filter(Boolean) - .join(delimiterChar); + // slightly faster alternative to `.filter(Boolean).join(delimiterChar)` + .reduce((acc: string, curr) => { + if (!curr) { + return acc; + } + + if (acc === '') { + return curr; + } + + return `${acc}${delimiterChar}${curr}`; + }, ''); return [str, searchIndexList]; } From e33142fd2e137cc2bcdb1b82a4cea17a74c08387 Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Thu, 12 Sep 2024 15:38:59 +0200 Subject: [PATCH 007/264] refactor: minus one TODO --- src/libs/SuffixUkkonenTree.ts | 72 +++++++++++++++++++++++++---- src/pages/ChatFinderPage/index.tsx | 74 +++++++++--------------------- 2 files changed, 85 insertions(+), 61 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index fc11e194b8e0..6d56c59a4ab4 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -31,7 +31,12 @@ const aToZRegex = /[^a-z]/gi; // The character that separates the different options in the search string const delimiterChar = '{'; -function prepareData({data, transform}: {data: T[]; transform: (data: T) => string}): [string, Array] { +type PrepareDataParams = { + data: T[]; + transform: (data: T) => string; +}; + +function prepareData({data, transform}: PrepareDataParams): [string, Array] { const searchIndexList: Array = []; const str = data .map((option) => { @@ -72,7 +77,19 @@ function prepareData({data, transform}: {data: T[]; transform: (data: T) => s * Thus, all input data must be cleaned before being passed to this function. * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). */ -function makeTree(stringToSearch: string) { +function makeTree(compose: Array>) { + const start1 = performance.now(); + const strings = []; + const indexes: Array> = []; + + for (const {data, transform} of compose) { + const [str, searchIndexList] = prepareData({data, transform}); + strings.push(str); + indexes.push(searchIndexList); + } + const stringToSearch = `${strings.join('')}|`; // End Character + console.log('building search strings', performance.now() - start1); + const a = stringToArray(stringToSearch); const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); @@ -216,16 +233,48 @@ function makeTree(stringToSearch: string) { return occurrences; } + function findInSearchTree(searchInput: string) { + const now = performance.now(); + const result = findSubstring(searchInput); + console.log('FindSubstring index result for searchInput', searchInput, result); + // Map the results to the original options + + const mappedResults: T[][] = Array.from({length: compose.length}, () => []); + console.log({result}); + result.forEach((index) => { + // const textInSearchString = searchString.substring(index, searchString.indexOf(delimiterChar, index)); + // console.log('textInSearchString', textInSearchString); + + // TODO: check with Hanno whether we restore the data correctly + let offset = 0; + for (let i = 0; i < indexes.length; i++) { + const relativeIndex = index - offset; + if (relativeIndex < indexes[i].length && relativeIndex >= 0) { + const option = indexes[i][relativeIndex]; + if (option) { + mappedResults[i].push(option); + } + } else { + offset += indexes[i].length; + } + } + }); + + console.log('search', performance.now() - now); + return mappedResults; + } + return { build, findSubstring, + findInSearchTree, }; } -function performanceProfile(input: string, search = 'sasha') { +function performanceProfile(input: PrepareDataParams, search = 'sasha') { // TODO: For emojis we could precalculate the makeTree function during build time using a babel plugin // maybe babel plugin that just precalculates the result of function execution (so that it can be generic purpose plugin) - const {build, findSubstring} = makeTree(input); + const {build, findSubstring} = makeTree([input]); const buildStart = performance.now(); build(); @@ -246,11 +295,16 @@ function performanceProfile(input: string, search = 'sasha') { // Demo function testing the performance for emojis function testEmojis() { - let searchString = ''; - Object.values(enEmojis).forEach(({keywords}) => { - searchString += `${keywords.join('')}{`; - }); - return performanceProfile(searchString, 'smile'); + const data = Object.values(enEmojis); + return performanceProfile( + { + data, + transform: ({keywords}) => { + return `${keywords.join('')}{`; + }, + }, + 'smile', + ); } export {makeTree, prepareData, testEmojis}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 97feafa892b7..2afe1fd96e3d 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -99,38 +99,28 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa /** * Builds a suffix tree and returns a function to search in it. - * - * // TODO: - * - The results we get from tree.findSubstring are the indexes of the occurrence in the original string - * I implemented a manual mapping function here, we probably want to put that inside the tree implementation - * (including the implementation detail of the delimiter character) */ const findInSearchTree = useMemo(() => { let start = performance.now(); - const [personalDetailsSearchString, searchIndexListPersonalDetails] = prepareData({ - data: searchOptions.personalDetails, - transform: (option) => { - // TODO: there are probably more fields we'd like to add to the search string - return (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + const tree = makeTree([ + { + data: searchOptions.personalDetails, + transform: (option) => { + // TODO: there are probably more fields we'd like to add to the search string + return (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + }, }, - }); - const [recentReportsSearchString, searchIndexListRecentReports] = prepareData({ - data: searchOptions.recentReports, - transform: (option) => { - let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); - searchStringForTree += option.reportID ?? ''; - searchStringForTree += option.name ?? ''; - - return searchStringForTree; + { + data: searchOptions.recentReports, + transform: (option) => { + let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + searchStringForTree += option.reportID ?? ''; + searchStringForTree += option.name ?? ''; + + return searchStringForTree; + }, }, - }); - const searchString = `${personalDetailsSearchString}${recentReportsSearchString}|`; // End Character - console.log(searchIndexListPersonalDetails.slice(0, 20)); - console.log(searchString.substring(0, 20)); - console.log('building search strings', performance.now() - start); - - start = performance.now(); - const tree = makeTree(searchString); + ]); console.log('makeTree', performance.now() - start); start = performance.now(); tree.build(); @@ -138,32 +128,12 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa function search(searchInput: string) { start = performance.now(); - const result = tree.findSubstring(searchInput); - console.log('FindSubstring index result for searchInput', searchInput, result); - // Map the results to the original options - const mappedResults = { - personalDetails: [] as OptionData[], - recentReports: [] as OptionData[], - }; - result.forEach((index) => { - // const textInSearchString = searchString.substring(index, searchString.indexOf(delimiterChar, index)); - // console.log('textInSearchString', textInSearchString); - - if (index < searchIndexListPersonalDetails.length) { - const option = searchIndexListPersonalDetails[index]; - if (option) { - mappedResults.personalDetails.push(option); - } - } else { - const option = searchIndexListRecentReports[index - searchIndexListPersonalDetails.length]; - if (option) { - mappedResults.recentReports.push(option); - } - } - }); + const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); - console.log('search', performance.now() - start); - return mappedResults; + return { + personalDetails, + recentReports, + }; } return search; From 30424a9e0fa91e8dc7a8e4c3ffb30363a5d5f4d9 Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Fri, 13 Sep 2024 13:56:43 +0200 Subject: [PATCH 008/264] fix: bring back userToInvite --- src/libs/OptionsListUtils.ts | 37 ++++++++++++++++++++++-------- src/pages/ChatFinderPage/index.tsx | 12 ++++++++-- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/libs/OptionsListUtils.ts b/src/libs/OptionsListUtils.ts index f191c1d06532..9b850c382400 100644 --- a/src/libs/OptionsListUtils.ts +++ b/src/libs/OptionsListUtils.ts @@ -2382,6 +2382,31 @@ function getPersonalDetailSearchTerms(item: Partial) { function getCurrentUserSearchTerms(item: ReportUtils.OptionData) { return [item.text ?? '', item.login ?? '', item.login?.replace(CONST.EMAIL_SEARCH_REGEX, '') ?? '']; } + +type PickUserToInviteParams = { + canInviteUser: boolean; + recentReports: ReportUtils.OptionData[]; + personalDetails: ReportUtils.OptionData[]; + searchValue: string; + config?: FilterOptionsConfig; + optionsToExclude: Option[]; +}; + +const pickUserToInvite = ({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude}: PickUserToInviteParams) => { + let userToInvite = null; + if (canInviteUser) { + if (recentReports.length === 0 && personalDetails.length === 0) { + userToInvite = getUserToInviteOption({ + searchValue, + selectedOptions: config?.selectedOptions, + optionsToExclude, + }); + } + } + + return userToInvite; +}; + /** * Filters options based on the search input value */ @@ -2457,16 +2482,7 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt recentReports = orderOptions(recentReports, searchValue); } - let userToInvite = null; - if (canInviteUser) { - if (recentReports.length === 0 && personalDetails.length === 0) { - userToInvite = getUserToInviteOption({ - searchValue, - selectedOptions: config?.selectedOptions, - optionsToExclude, - }); - } - } + const userToInvite = pickUserToInvite({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude}); if (maxRecentReportsToShow > 0 && recentReports.length > maxRecentReportsToShow) { recentReports.splice(maxRecentReportsToShow); @@ -2549,6 +2565,7 @@ export { getEmptyOptions, shouldUseBoldText, getAlternateText, + pickUserToInvite, }; export type {MemberForList, CategorySection, CategoryTreeSection, Options, OptionList, SearchOption, PayeePersonalDetails, Category, Tax, TaxRatesOption, Option, OptionTree}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 2afe1fd96e3d..facde356ff12 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -150,17 +150,25 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa } Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS); + const newOptions1 = OptionsListUtils.filterOptions(searchOptions, debouncedSearchValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true}); const newOptions = findInSearchTree(debouncedSearchValue.toLowerCase().replace(aToZRegex, '')); + const userToInvite = OptionsListUtils.pickUserToInvite({ + canInviteUser: true, + recentReports: newOptions.recentReports, + personalDetails: newOptions.personalDetails, + searchValue: debouncedSearchValue, + optionsToExclude: [{login: CONST.EMAIL.NOTIFICATIONS}], + }); Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS); const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length > 0, false, debouncedSearchValue); return { recentReports: newOptions.recentReports, personalDetails: newOptions.personalDetails, - userToInvite: undefined, // newOptions.userToInvite, + userToInvite, headerMessage: header, }; - }, [debouncedSearchValue, findInSearchTree]); + }, [debouncedSearchValue, searchOptions, findInSearchTree]); const {recentReports, personalDetails: localPersonalDetails, userToInvite, headerMessage} = debouncedSearchValue.trim() !== '' ? filteredOptions : searchOptions; From 1d11ed2d534a13b3f17dbeb5ed836489ffef6c25 Mon Sep 17 00:00:00 2001 From: kirillzyusko Date: Fri, 13 Sep 2024 15:59:43 +0200 Subject: [PATCH 009/264] fix: make Marc discoverable again (when we search in second array, then we will always get - so we add +1 bias) --- src/libs/SuffixUkkonenTree.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 6d56c59a4ab4..7a7bb1bd4a3c 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -248,7 +248,7 @@ function makeTree(compose: Array>) { // TODO: check with Hanno whether we restore the data correctly let offset = 0; for (let i = 0; i < indexes.length; i++) { - const relativeIndex = index - offset; + const relativeIndex = index - offset + 1; if (relativeIndex < indexes[i].length && relativeIndex >= 0) { const option = indexes[i][relativeIndex]; if (option) { From 061efbf7b5e1798a79b3eee056a3fc5cb7e28672 Mon Sep 17 00:00:00 2001 From: SIMalik Date: Sat, 14 Sep 2024 14:20:29 +0500 Subject: [PATCH 010/264] Issue resolved: LHN - RBR appears on the wrong workspace chat for an error occurring on another workspace #47874 --- src/libs/OptionsListUtils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libs/OptionsListUtils.ts b/src/libs/OptionsListUtils.ts index f191c1d06532..451727858654 100644 --- a/src/libs/OptionsListUtils.ts +++ b/src/libs/OptionsListUtils.ts @@ -471,7 +471,7 @@ function uniqFast(items: string[]): string[] { function getAllReportErrors(report: OnyxEntry, reportActions: OnyxEntry): OnyxCommon.Errors { const reportErrors = report?.errors ?? {}; const reportErrorFields = report?.errorFields ?? {}; - const reportActionsArray = Object.values(reportActions ?? {}); + const reportActionsArray = Object.values(reportActions ?? {}).filter(action => !ReportActionUtils.isDeletedAction(action)); const reportActionErrors: OnyxCommon.ErrorFields = {}; for (const action of reportActionsArray) { From b2076fdd20c513414b81fc1509af056efc79d044 Mon Sep 17 00:00:00 2001 From: Mateusz Rajski Date: Mon, 23 Sep 2024 15:46:17 +0200 Subject: [PATCH 011/264] Allow classic experience users to use NewDot travel page --- src/ONYXKEYS.ts | 4 ++++ src/libs/actions/Session/index.ts | 3 ++- src/pages/Travel/ManageTrips.tsx | 19 +++++++++++++++---- src/pages/Travel/MyTripsPage.tsx | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/ONYXKEYS.ts b/src/ONYXKEYS.ts index 68a9ca2f8502..67adae8e5ebc 100755 --- a/src/ONYXKEYS.ts +++ b/src/ONYXKEYS.ts @@ -433,6 +433,9 @@ const ONYXKEYS = { /** Stores recently used currencies */ RECENTLY_USED_CURRENCIES: 'nvp_recentlyUsedCurrencies', + /** Decides if we should return to OldDot after booking */ + SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING: 'shouldReturnToClassicExperienceAfterBooking', + /** Collection Keys */ COLLECTION: { DOWNLOAD: 'download_', @@ -996,6 +999,7 @@ type OnyxValuesMapping = { [ONYXKEYS.IMPORTED_SPREADSHEET]: OnyxTypes.ImportedSpreadsheet; [ONYXKEYS.LAST_ROUTE]: string; [ONYXKEYS.NVP_SHOULD_HIDE_SAVED_SEARCH_RENAME_TOOLTIP]: boolean; + [ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING]: boolean | undefined; }; type OnyxValues = OnyxValuesMapping & OnyxCollectionValuesMapping & OnyxFormValuesMapping & OnyxFormDraftValuesMapping; diff --git a/src/libs/actions/Session/index.ts b/src/libs/actions/Session/index.ts index ab209e9bf928..6bd805b54c43 100644 --- a/src/libs/actions/Session/index.ts +++ b/src/libs/actions/Session/index.ts @@ -482,7 +482,7 @@ function signUpUser() { function signInAfterTransitionFromOldDot(transitionURL: string) { const [route, queryParams] = transitionURL.split('?'); - const {email, authToken, encryptedAuthToken, accountID, autoGeneratedLogin, autoGeneratedPassword, clearOnyxOnStart} = Object.fromEntries( + const {email, authToken, encryptedAuthToken, accountID, autoGeneratedLogin, autoGeneratedPassword, clearOnyxOnStart, shouldReturnToOldDotAfterBooking} = Object.fromEntries( queryParams.split('&').map((param) => { const [key, value] = param.split('='); return [key, value]; @@ -494,6 +494,7 @@ function signInAfterTransitionFromOldDot(transitionURL: string) { [ONYXKEYS.SESSION]: {email, authToken, encryptedAuthToken: decodeURIComponent(encryptedAuthToken), accountID: Number(accountID)}, [ONYXKEYS.CREDENTIALS]: {autoGeneratedLogin, autoGeneratedPassword}, }).then(App.openApp); + Onyx.set(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING, shouldReturnToOldDotAfterBooking === 'true'); }; if (clearOnyxOnStart === 'true') { diff --git a/src/pages/Travel/ManageTrips.tsx b/src/pages/Travel/ManageTrips.tsx index 0591d8cf2fcf..c157a17e3d26 100644 --- a/src/pages/Travel/ManageTrips.tsx +++ b/src/pages/Travel/ManageTrips.tsx @@ -1,6 +1,6 @@ import {Str} from 'expensify-common'; import React, {useState} from 'react'; -import {Linking, View} from 'react-native'; +import {Linking, NativeModules, View} from 'react-native'; import {useOnyx} from 'react-native-onyx'; import type {FeatureListItem} from '@components/FeatureList'; import FeatureList from '@components/FeatureList'; @@ -12,6 +12,7 @@ import useLocalize from '@hooks/useLocalize'; import usePolicy from '@hooks/usePolicy'; import useResponsiveLayout from '@hooks/useResponsiveLayout'; import useThemeStyles from '@hooks/useThemeStyles'; +import Log from '@libs/Log'; import Navigation from '@libs/Navigation/Navigation'; import colors from '@styles/theme/colors'; import * as Link from '@userActions/Link'; @@ -37,6 +38,7 @@ function ManageTrips() { const {translate} = useLocalize(); const [travelSettings] = useOnyx(ONYXKEYS.NVP_TRAVEL_SETTINGS); const [activePolicyID] = useOnyx(ONYXKEYS.NVP_ACTIVE_POLICY_ID); + const [shouldReturnToOldDotAfterBooking] = useOnyx(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING); const [account] = useOnyx(ONYXKEYS.ACCOUNT); const policy = usePolicy(activePolicyID); @@ -78,9 +80,18 @@ function ManageTrips() { if (ctaErrorMessage) { setCtaErrorMessage(''); } - Link.openTravelDotLink(activePolicyID)?.catch(() => { - setCtaErrorMessage(translate('travel.errorMessage')); - }); + Link.openTravelDotLink(activePolicyID) + ?.then(() => { + if (!NativeModules.HybridAppModule || !shouldReturnToOldDotAfterBooking) { + return; + } + + Log.info('[HybridApp] Returning to OldDot after opening TravelDot'); + NativeModules.HybridAppModule.closeReactNativeApp(false, false); + }) + ?.catch(() => { + setCtaErrorMessage(translate('travel.errorMessage')); + }); }} ctaErrorMessage={ctaErrorMessage} illustration={LottieAnimations.TripsEmptyState} diff --git a/src/pages/Travel/MyTripsPage.tsx b/src/pages/Travel/MyTripsPage.tsx index be29e8dc8c12..565269128708 100644 --- a/src/pages/Travel/MyTripsPage.tsx +++ b/src/pages/Travel/MyTripsPage.tsx @@ -1,14 +1,29 @@ -import React from 'react'; +import React, {useCallback} from 'react'; +import {NativeModules} from 'react-native'; +import {useOnyx} from 'react-native-onyx'; import FullPageNotFoundView from '@components/BlockingViews/FullPageNotFoundView'; import HeaderWithBackButton from '@components/HeaderWithBackButton'; import ScreenWrapper from '@components/ScreenWrapper'; import useLocalize from '@hooks/useLocalize'; import usePermissions from '@hooks/usePermissions'; +import Log from '@libs/Log'; +import Navigation from '@libs/Navigation/Navigation'; +import ONYXKEYS from '@src/ONYXKEYS'; import ManageTrips from './ManageTrips'; function MyTripsPage() { const {translate} = useLocalize(); const {canUseSpotnanaTravel} = usePermissions(); + const [shouldReturnToOldDotAfterBooking] = useOnyx(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING); + + const onBackButtonPress = useCallback(() => { + if (NativeModules.HybridAppModule && shouldReturnToOldDotAfterBooking) { + Log.info('[HybridApp] Returning to OldDot after closing MyTripsPage'); + NativeModules.HybridAppModule.closeReactNativeApp(false, false); + return; + } + Navigation.goBack(); + }, [shouldReturnToOldDotAfterBooking]); return ( From 044f5754bd83babbfbe1824aaac13519652f004e Mon Sep 17 00:00:00 2001 From: SIMalik Date: Mon, 23 Sep 2024 18:55:32 +0500 Subject: [PATCH 012/264] Tiny lint issue resolved --- src/libs/OptionsListUtils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libs/OptionsListUtils.ts b/src/libs/OptionsListUtils.ts index 451727858654..b9e607da5fb6 100644 --- a/src/libs/OptionsListUtils.ts +++ b/src/libs/OptionsListUtils.ts @@ -471,7 +471,7 @@ function uniqFast(items: string[]): string[] { function getAllReportErrors(report: OnyxEntry, reportActions: OnyxEntry): OnyxCommon.Errors { const reportErrors = report?.errors ?? {}; const reportErrorFields = report?.errorFields ?? {}; - const reportActionsArray = Object.values(reportActions ?? {}).filter(action => !ReportActionUtils.isDeletedAction(action)); + const reportActionsArray = Object.values(reportActions ?? {}).filter((action) => !ReportActionUtils.isDeletedAction(action)); const reportActionErrors: OnyxCommon.ErrorFields = {}; for (const action of reportActionsArray) { From c0e38b816d0bdbcf8fda37098adb17cb0fc58161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 10:16:32 +0200 Subject: [PATCH 013/264] add debug timings --- src/pages/ChatFinderPage/index.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index facde356ff12..8e9dd684c94d 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -127,8 +127,9 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa console.log('build', performance.now() - start); function search(searchInput: string) { - start = performance.now(); + const searchStart = performance.now(); const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); + console.log('findInSearchTree', performance.now() - searchStart); return { personalDetails, From 955cdcbd6247e40365ea2a36fc8bdaa77c975ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 10:21:58 +0200 Subject: [PATCH 014/264] adjust values for search --- src/pages/ChatFinderPage/index.tsx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 8e9dd684c94d..d02ad7a91b52 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -106,16 +106,24 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa { data: searchOptions.personalDetails, transform: (option) => { - // TODO: there are probably more fields we'd like to add to the search string return (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); }, }, { data: searchOptions.recentReports, transform: (option) => { - let searchStringForTree = (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); - searchStringForTree += option.reportID ?? ''; - searchStringForTree += option.name ?? ''; + let searchStringForTree = option.text ?? '' + searchStringForTree += option.login ?? ''; + + if (option.isThread) { + if (option.alternateText) { + searchStringForTree += option.alternateText; + } + } else if (!!option.isChatRoom || !!option.isPolicyExpenseChat) { + if (option.subtitle) { + searchStringForTree += option.subtitle; + } + } return searchStringForTree; }, From d7c8d885ada4aee2a2ee72ea273b47ae7984867a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 10:48:55 +0200 Subject: [PATCH 015/264] correct display name for personal details search --- src/pages/ChatFinderPage/index.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index d02ad7a91b52..9bd790dae41c 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -106,7 +106,8 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa { data: searchOptions.personalDetails, transform: (option) => { - return (option.login ?? '') + (option.login !== option.displayName ? option.displayName ?? '' : ''); + const displayName = option.participantsList?.[0]?.displayName ?? ''; + return (option.login ?? '') + (option.login !== displayName ? displayName : ''); }, }, { @@ -138,6 +139,10 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa const searchStart = performance.now(); const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); console.log('findInSearchTree', performance.now() - searchStart); + console.log("results", { + personalDetails, + recentReports, + }) return { personalDetails, From d07940d26c3e7d981957c41540cef629226dfbab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 11:14:28 +0200 Subject: [PATCH 016/264] deduplicate search results --- src/libs/SuffixUkkonenTree.ts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 7a7bb1bd4a3c..b1e34c8cf72c 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -88,6 +88,7 @@ function makeTree(compose: Array>) { indexes.push(searchIndexList); } const stringToSearch = `${strings.join('')}|`; // End Character + console.log("Search String length", stringToSearch.length); console.log('building search strings', performance.now() - start1); const a = stringToArray(stringToSearch); @@ -233,26 +234,21 @@ function makeTree(compose: Array>) { return occurrences; } - function findInSearchTree(searchInput: string) { + function findInSearchTree(searchInput: string): T[][] { const now = performance.now(); const result = findSubstring(searchInput); console.log('FindSubstring index result for searchInput', searchInput, result); + // Map the results to the original options - - const mappedResults: T[][] = Array.from({length: compose.length}, () => []); - console.log({result}); + const mappedResults = Array.from({length: compose.length}, () => new Set()); result.forEach((index) => { - // const textInSearchString = searchString.substring(index, searchString.indexOf(delimiterChar, index)); - // console.log('textInSearchString', textInSearchString); - - // TODO: check with Hanno whether we restore the data correctly let offset = 0; for (let i = 0; i < indexes.length; i++) { const relativeIndex = index - offset + 1; if (relativeIndex < indexes[i].length && relativeIndex >= 0) { const option = indexes[i][relativeIndex]; if (option) { - mappedResults[i].push(option); + mappedResults[i].add(option); } } else { offset += indexes[i].length; @@ -261,7 +257,7 @@ function makeTree(compose: Array>) { }); console.log('search', performance.now() - now); - return mappedResults; + return mappedResults.map((set) => Array.from(set)); } return { From 6bdee5d6c6e3b900189d88609e395abb34007830 Mon Sep 17 00:00:00 2001 From: Mateusz Rajski Date: Tue, 24 Sep 2024 12:03:53 +0200 Subject: [PATCH 017/264] Do not display explanation modal for OldDot users who use travel feature --- src/Expensify.tsx | 6 ++++-- src/libs/actions/Session/index.ts | 36 ++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/Expensify.tsx b/src/Expensify.tsx index 62e7839b21f0..14310523695e 100644 --- a/src/Expensify.tsx +++ b/src/Expensify.tsx @@ -100,6 +100,7 @@ function Expensify({ const [session] = useOnyx(ONYXKEYS.SESSION); const [lastRoute] = useOnyx(ONYXKEYS.LAST_ROUTE); const [tryNewDotData] = useOnyx(ONYXKEYS.NVP_TRYNEWDOT); + const [shouldReturnToOldDotAfterBooking] = useOnyx(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING); const [shouldShowRequire2FAModal, setShouldShowRequire2FAModal] = useState(false); useEffect(() => { @@ -119,12 +120,13 @@ function Expensify({ }, [isCheckingPublicRoom]); useEffect(() => { - if (splashScreenState !== CONST.BOOT_SPLASH_STATE.HIDDEN || tryNewDotData === undefined) { + // The last condition disables the explanation modal for classic experience users who only use the travel feature. + if (splashScreenState !== CONST.BOOT_SPLASH_STATE.HIDDEN || tryNewDotData === undefined || shouldReturnToOldDotAfterBooking) { return; } handleHybridAppOnboarding(); - }, [splashScreenState, tryNewDotData]); + }, [shouldReturnToOldDotAfterBooking, splashScreenState, tryNewDotData]); const isAuthenticated = useMemo(() => !!(session?.authToken ?? null), [session]); const autoAuthState = useMemo(() => session?.autoAuthState ?? '', [session]); diff --git a/src/libs/actions/Session/index.ts b/src/libs/actions/Session/index.ts index 6bd805b54c43..ae5acf94a925 100644 --- a/src/libs/actions/Session/index.ts +++ b/src/libs/actions/Session/index.ts @@ -489,21 +489,33 @@ function signInAfterTransitionFromOldDot(transitionURL: string) { }), ); - const setSessionDataAndOpenApp = () => { - Onyx.multiSet({ - [ONYXKEYS.SESSION]: {email, authToken, encryptedAuthToken: decodeURIComponent(encryptedAuthToken), accountID: Number(accountID)}, - [ONYXKEYS.CREDENTIALS]: {autoGeneratedLogin, autoGeneratedPassword}, - }).then(App.openApp); - Onyx.set(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING, shouldReturnToOldDotAfterBooking === 'true'); + const clearOnyxForNewAccount = () => { + if (clearOnyxOnStart !== 'true') { + return Promise.resolve(); + } + + return Onyx.clear(KEYS_TO_PRESERVE); }; - if (clearOnyxOnStart === 'true') { - Onyx.clear(KEYS_TO_PRESERVE).then(setSessionDataAndOpenApp); - } else { - setSessionDataAndOpenApp(); - } + const setSessionDataAndOpenApp = new Promise((resolve) => { + clearOnyxForNewAccount() + .then(() => Onyx.set(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING, shouldReturnToOldDotAfterBooking === 'true')) + .then(() => + Onyx.multiSet({ + [ONYXKEYS.SESSION]: {email, authToken, encryptedAuthToken: decodeURIComponent(encryptedAuthToken), accountID: Number(accountID)}, + [ONYXKEYS.CREDENTIALS]: {autoGeneratedLogin, autoGeneratedPassword}, + }), + ) + .then(App.openApp) + .catch((error) => { + Log.hmmm('[HybridApp] Initialization of HybridApp has failed. Forcing transition', {error}); + }) + .finally(() => { + resolve(route as Route); + }); + }); - return route as Route; + return setSessionDataAndOpenApp; } /** From 84d3231dc8472fe5e542564eb215b400a9cfb2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 13:25:04 +0200 Subject: [PATCH 018/264] unify string cleaning --- src/libs/SuffixUkkonenTree.ts | 28 ++++++++++++++++++++-------- src/pages/ChatFinderPage/index.tsx | 4 +--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index b1e34c8cf72c..51bbe8f35fd4 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -24,6 +24,7 @@ function stringToArray(input: string) { res.push(charCode); } } + console.log("stringToArray", res) return res; } @@ -36,24 +37,28 @@ type PrepareDataParams = { transform: (data: T) => string; }; +function cleanedString(input: string) { + return input.toLowerCase().replace(aToZRegex, ''); +} + function prepareData({data, transform}: PrepareDataParams): [string, Array] { const searchIndexList: Array = []; const str = data .map((option) => { - let searchStringForTree = transform(option); + const searchStringForTree = transform(option); // Remove all none a-z chars: - searchStringForTree = searchStringForTree.toLowerCase().replace(aToZRegex, ''); + const cleanedSearchStringForTree = cleanedString(searchStringForTree); - if (searchStringForTree.length > 0) { + if (cleanedSearchStringForTree.length > 0) { // We need to push an array that has the same length as the length of the string we insert for this option: - const indexes = Array.from({length: searchStringForTree.length}, () => option); + const indexes = Array.from({length: cleanedSearchStringForTree.length}, () => option); // Note: we add undefined for the delimiter character searchIndexList.push(...indexes, undefined); } else { return undefined; } - return searchStringForTree; + return cleanedSearchStringForTree; }) // slightly faster alternative to `.filter(Boolean).join(delimiterChar)` .reduce((acc: string, curr) => { @@ -80,6 +85,9 @@ function prepareData({data, transform}: PrepareDataParams): [string, Array function makeTree(compose: Array>) { const start1 = performance.now(); const strings = []; + + // We might received multiple lists of data that we want to search in + // thus indexes is a list of those data lists const indexes: Array> = []; for (const {data, transform} of compose) { @@ -89,6 +97,7 @@ function makeTree(compose: Array>) { } const stringToSearch = `${strings.join('')}|`; // End Character console.log("Search String length", stringToSearch.length); + console.log(stringToSearch) console.log('building search strings', performance.now() - start1); const a = stringToArray(stringToSearch); @@ -205,6 +214,8 @@ function makeTree(compose: Array>) { */ function findSubstring(searchString: string) { const occurrences: number[] = []; + const cleanedSearchString = cleanedString(searchString); + const numericSearchQuery = stringToArray(cleanedSearchString); function dfs(node: number, depth: number) { const leftRange = l[node]; @@ -212,7 +223,7 @@ function makeTree(compose: Array>) { const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { - if (searchString.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { + if (numericSearchQuery[depth + i] !== a[leftRange + i]) { return; } } @@ -236,8 +247,9 @@ function makeTree(compose: Array>) { function findInSearchTree(searchInput: string): T[][] { const now = performance.now(); - const result = findSubstring(searchInput); - console.log('FindSubstring index result for searchInput', searchInput, result); + const cleanedSearchInput = searchInput.toLowerCase().replace(aToZRegex, ''); + const result = findSubstring(cleanedSearchInput); + console.log('FindSubstring index result for searchInput', cleanedSearchInput, result); // Map the results to the original options const mappedResults = Array.from({length: compose.length}, () => new Set()); diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 9bd790dae41c..9663dafe249c 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -52,8 +52,6 @@ const setPerformanceTimersEnd = () => { const ChatFinderPageFooterInstance = ; -const aToZRegex = /[^a-z]/gi; - function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPageProps) { const [isScreenTransitionEnd, setIsScreenTransitionEnd] = useState(false); const {translate} = useLocalize(); @@ -165,7 +163,7 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS); const newOptions1 = OptionsListUtils.filterOptions(searchOptions, debouncedSearchValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true}); - const newOptions = findInSearchTree(debouncedSearchValue.toLowerCase().replace(aToZRegex, '')); + const newOptions = findInSearchTree(debouncedSearchValue); const userToInvite = OptionsListUtils.pickUserToInvite({ canInviteUser: true, recentReports: newOptions.recentReports, From eb6b3713b7b8a3dd610859e4db515c5dadacb21e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Tue, 24 Sep 2024 15:02:18 +0200 Subject: [PATCH 019/264] fix search function temporarily --- src/libs/SuffixUkkonenTree.ts | 159 ++++++++++++++++---------------- tests/unit/SuffixUkkonenTree.ts | 26 ++++++ 2 files changed, 104 insertions(+), 81 deletions(-) create mode 100644 tests/unit/SuffixUkkonenTree.ts diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 51bbe8f35fd4..feaa2c37169f 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,5 +1,3 @@ -import enEmojis from '@assets/emojis/en'; - const CHAR_CODE_A = 'a'.charCodeAt(0); const ALPHABET_SIZE = 28; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; @@ -24,13 +22,12 @@ function stringToArray(input: string) { res.push(charCode); } } - console.log("stringToArray", res) return res; } const aToZRegex = /[^a-z]/gi; // The character that separates the different options in the search string -const delimiterChar = '{'; +const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); type PrepareDataParams = { data: T[]; @@ -70,7 +67,7 @@ function prepareData({data, transform}: PrepareDataParams): [string, Array return curr; } - return `${acc}${delimiterChar}${curr}`; + return `${acc}${DELIMITER_CHAR}${curr}`; }, ''); return [str, searchIndexList]; @@ -78,36 +75,32 @@ function prepareData({data, transform}: PrepareDataParams): [string, Array /** * Makes a tree from an input string - * **Important:** As we only support an alphabet of 26 characters, the input string should only contain characters from a-z. - * Thus, all input data must be cleaned before being passed to this function. - * If you then use this tree for search you should clean your search input as well (so that a search query of "testuser@myEmail.com" becomes "testusermyemailcom"). */ -function makeTree(compose: Array>) { +function makeTree(lists: Array>) { const start1 = performance.now(); - const strings = []; + const stringForList: string[] = []; // We might received multiple lists of data that we want to search in // thus indexes is a list of those data lists - const indexes: Array> = []; + const indexesForList: Array> = []; - for (const {data, transform} of compose) { + for (const {data, transform} of lists) { const [str, searchIndexList] = prepareData({data, transform}); - strings.push(str); - indexes.push(searchIndexList); + stringForList.push(str); + indexesForList.push(searchIndexList); } - const stringToSearch = `${strings.join('')}|`; // End Character - console.log("Search String length", stringToSearch.length); - console.log(stringToSearch) + const stringToSearch = `${stringForList.join('')}|`; // End Character console.log('building search strings', performance.now() - start1); const a = stringToArray(stringToSearch); - const N = 25000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings + console.log('Search String length', stringToSearch.length); + const N = 25_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); - const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1) as number[]); - const l = Array(N).fill(0) as number[]; - const r = Array(N).fill(0) as number[]; - const p = Array(N).fill(0) as number[]; - const s = Array(N).fill(0) as number[]; + const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); + const l = Array(N).fill(0); + const r = Array(N).fill(0); + const p = Array(N).fill(0); + const s = Array(N).fill(0); const end = performance.now(); console.log('Allocating memory took:', end - start, 'ms'); @@ -212,58 +205,98 @@ function makeTree(compose: Array>) { * This function will return the index(es) of found occurrences within this big string. * So, when searching for "an", it would return [1, 4, 11]. */ - function findSubstring(searchString: string) { + // function findSubstring(searchString: string) { + // const occurrences: number[] = []; + // // const cleanedSearchString = cleanedString(searchString); + // // const numericSearchQuery = stringToArray(cleanedSearchString); + + // function dfs(node: number, depth: number) { + // const leftRange = l[node]; + // const rightRange = r[node]; + // const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; + + // for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { + // if (searchString.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { + // return; + // } + // } + + // let isLeaf = true; + // for (let i = 0; i < ALPHABET_SIZE; ++i) { + // if (t[node][i] !== -1) { + // isLeaf = false; + // dfs(t[node][i], depth + rangeLen); + // } + // } + + // if (isLeaf && depth >= searchString.length) { + // occurrences.push(a.length - (depth + rangeLen)); + // } + // } + + // dfs(0, 0); + // return occurrences; + // } + + // TODO: replace, other search function is broken in edge cases we need to address first + function findSubstring(sString: string) { + const s = stringToArray(sString); const occurrences: number[] = []; - const cleanedSearchString = cleanedString(searchString); - const numericSearchQuery = stringToArray(cleanedSearchString); + const st: Array<[number, number]> = [[0, 0]]; + + while (st.length > 0) { + const [node, depth] = st.pop()!; - function dfs(node: number, depth: number) { + let isLeaf = true; const leftRange = l[node]; const rightRange = r[node]; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { - if (numericSearchQuery[depth + i] !== a[leftRange + i]) { - return; + let matches = true; + for (let i = 0; i < rangeLen && depth + i < s.length; i++) { + if (s[depth + i] !== a[leftRange + i]) { + matches = false; + break; } } - let isLeaf = true; - for (let i = 0; i < ALPHABET_SIZE; ++i) { + if (!matches) { + continue; + } + + for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { if (t[node][i] !== -1) { isLeaf = false; - dfs(t[node][i], depth + rangeLen); + st.push([t[node][i], depth + rangeLen]); } } - if (isLeaf && depth >= searchString.length) { + if (isLeaf && depth + rangeLen >= s.length) { occurrences.push(a.length - (depth + rangeLen)); } } - dfs(0, 0); return occurrences; } function findInSearchTree(searchInput: string): T[][] { const now = performance.now(); - const cleanedSearchInput = searchInput.toLowerCase().replace(aToZRegex, ''); - const result = findSubstring(cleanedSearchInput); - console.log('FindSubstring index result for searchInput', cleanedSearchInput, result); - + const result = findSubstring(searchInput); + console.log('FindSubstring index result for searchInput', searchInput, result); + // Map the results to the original options - const mappedResults = Array.from({length: compose.length}, () => new Set()); + const mappedResults = Array.from({length: lists.length}, () => new Set()); result.forEach((index) => { let offset = 0; - for (let i = 0; i < indexes.length; i++) { + for (let i = 0; i < indexesForList.length; i++) { const relativeIndex = index - offset + 1; - if (relativeIndex < indexes[i].length && relativeIndex >= 0) { - const option = indexes[i][relativeIndex]; + if (relativeIndex < indexesForList[i].length && relativeIndex >= 0) { + const option = indexesForList[i][relativeIndex]; if (option) { mappedResults[i].add(option); } } else { - offset += indexes[i].length; + offset += indexesForList[i].length; } } }); @@ -279,40 +312,4 @@ function makeTree(compose: Array>) { }; } -function performanceProfile(input: PrepareDataParams, search = 'sasha') { - // TODO: For emojis we could precalculate the makeTree function during build time using a babel plugin - // maybe babel plugin that just precalculates the result of function execution (so that it can be generic purpose plugin) - const {build, findSubstring} = makeTree([input]); - - const buildStart = performance.now(); - build(); - const buildEnd = performance.now(); - console.log('Building time:', buildEnd - buildStart, 'ms'); - - const searchStart = performance.now(); - const results = findSubstring(search); - const searchEnd = performance.now(); - console.log('Search time:', searchEnd - searchStart, 'ms'); - console.log(results); - - return { - buildTime: buildEnd - buildStart, - recursiveSearchTime: searchEnd - searchStart, - }; -} - -// Demo function testing the performance for emojis -function testEmojis() { - const data = Object.values(enEmojis); - return performanceProfile( - { - data, - transform: ({keywords}) => { - return `${keywords.join('')}{`; - }, - }, - 'smile', - ); -} - -export {makeTree, prepareData, testEmojis}; +export {makeTree, prepareData}; diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts new file mode 100644 index 000000000000..7a5e49d691fd --- /dev/null +++ b/tests/unit/SuffixUkkonenTree.ts @@ -0,0 +1,26 @@ +import {makeTree} from '@libs/SuffixUkkonenTree'; + +describe('SuffixUkkonenTree', () => { + it('should work', () => { + const tree = makeTree([ + { + data: ['banana'], + transform: (data) => data, + }, + ]); + tree.build(); + expect(tree.findInSearchTree('an')).toEqual([['banana']]); + }); + + it('should work 2', () => { + const tree = makeTree([ + { + data: ['banana', 'test'], + transform: (data) => data, + }, + ]); + tree.build(); + + expect(tree.findInSearchTree('es')).toEqual([['test']]); + }); +}); From 60021016800055089a6034bb77529f6f510f6896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 08:29:41 +0200 Subject: [PATCH 020/264] make it work with numbers by base26 everything thats bigger than our alphabet --- src/libs/SuffixUkkonenTree.ts | 133 +++++++++++++++++++------------- tests/unit/SuffixUkkonenTree.ts | 42 +++++++++- 2 files changed, 118 insertions(+), 57 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index feaa2c37169f..59838f552b4e 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,76 +1,98 @@ const CHAR_CODE_A = 'a'.charCodeAt(0); -const ALPHABET_SIZE = 28; +const LETTER_ALPHABET_SIZE = 26; +const ALPHABET_SIZE = LETTER_ALPHABET_SIZE + 3; // +3: special char, delimiter char, end char +const SPECIAL_CHAR_CODE = ALPHABET_SIZE - 3; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; +const END_CHAR_CODE = ALPHABET_SIZE - 1; + +const nonAlphanumericRegex = /[^a-z0-9]/gi; + +// The character that separates the different options in the search string +const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); + +const END_CHAR = String.fromCharCode(END_CHAR_CODE + CHAR_CODE_A); // TODO: // make makeTree faster // how to deal with unicode characters such as spanish ones? // i think we need to support numbers as well +function convertToBase26(num: number): string { + if (num < 0) { + throw new Error('Input must be a non-negative integer'); + } + + const alphabet = 'abcdefghijklmnopqrstuvwxyz'; + let result = ''; + let numCopy = num; + + do { + numCopy -= 1; // Adjust to 0-based index + result = alphabet[numCopy % 26] + result; + numCopy = Math.floor(numCopy / 26); + } while (numCopy > 0); + + return result; +} + /** * Converts a string to an array of numbers representing the characters of the string. * The numbers are offset by the character code of 'a' (97). * - This is so that the numbers from a-z are in the range 0-25. - * - 26 is for the delimiter character "{", - * - 27 is for the end character "|". + * - 26 is for encoding special characters (everything that is bigger than z will be encoded as "specialCharacter + base26(charCode))" + * - 27 is for the delimiter character + * - 28 is for the end character */ function stringToArray(input: string) { const res: number[] = []; - for (let i = 0; i < input.length; i++) { - const charCode = input.charCodeAt(i) - CHAR_CODE_A; - if (charCode >= 0 && charCode < ALPHABET_SIZE) { - res.push(charCode); + for (const char of input) { + const charCode = char.charCodeAt(0); + const charCodeABased = charCode - CHAR_CODE_A; + // TODO: each word should be converted on its own to stringToArray, so that the words can contain the special chars (which would get base26 encoded) + // When we do this we probably want to check here if the words are in the LETTER + SPECIAL_CHAR range + if (charCodeABased >= 0 && charCodeABased < ALPHABET_SIZE) { + res.push(charCodeABased); + } else { + const asBase26String = convertToBase26(charCode); + const asCharCodes = stringToArray(asBase26String); + res.push(SPECIAL_CHAR_CODE, ...asCharCodes); } } return res; } -const aToZRegex = /[^a-z]/gi; -// The character that separates the different options in the search string -const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); - type PrepareDataParams = { data: T[]; transform: (data: T) => string; }; function cleanedString(input: string) { - return input.toLowerCase().replace(aToZRegex, ''); + return input.toLowerCase().replace(nonAlphanumericRegex, ''); } -function prepareData({data, transform}: PrepareDataParams): [string, Array] { +function prepareData({data, transform}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; - const str = data - .map((option) => { - const searchStringForTree = transform(option); - // Remove all none a-z chars: - const cleanedSearchStringForTree = cleanedString(searchStringForTree); - - if (cleanedSearchStringForTree.length > 0) { - // We need to push an array that has the same length as the length of the string we insert for this option: - const indexes = Array.from({length: cleanedSearchStringForTree.length}, () => option); - // Note: we add undefined for the delimiter character - searchIndexList.push(...indexes, undefined); - } else { - return undefined; - } + const allDataAsNumbers: number[] = []; + data.forEach((option) => { + const searchStringForTree = transform(option); + // Remove all none a-z chars: + const cleanedSearchStringForTree = cleanedString(searchStringForTree); + + if (cleanedSearchStringForTree.length === 0) { + return; + } - return cleanedSearchStringForTree; - }) - // slightly faster alternative to `.filter(Boolean).join(delimiterChar)` - .reduce((acc: string, curr) => { - if (!curr) { - return acc; - } + const numericRepresentation = stringToArray(cleanedSearchStringForTree); - if (acc === '') { - return curr; - } + // We need to push an array that has the same length as the length of the string we insert for this option: + const indexes = Array.from({length: numericRepresentation.length}, () => option); + // Note: we add undefined for the delimiter character + searchIndexList.push(...indexes, undefined); - return `${acc}${DELIMITER_CHAR}${curr}`; - }, ''); + allDataAsNumbers.push(...numericRepresentation, DELIMITER_CHAR_CODE); + }); - return [str, searchIndexList]; + return [allDataAsNumbers, searchIndexList]; } /** @@ -78,22 +100,22 @@ function prepareData({data, transform}: PrepareDataParams): [string, Array */ function makeTree(lists: Array>) { const start1 = performance.now(); - const stringForList: string[] = []; + const listsAsConcatedNumericList: number[] = []; // We might received multiple lists of data that we want to search in // thus indexes is a list of those data lists const indexesForList: Array> = []; for (const {data, transform} of lists) { - const [str, searchIndexList] = prepareData({data, transform}); - stringForList.push(str); + const [numericRepresentation, searchIndexList] = prepareData({data, transform}); + listsAsConcatedNumericList.push(...numericRepresentation); indexesForList.push(searchIndexList); } - const stringToSearch = `${stringForList.join('')}|`; // End Character + listsAsConcatedNumericList.push(END_CHAR_CODE); console.log('building search strings', performance.now() - start1); - const a = stringToArray(stringToSearch); - console.log('Search String length', stringToSearch.length); + console.log('Search String length', listsAsConcatedNumericList.length); + console.log('Numeric representation', listsAsConcatedNumericList); const N = 25_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); @@ -110,7 +132,7 @@ function makeTree(lists: Array>) { let la = 0; function initializeTree() { - r.fill(a.length - 1); + r.fill(listsAsConcatedNumericList.length - 1); s[0] = 1; l[0] = -1; r[0] = -1; @@ -129,7 +151,7 @@ function makeTree(lists: Array>) { tv = t[tv][c]; tp = l[tv]; } - if (tp === -1 || c === a[tp]) { + if (tp === -1 || c === listsAsConcatedNumericList[tp]) { tp++; } else { splitEdge(c); @@ -154,13 +176,13 @@ function makeTree(lists: Array>) { l[ts] = l[tv]; r[ts] = tp - 1; p[ts] = p[tv]; - t[ts][a[tp]] = tv; + t[ts][listsAsConcatedNumericList[tp]] = tv; t[ts][c] = ts + 1; l[ts + 1] = la; p[ts + 1] = ts; l[tv] = tp; p[tv] = ts; - t[p[ts]][a[l[ts]]] = ts; + t[p[ts]][listsAsConcatedNumericList[l[ts]]] = ts; ts += 2; handleDescent(ts); } @@ -169,7 +191,7 @@ function makeTree(lists: Array>) { tv = s[p[ts - 2]]; tp = l[ts - 2]; while (tp <= r[ts - 2]) { - tv = t[tv][a[tp]]; + tv = t[tv][listsAsConcatedNumericList[tp]]; tp += r[tv] - l[tv] + 1; } if (tp === r[ts - 2] + 1) { @@ -187,8 +209,8 @@ function makeTree(lists: Array>) { function build() { initializeTree(); - for (la = 0; la < a.length; ++la) { - const c = a[la]; + for (la = 0; la < listsAsConcatedNumericList.length; ++la) { + const c = listsAsConcatedNumericList[la]; processCharacter(c); } } @@ -241,6 +263,7 @@ function makeTree(lists: Array>) { // TODO: replace, other search function is broken in edge cases we need to address first function findSubstring(sString: string) { const s = stringToArray(sString); + console.log('searching for', sString, s); const occurrences: number[] = []; const st: Array<[number, number]> = [[0, 0]]; @@ -254,7 +277,7 @@ function makeTree(lists: Array>) { let matches = true; for (let i = 0; i < rangeLen && depth + i < s.length; i++) { - if (s[depth + i] !== a[leftRange + i]) { + if (s[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { matches = false; break; } @@ -272,7 +295,7 @@ function makeTree(lists: Array>) { } if (isLeaf && depth + rangeLen >= s.length) { - occurrences.push(a.length - (depth + rangeLen)); + occurrences.push(listsAsConcatedNumericList.length - (depth + rangeLen)); } } diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 7a5e49d691fd..d2198ce92279 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -1,7 +1,7 @@ import {makeTree} from '@libs/SuffixUkkonenTree'; describe('SuffixUkkonenTree', () => { - it('should work', () => { + it('should insert, build, and find the word', () => { const tree = makeTree([ { data: ['banana'], @@ -12,7 +12,7 @@ describe('SuffixUkkonenTree', () => { expect(tree.findInSearchTree('an')).toEqual([['banana']]); }); - it('should work 2', () => { + it('should work with multiple words', () => { const tree = makeTree([ { data: ['banana', 'test'], @@ -23,4 +23,42 @@ describe('SuffixUkkonenTree', () => { expect(tree.findInSearchTree('es')).toEqual([['test']]); }); + + it('should work when providing two data sets', () => { + const tree = makeTree([ + { + data: ['erica', 'banana'], + transform: (data) => data, + }, + { + data: ['banana', 'test'], + transform: (data) => data, + }, + ]); + tree.build(); + + expect(tree.findInSearchTree('es')).toEqual([[], ['test']]); + }); + + it('should work with numbers', () => { + const tree = makeTree([ + { + data: [1, 2, 3, 4, 5], + transform: (data) => String(data), + }, + ]); + tree.build(); + expect(tree.findInSearchTree('2')).toEqual([[2]]); + }); + + it('should work with unicodes', () => { + const tree = makeTree([ + { + data: ['banana', 'ñèşťǒř'], + transform: (data) => data, + }, + ]); + tree.build(); + expect(tree.findInSearchTree('èşť')).toEqual([['ñèşťǒř']]); + }); }); From 131c30307c86cfdeef508d4fc975534122ebbb59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 08:30:47 +0200 Subject: [PATCH 021/264] don't add delimiter char at the end --- src/libs/SuffixUkkonenTree.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 59838f552b4e..db3858e70b60 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -73,7 +73,7 @@ function cleanedString(input: string) { function prepareData({data, transform}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; const allDataAsNumbers: number[] = []; - data.forEach((option) => { + data.forEach((option, index) => { const searchStringForTree = transform(option); // Remove all none a-z chars: const cleanedSearchStringForTree = cleanedString(searchStringForTree); @@ -89,7 +89,10 @@ function prepareData({data, transform}: PrepareDataParams): [number[], Arr // Note: we add undefined for the delimiter character searchIndexList.push(...indexes, undefined); - allDataAsNumbers.push(...numericRepresentation, DELIMITER_CHAR_CODE); + allDataAsNumbers.push(...numericRepresentation); + if (index < data.length - 1) { + allDataAsNumbers.push(DELIMITER_CHAR_CODE); + } }); return [allDataAsNumbers, searchIndexList]; From 0a89089ea1723805ed84a4c670870f53efba8bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 09:02:44 +0200 Subject: [PATCH 022/264] support for unicode --- src/libs/SuffixUkkonenTree.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index db3858e70b60..c8e025b58554 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -5,7 +5,7 @@ const SPECIAL_CHAR_CODE = ALPHABET_SIZE - 3; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; const END_CHAR_CODE = ALPHABET_SIZE - 1; -const nonAlphanumericRegex = /[^a-z0-9]/gi; +const nonAlphanumericRegex = /[^0-9\p{L}]/gu; // The character that separates the different options in the search string const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); @@ -70,13 +70,17 @@ function cleanedString(input: string) { return input.toLowerCase().replace(nonAlphanumericRegex, ''); } +let timeSpendCleaning = 0; function prepareData({data, transform}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; const allDataAsNumbers: number[] = []; + timeSpendCleaning = 0; data.forEach((option, index) => { const searchStringForTree = transform(option); // Remove all none a-z chars: + const start = performance.now(); const cleanedSearchStringForTree = cleanedString(searchStringForTree); + timeSpendCleaning += performance.now() - start; if (cleanedSearchStringForTree.length === 0) { return; @@ -94,6 +98,7 @@ function prepareData({data, transform}: PrepareDataParams): [number[], Arr allDataAsNumbers.push(DELIMITER_CHAR_CODE); } }); + console.log('cleaning', timeSpendCleaning, 'ms'); return [allDataAsNumbers, searchIndexList]; } @@ -111,15 +116,16 @@ function makeTree(lists: Array>) { for (const {data, transform} of lists) { const [numericRepresentation, searchIndexList] = prepareData({data, transform}); - listsAsConcatedNumericList.push(...numericRepresentation); + for (const num of numericRepresentation) { + listsAsConcatedNumericList.push(num); + } indexesForList.push(searchIndexList); } listsAsConcatedNumericList.push(END_CHAR_CODE); console.log('building search strings', performance.now() - start1); console.log('Search String length', listsAsConcatedNumericList.length); - console.log('Numeric representation', listsAsConcatedNumericList); - const N = 25_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings + const N = 150_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings const start = performance.now(); const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); const l = Array(N).fill(0); From def772b622a99b1ec967000108c3c34b84aeccbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 09:03:18 +0200 Subject: [PATCH 023/264] extended test case to make sure right item is returned --- tests/unit/SuffixUkkonenTree.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index d2198ce92279..fc5f9c14d1ab 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -54,7 +54,7 @@ describe('SuffixUkkonenTree', () => { it('should work with unicodes', () => { const tree = makeTree([ { - data: ['banana', 'ñèşťǒř'], + data: ['banana', 'ñèşťǒř', 'test'], transform: (data) => data, }, ]); From 0806fb0f6ccf40f7fa4d958d92b5fef681fde0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 09:07:32 +0200 Subject: [PATCH 024/264] use concat instead of for-loop --- src/libs/SuffixUkkonenTree.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index c8e025b58554..5151a4b20f58 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -108,7 +108,7 @@ function prepareData({data, transform}: PrepareDataParams): [number[], Arr */ function makeTree(lists: Array>) { const start1 = performance.now(); - const listsAsConcatedNumericList: number[] = []; + let listsAsConcatedNumericList: number[] = []; // We might received multiple lists of data that we want to search in // thus indexes is a list of those data lists @@ -116,9 +116,7 @@ function makeTree(lists: Array>) { for (const {data, transform} of lists) { const [numericRepresentation, searchIndexList] = prepareData({data, transform}); - for (const num of numericRepresentation) { - listsAsConcatedNumericList.push(num); - } + listsAsConcatedNumericList = listsAsConcatedNumericList.concat(numericRepresentation); indexesForList.push(searchIndexList); } listsAsConcatedNumericList.push(END_CHAR_CODE); From c660312eeb76a2b2a2943b8410bc444801ccb0c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 09:15:45 +0200 Subject: [PATCH 025/264] Revert "use concat instead of for-loop" This reverts commit 0806fb0f6ccf40f7fa4d958d92b5fef681fde0e6. --- src/libs/SuffixUkkonenTree.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 5151a4b20f58..c8e025b58554 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -108,7 +108,7 @@ function prepareData({data, transform}: PrepareDataParams): [number[], Arr */ function makeTree(lists: Array>) { const start1 = performance.now(); - let listsAsConcatedNumericList: number[] = []; + const listsAsConcatedNumericList: number[] = []; // We might received multiple lists of data that we want to search in // thus indexes is a list of those data lists @@ -116,7 +116,9 @@ function makeTree(lists: Array>) { for (const {data, transform} of lists) { const [numericRepresentation, searchIndexList] = prepareData({data, transform}); - listsAsConcatedNumericList = listsAsConcatedNumericList.concat(numericRepresentation); + for (const num of numericRepresentation) { + listsAsConcatedNumericList.push(num); + } indexesForList.push(searchIndexList); } listsAsConcatedNumericList.push(END_CHAR_CODE); From 24a59b74e44fecdf63b8d77035dce030b45685a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 09:16:16 +0200 Subject: [PATCH 026/264] add note --- src/libs/SuffixUkkonenTree.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index c8e025b58554..3541965bcd2b 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -117,6 +117,7 @@ function makeTree(lists: Array>) { for (const {data, transform} of lists) { const [numericRepresentation, searchIndexList] = prepareData({data, transform}); for (const num of numericRepresentation) { + // we have to use a loop here as push with spread yields a maximum call stack exceeded error listsAsConcatedNumericList.push(num); } indexesForList.push(searchIndexList); From 146d51b7f55ddd06a64de9e867bc6f393aed38c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:23:32 +0200 Subject: [PATCH 027/264] wip: make t dynamic instead of preallocating --- src/libs/SuffixUkkonenTree.ts | 63 +++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 3541965bcd2b..e89cab194ced 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-continue */ const CHAR_CODE_A = 'a'.charCodeAt(0); const LETTER_ALPHABET_SIZE = 26; const ALPHABET_SIZE = LETTER_ALPHABET_SIZE + 3; // +3: special char, delimiter char, end char @@ -127,14 +128,19 @@ function makeTree(lists: Array>) { console.log('Search String length', listsAsConcatedNumericList.length); const N = 150_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings - const start = performance.now(); - const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); + // const start = performance.now(); + // const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); const l = Array(N).fill(0); const r = Array(N).fill(0); const p = Array(N).fill(0); const s = Array(N).fill(0); - const end = performance.now(); - console.log('Allocating memory took:', end - start, 'ms'); + // const end = performance.now(); + // console.log('Allocating memory took:', end - start, 'ms'); + const t: Array = []; + // const l: number[] = []; + // const r: number[] = []; + // const p: number[] = []; + // const s: number[] = []; let tv = 0; let tp = 0; @@ -148,17 +154,25 @@ function makeTree(lists: Array>) { r[0] = -1; l[1] = -1; r[1] = -1; - t[1].fill(0); + // t[1].fill(0); + t[1] = Array(ALPHABET_SIZE).fill(0); } function processCharacter(c: number) { while (true) { if (r[tv] < tp) { - if (t[tv][c] === -1) { + let curNode = t[tv]; + + if (curNode === undefined) { + curNode = Array(ALPHABET_SIZE).fill(-1); + t[tv] = curNode; + } + + if (curNode[c] === -1) { createNewLeaf(c); continue; } - tv = t[tv][c]; + tv = curNode[c]; tp = l[tv]; } if (tp === -1 || c === listsAsConcatedNumericList[tp]) { @@ -175,7 +189,12 @@ function makeTree(lists: Array>) { } function createNewLeaf(c: number) { - t[tv][c] = ts; + const curNode = t[tv]; + if (curNode === undefined) { + throw new Error('createNewLeaf: curNode should not be undefined'); + } + + curNode[c] = ts; l[ts] = la; p[ts++] = tv; tv = s[tv]; @@ -186,13 +205,24 @@ function makeTree(lists: Array>) { l[ts] = l[tv]; r[ts] = tp - 1; p[ts] = p[tv]; - t[ts][listsAsConcatedNumericList[tp]] = tv; - t[ts][c] = ts + 1; + let tTs = t[ts]; + if (tTs === undefined) { + tTs = Array(ALPHABET_SIZE).fill(-1); + t[ts] = tTs; + } + tTs[listsAsConcatedNumericList[tp]] = tv; + tTs[c] = ts + 1; l[ts + 1] = la; p[ts + 1] = ts; l[tv] = tp; p[tv] = ts; - t[p[ts]][listsAsConcatedNumericList[l[ts]]] = ts; + + let tpts = t[p[ts]]; + if (tpts === undefined) { + tpts = Array(ALPHABET_SIZE).fill(-1); + t[p[ts]] = tpts; + } + tpts[listsAsConcatedNumericList[l[ts]]] = ts; ts += 2; handleDescent(ts); } @@ -201,7 +231,11 @@ function makeTree(lists: Array>) { tv = s[p[ts - 2]]; tp = l[ts - 2]; while (tp <= r[ts - 2]) { - tv = t[tv][listsAsConcatedNumericList[tp]]; + const tTv = t[tv]; + if (tTv === undefined) { + throw new Error('handleDescent: tTv should not be undefined'); + } + tv = tTv[listsAsConcatedNumericList[tp]]; tp += r[tv] - l[tv] + 1; } if (tp === r[ts - 2] + 1) { @@ -298,9 +332,10 @@ function makeTree(lists: Array>) { } for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { - if (t[node][i] !== -1) { + const tNode = t[node]?.[i]; + if (tNode !== undefined && tNode !== -1) { isLeaf = false; - st.push([t[node][i], depth + rangeLen]); + st.push([tNode, depth + rangeLen]); } } From a2cdaf98f211ccb8d52cca41efe13b744900ffe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:40:43 +0200 Subject: [PATCH 028/264] wip: refactor r list --- src/libs/SuffixUkkonenTree.ts | 41 ++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index e89cab194ced..c951c616a0da 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -13,11 +13,6 @@ const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); const END_CHAR = String.fromCharCode(END_CHAR_CODE + CHAR_CODE_A); -// TODO: -// make makeTree faster -// how to deal with unicode characters such as spanish ones? -// i think we need to support numbers as well - function convertToBase26(num: number): string { if (num < 0) { throw new Error('Input must be a non-negative integer'); @@ -71,6 +66,7 @@ function cleanedString(input: string) { return input.toLowerCase().replace(nonAlphanumericRegex, ''); } +// TODO: remove timeSpendCleaning once verified the regex works okay on hermes as well! let timeSpendCleaning = 0; function prepareData({data, transform}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; @@ -131,14 +127,15 @@ function makeTree(lists: Array>) { // const start = performance.now(); // const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); const l = Array(N).fill(0); - const r = Array(N).fill(0); + // const r = Array(N).fill(0); const p = Array(N).fill(0); const s = Array(N).fill(0); // const end = performance.now(); // console.log('Allocating memory took:', end - start, 'ms'); const t: Array = []; // const l: number[] = []; - // const r: number[] = []; + const r: Array = []; + const defaultREdgeValue = listsAsConcatedNumericList.length - 1; // const p: number[] = []; // const s: number[] = []; @@ -148,7 +145,7 @@ function makeTree(lists: Array>) { let la = 0; function initializeTree() { - r.fill(listsAsConcatedNumericList.length - 1); + // r.fill(listsAsConcatedNumericList.length - 1); s[0] = 1; l[0] = -1; r[0] = -1; @@ -158,9 +155,19 @@ function makeTree(lists: Array>) { t[1] = Array(ALPHABET_SIZE).fill(0); } + function getOrCreateREdge(node: number): number { + let rEdge = r[node]; + if (rEdge === undefined) { + rEdge = defaultREdgeValue; + r[node] = rEdge; + } + return rEdge; + } + function processCharacter(c: number) { while (true) { - if (r[tv] < tp) { + const rEdge = getOrCreateREdge(tv); + if (rEdge < tp) { let curNode = t[tv]; if (curNode === undefined) { @@ -198,7 +205,9 @@ function makeTree(lists: Array>) { l[ts] = la; p[ts++] = tv; tv = s[tv]; - tp = r[tv] + 1; + + const rEdge = getOrCreateREdge(tv); + tp = rEdge + 1; } function splitEdge(c: number) { @@ -230,20 +239,22 @@ function makeTree(lists: Array>) { function handleDescent(ts: number) { tv = s[p[ts - 2]]; tp = l[ts - 2]; - while (tp <= r[ts - 2]) { + while (tp <= (r[ts - 2] ?? defaultREdgeValue)) { const tTv = t[tv]; if (tTv === undefined) { throw new Error('handleDescent: tTv should not be undefined'); } tv = tTv[listsAsConcatedNumericList[tp]]; - tp += r[tv] - l[tv] + 1; + const rEdge = getOrCreateREdge(tv); + tp += rEdge - l[tv] + 1; } - if (tp === r[ts - 2] + 1) { + if (tp === (r[ts - 2] ?? defaultREdgeValue) + 1) { s[ts - 2] = tv; } else { s[ts - 2] = ts; } - tp = r[tv] - (tp - r[ts - 2]) + 2; + const rEdge = getOrCreateREdge(tv); + tp = rEdge - (tp - (r[ts - 2] ?? defaultREdgeValue)) + 2; } function resetTreeTraversal() { @@ -316,7 +327,7 @@ function makeTree(lists: Array>) { let isLeaf = true; const leftRange = l[node]; - const rightRange = r[node]; + const rightRange = r[node] ?? listsAsConcatedNumericList.length - 1; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; let matches = true; From f6dbecf0bf92108a75f898a97c48bc7d74f73140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:41:32 +0200 Subject: [PATCH 029/264] wip: refactor l list --- src/libs/SuffixUkkonenTree.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index c951c616a0da..8b3029e2e9da 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -126,14 +126,14 @@ function makeTree(lists: Array>) { const N = 150_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings // const start = performance.now(); // const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); - const l = Array(N).fill(0); + // const l = Array(N).fill(0); // const r = Array(N).fill(0); const p = Array(N).fill(0); const s = Array(N).fill(0); // const end = performance.now(); // console.log('Allocating memory took:', end - start, 'ms'); const t: Array = []; - // const l: number[] = []; + const l: number[] = []; const r: Array = []; const defaultREdgeValue = listsAsConcatedNumericList.length - 1; // const p: number[] = []; From bda1e34b1796ec7f259afc6b2e4d25b59f7b2472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:42:06 +0200 Subject: [PATCH 030/264] wip: refactor p list --- src/libs/SuffixUkkonenTree.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 8b3029e2e9da..fa4779345eb0 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -128,7 +128,7 @@ function makeTree(lists: Array>) { // const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); // const l = Array(N).fill(0); // const r = Array(N).fill(0); - const p = Array(N).fill(0); + // const p = Array(N).fill(0); const s = Array(N).fill(0); // const end = performance.now(); // console.log('Allocating memory took:', end - start, 'ms'); @@ -136,7 +136,7 @@ function makeTree(lists: Array>) { const l: number[] = []; const r: Array = []; const defaultREdgeValue = listsAsConcatedNumericList.length - 1; - // const p: number[] = []; + const p: number[] = []; // const s: number[] = []; let tv = 0; From 2508d557aa52770d505149e7085af1947382537b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:42:29 +0200 Subject: [PATCH 031/264] wip: refactor s list --- src/libs/SuffixUkkonenTree.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index fa4779345eb0..c71424440e08 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -129,7 +129,7 @@ function makeTree(lists: Array>) { // const l = Array(N).fill(0); // const r = Array(N).fill(0); // const p = Array(N).fill(0); - const s = Array(N).fill(0); + // const s = Array(N).fill(0); // const end = performance.now(); // console.log('Allocating memory took:', end - start, 'ms'); const t: Array = []; @@ -137,7 +137,7 @@ function makeTree(lists: Array>) { const r: Array = []; const defaultREdgeValue = listsAsConcatedNumericList.length - 1; const p: number[] = []; - // const s: number[] = []; + const s: number[] = []; let tv = 0; let tp = 0; From c32d57f0ab5bc2ef4c116d52df8e807c7b3725c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:42:56 +0200 Subject: [PATCH 032/264] removed preallocations! --- src/libs/SuffixUkkonenTree.ts | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index c71424440e08..6d8feea00f28 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -8,11 +8,6 @@ const END_CHAR_CODE = ALPHABET_SIZE - 1; const nonAlphanumericRegex = /[^0-9\p{L}]/gu; -// The character that separates the different options in the search string -const DELIMITER_CHAR = String.fromCharCode(DELIMITER_CHAR_CODE + CHAR_CODE_A); - -const END_CHAR = String.fromCharCode(END_CHAR_CODE + CHAR_CODE_A); - function convertToBase26(num: number): string { if (num < 0) { throw new Error('Input must be a non-negative integer'); @@ -123,15 +118,6 @@ function makeTree(lists: Array>) { console.log('building search strings', performance.now() - start1); console.log('Search String length', listsAsConcatedNumericList.length); - const N = 150_000; // TODO: i reduced this number from 1_000_000 down to this, for faster performance - however its possible that it needs to be bigger for larger search strings - // const start = performance.now(); - // const t = Array.from({length: N}, () => Array(ALPHABET_SIZE).fill(-1)); - // const l = Array(N).fill(0); - // const r = Array(N).fill(0); - // const p = Array(N).fill(0); - // const s = Array(N).fill(0); - // const end = performance.now(); - // console.log('Allocating memory took:', end - start, 'ms'); const t: Array = []; const l: number[] = []; const r: Array = []; @@ -145,13 +131,11 @@ function makeTree(lists: Array>) { let la = 0; function initializeTree() { - // r.fill(listsAsConcatedNumericList.length - 1); s[0] = 1; l[0] = -1; r[0] = -1; l[1] = -1; r[1] = -1; - // t[1].fill(0); t[1] = Array(ALPHABET_SIZE).fill(0); } From f4812c0bd3bfe40c5a3b7af6059e1a85af64b46c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 10:46:26 +0200 Subject: [PATCH 033/264] better naming --- src/libs/SuffixUkkonenTree.ts | 144 +++++++++++++++++----------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 6d8feea00f28..7d475ef615f5 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -118,56 +118,56 @@ function makeTree(lists: Array>) { console.log('building search strings', performance.now() - start1); console.log('Search String length', listsAsConcatedNumericList.length); - const t: Array = []; - const l: number[] = []; - const r: Array = []; + const transitionNodes: Array = []; + const leftEdges: number[] = []; + const rightEdges: Array = []; const defaultREdgeValue = listsAsConcatedNumericList.length - 1; - const p: number[] = []; - const s: number[] = []; + const parent: number[] = []; + const suffixLink: number[] = []; - let tv = 0; - let tp = 0; - let ts = 2; - let la = 0; + let currentNode = 0; + let currentPosition = 0; + let nodeCounter = 2; + let currentIndex = 0; function initializeTree() { - s[0] = 1; - l[0] = -1; - r[0] = -1; - l[1] = -1; - r[1] = -1; - t[1] = Array(ALPHABET_SIZE).fill(0); + suffixLink[0] = 1; + leftEdges[0] = -1; + rightEdges[0] = -1; + leftEdges[1] = -1; + rightEdges[1] = -1; + transitionNodes[1] = Array(ALPHABET_SIZE).fill(0); } function getOrCreateREdge(node: number): number { - let rEdge = r[node]; + let rEdge = rightEdges[node]; if (rEdge === undefined) { rEdge = defaultREdgeValue; - r[node] = rEdge; + rightEdges[node] = rEdge; } return rEdge; } function processCharacter(c: number) { while (true) { - const rEdge = getOrCreateREdge(tv); - if (rEdge < tp) { - let curNode = t[tv]; + const rEdge = getOrCreateREdge(currentNode); + if (rEdge < currentPosition) { + let curNode = transitionNodes[currentNode]; if (curNode === undefined) { curNode = Array(ALPHABET_SIZE).fill(-1); - t[tv] = curNode; + transitionNodes[currentNode] = curNode; } if (curNode[c] === -1) { createNewLeaf(c); continue; } - tv = curNode[c]; - tp = l[tv]; + currentNode = curNode[c]; + currentPosition = leftEdges[currentNode]; } - if (tp === -1 || c === listsAsConcatedNumericList[tp]) { - tp++; + if (currentPosition === -1 || c === listsAsConcatedNumericList[currentPosition]) { + currentPosition++; } else { splitEdge(c); continue; @@ -180,76 +180,76 @@ function makeTree(lists: Array>) { } function createNewLeaf(c: number) { - const curNode = t[tv]; + const curNode = transitionNodes[currentNode]; if (curNode === undefined) { throw new Error('createNewLeaf: curNode should not be undefined'); } - curNode[c] = ts; - l[ts] = la; - p[ts++] = tv; - tv = s[tv]; + curNode[c] = nodeCounter; + leftEdges[nodeCounter] = currentIndex; + parent[nodeCounter++] = currentNode; + currentNode = suffixLink[currentNode]; - const rEdge = getOrCreateREdge(tv); - tp = rEdge + 1; + const rEdge = getOrCreateREdge(currentNode); + currentPosition = rEdge + 1; } function splitEdge(c: number) { - l[ts] = l[tv]; - r[ts] = tp - 1; - p[ts] = p[tv]; - let tTs = t[ts]; - if (tTs === undefined) { - tTs = Array(ALPHABET_SIZE).fill(-1); - t[ts] = tTs; + leftEdges[nodeCounter] = leftEdges[currentNode]; + rightEdges[nodeCounter] = currentPosition - 1; + parent[nodeCounter] = parent[currentNode]; + let transitionTable = transitionNodes[nodeCounter]; + if (transitionTable === undefined) { + transitionTable = Array(ALPHABET_SIZE).fill(-1); + transitionNodes[nodeCounter] = transitionTable; } - tTs[listsAsConcatedNumericList[tp]] = tv; - tTs[c] = ts + 1; - l[ts + 1] = la; - p[ts + 1] = ts; - l[tv] = tp; - p[tv] = ts; - - let tpts = t[p[ts]]; - if (tpts === undefined) { - tpts = Array(ALPHABET_SIZE).fill(-1); - t[p[ts]] = tpts; + transitionTable[listsAsConcatedNumericList[currentPosition]] = currentNode; + transitionTable[c] = nodeCounter + 1; + leftEdges[nodeCounter + 1] = currentIndex; + parent[nodeCounter + 1] = nodeCounter; + leftEdges[currentNode] = currentPosition; + parent[currentNode] = nodeCounter; + + let parentTransitionNodes = transitionNodes[parent[nodeCounter]]; + if (parentTransitionNodes === undefined) { + parentTransitionNodes = Array(ALPHABET_SIZE).fill(-1); + transitionNodes[parent[nodeCounter]] = parentTransitionNodes; } - tpts[listsAsConcatedNumericList[l[ts]]] = ts; - ts += 2; - handleDescent(ts); + parentTransitionNodes[listsAsConcatedNumericList[leftEdges[nodeCounter]]] = nodeCounter; + nodeCounter += 2; + handleDescent(nodeCounter); } function handleDescent(ts: number) { - tv = s[p[ts - 2]]; - tp = l[ts - 2]; - while (tp <= (r[ts - 2] ?? defaultREdgeValue)) { - const tTv = t[tv]; + currentNode = suffixLink[parent[ts - 2]]; + currentPosition = leftEdges[ts - 2]; + while (currentPosition <= (rightEdges[ts - 2] ?? defaultREdgeValue)) { + const tTv = transitionNodes[currentNode]; if (tTv === undefined) { throw new Error('handleDescent: tTv should not be undefined'); } - tv = tTv[listsAsConcatedNumericList[tp]]; - const rEdge = getOrCreateREdge(tv); - tp += rEdge - l[tv] + 1; + currentNode = tTv[listsAsConcatedNumericList[currentPosition]]; + const rEdge = getOrCreateREdge(currentNode); + currentPosition += rEdge - leftEdges[currentNode] + 1; } - if (tp === (r[ts - 2] ?? defaultREdgeValue) + 1) { - s[ts - 2] = tv; + if (currentPosition === (rightEdges[ts - 2] ?? defaultREdgeValue) + 1) { + suffixLink[ts - 2] = currentNode; } else { - s[ts - 2] = ts; + suffixLink[ts - 2] = ts; } - const rEdge = getOrCreateREdge(tv); - tp = rEdge - (tp - (r[ts - 2] ?? defaultREdgeValue)) + 2; + const rEdge = getOrCreateREdge(currentNode); + currentPosition = rEdge - (currentPosition - (rightEdges[ts - 2] ?? defaultREdgeValue)) + 2; } function resetTreeTraversal() { - tv = 0; - tp = 0; + currentNode = 0; + currentPosition = 0; } function build() { initializeTree(); - for (la = 0; la < listsAsConcatedNumericList.length; ++la) { - const c = listsAsConcatedNumericList[la]; + for (currentIndex = 0; currentIndex < listsAsConcatedNumericList.length; ++currentIndex) { + const c = listsAsConcatedNumericList[currentIndex]; processCharacter(c); } } @@ -310,8 +310,8 @@ function makeTree(lists: Array>) { const [node, depth] = st.pop()!; let isLeaf = true; - const leftRange = l[node]; - const rightRange = r[node] ?? listsAsConcatedNumericList.length - 1; + const leftRange = leftEdges[node]; + const rightRange = rightEdges[node] ?? listsAsConcatedNumericList.length - 1; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; let matches = true; @@ -327,7 +327,7 @@ function makeTree(lists: Array>) { } for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { - const tNode = t[node]?.[i]; + const tNode = transitionNodes[node]?.[i]; if (tNode !== undefined && tNode !== -1) { isLeaf = false; st.push([tNode, depth + rangeLen]); From de0b4673053eb64a75316c0478c9859b19a8a14c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 11:28:21 +0200 Subject: [PATCH 034/264] renamings --- src/libs/SuffixUkkonenTree.ts | 36 +++++++++++++++--------------- src/pages/ChatFinderPage/index.tsx | 17 +++++--------- tests/unit/SuffixUkkonenTree.ts | 12 +++++----- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 7d475ef615f5..08addba59bbf 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -54,7 +54,7 @@ function stringToArray(input: string) { type PrepareDataParams = { data: T[]; - transform: (data: T) => string; + toSearchableString: (data: T) => string; }; function cleanedString(input: string) { @@ -63,12 +63,12 @@ function cleanedString(input: string) { // TODO: remove timeSpendCleaning once verified the regex works okay on hermes as well! let timeSpendCleaning = 0; -function prepareData({data, transform}: PrepareDataParams): [number[], Array] { +function prepareData({data, toSearchableString}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; const allDataAsNumbers: number[] = []; timeSpendCleaning = 0; data.forEach((option, index) => { - const searchStringForTree = transform(option); + const searchStringForTree = toSearchableString(option); // Remove all none a-z chars: const start = performance.now(); const cleanedSearchStringForTree = cleanedString(searchStringForTree); @@ -106,8 +106,8 @@ function makeTree(lists: Array>) { // thus indexes is a list of those data lists const indexesForList: Array> = []; - for (const {data, transform} of lists) { - const [numericRepresentation, searchIndexList] = prepareData({data, transform}); + for (const {data, toSearchableString: transform} of lists) { + const [numericRepresentation, searchIndexList] = prepareData({data, toSearchableString: transform}); for (const num of numericRepresentation) { // we have to use a loop here as push with spread yields a maximum call stack exceeded error listsAsConcatedNumericList.push(num); @@ -148,7 +148,7 @@ function makeTree(lists: Array>) { return rEdge; } - function processCharacter(c: number) { + function processCharacter(char: number) { while (true) { const rEdge = getOrCreateREdge(currentNode); if (rEdge < currentPosition) { @@ -159,22 +159,22 @@ function makeTree(lists: Array>) { transitionNodes[currentNode] = curNode; } - if (curNode[c] === -1) { - createNewLeaf(c); + if (curNode[char] === -1) { + createNewLeaf(char); continue; } - currentNode = curNode[c]; + currentNode = curNode[char]; currentPosition = leftEdges[currentNode]; } - if (currentPosition === -1 || c === listsAsConcatedNumericList[currentPosition]) { + if (currentPosition === -1 || char === listsAsConcatedNumericList[currentPosition]) { currentPosition++; } else { - splitEdge(c); + splitEdge(char); continue; } break; } - if (c === DELIMITER_CHAR_CODE) { + if (char === DELIMITER_CHAR_CODE) { resetTreeTraversal(); } } @@ -300,9 +300,9 @@ function makeTree(lists: Array>) { // } // TODO: replace, other search function is broken in edge cases we need to address first - function findSubstring(sString: string) { - const s = stringToArray(sString); - console.log('searching for', sString, s); + function findSubstring(value: string) { + const searchValueNumeric = stringToArray(value); + console.log('searching for', value, searchValueNumeric); const occurrences: number[] = []; const st: Array<[number, number]> = [[0, 0]]; @@ -315,8 +315,8 @@ function makeTree(lists: Array>) { const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; let matches = true; - for (let i = 0; i < rangeLen && depth + i < s.length; i++) { - if (s[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { + for (let i = 0; i < rangeLen && depth + i < searchValueNumeric.length; i++) { + if (searchValueNumeric[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { matches = false; break; } @@ -334,7 +334,7 @@ function makeTree(lists: Array>) { } } - if (isLeaf && depth + rangeLen >= s.length) { + if (isLeaf && depth + rangeLen >= searchValueNumeric.length) { occurrences.push(listsAsConcatedNumericList.length - (depth + rangeLen)); } } diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 9663dafe249c..b1806b2bccc9 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -18,7 +18,7 @@ import type {RootStackParamList} from '@libs/Navigation/types'; import * as OptionsListUtils from '@libs/OptionsListUtils'; import Performance from '@libs/Performance'; import type {OptionData} from '@libs/ReportUtils'; -import {makeTree, prepareData} from '@libs/SuffixUkkonenTree'; +import * as SuffixTree from '@libs/SuffixUkkonenTree'; import * as Report from '@userActions/Report'; import Timing from '@userActions/Timing'; import CONST from '@src/CONST'; @@ -100,18 +100,18 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa */ const findInSearchTree = useMemo(() => { let start = performance.now(); - const tree = makeTree([ + const tree = SuffixTree.makeTree([ { data: searchOptions.personalDetails, - transform: (option) => { + toSearchableString: (option) => { const displayName = option.participantsList?.[0]?.displayName ?? ''; return (option.login ?? '') + (option.login !== displayName ? displayName : ''); }, }, { data: searchOptions.recentReports, - transform: (option) => { - let searchStringForTree = option.text ?? '' + toSearchableString: (option) => { + let searchStringForTree = option.text ?? ''; searchStringForTree += option.login ?? ''; if (option.isThread) { @@ -137,10 +137,6 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa const searchStart = performance.now(); const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); console.log('findInSearchTree', performance.now() - searchStart); - console.log("results", { - personalDetails, - recentReports, - }) return { personalDetails, @@ -162,7 +158,6 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa } Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const newOptions1 = OptionsListUtils.filterOptions(searchOptions, debouncedSearchValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true}); const newOptions = findInSearchTree(debouncedSearchValue); const userToInvite = OptionsListUtils.pickUserToInvite({ canInviteUser: true, @@ -180,7 +175,7 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa userToInvite, headerMessage: header, }; - }, [debouncedSearchValue, searchOptions, findInSearchTree]); + }, [debouncedSearchValue, findInSearchTree]); const {recentReports, personalDetails: localPersonalDetails, userToInvite, headerMessage} = debouncedSearchValue.trim() !== '' ? filteredOptions : searchOptions; diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index fc5f9c14d1ab..b170c4ebfb9b 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -5,7 +5,7 @@ describe('SuffixUkkonenTree', () => { const tree = makeTree([ { data: ['banana'], - transform: (data) => data, + toSearchableString: (data) => data, }, ]); tree.build(); @@ -16,7 +16,7 @@ describe('SuffixUkkonenTree', () => { const tree = makeTree([ { data: ['banana', 'test'], - transform: (data) => data, + toSearchableString: (data) => data, }, ]); tree.build(); @@ -28,11 +28,11 @@ describe('SuffixUkkonenTree', () => { const tree = makeTree([ { data: ['erica', 'banana'], - transform: (data) => data, + toSearchableString: (data) => data, }, { data: ['banana', 'test'], - transform: (data) => data, + toSearchableString: (data) => data, }, ]); tree.build(); @@ -44,7 +44,7 @@ describe('SuffixUkkonenTree', () => { const tree = makeTree([ { data: [1, 2, 3, 4, 5], - transform: (data) => String(data), + toSearchableString: (data) => String(data), }, ]); tree.build(); @@ -55,7 +55,7 @@ describe('SuffixUkkonenTree', () => { const tree = makeTree([ { data: ['banana', 'ñèşťǒř', 'test'], - transform: (data) => data, + toSearchableString: (data) => data, }, ]); tree.build(); From c17a95b875005ceb941249de91a07bbe23ce567e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 11:55:39 +0200 Subject: [PATCH 035/264] add userToToInvite to header count back --- src/pages/ChatFinderPage/index.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index b1806b2bccc9..ebf833bfc096 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -128,7 +128,6 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa }, }, ]); - console.log('makeTree', performance.now() - start); start = performance.now(); tree.build(); console.log('build', performance.now() - start); @@ -168,7 +167,7 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa }); Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length > 0, false, debouncedSearchValue); + const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length + Number(!!userToToInvite) > 0, false, debouncedSearchValue); return { recentReports: newOptions.recentReports, personalDetails: newOptions.personalDetails, From 0f94c05754dd7fbf7d9e05d825dc75060615c05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 11:55:47 +0200 Subject: [PATCH 036/264] remove debug logs --- src/libs/SuffixUkkonenTree.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 08addba59bbf..061be1672203 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -302,7 +302,6 @@ function makeTree(lists: Array>) { // TODO: replace, other search function is broken in edge cases we need to address first function findSubstring(value: string) { const searchValueNumeric = stringToArray(value); - console.log('searching for', value, searchValueNumeric); const occurrences: number[] = []; const st: Array<[number, number]> = [[0, 0]]; @@ -345,7 +344,6 @@ function makeTree(lists: Array>) { function findInSearchTree(searchInput: string): T[][] { const now = performance.now(); const result = findSubstring(searchInput); - console.log('FindSubstring index result for searchInput', searchInput, result); // Map the results to the original options const mappedResults = Array.from({length: lists.length}, () => new Set()); From fc54faa840b0ef480db35883eef06d7746e43b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 12:55:08 +0200 Subject: [PATCH 037/264] cleanup --- src/libs/SuffixUkkonenTree.ts | 13 ++----------- src/pages/ChatFinderPage/index.tsx | 8 ++------ 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 061be1672203..2248d1c92df8 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -61,18 +61,14 @@ function cleanedString(input: string) { return input.toLowerCase().replace(nonAlphanumericRegex, ''); } -// TODO: remove timeSpendCleaning once verified the regex works okay on hermes as well! -let timeSpendCleaning = 0; function prepareData({data, toSearchableString}: PrepareDataParams): [number[], Array] { const searchIndexList: Array = []; const allDataAsNumbers: number[] = []; - timeSpendCleaning = 0; + data.forEach((option, index) => { const searchStringForTree = toSearchableString(option); // Remove all none a-z chars: - const start = performance.now(); const cleanedSearchStringForTree = cleanedString(searchStringForTree); - timeSpendCleaning += performance.now() - start; if (cleanedSearchStringForTree.length === 0) { return; @@ -90,7 +86,6 @@ function prepareData({data, toSearchableString}: PrepareDataParams): [numb allDataAsNumbers.push(DELIMITER_CHAR_CODE); } }); - console.log('cleaning', timeSpendCleaning, 'ms'); return [allDataAsNumbers, searchIndexList]; } @@ -99,7 +94,6 @@ function prepareData({data, toSearchableString}: PrepareDataParams): [numb * Makes a tree from an input string */ function makeTree(lists: Array>) { - const start1 = performance.now(); const listsAsConcatedNumericList: number[] = []; // We might received multiple lists of data that we want to search in @@ -115,9 +109,7 @@ function makeTree(lists: Array>) { indexesForList.push(searchIndexList); } listsAsConcatedNumericList.push(END_CHAR_CODE); - console.log('building search strings', performance.now() - start1); - console.log('Search String length', listsAsConcatedNumericList.length); const transitionNodes: Array = []; const leftEdges: number[] = []; const rightEdges: Array = []; @@ -306,6 +298,7 @@ function makeTree(lists: Array>) { const st: Array<[number, number]> = [[0, 0]]; while (st.length > 0) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const [node, depth] = st.pop()!; let isLeaf = true; @@ -342,7 +335,6 @@ function makeTree(lists: Array>) { } function findInSearchTree(searchInput: string): T[][] { - const now = performance.now(); const result = findSubstring(searchInput); // Map the results to the original options @@ -362,7 +354,6 @@ function makeTree(lists: Array>) { } }); - console.log('search', performance.now() - now); return mappedResults.map((set) => Array.from(set)); } diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index ebf833bfc096..d3a34e3a7452 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -99,7 +99,7 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa * Builds a suffix tree and returns a function to search in it. */ const findInSearchTree = useMemo(() => { - let start = performance.now(); + const start = performance.now(); const tree = SuffixTree.makeTree([ { data: searchOptions.personalDetails, @@ -128,14 +128,10 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa }, }, ]); - start = performance.now(); tree.build(); - console.log('build', performance.now() - start); function search(searchInput: string) { - const searchStart = performance.now(); const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); - console.log('findInSearchTree', performance.now() - searchStart); return { personalDetails, @@ -167,7 +163,7 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa }); Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length + Number(!!userToToInvite) > 0, false, debouncedSearchValue); + const header = OptionsListUtils.getHeaderMessage(newOptions.recentReports.length + Number(!!userToInvite) > 0, false, debouncedSearchValue); return { recentReports: newOptions.recentReports, personalDetails: newOptions.personalDetails, From 96c93e7cd7c6ab3a132aec4f2848cdf3ed9765ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 13:16:28 +0200 Subject: [PATCH 038/264] add unit test for special char handling --- src/libs/SuffixUkkonenTree.ts | 4 +--- tests/unit/SuffixUkkonenTree.ts | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 2248d1c92df8..6462498f5ac6 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -39,9 +39,7 @@ function stringToArray(input: string) { for (const char of input) { const charCode = char.charCodeAt(0); const charCodeABased = charCode - CHAR_CODE_A; - // TODO: each word should be converted on its own to stringToArray, so that the words can contain the special chars (which would get base26 encoded) - // When we do this we probably want to check here if the words are in the LETTER + SPECIAL_CHAR range - if (charCodeABased >= 0 && charCodeABased < ALPHABET_SIZE) { + if (charCodeABased >= 0 && charCodeABased < LETTER_ALPHABET_SIZE) { res.push(charCodeABased); } else { const asBase26String = convertToBase26(charCode); diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index b170c4ebfb9b..4c916bb34875 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -61,4 +61,21 @@ describe('SuffixUkkonenTree', () => { tree.build(); expect(tree.findInSearchTree('èşť')).toEqual([['ñèşťǒř']]); }); + + it('should work with words containing "reserved special characters"', () => { + // Some special characters are used for the internal representation of the tree + // However, they are still supported and shouldn't cause any problems. + // The only gotcha is, that you can't search for special chars (however, none of our searchable data contains any of them). + const tree = makeTree([ + { + data: ['ba|nana', 'te{st', 'he}llo'], + toSearchableString: (data) => data, + }, + ]); + tree.build(); + + expect(tree.findInSearchTree('st')).toEqual([['te{st']]); + expect(tree.findInSearchTree('llo')).toEqual([['he}llo']]); + expect(tree.findInSearchTree('nana')).toEqual([['ba|nana']]); + }); }); From 81dffddb2b19b262b8141a42b7fa8d94eb23d14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 13:16:36 +0200 Subject: [PATCH 039/264] wip: document suffix tree functions --- src/libs/SuffixUkkonenTree.ts | 60 +++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 6462498f5ac6..b1a2cd532944 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,4 +1,9 @@ /* eslint-disable no-continue */ + +/** + * TODO: quick explanation to how suffix ukkonen tree works: + */ + const CHAR_CODE_A = 'a'.charCodeAt(0); const LETTER_ALPHABET_SIZE = 26; const ALPHABET_SIZE = LETTER_ALPHABET_SIZE + 3; // +3: special char, delimiter char, end char @@ -6,11 +11,16 @@ const SPECIAL_CHAR_CODE = ALPHABET_SIZE - 3; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; const END_CHAR_CODE = ALPHABET_SIZE - 1; +// Removes any special characters, except for numbers and letters (including unicode letters) const nonAlphanumericRegex = /[^0-9\p{L}]/gu; +/** + * Converts a number to a base26 string number. + * This is used to fit all kinds of characters in the range of a-z. + */ function convertToBase26(num: number): string { if (num < 0) { - throw new Error('Input must be a non-negative integer'); + throw new Error('convertToBase26: Input must be a non-negative integer'); } const alphabet = 'abcdefghijklmnopqrstuvwxyz'; @@ -28,9 +38,11 @@ function convertToBase26(num: number): string { /** * Converts a string to an array of numbers representing the characters of the string. + * Every number in the array is in the range 0-ALPHABET_SIZE (0-28). + * * The numbers are offset by the character code of 'a' (97). - * - This is so that the numbers from a-z are in the range 0-25. - * - 26 is for encoding special characters (everything that is bigger than z will be encoded as "specialCharacter + base26(charCode))" + * - This is so that the numbers from a-z are in the range 0-28. + * - 26 is for encoding special characters. Character numbers that are not within the range of a-z will be encoded as "specialCharacter + base26(charCode)" * - 27 is for the delimiter character * - 28 is for the end character */ @@ -50,23 +62,39 @@ function stringToArray(input: string) { return res; } -type PrepareDataParams = { +type TreeDataParams = { + /** + * The data that should be searchable + */ data: T[]; + /** + * A function that generates a string from a data entry. The string's value is used for searching. + * If you have multiple fields that should be searchable, simply concat them to the string and return it. + */ toSearchableString: (data: T) => string; }; -function cleanedString(input: string) { +/** + * Everything in the tree is treated as lowercase. Strings will additionally be cleaned from + * special characters, as they are irrelevant for the search, and thus we can save some space. + */ +function cleanString(input: string) { return input.toLowerCase().replace(nonAlphanumericRegex, ''); } -function prepareData({data, toSearchableString}: PrepareDataParams): [number[], Array] { +/** + * The suffix tree can only store string like values, and internally stores those as numbers. + * This function converts the user data (which are most likely objects) to a numeric representation. + * Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data. + */ +function dataToNumericRepresentation({data, toSearchableString}: TreeDataParams): [number[], Array] { const searchIndexList: Array = []; const allDataAsNumbers: number[] = []; data.forEach((option, index) => { const searchStringForTree = toSearchableString(option); // Remove all none a-z chars: - const cleanedSearchStringForTree = cleanedString(searchStringForTree); + const cleanedSearchStringForTree = cleanString(searchStringForTree); if (cleanedSearchStringForTree.length === 0) { return; @@ -91,20 +119,20 @@ function prepareData({data, toSearchableString}: PrepareDataParams): [numb /** * Makes a tree from an input string */ -function makeTree(lists: Array>) { +function makeTree(lists: Array>) { const listsAsConcatedNumericList: number[] = []; // We might received multiple lists of data that we want to search in // thus indexes is a list of those data lists - const indexesForList: Array> = []; + const indexesByList: Array> = []; for (const {data, toSearchableString: transform} of lists) { - const [numericRepresentation, searchIndexList] = prepareData({data, toSearchableString: transform}); + const [numericRepresentation, searchIndexList] = dataToNumericRepresentation({data, toSearchableString: transform}); for (const num of numericRepresentation) { // we have to use a loop here as push with spread yields a maximum call stack exceeded error listsAsConcatedNumericList.push(num); } - indexesForList.push(searchIndexList); + indexesByList.push(searchIndexList); } listsAsConcatedNumericList.push(END_CHAR_CODE); @@ -339,15 +367,15 @@ function makeTree(lists: Array>) { const mappedResults = Array.from({length: lists.length}, () => new Set()); result.forEach((index) => { let offset = 0; - for (let i = 0; i < indexesForList.length; i++) { + for (let i = 0; i < indexesByList.length; i++) { const relativeIndex = index - offset + 1; - if (relativeIndex < indexesForList[i].length && relativeIndex >= 0) { - const option = indexesForList[i][relativeIndex]; + if (relativeIndex < indexesByList[i].length && relativeIndex >= 0) { + const option = indexesByList[i][relativeIndex]; if (option) { mappedResults[i].add(option); } } else { - offset += indexesForList[i].length; + offset += indexesByList[i].length; } } }); @@ -362,4 +390,4 @@ function makeTree(lists: Array>) { }; } -export {makeTree, prepareData}; +export {makeTree, dataToNumericRepresentation as prepareData}; From 25909bacec2ee68ea224a5900f0b4f52d7d26b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 13:41:30 +0200 Subject: [PATCH 040/264] fix recursive search function --- src/libs/SuffixUkkonenTree.ts | 73 ++++++++--------------------------- 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index b1a2cd532944..6963a3fe7c04 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -284,84 +284,43 @@ function makeTree(lists: Array>) { * This function will return the index(es) of found occurrences within this big string. * So, when searching for "an", it would return [1, 4, 11]. */ - // function findSubstring(searchString: string) { - // const occurrences: number[] = []; - // // const cleanedSearchString = cleanedString(searchString); - // // const numericSearchQuery = stringToArray(cleanedSearchString); - - // function dfs(node: number, depth: number) { - // const leftRange = l[node]; - // const rightRange = r[node]; - // const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - - // for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { - // if (searchString.charCodeAt(depth + i) - CHAR_CODE_A !== a[leftRange + i]) { - // return; - // } - // } - - // let isLeaf = true; - // for (let i = 0; i < ALPHABET_SIZE; ++i) { - // if (t[node][i] !== -1) { - // isLeaf = false; - // dfs(t[node][i], depth + rangeLen); - // } - // } - - // if (isLeaf && depth >= searchString.length) { - // occurrences.push(a.length - (depth + rangeLen)); - // } - // } - - // dfs(0, 0); - // return occurrences; - // } - - // TODO: replace, other search function is broken in edge cases we need to address first - function findSubstring(value: string) { - const searchValueNumeric = stringToArray(value); + function findSubstring(searchString: number[]) { const occurrences: number[] = []; - const st: Array<[number, number]> = [[0, 0]]; - while (st.length > 0) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const [node, depth] = st.pop()!; - - let isLeaf = true; + function dfs(node: number, depth: number) { const leftRange = leftEdges[node]; - const rightRange = rightEdges[node] ?? listsAsConcatedNumericList.length - 1; + const rightRange = rightEdges[node] ?? defaultREdgeValue; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - let matches = true; - for (let i = 0; i < rangeLen && depth + i < searchValueNumeric.length; i++) { - if (searchValueNumeric[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { - matches = false; - break; - } - } + // console.log('dfs', node, depth, leftRange, rightRange, rangeLen, searchString.length, searchString); - if (!matches) { - continue; + for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { + if (searchString[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { + return; + } } - for (let i = ALPHABET_SIZE - 1; i >= 0; --i) { + let isLeaf = true; + for (let i = 0; i < ALPHABET_SIZE; ++i) { const tNode = transitionNodes[node]?.[i]; - if (tNode !== undefined && tNode !== -1) { + const correctChar = depth + rangeLen >= searchString.length || i === searchString[depth + rangeLen]; + if (tNode && tNode !== -1 && correctChar) { isLeaf = false; - st.push([tNode, depth + rangeLen]); + dfs(tNode, depth + rangeLen); } } - if (isLeaf && depth + rangeLen >= searchValueNumeric.length) { + if (isLeaf && depth + rangeLen >= searchString.length) { occurrences.push(listsAsConcatedNumericList.length - (depth + rangeLen)); } } + dfs(0, 0); return occurrences; } function findInSearchTree(searchInput: string): T[][] { - const result = findSubstring(searchInput); + const result = findSubstring(stringToArray(searchInput)); // Map the results to the original options const mappedResults = Array.from({length: lists.length}, () => new Set()); From b85cfd4179a312e9c17789fe75514fdf517a27a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 13:48:38 +0200 Subject: [PATCH 041/264] small cleanup --- src/libs/SuffixUkkonenTree.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 6963a3fe7c04..5570bda837da 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -320,7 +320,8 @@ function makeTree(lists: Array>) { } function findInSearchTree(searchInput: string): T[][] { - const result = findSubstring(stringToArray(searchInput)); + const searchValueNumeric = stringToArray(searchInput); + const result = findSubstring(searchValueNumeric); // Map the results to the original options const mappedResults = Array.from({length: lists.length}, () => new Set()); From 5de310c311ff97895a53d5734b64336f22745cfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 14:02:17 +0200 Subject: [PATCH 042/264] clean search input string --- src/libs/SuffixUkkonenTree.ts | 2 +- tests/unit/SuffixUkkonenTree.ts | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 5570bda837da..be2d2607b936 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -320,7 +320,7 @@ function makeTree(lists: Array>) { } function findInSearchTree(searchInput: string): T[][] { - const searchValueNumeric = stringToArray(searchInput); + const searchValueNumeric = stringToArray(cleanString(searchInput)); const result = findSubstring(searchValueNumeric); // Map the results to the original options diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 4c916bb34875..967a84a52086 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -78,4 +78,39 @@ describe('SuffixUkkonenTree', () => { expect(tree.findInSearchTree('llo')).toEqual([['he}llo']]); expect(tree.findInSearchTree('nana')).toEqual([['ba|nana']]); }); + + it('should be case insensitive', () => { + const tree = makeTree([ + { + data: ['banana', 'TeSt', 'TEST'], + toSearchableString: (data) => data, + }, + ]); + tree.build(); + + expect(tree.findInSearchTree('test')).toEqual([['TeSt', 'TEST']]); + }); + + it('should work with large random data sets', () => { + const data = Array.from({length: 1000}, () => { + // return words of length 5-27 with random char codes: + return Array.from({length: Math.floor(Math.random() * 22 + 5)}, () => { + const alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789@-_.'; + return alphabet.charAt(Math.floor(Math.random() * alphabet.length)); + }).join(''); + }); + + const tree = makeTree([ + { + data, + toSearchableString: (x) => x, + }, + ]); + tree.build(); + + // Expect to find each word in the tree + data.forEach((word) => { + expect(tree.findInSearchTree(word)).toEqual([[word]]); + }); + }); }); From 2a1e2510d092fbc4f395fa1a4e573e5c1ca6bff3 Mon Sep 17 00:00:00 2001 From: Mateusz Rajski Date: Wed, 25 Sep 2024 14:11:12 +0200 Subject: [PATCH 043/264] Bring back promise handling --- src/components/InitialURLContextProvider.tsx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/components/InitialURLContextProvider.tsx b/src/components/InitialURLContextProvider.tsx index 85ad54ca6c94..f026f2de53f9 100644 --- a/src/components/InitialURLContextProvider.tsx +++ b/src/components/InitialURLContextProvider.tsx @@ -31,9 +31,10 @@ function InitialURLContextProvider({children, url}: InitialURLContextProviderPro useEffect(() => { if (url) { - const route = signInAfterTransitionFromOldDot(url); - setInitialURL(route); - setSplashScreenState(CONST.BOOT_SPLASH_STATE.READY_TO_BE_HIDDEN); + signInAfterTransitionFromOldDot(url).then((route) => { + setInitialURL(route); + setSplashScreenState(CONST.BOOT_SPLASH_STATE.READY_TO_BE_HIDDEN); + }); return; } Linking.getInitialURL().then((initURL) => { From 1bea699dc5174f77427e49adfa1e365af6ce37e4 Mon Sep 17 00:00:00 2001 From: Mateusz Rajski Date: Wed, 25 Sep 2024 14:19:31 +0200 Subject: [PATCH 044/264] Update signInAfterTransitionFromOldDot --- src/libs/actions/Session/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libs/actions/Session/index.ts b/src/libs/actions/Session/index.ts index ae5acf94a925..df47e710a609 100644 --- a/src/libs/actions/Session/index.ts +++ b/src/libs/actions/Session/index.ts @@ -499,11 +499,11 @@ function signInAfterTransitionFromOldDot(transitionURL: string) { const setSessionDataAndOpenApp = new Promise((resolve) => { clearOnyxForNewAccount() - .then(() => Onyx.set(ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING, shouldReturnToOldDotAfterBooking === 'true')) .then(() => Onyx.multiSet({ [ONYXKEYS.SESSION]: {email, authToken, encryptedAuthToken: decodeURIComponent(encryptedAuthToken), accountID: Number(accountID)}, [ONYXKEYS.CREDENTIALS]: {autoGeneratedLogin, autoGeneratedPassword}, + [ONYXKEYS.SHOULD_RETURN_TO_OLD_DOT_AFTER_BOOKING]: shouldReturnToOldDotAfterBooking === 'true', }), ) .then(App.openApp) From d51a4c363e3607bb20462a22e3af696da27987bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 14:22:34 +0200 Subject: [PATCH 045/264] fixed flaky test --- tests/unit/SuffixUkkonenTree.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 967a84a52086..7c056397aafa 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -110,7 +110,7 @@ describe('SuffixUkkonenTree', () => { // Expect to find each word in the tree data.forEach((word) => { - expect(tree.findInSearchTree(word)).toEqual([[word]]); + expect(tree.findInSearchTree(word)).toEqual([expect.arrayContaining([word])]); }); }); }); From 9e9b57488299e193859773a3ca2be5740832f678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 14:31:56 +0200 Subject: [PATCH 046/264] made test more reliable --- tests/unit/SuffixUkkonenTree.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 7c056397aafa..998a78652e58 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -93,8 +93,8 @@ describe('SuffixUkkonenTree', () => { it('should work with large random data sets', () => { const data = Array.from({length: 1000}, () => { - // return words of length 5-27 with random char codes: - return Array.from({length: Math.floor(Math.random() * 22 + 5)}, () => { + // return words of length 9-31 with random char codes: + return Array.from({length: Math.floor(Math.random() * 22 + 9)}, () => { const alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789@-_.'; return alphabet.charAt(Math.floor(Math.random() * alphabet.length)); }).join(''); From 4fa839024b213d77f6196b8d2bc2357729a3b5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 14:47:54 +0200 Subject: [PATCH 047/264] migrate to useOnyx --- src/pages/ChatFinderPage/index.tsx | 32 +++++++++--------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index d3a34e3a7452..8da03cce0046 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -1,8 +1,7 @@ import type {StackScreenProps} from '@react-navigation/stack'; import isEmpty from 'lodash/isEmpty'; import React, {useCallback, useEffect, useMemo, useState} from 'react'; -import type {OnyxEntry} from 'react-native-onyx'; -import {withOnyx} from 'react-native-onyx'; +import {useOnyx} from 'react-native-onyx'; import HeaderWithBackButton from '@components/HeaderWithBackButton'; import {useOptionsList} from '@components/OptionListContextProvider'; import ScreenWrapper from '@components/ScreenWrapper'; @@ -25,18 +24,9 @@ import CONST from '@src/CONST'; import ONYXKEYS from '@src/ONYXKEYS'; import ROUTES from '@src/ROUTES'; import type SCREENS from '@src/SCREENS'; -import type * as OnyxTypes from '@src/types/onyx'; import ChatFinderPageFooter from './ChatFinderPageFooter'; -type ChatFinderPageOnyxProps = { - /** Beta features list */ - betas: OnyxEntry; - - /** Whether or not we are searching for reports on the server */ - isSearchingForReports: OnyxEntry; -}; - -type ChatFinderPageProps = ChatFinderPageOnyxProps & StackScreenProps; +type ChatFinderPageProps = StackScreenProps; type ChatFinderPageSectionItem = { data: OptionData[]; @@ -52,7 +42,7 @@ const setPerformanceTimersEnd = () => { const ChatFinderPageFooterInstance = ; -function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPageProps) { +function ChatFinderPage({navigation}: ChatFinderPageProps) { const [isScreenTransitionEnd, setIsScreenTransitionEnd] = useState(false); const {translate} = useLocalize(); const {isOffline} = useNetwork(); @@ -62,6 +52,11 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa const offlineMessage: string = isOffline ? `${translate('common.youAppearToBeOffline')} ${translate('search.resultsAreLimited')}` : ''; + const [betas] = useOnyx(ONYXKEYS.BETAS); + const [isSearchingForReports] = useOnyx(ONYXKEYS.IS_SEARCHING_FOR_REPORTS, { + initWithStoredValues: false, + }); + const [searchValue, debouncedSearchValue, setSearchValue] = useDebouncedState(''); const [, debouncedSearchValueInServer, setSearchValueInServer] = useDebouncedState('', 500); const updateSearchValue = useCallback( @@ -99,7 +94,6 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa * Builds a suffix tree and returns a function to search in it. */ const findInSearchTree = useMemo(() => { - const start = performance.now(); const tree = SuffixTree.makeTree([ { data: searchOptions.personalDetails, @@ -253,12 +247,4 @@ function ChatFinderPage({betas, isSearchingForReports, navigation}: ChatFinderPa ChatFinderPage.displayName = 'ChatFinderPage'; -export default withOnyx({ - betas: { - key: ONYXKEYS.BETAS, - }, - isSearchingForReports: { - key: ONYXKEYS.IS_SEARCHING_FOR_REPORTS, - initWithStoredValues: false, - }, -})(ChatFinderPage); +export default ChatFinderPage; From 8e583cbe005920355eaf82b70112783224fa0ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 14:57:20 +0200 Subject: [PATCH 048/264] ignore warning, this algorithm is optimized for speed --- src/libs/SuffixUkkonenTree.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index be2d2607b936..66f7de42672a 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -167,6 +167,7 @@ function makeTree(lists: Array>) { } function processCharacter(char: number) { + // eslint-disable-next-line no-constant-condition while (true) { const rEdge = getOrCreateREdge(currentNode); if (rEdge < currentPosition) { From 56a1e8ac5c10938ce2362f824e6ba41e80451d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 15:06:34 +0200 Subject: [PATCH 049/264] add timings to new search --- src/CONST.ts | 2 ++ src/pages/ChatFinderPage/index.tsx | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/CONST.ts b/src/CONST.ts index 9ee9ec4d9147..46a0f7b66a82 100755 --- a/src/CONST.ts +++ b/src/CONST.ts @@ -1096,6 +1096,8 @@ const CONST = { SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300, RESIZE_DEBOUNCE_TIME: 100, UNREAD_UPDATE_DEBOUNCE_TIME: 300, + SEARCH_MAKE_TREE: 'search_make_tree', + SEARCH_BUILD_TREE: 'search_build_tree', SEARCH_FILTER_OPTIONS: 'search_filter_options', USE_DEBOUNCED_STATE_DELAY: 300, }, diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 8da03cce0046..8434637643b1 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -94,6 +94,7 @@ function ChatFinderPage({navigation}: ChatFinderPageProps) { * Builds a suffix tree and returns a function to search in it. */ const findInSearchTree = useMemo(() => { + Timing.start(CONST.TIMING.SEARCH_MAKE_TREE); const tree = SuffixTree.makeTree([ { data: searchOptions.personalDetails, @@ -122,7 +123,10 @@ function ChatFinderPage({navigation}: ChatFinderPageProps) { }, }, ]); + Timing.end(CONST.TIMING.SEARCH_MAKE_TREE); + Timing.start(CONST.TIMING.SEARCH_BUILD_TREE); tree.build(); + Timing.end(CONST.TIMING.SEARCH_BUILD_TREE); function search(searchInput: string) { const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); From 309556ac31e0614c03fb521fac91d1bd929d9202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:00:49 +0200 Subject: [PATCH 050/264] refactor: separate suffix tree from search logic --- src/CONST.ts | 1 + src/libs/FastSearch.ts | 120 ++++++++++++++++++++++++++++ src/libs/SuffixUkkonenTree.ts | 124 +++-------------------------- src/pages/ChatFinderPage/index.tsx | 11 +-- tests/unit/FastSearchTest.ts | 105 ++++++++++++++++++++++++ tests/unit/SuffixUkkonenTree.ts | 115 +------------------------- 6 files changed, 246 insertions(+), 230 deletions(-) create mode 100644 src/libs/FastSearch.ts create mode 100644 tests/unit/FastSearchTest.ts diff --git a/src/CONST.ts b/src/CONST.ts index 46a0f7b66a82..94ee4c3c614c 100755 --- a/src/CONST.ts +++ b/src/CONST.ts @@ -1096,6 +1096,7 @@ const CONST = { SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300, RESIZE_DEBOUNCE_TIME: 100, UNREAD_UPDATE_DEBOUNCE_TIME: 300, + SEARCH_CONVERT_SEARCH_VALUES: 'search_convert_search_values', SEARCH_MAKE_TREE: 'search_make_tree', SEARCH_BUILD_TREE: 'search_build_tree', SEARCH_FILTER_OPTIONS: 'search_filter_options', diff --git a/src/libs/FastSearch.ts b/src/libs/FastSearch.ts new file mode 100644 index 000000000000..1c32cdfd8148 --- /dev/null +++ b/src/libs/FastSearch.ts @@ -0,0 +1,120 @@ +import CONST from '@src/CONST'; +import Timing from './actions/Timing'; +import {DELIMITER_CHAR_CODE, END_CHAR_CODE, makeTree, stringToNumeric} from './SuffixUkkonenTree'; + +type SearchableData = { + /** + * The data that should be searchable + */ + data: T[]; + /** + * A function that generates a string from a data entry. The string's value is used for searching. + * If you have multiple fields that should be searchable, simply concat them to the string and return it. + */ + toSearchableString: (data: T) => string; +}; + +function createFastSearch(dataSet: Array>) { + // Create a numeric list for the suffix tree, and a look up indexes array + Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES); + const listsAsConcatedNumericList: number[] = []; + const indexesByList: Array> = []; + for (const {data, toSearchableString} of dataSet) { + const [numericRepresentation, searchIndexList] = dataToNumericRepresentation({data, toSearchableString}); + for (const num of numericRepresentation) { + // Note: we had to use a loop here as push with spread yields a maximum call stack exceeded error + listsAsConcatedNumericList.push(num); + } + indexesByList.push(searchIndexList); + } + listsAsConcatedNumericList.push(END_CHAR_CODE); + Timing.end(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES); + + // Create & build the suffix tree: + Timing.start(CONST.TIMING.SEARCH_MAKE_TREE); + const tree = makeTree(listsAsConcatedNumericList); + Timing.end(CONST.TIMING.SEARCH_MAKE_TREE); + + Timing.start(CONST.TIMING.SEARCH_BUILD_TREE); + tree.build(); + Timing.end(CONST.TIMING.SEARCH_BUILD_TREE); + + function search(searchInput: string): T[][] { + const searchValueNumeric = stringToNumeric(cleanString(searchInput)); + const result = tree.findSubstring(searchValueNumeric); + + // Map the results to the original options + const mappedResults = Array.from({length: indexesByList.length}, () => new Set()); + result.forEach((index) => { + let offset = 0; + for (let i = 0; i < indexesByList.length; i++) { + const relativeIndex = index - offset + 1; + if (relativeIndex < indexesByList[i].length && relativeIndex >= 0) { + const option = indexesByList[i][relativeIndex]; + if (option) { + mappedResults[i].add(option); + } + } else { + offset += indexesByList[i].length; + } + } + }); + + return mappedResults.map((set) => Array.from(set)); + } + + return { + search, + }; +} + +/** + * The suffix tree can only store string like values, and internally stores those as numbers. + * This function converts the user data (which are most likely objects) to a numeric representation. + * Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data. + */ +function dataToNumericRepresentation({data, toSearchableString}: SearchableData): [number[], Array] { + const searchIndexList: Array = []; + const allDataAsNumbers: number[] = []; + + data.forEach((option, index) => { + const searchStringForTree = toSearchableString(option); + // Remove all none a-z chars: + const cleanedSearchStringForTree = cleanString(searchStringForTree); + + if (cleanedSearchStringForTree.length === 0) { + return; + } + + const numericRepresentation = stringToNumeric(cleanedSearchStringForTree); + + // We need to push an array that has the same length as the length of the string we insert for this option: + const indexes = Array.from({length: numericRepresentation.length}, () => option); + // Note: we add undefined for the delimiter character + searchIndexList.push(...indexes, undefined); + + allDataAsNumbers.push(...numericRepresentation); + if (index < data.length - 1) { + allDataAsNumbers.push(DELIMITER_CHAR_CODE); + } + }); + + return [allDataAsNumbers, searchIndexList]; +} + +// Removes any special characters, except for numbers and letters (including unicode letters) +const nonAlphanumericRegex = /[^0-9\p{L}]/gu; + +/** + * Everything in the tree is treated as lowercase. Strings will additionally be cleaned from + * special characters, as they are irrelevant for the search, and thus we can save some space. + */ +function cleanString(input: string) { + return input.toLowerCase().replace(nonAlphanumericRegex, ''); +} + +const FastSearch = { + createFastSearch, +}; + +export default FastSearch; diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 66f7de42672a..78ad3f289eb6 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -11,9 +11,6 @@ const SPECIAL_CHAR_CODE = ALPHABET_SIZE - 3; const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; const END_CHAR_CODE = ALPHABET_SIZE - 1; -// Removes any special characters, except for numbers and letters (including unicode letters) -const nonAlphanumericRegex = /[^0-9\p{L}]/gu; - /** * Converts a number to a base26 string number. * This is used to fit all kinds of characters in the range of a-z. @@ -46,7 +43,7 @@ function convertToBase26(num: number): string { * - 27 is for the delimiter character * - 28 is for the end character */ -function stringToArray(input: string) { +function stringToNumeric(input: string) { const res: number[] = []; for (const char of input) { const charCode = char.charCodeAt(0); @@ -55,91 +52,21 @@ function stringToArray(input: string) { res.push(charCodeABased); } else { const asBase26String = convertToBase26(charCode); - const asCharCodes = stringToArray(asBase26String); + const asCharCodes = stringToNumeric(asBase26String); res.push(SPECIAL_CHAR_CODE, ...asCharCodes); } } return res; } -type TreeDataParams = { - /** - * The data that should be searchable - */ - data: T[]; - /** - * A function that generates a string from a data entry. The string's value is used for searching. - * If you have multiple fields that should be searchable, simply concat them to the string and return it. - */ - toSearchableString: (data: T) => string; -}; - -/** - * Everything in the tree is treated as lowercase. Strings will additionally be cleaned from - * special characters, as they are irrelevant for the search, and thus we can save some space. - */ -function cleanString(input: string) { - return input.toLowerCase().replace(nonAlphanumericRegex, ''); -} - -/** - * The suffix tree can only store string like values, and internally stores those as numbers. - * This function converts the user data (which are most likely objects) to a numeric representation. - * Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data. - */ -function dataToNumericRepresentation({data, toSearchableString}: TreeDataParams): [number[], Array] { - const searchIndexList: Array = []; - const allDataAsNumbers: number[] = []; - - data.forEach((option, index) => { - const searchStringForTree = toSearchableString(option); - // Remove all none a-z chars: - const cleanedSearchStringForTree = cleanString(searchStringForTree); - - if (cleanedSearchStringForTree.length === 0) { - return; - } - - const numericRepresentation = stringToArray(cleanedSearchStringForTree); - - // We need to push an array that has the same length as the length of the string we insert for this option: - const indexes = Array.from({length: numericRepresentation.length}, () => option); - // Note: we add undefined for the delimiter character - searchIndexList.push(...indexes, undefined); - - allDataAsNumbers.push(...numericRepresentation); - if (index < data.length - 1) { - allDataAsNumbers.push(DELIMITER_CHAR_CODE); - } - }); - - return [allDataAsNumbers, searchIndexList]; -} - /** * Makes a tree from an input string */ -function makeTree(lists: Array>) { - const listsAsConcatedNumericList: number[] = []; - - // We might received multiple lists of data that we want to search in - // thus indexes is a list of those data lists - const indexesByList: Array> = []; - - for (const {data, toSearchableString: transform} of lists) { - const [numericRepresentation, searchIndexList] = dataToNumericRepresentation({data, toSearchableString: transform}); - for (const num of numericRepresentation) { - // we have to use a loop here as push with spread yields a maximum call stack exceeded error - listsAsConcatedNumericList.push(num); - } - indexesByList.push(searchIndexList); - } - listsAsConcatedNumericList.push(END_CHAR_CODE); - +function makeTree(numericSearchValues: number[]) { const transitionNodes: Array = []; const leftEdges: number[] = []; const rightEdges: Array = []; - const defaultREdgeValue = listsAsConcatedNumericList.length - 1; + const defaultREdgeValue = numericSearchValues.length - 1; const parent: number[] = []; const suffixLink: number[] = []; @@ -185,7 +112,7 @@ function makeTree(lists: Array>) { currentNode = curNode[char]; currentPosition = leftEdges[currentNode]; } - if (currentPosition === -1 || char === listsAsConcatedNumericList[currentPosition]) { + if (currentPosition === -1 || char === numericSearchValues[currentPosition]) { currentPosition++; } else { splitEdge(char); @@ -222,7 +149,7 @@ function makeTree(lists: Array>) { transitionTable = Array(ALPHABET_SIZE).fill(-1); transitionNodes[nodeCounter] = transitionTable; } - transitionTable[listsAsConcatedNumericList[currentPosition]] = currentNode; + transitionTable[numericSearchValues[currentPosition]] = currentNode; transitionTable[c] = nodeCounter + 1; leftEdges[nodeCounter + 1] = currentIndex; parent[nodeCounter + 1] = nodeCounter; @@ -234,7 +161,7 @@ function makeTree(lists: Array>) { parentTransitionNodes = Array(ALPHABET_SIZE).fill(-1); transitionNodes[parent[nodeCounter]] = parentTransitionNodes; } - parentTransitionNodes[listsAsConcatedNumericList[leftEdges[nodeCounter]]] = nodeCounter; + parentTransitionNodes[numericSearchValues[leftEdges[nodeCounter]]] = nodeCounter; nodeCounter += 2; handleDescent(nodeCounter); } @@ -247,7 +174,7 @@ function makeTree(lists: Array>) { if (tTv === undefined) { throw new Error('handleDescent: tTv should not be undefined'); } - currentNode = tTv[listsAsConcatedNumericList[currentPosition]]; + currentNode = tTv[numericSearchValues[currentPosition]]; const rEdge = getOrCreateREdge(currentNode); currentPosition += rEdge - leftEdges[currentNode] + 1; } @@ -267,8 +194,8 @@ function makeTree(lists: Array>) { function build() { initializeTree(); - for (currentIndex = 0; currentIndex < listsAsConcatedNumericList.length; ++currentIndex) { - const c = listsAsConcatedNumericList[currentIndex]; + for (currentIndex = 0; currentIndex < numericSearchValues.length; ++currentIndex) { + const c = numericSearchValues[currentIndex]; processCharacter(c); } } @@ -296,7 +223,7 @@ function makeTree(lists: Array>) { // console.log('dfs', node, depth, leftRange, rightRange, rangeLen, searchString.length, searchString); for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { - if (searchString[depth + i] !== listsAsConcatedNumericList[leftRange + i]) { + if (searchString[depth + i] !== numericSearchValues[leftRange + i]) { return; } } @@ -312,7 +239,7 @@ function makeTree(lists: Array>) { } if (isLeaf && depth + rangeLen >= searchString.length) { - occurrences.push(listsAsConcatedNumericList.length - (depth + rangeLen)); + occurrences.push(numericSearchValues.length - (depth + rangeLen)); } } @@ -320,35 +247,10 @@ function makeTree(lists: Array>) { return occurrences; } - function findInSearchTree(searchInput: string): T[][] { - const searchValueNumeric = stringToArray(cleanString(searchInput)); - const result = findSubstring(searchValueNumeric); - - // Map the results to the original options - const mappedResults = Array.from({length: lists.length}, () => new Set()); - result.forEach((index) => { - let offset = 0; - for (let i = 0; i < indexesByList.length; i++) { - const relativeIndex = index - offset + 1; - if (relativeIndex < indexesByList[i].length && relativeIndex >= 0) { - const option = indexesByList[i][relativeIndex]; - if (option) { - mappedResults[i].add(option); - } - } else { - offset += indexesByList[i].length; - } - } - }); - - return mappedResults.map((set) => Array.from(set)); - } - return { build, findSubstring, - findInSearchTree, }; } -export {makeTree, dataToNumericRepresentation as prepareData}; +export {makeTree, stringToNumeric, DELIMITER_CHAR_CODE, END_CHAR_CODE}; diff --git a/src/pages/ChatFinderPage/index.tsx b/src/pages/ChatFinderPage/index.tsx index 8434637643b1..1adcabcf3c31 100644 --- a/src/pages/ChatFinderPage/index.tsx +++ b/src/pages/ChatFinderPage/index.tsx @@ -12,12 +12,12 @@ import useDebouncedState from '@hooks/useDebouncedState'; import useDismissedReferralBanners from '@hooks/useDismissedReferralBanners'; import useLocalize from '@hooks/useLocalize'; import useNetwork from '@hooks/useNetwork'; +import FastSearch from '@libs/FastSearch'; import Navigation from '@libs/Navigation/Navigation'; import type {RootStackParamList} from '@libs/Navigation/types'; import * as OptionsListUtils from '@libs/OptionsListUtils'; import Performance from '@libs/Performance'; import type {OptionData} from '@libs/ReportUtils'; -import * as SuffixTree from '@libs/SuffixUkkonenTree'; import * as Report from '@userActions/Report'; import Timing from '@userActions/Timing'; import CONST from '@src/CONST'; @@ -94,8 +94,7 @@ function ChatFinderPage({navigation}: ChatFinderPageProps) { * Builds a suffix tree and returns a function to search in it. */ const findInSearchTree = useMemo(() => { - Timing.start(CONST.TIMING.SEARCH_MAKE_TREE); - const tree = SuffixTree.makeTree([ + const fastSearch = FastSearch.createFastSearch([ { data: searchOptions.personalDetails, toSearchableString: (option) => { @@ -123,13 +122,9 @@ function ChatFinderPage({navigation}: ChatFinderPageProps) { }, }, ]); - Timing.end(CONST.TIMING.SEARCH_MAKE_TREE); - Timing.start(CONST.TIMING.SEARCH_BUILD_TREE); - tree.build(); - Timing.end(CONST.TIMING.SEARCH_BUILD_TREE); function search(searchInput: string) { - const [personalDetails, recentReports] = tree.findInSearchTree(searchInput); + const [personalDetails, recentReports] = fastSearch.search(searchInput); return { personalDetails, diff --git a/tests/unit/FastSearchTest.ts b/tests/unit/FastSearchTest.ts new file mode 100644 index 000000000000..cb5084f9b40a --- /dev/null +++ b/tests/unit/FastSearchTest.ts @@ -0,0 +1,105 @@ +import FastSearch from '../../src/libs/FastSearch'; + +describe('FastSearch', () => { + it('should insert, and find the word', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['banana'], + toSearchableString: (data) => data, + }, + ]); + expect(search('an')).toEqual([['banana']]); + }); + + it('should work with multiple words', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['banana', 'test'], + toSearchableString: (data) => data, + }, + ]); + + expect(search('es')).toEqual([['test']]); + }); + + it('should work when providing two data sets', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['erica', 'banana'], + toSearchableString: (data) => data, + }, + { + data: ['banana', 'test'], + toSearchableString: (data) => data, + }, + ]); + + expect(search('es')).toEqual([[], ['test']]); + }); + + it('should work with numbers', () => { + const {search} = FastSearch.createFastSearch([ + { + data: [1, 2, 3, 4, 5], + toSearchableString: (data) => String(data), + }, + ]); + + expect(search('2')).toEqual([[2]]); + }); + + it('should work with unicodes', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['banana', 'ñèşťǒř', 'test'], + toSearchableString: (data) => data, + }, + ]); + + expect(search('èşť')).toEqual([['ñèşťǒř']]); + }); + + it('should work with words containing "reserved special characters"', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['ba|nana', 'te{st', 'he}llo'], + toSearchableString: (data) => data, + }, + ]); + + expect(search('st')).toEqual([['te{st']]); + expect(search('llo')).toEqual([['he}llo']]); + expect(search('nana')).toEqual([['ba|nana']]); + }); + + it('should be case insensitive', () => { + const {search} = FastSearch.createFastSearch([ + { + data: ['banana', 'TeSt', 'TEST'], + toSearchableString: (data) => data, + }, + ]); + + expect(search('test')).toEqual([['TeSt', 'TEST']]); + }); + + it('should work with large random data sets', () => { + const data = Array.from({length: 1000}, () => { + return Array.from({length: Math.floor(Math.random() * 22 + 9)}, () => { + const alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789@-_.'; + return alphabet.charAt(Math.floor(Math.random() * alphabet.length)); + }).join(''); + }); + + const {search} = FastSearch.createFastSearch([ + { + data, + toSearchableString: (x) => x, + }, + ]); + + data.forEach((word) => { + expect(search(word)).toEqual([expect.arrayContaining([word])]); + }); + }); +}); diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 998a78652e58..a0c8edbb1162 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -1,116 +1,9 @@ -import {makeTree} from '@libs/SuffixUkkonenTree'; +import {END_CHAR_CODE, makeTree, stringToNumeric} from '@libs/SuffixUkkonenTree'; describe('SuffixUkkonenTree', () => { - it('should insert, build, and find the word', () => { - const tree = makeTree([ - { - data: ['banana'], - toSearchableString: (data) => data, - }, - ]); + it('should insert, build, and find all occurrences', () => { + const tree = makeTree([...stringToNumeric('banana'), END_CHAR_CODE]); tree.build(); - expect(tree.findInSearchTree('an')).toEqual([['banana']]); - }); - - it('should work with multiple words', () => { - const tree = makeTree([ - { - data: ['banana', 'test'], - toSearchableString: (data) => data, - }, - ]); - tree.build(); - - expect(tree.findInSearchTree('es')).toEqual([['test']]); - }); - - it('should work when providing two data sets', () => { - const tree = makeTree([ - { - data: ['erica', 'banana'], - toSearchableString: (data) => data, - }, - { - data: ['banana', 'test'], - toSearchableString: (data) => data, - }, - ]); - tree.build(); - - expect(tree.findInSearchTree('es')).toEqual([[], ['test']]); - }); - - it('should work with numbers', () => { - const tree = makeTree([ - { - data: [1, 2, 3, 4, 5], - toSearchableString: (data) => String(data), - }, - ]); - tree.build(); - expect(tree.findInSearchTree('2')).toEqual([[2]]); - }); - - it('should work with unicodes', () => { - const tree = makeTree([ - { - data: ['banana', 'ñèşťǒř', 'test'], - toSearchableString: (data) => data, - }, - ]); - tree.build(); - expect(tree.findInSearchTree('èşť')).toEqual([['ñèşťǒř']]); - }); - - it('should work with words containing "reserved special characters"', () => { - // Some special characters are used for the internal representation of the tree - // However, they are still supported and shouldn't cause any problems. - // The only gotcha is, that you can't search for special chars (however, none of our searchable data contains any of them). - const tree = makeTree([ - { - data: ['ba|nana', 'te{st', 'he}llo'], - toSearchableString: (data) => data, - }, - ]); - tree.build(); - - expect(tree.findInSearchTree('st')).toEqual([['te{st']]); - expect(tree.findInSearchTree('llo')).toEqual([['he}llo']]); - expect(tree.findInSearchTree('nana')).toEqual([['ba|nana']]); - }); - - it('should be case insensitive', () => { - const tree = makeTree([ - { - data: ['banana', 'TeSt', 'TEST'], - toSearchableString: (data) => data, - }, - ]); - tree.build(); - - expect(tree.findInSearchTree('test')).toEqual([['TeSt', 'TEST']]); - }); - - it('should work with large random data sets', () => { - const data = Array.from({length: 1000}, () => { - // return words of length 9-31 with random char codes: - return Array.from({length: Math.floor(Math.random() * 22 + 9)}, () => { - const alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789@-_.'; - return alphabet.charAt(Math.floor(Math.random() * alphabet.length)); - }).join(''); - }); - - const tree = makeTree([ - { - data, - toSearchableString: (x) => x, - }, - ]); - tree.build(); - - // Expect to find each word in the tree - data.forEach((word) => { - expect(tree.findInSearchTree(word)).toEqual([expect.arrayContaining([word])]); - }); + expect(tree.findSubstring(stringToNumeric('an'))).toEqual([1, 3]); }); }); From f7528f40e64e7c01b49db812d6a7325633472d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:12:04 +0200 Subject: [PATCH 051/264] add explanations + fix test --- src/libs/SuffixUkkonenTree.ts | 12 ++++++++---- tests/unit/SuffixUkkonenTree.ts | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 78ad3f289eb6..7d0beeb1ea76 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -60,7 +60,11 @@ function stringToNumeric(input: string) { } /** - * Makes a tree from an input string + * Creates a new tree instance that can be used to build a suffix tree and search in it. + * The input is a numeric representation of the search string, which can be create using {@link stringToNumeric}. + * Separate search values must be separated by the {@link DELIMITER_CHAR_CODE}. The search string must end with the {@link END_CHAR_CODE}. + * + * The tree will be built using the Ukkonen's algorithm: https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf */ function makeTree(numericSearchValues: number[]) { const transitionNodes: Array = []; @@ -205,12 +209,12 @@ function makeTree(numericSearchValues: number[]) { * * You can think of the tree that we create as a big string that looks like this: * - * "banana{pancake{apple|" - * The delimiter character '{' is used to separate the different strings. + * "banana$pancake$apple|" + * The example delimiter character '$' is used to separate the different strings. * The end character '|' is used to indicate the end of our search string. * * This function will return the index(es) of found occurrences within this big string. - * So, when searching for "an", it would return [1, 4, 11]. + * So, when searching for "an", it would return [1, 3, 8]. */ function findSubstring(searchString: number[]) { const occurrences: number[] = []; diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index a0c8edbb1162..84ee4d984ed6 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -1,9 +1,9 @@ -import {END_CHAR_CODE, makeTree, stringToNumeric} from '@libs/SuffixUkkonenTree'; +import {DELIMITER_CHAR_CODE, END_CHAR_CODE, makeTree, stringToNumeric} from '@libs/SuffixUkkonenTree'; describe('SuffixUkkonenTree', () => { it('should insert, build, and find all occurrences', () => { - const tree = makeTree([...stringToNumeric('banana'), END_CHAR_CODE]); + const tree = makeTree([...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('pancake'), END_CHAR_CODE]); tree.build(); - expect(tree.findSubstring(stringToNumeric('an'))).toEqual([1, 3]); + expect(tree.findSubstring(stringToNumeric('an'))).toEqual([1, 3, 8]); }); }); From 8b5b77f6a720e33644dbf1fd34457a176451755c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:18:39 +0200 Subject: [PATCH 052/264] code documentation --- src/libs/FastSearch.ts | 7 +++++++ src/libs/SuffixUkkonenTree.ts | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/libs/FastSearch.ts b/src/libs/FastSearch.ts index 1c32cdfd8148..91b297df3fee 100644 --- a/src/libs/FastSearch.ts +++ b/src/libs/FastSearch.ts @@ -14,6 +14,10 @@ type SearchableData = { toSearchableString: (data: T) => string; }; +/** + * Creates a new "FastSearch" instance. "FastSearch" uses a suffix tree to search for (sub-)strings in a list of strings. + * You can provide multiple datasets. The search results will be returned for each dataset. + */ function createFastSearch(dataSet: Array>) { // Create a numeric list for the suffix tree, and a look up indexes array Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES); @@ -39,6 +43,9 @@ function createFastSearch(dataSet: Array>) { tree.build(); Timing.end(CONST.TIMING.SEARCH_BUILD_TREE); + /** + * Searches for the given input and returns results for each dataset. + */ function search(searchInput: string): T[][] { const searchValueNumeric = stringToNumeric(cleanString(searchInput)); const result = tree.findSubstring(searchValueNumeric); diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 7d0beeb1ea76..3b4c9dd1f842 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -1,7 +1,9 @@ /* eslint-disable no-continue */ /** - * TODO: quick explanation to how suffix ukkonen tree works: + * This implements a suffix tree using Ukkonen's algorithm. + * A good visualization to learn about the algorithm can be found here: https://brenden.github.io/ukkonen-animation/ + * Note: This implementation is optimized for performance, not necessarily for readability. */ const CHAR_CODE_A = 'a'.charCodeAt(0); @@ -224,8 +226,6 @@ function makeTree(numericSearchValues: number[]) { const rightRange = rightEdges[node] ?? defaultREdgeValue; const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1; - // console.log('dfs', node, depth, leftRange, rightRange, rangeLen, searchString.length, searchString); - for (let i = 0; i < rangeLen && depth + i < searchString.length; i++) { if (searchString[depth + i] !== numericSearchValues[leftRange + i]) { return; From a6b7939fc4813639af7c27232cc8b93835ad1a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:33:07 +0200 Subject: [PATCH 053/264] add test verifying identical words work in suffix tree --- tests/unit/SuffixUkkonenTree.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index 84ee4d984ed6..d56db7778528 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -4,6 +4,12 @@ describe('SuffixUkkonenTree', () => { it('should insert, build, and find all occurrences', () => { const tree = makeTree([...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('pancake'), END_CHAR_CODE]); tree.build(); - expect(tree.findSubstring(stringToNumeric('an'))).toEqual([1, 3, 8]); + expect(tree.findSubstring(stringToNumeric('an'))).toEqual(expect.arrayContaining([1, 3, 8])); + }); + + it('should handle identical words', () => { + const tree = makeTree([...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('banana'), END_CHAR_CODE]); + tree.build(); + expect(tree.findSubstring(stringToNumeric('an'))).toEqual(expect.arrayContaining([1, 3, 8, 10])); }); }); From 8b41e8823f8dfe542a9472918ccbc1952acb28f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:36:49 +0200 Subject: [PATCH 054/264] add failing unit tests for duplicated string issue --- src/libs/SuffixUkkonenTree.ts | 2 ++ tests/unit/FastSearchTest.ts | 2 +- tests/unit/SuffixUkkonenTree.ts | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 3b4c9dd1f842..f7c27445be1a 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -44,6 +44,8 @@ function convertToBase26(num: number): string { * - 26 is for encoding special characters. Character numbers that are not within the range of a-z will be encoded as "specialCharacter + base26(charCode)" * - 27 is for the delimiter character * - 28 is for the end character + * + * Note: The string should be converted to lowercase first (otherwise uppercase letters get base26'ed taking more space than necessary). */ function stringToNumeric(input: string) { const res: number[] = []; diff --git a/tests/unit/FastSearchTest.ts b/tests/unit/FastSearchTest.ts index cb5084f9b40a..d1186dc9adb9 100644 --- a/tests/unit/FastSearchTest.ts +++ b/tests/unit/FastSearchTest.ts @@ -75,7 +75,7 @@ describe('FastSearch', () => { it('should be case insensitive', () => { const {search} = FastSearch.createFastSearch([ { - data: ['banana', 'TeSt', 'TEST'], + data: ['banana', 'TeSt', 'TEST', 'X'], toSearchableString: (data) => data, }, ]); diff --git a/tests/unit/SuffixUkkonenTree.ts b/tests/unit/SuffixUkkonenTree.ts index d56db7778528..3e4cfbbe3172 100644 --- a/tests/unit/SuffixUkkonenTree.ts +++ b/tests/unit/SuffixUkkonenTree.ts @@ -8,7 +8,7 @@ describe('SuffixUkkonenTree', () => { }); it('should handle identical words', () => { - const tree = makeTree([...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('banana'), END_CHAR_CODE]); + const tree = makeTree([...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('banana'), DELIMITER_CHAR_CODE, ...stringToNumeric('x'), END_CHAR_CODE]); tree.build(); expect(tree.findSubstring(stringToNumeric('an'))).toEqual(expect.arrayContaining([1, 3, 8, 10])); }); From 4e273df5c60c2cbbbd986b0a2bb462251e79ef1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Wed, 25 Sep 2024 16:42:30 +0200 Subject: [PATCH 055/264] add comment --- src/libs/SuffixUkkonenTree.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index f7c27445be1a..52d632511d2a 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -4,6 +4,8 @@ * This implements a suffix tree using Ukkonen's algorithm. * A good visualization to learn about the algorithm can be found here: https://brenden.github.io/ukkonen-animation/ * Note: This implementation is optimized for performance, not necessarily for readability. + * + * You probably don't want to use this directly, but rather use @libs/FastSearch.ts as a easy to use wrapper around this. */ const CHAR_CODE_A = 'a'.charCodeAt(0); From 3afbbdc1332bfd51b251494e802beaafad4a301d Mon Sep 17 00:00:00 2001 From: VickyStash Date: Thu, 26 Sep 2024 09:36:20 +0200 Subject: [PATCH 056/264] Implement AssignCard API call --- .../API/parameters/AssignCompanyCardParams.ts | 10 +++++++++ src/libs/API/parameters/index.ts | 1 + src/libs/API/types.ts | 2 ++ src/libs/actions/Policy/Policy.ts | 21 +++++++++++++++++++ .../assignCard/AssignCardFeedPage.tsx | 4 ++-- .../assignCard/CardSelectionStep.tsx | 20 +++++++++--------- .../assignCard/ConfirmationStep.tsx | 9 +++++++- src/types/onyx/AssignCard.ts | 6 +++--- 8 files changed, 57 insertions(+), 16 deletions(-) create mode 100644 src/libs/API/parameters/AssignCompanyCardParams.ts diff --git a/src/libs/API/parameters/AssignCompanyCardParams.ts b/src/libs/API/parameters/AssignCompanyCardParams.ts new file mode 100644 index 000000000000..6139bd5708f6 --- /dev/null +++ b/src/libs/API/parameters/AssignCompanyCardParams.ts @@ -0,0 +1,10 @@ +type AssignCompanyCardParams = { + policyID: string; + bankName: string; + encryptedCardNumber: string; + email: string; + startDate: string; + // reportActionID: string; +}; + +export default AssignCompanyCardParams; diff --git a/src/libs/API/parameters/index.ts b/src/libs/API/parameters/index.ts index e5cde1b77be7..32a4286f1755 100644 --- a/src/libs/API/parameters/index.ts +++ b/src/libs/API/parameters/index.ts @@ -321,6 +321,7 @@ export type {default as UpdateCardSettlementAccountParams} from './UpdateCardSet export type {default as SetCompanyCardFeedName} from './SetCompanyCardFeedName'; export type {default as DeleteCompanyCardFeed} from './DeleteCompanyCardFeed'; export type {default as SetCompanyCardTransactionLiability} from './SetCompanyCardTransactionLiability'; +export type {default as AssignCompanyCardParams} from './AssignCompanyCardParams'; export type {default as UnassignCompanyCard} from './UnassignCompanyCard'; export type {default as UpdateCompanyCard} from './UpdateCompanyCard'; export type {default as UpdateCompanyCardNameParams} from './UpdateCompanyCardNameParams'; diff --git a/src/libs/API/types.ts b/src/libs/API/types.ts index 8e35a0cb1984..9a2da64c115c 100644 --- a/src/libs/API/types.ts +++ b/src/libs/API/types.ts @@ -402,6 +402,7 @@ const WRITE_COMMANDS = { SET_COMPANY_CARD_FEED_NAME: 'SetFeedName', DELETE_COMPANY_CARD_FEED: 'RemoveFeed', SET_COMPANY_CARD_TRANSACTION_LIABILITY: 'SetFeedTransactionLiability', + ASSIGN_COMPANY_CARD: 'AssignCard', UNASSIGN_COMPANY_CARD: 'UnassignCard', UPDATE_COMPANY_CARD: 'SyncCard', UPDATE_COMPANY_CARD_NAME: 'SetCardName', @@ -466,6 +467,7 @@ type WriteCommandParameters = { [WRITE_COMMANDS.UPDATE_PERSONAL_DETAILS_FOR_WALLET]: Parameters.UpdatePersonalDetailsForWalletParams; [WRITE_COMMANDS.SET_COMPANY_CARD_FEED_NAME]: Parameters.SetCompanyCardFeedName; [WRITE_COMMANDS.DELETE_COMPANY_CARD_FEED]: Parameters.DeleteCompanyCardFeed; + [WRITE_COMMANDS.ASSIGN_COMPANY_CARD]: Parameters.AssignCompanyCardParams; [WRITE_COMMANDS.UNASSIGN_COMPANY_CARD]: Parameters.UnassignCompanyCard; [WRITE_COMMANDS.UPDATE_COMPANY_CARD]: Parameters.UpdateCompanyCard; [WRITE_COMMANDS.UPDATE_COMPANY_CARD_NAME]: Parameters.UpdateCompanyCardNameParams; diff --git a/src/libs/actions/Policy/Policy.ts b/src/libs/actions/Policy/Policy.ts index 2123f2a47764..e1f0d1b8b045 100644 --- a/src/libs/actions/Policy/Policy.ts +++ b/src/libs/actions/Policy/Policy.ts @@ -8,6 +8,7 @@ import type {ReportExportType} from '@components/ButtonWithDropdownMenu/types'; import * as API from '@libs/API'; import type { AddBillingCardAndRequestWorkspaceOwnerChangeParams, + AssignCompanyCardParams, CreateWorkspaceFromIOUPaymentParams, CreateWorkspaceParams, DeleteWorkspaceAvatarParams, @@ -88,6 +89,7 @@ import type { TaxRatesWithDefault, Transaction, } from '@src/types/onyx'; +import type {AssignCardData} from '@src/types/onyx/AssignCard'; import type {Errors} from '@src/types/onyx/OnyxCommon'; import type {Attributes, CompanyAddress, CustomUnit, NetSuiteCustomList, NetSuiteCustomSegment, Rate, TaxRate} from '@src/types/onyx/Policy'; import type {OnyxData} from '@src/types/onyx/Request'; @@ -4540,6 +4542,24 @@ function deleteWorkspaceCompanyCardFeed(policyID: string, workspaceAccountID: nu API.write(WRITE_COMMANDS.DELETE_COMPANY_CARD_FEED, parameters, onyxData); } +function assignWorkspaceCompanyCard(policyID: string, data?: Partial) { + if (!data) { + return; + } + const {bankName = '', email = '', encryptedCardNumber = '', startDate = ''} = data; + + const parameters: AssignCompanyCardParams = { + policyID, + bankName, + encryptedCardNumber, + email, + startDate, + // reportActionID, + }; + + API.write(WRITE_COMMANDS.ASSIGN_COMPANY_CARD, parameters); +} + function unassignWorkspaceCompanyCard(workspaceAccountID: number, cardID: string, bankName: string) { const authToken = NetworkStore.getAuthToken(); @@ -4879,6 +4899,7 @@ export { deleteWorkspaceCompanyCardFeed, setWorkspaceCompanyCardTransactionLiability, openPolicyCompanyCardsPage, + assignWorkspaceCompanyCard, unassignWorkspaceCompanyCard, updateWorkspaceCompanyCard, updateCompanyCardName, diff --git a/src/pages/workspace/companyCards/assignCard/AssignCardFeedPage.tsx b/src/pages/workspace/companyCards/assignCard/AssignCardFeedPage.tsx index 66ceaaf914c4..03e9d58a71ab 100644 --- a/src/pages/workspace/companyCards/assignCard/AssignCardFeedPage.tsx +++ b/src/pages/workspace/companyCards/assignCard/AssignCardFeedPage.tsx @@ -25,7 +25,7 @@ function AssignCardFeedPage({route, policy}: AssignCardFeedPageProps) { const feed = route.params?.feed; useEffect(() => { - CompanyCards.setAssignCardStepAndData({data: {feed}}); + CompanyCards.setAssignCardStepAndData({data: {bankName: feed}}); }, [feed]); switch (currentStep) { @@ -36,7 +36,7 @@ function AssignCardFeedPage({route, policy}: AssignCardFeedPageProps) { case CONST.COMPANY_CARD.STEP.TRANSACTION_START_DATE: return ; case CONST.COMPANY_CARD.STEP.CONFIRMATION: - return ; + return ; default: return ; } diff --git a/src/pages/workspace/companyCards/assignCard/CardSelectionStep.tsx b/src/pages/workspace/companyCards/assignCard/CardSelectionStep.tsx index b31780d54fa2..5947d382029f 100644 --- a/src/pages/workspace/companyCards/assignCard/CardSelectionStep.tsx +++ b/src/pages/workspace/companyCards/assignCard/CardSelectionStep.tsx @@ -23,21 +23,21 @@ import ROUTES from '@src/ROUTES'; type MockedCard = { key: string; - cardNumber: string; + encryptedCardNumber: string; }; const mockedCardList = [ { key: '1', - cardNumber: '123412XXXXXX1234', + encryptedCardNumber: '123412XXXXXX1234', }, { key: '2', - cardNumber: '123412XXXXXX1235', + encryptedCardNumber: '123412XXXXXX1235', }, { key: '3', - cardNumber: '123412XXXXXX1236', + encryptedCardNumber: '123412XXXXXX1236', }, ]; @@ -62,7 +62,7 @@ function CardSelectionStep({feed}: CardSelectionStepProps) { const isEditing = assignCard?.isEditing; const assignee = assignCard?.data?.email ?? ''; - const [cardSelected, setCardSelected] = useState(assignCard?.data?.cardName ?? ''); + const [cardSelected, setCardSelected] = useState(assignCard?.data?.encryptedCardNumber ?? ''); const [shouldShowError, setShouldShowError] = useState(false); const handleBackButtonPress = () => { @@ -88,7 +88,7 @@ function CardSelectionStep({feed}: CardSelectionStepProps) { } CompanyCards.setAssignCardStepAndData({ currentStep: isEditing ? CONST.COMPANY_CARD.STEP.CONFIRMATION : CONST.COMPANY_CARD.STEP.TRANSACTION_START_DATE, - data: {cardName: cardSelected}, + data: {encryptedCardNumber: cardSelected}, isEditing: false, }); }; @@ -97,10 +97,10 @@ function CardSelectionStep({feed}: CardSelectionStepProps) { const mockedCards = !Object.values(CONST.COMPANY_CARD.FEED_BANK_NAME).some((value) => value === feed) ? mockedCardListEmpty : mockedCardList; const cardListOptions = mockedCards.map((item) => ({ - keyForList: item?.cardNumber, - value: item?.cardNumber, - text: item?.cardNumber, - isSelected: cardSelected === item?.cardNumber, + keyForList: item?.encryptedCardNumber, + value: item?.encryptedCardNumber, + text: item?.encryptedCardNumber, + isSelected: cardSelected === item?.encryptedCardNumber, leftElement: ( { + Policy.assignWorkspaceCompanyCard(policyID, data); Navigation.goBack(); CompanyCards.clearAssignCardStepAndData(); }; diff --git a/src/types/onyx/AssignCard.ts b/src/types/onyx/AssignCard.ts index 0b2592d15079..690900fe5f74 100644 --- a/src/types/onyx/AssignCard.ts +++ b/src/types/onyx/AssignCard.ts @@ -6,14 +6,14 @@ type AssignCardStep = ValueOf; /** Data required to be sent to issue a new card */ type AssignCardData = { - /** The email address of the asignee */ + /** The email address of the assignee */ email: string; /** Number of the selected card */ - cardNumber: string; + encryptedCardNumber: string; /** The name of the feed */ - feed: string; + bankName: string; /** The name of the card */ cardName: string; From 57af9b1909c92c95bd45d1f9d89edb59811c19fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanno=20J=2E=20G=C3=B6decke?= Date: Thu, 26 Sep 2024 09:36:52 +0200 Subject: [PATCH 057/264] fix search not returning results with same search value --- src/libs/SuffixUkkonenTree.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/libs/SuffixUkkonenTree.ts b/src/libs/SuffixUkkonenTree.ts index 52d632511d2a..96c5a1e46454 100644 --- a/src/libs/SuffixUkkonenTree.ts +++ b/src/libs/SuffixUkkonenTree.ts @@ -130,9 +130,6 @@ function makeTree(numericSearchValues: number[]) { } break; } - if (char === DELIMITER_CHAR_CODE) { - resetTreeTraversal(); - } } function createNewLeaf(c: number) { @@ -197,11 +194,6 @@ function makeTree(numericSearchValues: number[]) { currentPosition = rEdge - (currentPosition - (rightEdges[ts - 2] ?? defaultREdgeValue)) + 2; } - function resetTreeTraversal() { - currentNode = 0; - currentPosition = 0; - } - function build() { initializeTree(); for (currentIndex = 0; currentIndex < numericSearchValues.length; ++currentIndex) { From 8cea638bc766e0f362d1bbc31be1918860099e25 Mon Sep 17 00:00:00 2001 From: VickyStash Date: Thu, 26 Sep 2024 10:09:23 +0200 Subject: [PATCH 058/264] Resolve TODO --- .../companyCards/WorkspaceCompanyCardsListHeaderButtons.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pages/workspace/companyCards/WorkspaceCompanyCardsListHeaderButtons.tsx b/src/pages/workspace/companyCards/WorkspaceCompanyCardsListHeaderButtons.tsx index b71f6583e25a..aeb75603ce20 100644 --- a/src/pages/workspace/companyCards/WorkspaceCompanyCardsListHeaderButtons.tsx +++ b/src/pages/workspace/companyCards/WorkspaceCompanyCardsListHeaderButtons.tsx @@ -57,8 +57,7 @@ function WorkspaceCompanyCardsListHeaderButtons({policyID, selectedFeed}: Worksp