Expensify · marcaaron · Oct 17, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
@@ -1096,6 +1096,9 @@ const CONST = {
         SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300,
         RESIZE_DEBOUNCE_TIME: 100,
         UNREAD_UPDATE_DEBOUNCE_TIME: 300,
+        SEARCH_CONVERT_SEARCH_VALUES: 'search_convert_search_values',
+        SEARCH_MAKE_TREE: 'search_make_tree',
+        SEARCH_BUILD_TREE: 'search_build_tree',
         SEARCH_FILTER_OPTIONS: 'search_filter_options',
         USE_DEBOUNCED_STATE_DELAY: 300,
     },

@@ -0,0 +1,142 @@
+import CONST from '@src/CONST';
+import Timing from './actions/Timing';
+import SuffixUkkonenTree from './SuffixUkkonenTree';
+
+type SearchableData<T> = {
+    /**
+     * The data that should be searchable
+     */
+    data: T[];
+    /**
+     * A function that generates a string from a data entry. The string's value is used for searching.
+     * If you have multiple fields that should be searchable, simply concat them to the string and return it.
+     */
+    toSearchableString: (data: T) => string;
+};
+
+// There are certain characters appear very often in our search data (email addresses), which we don't need to search for.
+const charSetToSkip = new Set(['@', '#', '$', '%', '&', '*', '+', '-', '/', ':', ';', '<', '=', '>', '?', '_', '~', '!']);
+
+/**
+ * Creates a new "FastSearch" instance. "FastSearch" uses a suffix tree to search for (sub-)strings in a list of strings.
+ * You can provide multiple datasets. The search results will be returned for each dataset.
+ *
+ * Note: Creating a FastSearch instance with a lot of data is computationally expensive. You should create an instance once and reuse it.
+ * Searches will be very fast though, even with a lot of data.
+ */
+function createFastSearch<T>(dataSets: Array<SearchableData<T>>) {
+    // Create a numeric list for the suffix tree, and a look up indexes array
+    Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);
+    const maxNumericListSize = 400_000;
+    // The user might provide multiple data sets, but internally, the search values will be stored in this one list:
+    let concatenatedNumericList = new Int8Array(maxNumericListSize);
+    // Here we store the index of the data item in the original data list, so we can map the found occurrences back to the original data:
+    const occurrenceToIndex = new Int32Array(maxNumericListSize * 4);
+    // As we are working with ArrayBuffers, we need to keep track of the current offset:
+    const offset = {value: 0};
+    // We store the last offset for a dataSet, so we can map the found occurrences to the correct dataSet:
+    const listOffsets: number[] = [];
+
+    for (const {data, toSearchableString} of dataSets) {
+        // Performance critical: the array parameters are out parameters, so we don't want to create new arrays every time:
+        dataToNumericRepresentation(concatenatedNumericList, occurrenceToIndex, offset, {data, toSearchableString});
+        listOffsets.push(offset.value);
+    }
+    concatenatedNumericList[offset.value++] = SuffixUkkonenTree.END_CHAR_CODE;
+    listOffsets[listOffsets.length - 1] = offset.value;
+    Timing.end(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);
+
+    // The list might be larger than necessary, so we clamp it to the actual size:
+    concatenatedNumericList = concatenatedNumericList.slice(0, offset.value);
+
+    // Create & build the suffix tree:
+    Timing.start(CONST.TIMING.SEARCH_MAKE_TREE);
+    const tree = SuffixUkkonenTree.makeTree(concatenatedNumericList);
+    Timing.end(CONST.TIMING.SEARCH_MAKE_TREE);
+
+    Timing.start(CONST.TIMING.SEARCH_BUILD_TREE);
+    tree.build();
+    Timing.end(CONST.TIMING.SEARCH_BUILD_TREE);
+
+    /**
+     * Searches for the given input and returns results for each dataset.
+     */
+    function search(searchInput: string): T[][] {
+        const cleanedSearchString = cleanString(searchInput);
+        const {numeric} = SuffixUkkonenTree.stringToNumeric(cleanedSearchString, {
+            charSetToSkip,
+            // stringToNumeric might return a list that is larger than necessary, so we clamp it to the actual size
+            // (otherwise the search could fail as we include in our search empty array values):
+            clamp: true,
+        });
+        const result = tree.findSubstring(Array.from(numeric));
+
+        const resultsByDataSet = Array.from({length: dataSets.length}, () => new Set<T>());
+        // eslint-disable-next-line @typescript-eslint/prefer-for-of
+        for (let i = 0; i < result.length; i++) {
+            const occurrenceIndex = result[i];
+            const itemIndexInDataSet = occurrenceToIndex[occurrenceIndex];
+            const dataSetIndex = listOffsets.findIndex((listOffset) => occurrenceIndex < listOffset);
+
+            if (dataSetIndex === -1) {
+                throw new Error('Programmatic error, this should never ever happen');
+            }
+            const item = dataSets[dataSetIndex].data[itemIndexInDataSet];
+            if (!item) {
+                throw new Error('Programmatic error, this should never ever happen');
+            }
+            resultsByDataSet[dataSetIndex].add(item);
+        }
+
+        return resultsByDataSet.map((set) => Array.from(set));
+    }
+
+    return {
+        search,
+    };
+}
+
+/**
+ * The suffix tree can only store string like values, and internally stores those as numbers.
+ * This function converts the user data (which are most likely objects) to a numeric representation.
+ * Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data.
+ */
+function dataToNumericRepresentation<T>(concatenatedNumericList: Int8Array, occurrenceToIndex: Int32Array, offset: {value: number}, {data, toSearchableString}: SearchableData<T>): void {
+    // const searchIndexList: Array<T | undefined> = [];
+
+    data.forEach((option, index) => {
+        const searchStringForTree = toSearchableString(option);
+        const cleanedSearchStringForTree = cleanString(searchStringForTree);
+
+        if (cleanedSearchStringForTree.length === 0) {
+            return;
+        }
+
+        SuffixUkkonenTree.stringToNumeric(cleanedSearchStringForTree, {
+            charSetToSkip,
+            out: {
+                outArray: concatenatedNumericList,
+                offset,
+                outOccurrenceToIndex: occurrenceToIndex,
+                index,
+            },
+        });
+        // eslint-disable-next-line no-param-reassign
+        occurrenceToIndex[offset.value] = index;
+        // eslint-disable-next-line no-param-reassign
+        concatenatedNumericList[offset.value++] = SuffixUkkonenTree.DELIMITER_CHAR_CODE;
+    });
+}
+
+/**
+ * Everything in the tree is treated as lowercase.
+ */
+function cleanString(input: string) {
+    return input.toLowerCase();
+}
+
+const FastSearch = {
+    createFastSearch,
+};
+
+export default FastSearch;
@@ -2380,6 +2380,31 @@ function getPersonalDetailSearchTerms(item: Partial<ReportUtils.OptionData>) {
 function getCurrentUserSearchTerms(item: ReportUtils.OptionData) {
     return [item.text ?? '', item.login ?? '', item.login?.replace(CONST.EMAIL_SEARCH_REGEX, '') ?? ''];
 }
+
+type PickUserToInviteParams = {
+    canInviteUser: boolean;
+    recentReports: ReportUtils.OptionData[];
+    personalDetails: ReportUtils.OptionData[];
+    searchValue: string;
+    config?: FilterOptionsConfig;
+    optionsToExclude: Option[];
+};
+
+const pickUserToInvite = ({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude}: PickUserToInviteParams) => {
+    let userToInvite = null;
+    if (canInviteUser) {
+        if (recentReports.length === 0 && personalDetails.length === 0) {
+            userToInvite = getUserToInviteOption({
+                searchValue,
+                selectedOptions: config?.selectedOptions,
+                optionsToExclude,
+            });
+        }
+    }
+
+    return userToInvite;
+};
+
 /**
  * Filters options based on the search input value
  */
@@ -2455,16 +2480,7 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt
         recentReports = orderOptions(recentReports, searchValue);
     }
 
-    let userToInvite = null;
-    if (canInviteUser) {
-        if (recentReports.length === 0 && personalDetails.length === 0) {
-            userToInvite = getUserToInviteOption({
-                searchValue,
-                selectedOptions: config?.selectedOptions,
-                optionsToExclude,
-            });
-        }
-    }
+    const userToInvite = pickUserToInvite({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude});
 
     if (maxRecentReportsToShow > 0 && recentReports.length > maxRecentReportsToShow) {
         recentReports.splice(maxRecentReportsToShow);
@@ -2547,6 +2563,7 @@ export {
     getEmptyOptions,
     shouldUseBoldText,
     getAlternateText,
+    pickUserToInvite,
 };
 
 export type {MemberForList, CategorySection, CategoryTreeSection, Options, OptionList, SearchOption, PayeePersonalDetails, Category, Tax, TaxRatesOption, Option, OptionTree};
@@ -0,0 +1,183 @@
+/* eslint-disable no-continue */
+import {ALPHABET_SIZE, DELIMITER_CHAR_CODE, END_CHAR_CODE, SPECIAL_CHAR_CODE, stringToNumeric} from './utils';
+
+/**
+ * This implements a suffix tree using Ukkonen's algorithm.
+ * A good visualization to learn about the algorithm can be found here: https://brenden.github.io/ukkonen-animation/
+ * Note: This implementation is optimized for performance, not necessarily for readability.
+ *
+ * You probably don't want to use this directly, but rather use @libs/FastSearch.ts as a easy to use wrapper around this.
+ */
+
+/**
+ * Creates a new tree instance that can be used to build a suffix tree and search in it.
+ * The input is a numeric representation of the search string, which can be create using {@link stringToNumeric}.
+ * Separate search values must be separated by the {@link DELIMITER_CHAR_CODE}. The search string must end with the {@link END_CHAR_CODE}.
+ *
+ * The tree will be built using the Ukkonen's algorithm: https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+ */
+function makeTree(numericSearchValues: Int8Array) {
+    const maxNodes = 2 * numericSearchValues.length;
+    // Allocate an ArrayBuffer to store all transitions (flat buffer), 4 bytes per transition (Uint32)
+    const transitionNodes = new Int32Array(maxNodes * ALPHABET_SIZE * 4);
+    transitionNodes.fill(-1); // Initialize all transitions to -1 (no transition)
+
+    const leftEdges = new Int32Array(maxNodes * 4);
+    const rightEdges = new Int32Array(maxNodes * 4);
+    const defaultREdgeValue = numericSearchValues.length - 1;
+
+    const parent = new Int32Array(maxNodes * 4);
+    const suffixLink = new Int32Array(maxNodes * 4);
+
+    let currentNode = 0;
+    let currentPosition = 0;
+    let nodeCounter = 2;
+    let currentIndex = 0;
+
+    function initializeTree() {
+        rightEdges.fill(numericSearchValues.length - 1);
+        suffixLink[0] = 1;
+        leftEdges[0] = -1;
+        rightEdges[0] = -1;
+        leftEdges[1] = -1;
+        rightEdges[1] = -1;
+        for (let i = 0; i < ALPHABET_SIZE; ++i) {
+            transitionNodes[ALPHABET_SIZE + i] = 0;
+        }
+    }
+
+    function processCharacter(char: number) {
+        // eslint-disable-next-line no-constant-condition
+        while (true) {
+            if (rightEdges[currentNode] < currentPosition) {
+                if (transitionNodes[currentNode * ALPHABET_SIZE + char] === -1) {
+                    createNewLeaf(char);
+                    continue;
+                }
+                currentNode = transitionNodes[currentNode * ALPHABET_SIZE + char];
+                currentPosition = leftEdges[currentNode];
+            }
+            if (currentPosition === -1 || char === numericSearchValues[currentPosition]) {
+                currentPosition++;
+            } else {
+                splitEdge(char);
+                continue;
+            }
+            break;
+        }
+    }
+
+    function createNewLeaf(c: number) {
+        transitionNodes[currentNode * ALPHABET_SIZE + c] = nodeCounter;
+        leftEdges[nodeCounter] = currentIndex;
+        parent[nodeCounter++] = currentNode;
+        currentNode = suffixLink[currentNode];
+
+        currentPosition = rightEdges[currentNode] + 1;
+    }
+
+    function splitEdge(c: number) {
+        leftEdges[nodeCounter] = leftEdges[currentNode];
+        rightEdges[nodeCounter] = currentPosition - 1;
+        parent[nodeCounter] = parent[currentNode];
+
+        transitionNodes[nodeCounter * ALPHABET_SIZE + numericSearchValues[currentPosition]] = currentNode;
+        transitionNodes[nodeCounter * ALPHABET_SIZE + c] = nodeCounter + 1;
+        leftEdges[nodeCounter + 1] = currentIndex;
+        parent[nodeCounter + 1] = nodeCounter;
+        leftEdges[currentNode] = currentPosition;
+        parent[currentNode] = nodeCounter;
+
+        transitionNodes[parent[nodeCounter] * ALPHABET_SIZE + numericSearchValues[leftEdges[nodeCounter]]] = nodeCounter;
+        nodeCounter += 2;
+        handleDescent(nodeCounter);
+    }
+
+    function handleDescent(ts: number) {
+        currentNode = suffixLink[parent[ts - 2]];
+        currentPosition = leftEdges[ts - 2];
+        while (currentPosition <= (rightEdges[ts - 2] ?? defaultREdgeValue)) {
+            currentNode = transitionNodes[currentNode * ALPHABET_SIZE + numericSearchValues[currentPosition]];
+            currentPosition += rightEdges[currentNode] - leftEdges[currentNode] + 1;
+        }
+        if (currentPosition === (rightEdges[ts - 2] ?? defaultREdgeValue) + 1) {
+            suffixLink[ts - 2] = currentNode;
+        } else {
+            suffixLink[ts - 2] = ts;
+        }
+        currentPosition = rightEdges[currentNode] - (currentPosition - (rightEdges[ts - 2] ?? defaultREdgeValue)) + 2;
+    }
+
+    function build() {
+        initializeTree();
+        for (currentIndex = 0; currentIndex < numericSearchValues.length; ++currentIndex) {
+            const c = numericSearchValues[currentIndex];
+            processCharacter(c);
+        }
+    }
+
+    /**
+     * Returns all occurrences of the given (sub)string in the input string.
+     *
+     * You can think of the tree that we create as a big string that looks like this:
+     *
+     * "banana$pancake$apple|"
+     * The example delimiter character '$' is used to separate the different strings.
+     * The end character '|' is used to indicate the end of our search string.
+     *
+     * This function will return the index(es) of found occurrences within this big string.
+     * So, when searching for "an", it would return [1, 3, 8].
+     */
+    function findSubstring(searchValue: number[]) {
+        const occurrences: number[] = [];
+
+        function dfs(node: number, depth: number) {
+            const leftRange = leftEdges[node];
+            const rightRange = rightEdges[node] ?? defaultREdgeValue;
+            const rangeLen = node === 0 ? 0 : rightRange - leftRange + 1;
+
+            for (let i = 0; i < rangeLen && depth + i < searchValue.length && leftRange + i < numericSearchValues.length; i++) {
+                if (searchValue[depth + i] !== numericSearchValues[leftRange + i]) {
+                    return;
+                }
+            }
+
+            let isLeaf = true;
+            for (let i = 0; i < ALPHABET_SIZE; ++i) {
+                const tNode = transitionNodes[node * ALPHABET_SIZE + i];
+
+                // Search speed optimization: don't go through the edge if it's different than the next char:
+                const correctChar = depth + rangeLen >= searchValue.length || i === searchValue[depth + rangeLen];
+
+                if (tNode && tNode !== -1 && correctChar) {
+                    isLeaf = false;
+                    dfs(tNode, depth + rangeLen);
+                }
+            }
+
+            if (isLeaf && depth + rangeLen >= searchValue.length) {
+                occurrences.push(numericSearchValues.length - (depth + rangeLen));
+            }
+        }
+
+        dfs(0, 0);
+        return occurrences;
+    }
+
+    return {
+        build,
+        findSubstring,
+    };
+}
+
+const SuffixUkkonenTree = {
+    makeTree,
+
+    // Re-exported from utils:
+    DELIMITER_CHAR_CODE,
+    SPECIAL_CHAR_CODE,
+    END_CHAR_CODE,
+    stringToNumeric,
+};
+
+export default SuffixUkkonenTree;