[lib] Avoid constructing multiple duplicate tokenizers in SearchIndex

Summary: This simple change improves the perf of `useChatMentionSearchIndex` by 45% in my local environment. Before this it takes an average of 1777.5ms, but after it takes an average of 978ms. Linear task: [ENG-5480](https://linear.app/comm/issue/ENG-5480/investigate-chatmentioncontextprovider-tti-regression) Test Plan: I used [this patch](https://gist.github.com/Ashoat/fc1c91a61009de0e9959527454be8236) to test performance before and after this change. I made sure I had at least three samples of each scenario. Will also link my [messy Gist of results](https://gist.github.com/Ashoat/b871afaaaee10b435b8676175d120d53), but it's not really interpretable by anyone other than me. Here's the relevant portion: ``` BEFORE LOG useChatMentionSearchIndex took 1801ms LOG useChatMentionSearchIndex took 1748ms LOG useChatMentionSearchIndex took 1730ms LOG useChatMentionSearchIndex took 1831ms AVERAGE 1777.5ms JUST DEDUP LOG useChatMentionSearchIndex took 1027ms LOG useChatMentionSearchIndex took 949ms LOG useChatMentionSearchIndex took 957ms AVERAGE 977.7ms ``` Reviewers: tomek, atul, inka, rohan Reviewed By: rohan Subscribers: wyilio Differential Revision: https://phab.comm.dev/D9625
CommE2E · Oct 28, 2023 · 6998939 · 6998939
1 parent 78a8958
commit 6998939
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 4 deletions.
diff --git a/lib/shared/search-index.js b/lib/shared/search-index.js
@@ -15,13 +15,16 @@ type Token = {
   ...
 };
 
+type TokenizeFunc = (str: string) => Token[];
+const defaultTokenize: TokenizeFunc = new Tokenizer().words();
+
 class SearchIndex {
   tokenize: (str: string) => Token[];
   fullTextIndex: { [token: string]: Set<string> };
   partialTextIndex: { [token: string]: Set<string> };
 
-  constructor() {
-    this.tokenize = new Tokenizer().words();
+  constructor(inputTokenize?: TokenizeFunc) {
+    this.tokenize = inputTokenize ?? defaultTokenize;
     this.fullTextIndex = {};
     this.partialTextIndex = {};
   }

diff --git a/lib/shared/sentence-prefix-search-index.js b/lib/shared/sentence-prefix-search-index.js
@@ -4,12 +4,15 @@ import Tokenizer from 'tokenize-text';
 
 import SearchIndex from './search-index.js';
 
+// defaultTokenize used in SearchIndex splits on punctuation
+// We use this alternative because we only want to split on whitespace
+const tokenize = new Tokenizer().re(/\S+/);
+
 class SentencePrefixSearchIndex extends SearchIndex {
   entries: Set<string>;
 
   constructor() {
-    super();
-    this.tokenize = new Tokenizer().re(/\S+/);
+    super(tokenize);
     this.entries = new Set();
   }