From 6998939bd1b84425ac17a1fbf17d12c1d2dc924d Mon Sep 17 00:00:00 2001
From: Ashoat Tevosyan <ashoat@comm.app>
Date: Fri, 27 Oct 2023 14:02:54 -0400
Subject: [PATCH] [lib] Avoid constructing multiple duplicate tokenizers in
 SearchIndex

Summary:
This simple change improves the perf of `useChatMentionSearchIndex` by 45% in my local environment. Before this it takes an average of 1777.5ms, but after it takes an average of 978ms.

Linear task: [ENG-5480](https://linear.app/comm/issue/ENG-5480/investigate-chatmentioncontextprovider-tti-regression)

Test Plan:
I used [this patch](https://gist.github.com/Ashoat/fc1c91a61009de0e9959527454be8236) to test performance before and after this change. I made sure I had at least three samples of each scenario. Will also link my [messy Gist of results](https://gist.github.com/Ashoat/b871afaaaee10b435b8676175d120d53), but it's not really interpretable by anyone other than me.

Here's the relevant portion:

```
BEFORE

 LOG  useChatMentionSearchIndex took 1801ms
 LOG  useChatMentionSearchIndex took 1748ms
 LOG  useChatMentionSearchIndex took 1730ms
 LOG  useChatMentionSearchIndex took 1831ms

 AVERAGE 1777.5ms

JUST DEDUP

 LOG  useChatMentionSearchIndex took 1027ms
 LOG  useChatMentionSearchIndex took 949ms
 LOG  useChatMentionSearchIndex took 957ms

 AVERAGE 977.7ms
```

Reviewers: tomek, atul, inka, rohan

Reviewed By: rohan

Subscribers: wyilio

Differential Revision: https://phab.comm.dev/D9625
---
 lib/shared/search-index.js                 | 7 +++++--
 lib/shared/sentence-prefix-search-index.js | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/lib/shared/search-index.js b/lib/shared/search-index.js
index 432b2cadbc..026eca1024 100644
--- a/lib/shared/search-index.js
+++ b/lib/shared/search-index.js
@@ -15,13 +15,16 @@ type Token = {
   ...
 };
 
+type TokenizeFunc = (str: string) => Token[];
+const defaultTokenize: TokenizeFunc = new Tokenizer().words();
+
 class SearchIndex {
   tokenize: (str: string) => Token[];
   fullTextIndex: { [token: string]: Set<string> };
   partialTextIndex: { [token: string]: Set<string> };
 
-  constructor() {
-    this.tokenize = new Tokenizer().words();
+  constructor(inputTokenize?: TokenizeFunc) {
+    this.tokenize = inputTokenize ?? defaultTokenize;
     this.fullTextIndex = {};
     this.partialTextIndex = {};
   }
diff --git a/lib/shared/sentence-prefix-search-index.js b/lib/shared/sentence-prefix-search-index.js
index 4848f2f0bd..928c025fad 100644
--- a/lib/shared/sentence-prefix-search-index.js
+++ b/lib/shared/sentence-prefix-search-index.js
@@ -4,12 +4,15 @@ import Tokenizer from 'tokenize-text';
 
 import SearchIndex from './search-index.js';
 
+// defaultTokenize used in SearchIndex splits on punctuation
+// We use this alternative because we only want to split on whitespace
+const tokenize = new Tokenizer().re(/\S+/);
+
 class SentencePrefixSearchIndex extends SearchIndex {
   entries: Set<string>;
 
   constructor() {
-    super();
-    this.tokenize = new Tokenizer().re(/\S+/);
+    super(tokenize);
     this.entries = new Set();
   }