Skip to content

Commit

Permalink
Merge pull request #4 from itaybar/removed-transformers
Browse files Browse the repository at this point in the history
removed transformers dependency
  • Loading branch information
itaybar authored Dec 30, 2023
2 parents c86b96c + 1cac79e commit 9c2bab9
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ PUBLIC_APP_COLOR=blue # can be any of tailwind colors: https://tailwindcss.com/d
PUBLIC_APP_DESCRIPTION=# description used throughout the app (if not set, a default one will be used)
PUBLIC_APP_DATA_SHARING=#set to 1 to enable options & text regarding data sharing
PUBLIC_APP_DISCLAIMER=#set to 1 to show a disclaimer on login page
LLM_SUMMERIZATION=true
LLM_SUMMERIZATION=false

# PUBLIC_APP_NAME=HuggingChat
# PUBLIC_APP_ASSETS=huggingchat
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
"@huggingface/hub": "^0.5.1",
"@huggingface/inference": "^2.6.3",
"@iconify-json/bi": "^1.1.21",
"@xenova/transformers": "^2.6.0",
"autoprefixer": "^10.4.14",
"browser-image-resizer": "^2.4.1",
"date-fns": "^2.29.3",
Expand Down
15 changes: 7 additions & 8 deletions src/lib/server/websearch/runWebSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
import { generateQuery } from "$lib/server/websearch/generateQuery";
import { parseWeb } from "$lib/server/websearch/parseWeb";
import { chunk } from "$lib/utils/chunk";
import {
MAX_SEQ_LEN as CHUNK_CAR_LEN,
findSimilarSentences,
} from "$lib/server/websearch/sentenceSimilarity";
import { MAX_SEQ_LEN as CHUNK_CAR_LEN } from "$lib/server/websearch/sentenceSimilarity";
import type { Conversation } from "$lib/types/Conversation";
import type { MessageUpdate } from "$lib/types/MessageUpdate";
import { getWebSearchProvider } from "./searchWeb";
Expand Down Expand Up @@ -85,11 +82,13 @@ export async function runWebSearch(
}

appendUpdate("Extracting relevant information");
const topKClosestParagraphs = 8;
// const topKClosestParagraphs = 8;
const texts = paragraphChunks.map(({ text }) => text);
const indices = await findSimilarSentences(prompt, texts, {
topK: topKClosestParagraphs,
});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const indices: any[] = [];
// const indices = await findSimilarSentences(prompt, texts, {
// topK: topKClosestParagraphs,
// });
webSearch.context = indices.map((idx) => texts[idx]).join("");

const usedSources = new Set<string>();
Expand Down
100 changes: 50 additions & 50 deletions src/lib/server/websearch/sentenceSimilarity.ts
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
import type { Tensor, Pipeline } from "@xenova/transformers";
import { pipeline, dot } from "@xenova/transformers";

// see here: https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/README.md?plain=1#L34
function innerProduct(tensor1: Tensor, tensor2: Tensor) {
return 1.0 - dot(tensor1.data, tensor2.data);
}

// Use the Singleton pattern to enable lazy construction of the pipeline.
class PipelineSingleton {
static modelId = "Xenova/gte-small";
static instance: Promise<Pipeline> | null = null;
static async getInstance() {
if (this.instance === null) {
this.instance = pipeline("feature-extraction", this.modelId);
}
return this.instance;
}
}

// see https://huggingface.co/thenlper/gte-small/blob/d8e2604cadbeeda029847d19759d219e0ce2e6d8/README.md?code=true#L2625
// import type { Tensor, Pipeline } from "@xenova/transformers";
// import { pipeline, dot } from "@xenova/transformers";

// // see here: https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/README.md?plain=1#L34
// function innerProduct(tensor1: Tensor, tensor2: Tensor) {
// return 1.0 - dot(tensor1.data, tensor2.data);
// }

// // Use the Singleton pattern to enable lazy construction of the pipeline.
// class PipelineSingleton {
// static modelId = "Xenova/gte-small";
// static instance: Promise<Pipeline> | null = null;
// static async getInstance() {
// if (this.instance === null) {
// this.instance = pipeline("feature-extraction", this.modelId);
// }
// return this.instance;
// }
// }

// // see https://huggingface.co/thenlper/gte-small/blob/d8e2604cadbeeda029847d19759d219e0ce2e6d8/README.md?code=true#L2625
export const MAX_SEQ_LEN = 512 as const;

export async function findSimilarSentences(
query: string,
sentences: string[],
{ topK = 5 }: { topK: number }
) {
const input = [query, ...sentences];

const extractor = await PipelineSingleton.getInstance();
const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });

const queryTensor: Tensor = output[0];
const sentencesTensor: Tensor = output.slice([1, input.length - 1]);

const distancesFromQuery: { distance: number; index: number }[] = [...sentencesTensor].map(
(sentenceTensor: Tensor, index: number) => {
return {
distance: innerProduct(queryTensor, sentenceTensor),
index: index,
};
}
);

distancesFromQuery.sort((a, b) => {
return a.distance - b.distance;
});

// Return the indexes of the closest topK sentences
return distancesFromQuery.slice(0, topK).map((item) => item.index);
}
// export async function findSimilarSentences(
// query: string,
// sentences: string[],
// { topK = 5 }: { topK: number }
// ) {
// const input = [query, ...sentences];

// const extractor = await PipelineSingleton.getInstance();
// const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });

// const queryTensor: Tensor = output[0];
// const sentencesTensor: Tensor = output.slice([1, input.length - 1]);

// const distancesFromQuery: { distance: number; index: number }[] = [...sentencesTensor].map(
// (sentenceTensor: Tensor, index: number) => {
// return {
// distance: innerProduct(queryTensor, sentenceTensor),
// index: index,
// };
// }
// );

// distancesFromQuery.sort((a, b) => {
// return a.distance - b.distance;
// });

// // Return the indexes of the closest topK sentences
// return distancesFromQuery.slice(0, topK).map((item) => item.index);
// }

0 comments on commit 9c2bab9

Please sign in to comment.