Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(community): Migrate xenova transformers lib to huggingface #7431

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api_refs/blacklisted-entrypoints.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"../../langchain/src/embeddings/tensorflow.ts",
"../../langchain/src/embeddings/hf.ts",
"../../langchain/src/embeddings/hf_transformers.ts",
"../../langchain/src/embeddings/huggingface_transformers.ts",
"../../langchain/src/embeddings/googlevertexai.ts",
"../../langchain/src/embeddings/googlepalm.ts",
"../../langchain/src/embeddings/minimax.ts",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ It runs locally and even works directly in the browser, allowing you to create w

## Setup

You'll need to install the [@xenova/transformers](https://www.npmjs.com/package/@xenova/transformers) package as a peer dependency:
You'll need to install the [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) package as a peer dependency:

```bash npm2yarn
npm install @xenova/transformers
npm install @huggingface/transformers
```

import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ async function test() {
const { OpenAI } = await import("@langchain/openai");
const { LLMChain } = await import("langchain/chains");
const { ChatPromptTemplate } = await import("@langchain/core/prompts");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers");
const { Document } = await import("@langchain/core/documents");
const { MemoryVectorStore } = await import("langchain/vectorstores/memory");

Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

// Test exports
Expand Down
6 changes: 4 additions & 2 deletions environment_tests/test-exports-cjs/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

async function test(useAzure: boolean = false) {
Expand All @@ -25,7 +25,9 @@ async function test(useAzure: boolean = false) {
openAIApiKey: "sk-XXXX",
};

const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }));
const vs = new MemoryVectorStore(
new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })
);

await vs.addVectors(
[
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/require.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai");
const { LLMChain } = require("langchain/chains");
const { ChatPromptTemplate } = require("@langchain/core/prompts");
const { MemoryVectorStore } = require("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers");
const { Document } = require("@langchain/core/documents");

async function test() {
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/import.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ async function test() {
const { LLMChain } = await import("langchain/chains");
const { ChatPromptTemplate } = await import("@langchain/core/prompts");
const { MemoryVectorStore } = await import("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers");
const { Document } = await import("@langchain/core/documents");

// Test exports
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";
import { CallbackManager } from "@langchain/core/callbacks/manager";

Expand Down
6 changes: 4 additions & 2 deletions environment_tests/test-exports-esm/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

async function test(useAzure: boolean = false) {
Expand All @@ -24,7 +24,9 @@ async function test(useAzure: boolean = false) {
openAIApiKey: "sk-XXXX",
};

const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", }));
const vs = new MemoryVectorStore(
new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })
);

await vs.addVectors(
[
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/require.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai");
const { LLMChain } = require("langchain/chains");
const { ChatPromptTemplate } = require("@langchain/core/prompts");
const { MemoryVectorStore } = require("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers");
const { Document } = require("@langchain/core/documents");

async function test() {
Expand Down
2 changes: 1 addition & 1 deletion examples/src/models/embeddings/hf_transformers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";

const model = new HuggingFaceTransformersEmbeddings({
model: "Xenova/all-MiniLM-L6-v2",
Expand Down
2 changes: 1 addition & 1 deletion examples/src/use_cases/local_retrieval_qa/chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { Ollama } from "@langchain/community/llms/ollama";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { formatDocumentsAsString } from "langchain/util/document";
import { PromptTemplate } from "@langchain/core/prompts";
import {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";

const loader = new CheerioWebBaseLoader(
"https://lilianweng.github.io/posts/2023-06-23-agent/"
Expand Down
2 changes: 1 addition & 1 deletion examples/src/use_cases/local_retrieval_qa/qa_chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { Ollama } from "@langchain/community/llms/ollama";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { PromptTemplate } from "@langchain/core/prompts";

const loader = new CheerioWebBaseLoader(
Expand Down
4 changes: 4 additions & 0 deletions libs/langchain-community/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ embeddings/hf_transformers.cjs
embeddings/hf_transformers.js
embeddings/hf_transformers.d.ts
embeddings/hf_transformers.d.cts
embeddings/huggingface_transformers.cjs
embeddings/huggingface_transformers.js
embeddings/huggingface_transformers.d.ts
embeddings/huggingface_transformers.d.cts
embeddings/ibm.cjs
embeddings/ibm.js
embeddings/ibm.d.ts
Expand Down
2 changes: 2 additions & 0 deletions libs/langchain-community/langchain.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export const config = {
"embeddings/gradient_ai": "embeddings/gradient_ai",
"embeddings/hf": "embeddings/hf",
"embeddings/hf_transformers": "embeddings/hf_transformers",
"embeddings/huggingface_transformers": "embeddings/huggingface_transformers",
"embeddings/ibm": "embeddings/ibm",
"embeddings/jina": "embeddings/jina",
"embeddings/llama_cpp": "embeddings/llama_cpp",
Expand Down Expand Up @@ -355,6 +356,7 @@ export const config = {
"embeddings/tensorflow",
"embeddings/hf",
"embeddings/hf_transformers",
"embeddings/huggingface_transformers",
"embeddings/ibm",
"embeddings/jina",
"embeddings/llama_cpp",
Expand Down
23 changes: 18 additions & 5 deletions libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"@google-cloud/storage": "^7.7.0",
"@gradientai/nodejs-sdk": "^1.2.0",
"@huggingface/inference": "^2.6.4",
"@huggingface/transformers": "^3.2.3",
"@ibm-cloud/watsonx-ai": "^1.3.0",
"@jest/globals": "^29.5.0",
"@lancedb/lancedb": "^0.13.0",
Expand Down Expand Up @@ -134,7 +135,6 @@
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.28.0",
"@xenova/transformers": "^2.17.2",
"@zilliz/milvus2-sdk-node": ">=2.3.5",
"apify-client": "^2.7.1",
"assemblyai": "^4.6.0",
Expand Down Expand Up @@ -249,6 +249,7 @@
"@google-cloud/storage": "^6.10.1 || ^7.7.0",
"@gradientai/nodejs-sdk": "^1.2.0",
"@huggingface/inference": "^2.6.4",
"@huggingface/transformers": "^3.2.3",
"@ibm-cloud/watsonx-ai": "*",
"@lancedb/lancedb": "^0.12.0",
"@langchain/core": ">=0.2.21 <0.4.0",
Expand Down Expand Up @@ -282,7 +283,6 @@
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.28.0",
"@xenova/transformers": "^2.17.2",
"@zilliz/milvus2-sdk-node": ">=2.3.5",
"apify-client": "^2.7.1",
"assemblyai": "^4.6.0",
Expand Down Expand Up @@ -430,6 +430,9 @@
"@huggingface/inference": {
"optional": true
},
"@huggingface/transformers": {
"optional": true
},
"@lancedb/lancedb": {
"optional": true
},
Expand Down Expand Up @@ -523,9 +526,6 @@
"@xata.io/client": {
"optional": true
},
"@xenova/transformers": {
"optional": true
},
"@zilliz/milvus2-sdk-node": {
"optional": true
},
Expand Down Expand Up @@ -1113,6 +1113,15 @@
"import": "./embeddings/hf_transformers.js",
"require": "./embeddings/hf_transformers.cjs"
},
"./embeddings/huggingface_transformers": {
"types": {
"import": "./embeddings/huggingface_transformers.d.ts",
"require": "./embeddings/huggingface_transformers.d.cts",
"default": "./embeddings/huggingface_transformers.d.ts"
},
"import": "./embeddings/huggingface_transformers.js",
"require": "./embeddings/huggingface_transformers.cjs"
},
"./embeddings/ibm": {
"types": {
"import": "./embeddings/ibm.d.ts",
Expand Down Expand Up @@ -3336,6 +3345,10 @@
"embeddings/hf_transformers.js",
"embeddings/hf_transformers.d.ts",
"embeddings/hf_transformers.d.cts",
"embeddings/huggingface_transformers.cjs",
"embeddings/huggingface_transformers.js",
"embeddings/huggingface_transformers.d.ts",
"embeddings/huggingface_transformers.d.cts",
"embeddings/ibm.cjs",
"embeddings/ibm.js",
"embeddings/ibm.d.ts",
Expand Down
34 changes: 11 additions & 23 deletions libs/langchain-community/src/embeddings/hf_transformers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* eslint-disable */
// @ts-nocheck
import type {
PretrainedOptions,
FeatureExtractionPipelineOptions,
Expand All @@ -6,13 +8,19 @@ import type {
import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings";
import { chunkArray } from "@langchain/core/utils/chunk_array";

/**
* @deprecated Import from
* "@langchain/community/embeddings/huggingface_transformers"
* instead and use the new "@huggingface/transformers" peer dependency.
*/
export interface HuggingFaceTransformersEmbeddingsParams
extends EmbeddingsParams {
/**
* Model name to use
* Alias for `model`
*/
modelName: string;

/** Model name to use */
model: string;

Expand Down Expand Up @@ -42,24 +50,10 @@ export interface HuggingFaceTransformersEmbeddingsParams
*/
pipelineOptions?: FeatureExtractionPipelineOptions;
}

/**
* @example
* ```typescript
* const model = new HuggingFaceTransformersEmbeddings({
* model: "Xenova/all-MiniLM-L6-v2",
* });
*
* // Embed a single query
* const res = await model.embedQuery(
* "What would be a good company name for a company that makes colorful socks?"
* );
* console.log({ res });
*
* // Embed multiple documents
* const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]);
* console.log({ documentRes });
* ```
* @deprecated Import from
* "@langchain/community/embeddings/huggingface_transformers"
* instead and use the new "@huggingface/transformers" peer dependency.
*/
export class HuggingFaceTransformersEmbeddings
extends Embeddings
Expand All @@ -83,7 +77,6 @@ export class HuggingFaceTransformersEmbeddings

constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>) {
super(fields ?? {});

this.modelName = fields?.model ?? fields?.modelName ?? this.model;
this.model = this.modelName;
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
Expand All @@ -95,27 +88,22 @@ export class HuggingFaceTransformersEmbeddings
...fields?.pipelineOptions,
};
}

async embedDocuments(texts: string[]): Promise<number[][]> {
const batches = chunkArray(
this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts,
this.batchSize
);

const batchRequests = batches.map((batch) => this.runEmbedding(batch));
const batchResponses = await Promise.all(batchRequests);
const embeddings: number[][] = [];

for (let i = 0; i < batchResponses.length; i += 1) {
const batchResponse = batchResponses[i];
for (let j = 0; j < batchResponse.length; j += 1) {
embeddings.push(batchResponse[j]);
}
}

return embeddings;
}

async embedQuery(text: string): Promise<number[]> {
const data = await this.runEmbedding([
this.stripNewLines ? text.replace(/\n/g, " ") : text,
Expand Down
Loading
Loading