From 7756489950481b17d403f7a8ae4026c55cef24ef Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 11:17:28 +0800 Subject: [PATCH 01/13] feat: integrate bytedance volcano embedding --- .../text_embedding/bytedance_volcano.ipynb | 123 +++++++++++++++++ .../embeddings/__init__.py | 2 + .../embeddings/bytedance_volcano.py | 127 ++++++++++++++++++ .../embeddings/test_volcano.py | 19 +++ .../unit_tests/embeddings/test_imports.py | 1 + .../langchain/embeddings/__init__.py | 2 + .../langchain/embeddings/bytedance_volcano.py | 5 + .../unit_tests/embeddings/test_imports.py | 1 + 8 files changed, 280 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/bytedance_volcano.ipynb create mode 100644 libs/community/langchain_community/embeddings/bytedance_volcano.py create mode 100644 libs/community/tests/integration_tests/embeddings/test_volcano.py create mode 100644 libs/langchain/langchain/embeddings/bytedance_volcano.py diff --git a/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb b/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb new file mode 100644 index 0000000000000..0370a934be75f --- /dev/null +++ b/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (3044863648.py, line 3)", + "output_type": "error", + "traceback": [ + "\u001B[0;36m Cell \u001B[0;32mIn[5], line 3\u001B[0;36m\u001B[0m\n\u001B[0;31m This notebook provides you with a guide on how to load the Volcano Embedding class.\u001B[0m\n\u001B[0m ^\u001B[0m\n\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n" + ] + } + ], + "source": [ + "# Bytedance Volcano\n", + "\n", + "This notebook provides you with a guide on how to load the Volcano Embedding class.\n", + "\n", + "\n", + "## API Initialization\n", + "\n", + "To use the LLM services based on [Bytedance Volcano](https://www.volcengine.com/docs/82379/1099455), you have to initialize these parameters:\n", + "\n", + "You could either choose to init the AK,SK in environment variables or init params:\n", + "\n", + "```base\n", + "export VOLC_ACCESSKEY=XXX\n", + "export VOLC_SECRETKEY=XXX\n", + "```" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-14T03:05:29.859376Z", + "start_time": "2023-12-14T03:05:29.853288Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2023-12-14T03:05:29.857798Z" + } + }, + "outputs": [], + "source": [ + "\"\"\"For basic init and call\"\"\"\n", + "import os\n", + "\n", + "from langchain.embeddings import VolcanoEmbeddings\n", + "\n", + "os.environ[\"VOLC_ACCESSKEY\"] = \"\"\n", + "os.environ[\"VOLC_SECRETKEY\"] = \"==\"\n", + "\n", + "embed = VolcanoEmbeddings(\n", + " # volcano_ak='xxx',\n", + " # volcano_sk='xxx'\n", + ")\n", + "\n", + "print(\"embed_documents result:\")\n", + "res1 = embed.embed_documents([\"foo\", \"bar\"])\n", + "for r in res1:\n", + " print(\"\", r[:8])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2023-12-14T03:05:29.859276Z" + } + }, + "outputs": [], + "source": [ + "print(\"embed_query result:\")\n", + "res2 = embed.embed_query(\"foo\")\n", + "print(\"\", r[:8])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2023-12-14T03:05:29.860282Z" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "vscode": { + "interpreter": { + "hash": "6fa70026b407ae751a5c9e6bd7f7d482379da8ad616f98512780b705c84ee157" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index ce9cfc7aa0b76..71f1aeee69179 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -78,6 +78,7 @@ from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain_community.embeddings.vertexai import VertexAIEmbeddings +from langchain_community.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain_community.embeddings.voyageai import VoyageEmbeddings from langchain_community.embeddings.xinference import XinferenceEmbeddings @@ -136,6 +137,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", + "VolcanoEmbeddings", ] diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/bytedance_volcano.py new file mode 100644 index 0000000000000..cd0d7b96d515e --- /dev/null +++ b/libs/community/langchain_community/embeddings/bytedance_volcano.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, root_validator + +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class VolcanoEmbeddings(BaseModel, Embeddings): + """`Bytedance Volcano Embeddings` embedding models.""" + + volcano_ak: Optional[str] = None + """volcano access key + learn more from: https://www.volcengine.com/docs/6459/76491#ak-sk""" + + volcano_sk: Optional[str] = None + """volcano secret key + learn more from: https://www.volcengine.com/docs/6459/76491#ak-sk""" + + host: str = 'maas-api.ml-platform-cn-beijing.volces.com' + """host + learn more from https://www.volcengine.com/docs/82379/1174746""" + region: str = "cn-beijing" + """region + learn more from https://www.volcengine.com/docs/82379/1174746""" + + model: str = "bge-large-zh" + """Model name + you could get from https://www.volcengine.com/docs/82379/1174746 + for now, we support bge_large_zh + """ + + version: str = "1.0" + """ model version """ + + chunk_size: int = 100 + """Chunk size when multiple texts are input""" + + client: Any + """volcano client""" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """ + Validate whether volcano_ak and volcano_sk in the environment variables or + configuration file are available or not. + + init volcano embedding client with `ak`, `sk`, `host`, `region` + + Args: + + values: a dictionary containing configuration information, must include the + fields of volcano_ak and volcano_sk + Returns: + + a dictionary containing configuration information. If volcano_ak and + volcano_sk are not provided in the environment variables or configuration + file,the original values will be returned; otherwise, values containing + volcano_ak and volcano_sk will be returned. + Raises: + + ValueError: volcengine package not found, please install it with `pip install + volcengine` + """ + values["volcano_ak"] = get_from_dict_or_env( + values, + "volcano_ak", + "VOLC_ACCESSKEY", + ) + values["volcano_sk"] = get_from_dict_or_env( + values, + "volcano_sk", + "VOLC_SECRETKEY", + ) + + try: + from volcengine.maas import MaasService, MaasException + client = MaasService(values["host"], values["region"]) + client.set_ak(values["volcano_ak"]) + client.set_sk(values["volcano_sk"]) + values["client"] = client + except ImportError: + raise ImportError( + "volcengine package not found, please install it with " + "`pip install volcengine`" + ) + return values + + def embed_query(self, text: str) -> List[float]: + return self.embed_documents([text])[0] + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """ + Embeds a list of text documents using the AutoVOT algorithm. + + Args: + texts (List[str]): A list of text documents to embed. + + Returns: + List[List[float]]: A list of embeddings for each document in the input list. + Each embedding is represented as a list of float values. + """ + text_in_chunks = [ + texts[i: i + self.chunk_size] + for i in range(0, len(texts), self.chunk_size) + ] + lst = [] + for chunk in text_in_chunks: + req = { + "model": { + "name": self.model, + "version": self.version, + }, + "input": chunk, + } + try: + from volcengine.maas import MaasException + resp = self.client.embeddings(req) + lst.extend([res["embedding"] for res in resp["data"]]) + except MaasException as err: + raise ValueError(f"Error: {err!r}") + return lst diff --git a/libs/community/tests/integration_tests/embeddings/test_volcano.py b/libs/community/tests/integration_tests/embeddings/test_volcano.py new file mode 100644 index 0000000000000..3e4411d597a5f --- /dev/null +++ b/libs/community/tests/integration_tests/embeddings/test_volcano.py @@ -0,0 +1,19 @@ +"""Test Bytedance Vocalno Embedding.""" +from langchain_community.embeddings import VolcanoEmbeddings + + +def test_modelscope_embedding_documents() -> None: + """Test modelscope embeddings for documents.""" + documents = ["foo", "bar"] + embedding = VolcanoEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 1024 + + +def test_modelscope_embedding_query() -> None: + """Test modelscope embeddings for query.""" + document = "foo bar" + embedding = VolcanoEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 1024 diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py index d33d98e493b26..4c6e7a7ce355d 100644 --- a/libs/community/tests/unit_tests/embeddings/test_imports.py +++ b/libs/community/tests/unit_tests/embeddings/test_imports.py @@ -53,6 +53,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", + "VolcanoEmbeddings" ] diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 3710a6e1969fa..3dc72cc1d88cd 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -70,6 +70,7 @@ from langchain.embeddings.spacy_embeddings import SpacyEmbeddings from langchain.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain.embeddings.vertexai import VertexAIEmbeddings +from langchain.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain.embeddings.voyageai import VoyageEmbeddings from langchain.embeddings.xinference import XinferenceEmbeddings @@ -129,6 +130,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", + "VolcanoEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/bytedance_volcano.py b/libs/langchain/langchain/embeddings/bytedance_volcano.py new file mode 100644 index 0000000000000..5b8592466d049 --- /dev/null +++ b/libs/langchain/langchain/embeddings/bytedance_volcano.py @@ -0,0 +1,5 @@ +from langchain_community.embeddings.bytedance_volcano import ( + VolcanoEmbeddings, +) + +__all__ = ["VolcanoEmbeddings"] diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index 8fe5df0994a50..5bd96f6b29eda 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -54,6 +54,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", + "VolcanoEmbeddings" ] From 3cc606f8a711d38da698105107e297d64bba8f1b Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 11:21:34 +0800 Subject: [PATCH 02/13] format --- libs/langchain/langchain/embeddings/__init__.py | 2 +- libs/langchain/tests/unit_tests/embeddings/test_imports.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 3dc72cc1d88cd..4fbc849bd40c7 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -23,6 +23,7 @@ from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings from langchain.embeddings.bookend import BookendEmbeddings +from langchain.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.cohere import CohereEmbeddings @@ -70,7 +71,6 @@ from langchain.embeddings.spacy_embeddings import SpacyEmbeddings from langchain.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain.embeddings.vertexai import VertexAIEmbeddings -from langchain.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain.embeddings.voyageai import VoyageEmbeddings from langchain.embeddings.xinference import XinferenceEmbeddings diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index 5bd96f6b29eda..ac89763907eef 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -54,7 +54,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", - "VolcanoEmbeddings" + "VolcanoEmbeddings", ] From cb88740ff902622101e040682feb084d0169dcad Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 11:53:47 +0800 Subject: [PATCH 03/13] fix jupyter --- .../text_embedding/bytedance_volcano.ipynb | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb b/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb index 0370a934be75f..bbb3f919e01a9 100644 --- a/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb +++ b/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb @@ -1,18 +1,7 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (3044863648.py, line 3)", - "output_type": "error", - "traceback": [ - "\u001B[0;36m Cell \u001B[0;32mIn[5], line 3\u001B[0;36m\u001B[0m\n\u001B[0;31m This notebook provides you with a guide on how to load the Volcano Embedding class.\u001B[0m\n\u001B[0m ^\u001B[0m\n\u001B[0;31mSyntaxError\u001B[0m\u001B[0;31m:\u001B[0m invalid syntax\n" - ] - } - ], + "cell_type": "raw", "source": [ "# Bytedance Volcano\n", "\n", @@ -31,11 +20,7 @@ "```" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-12-14T03:05:29.859376Z", - "start_time": "2023-12-14T03:05:29.853288Z" - } + "collapsed": false } }, { From 2dd743551897a691ddc4bb0ae6342c49ea3a780c Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 11:57:35 +0800 Subject: [PATCH 04/13] fix import --- .../langchain_community/embeddings/bytedance_volcano.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/bytedance_volcano.py index cd0d7b96d515e..59726ee847382 100644 --- a/libs/community/langchain_community/embeddings/bytedance_volcano.py +++ b/libs/community/langchain_community/embeddings/bytedance_volcano.py @@ -5,8 +5,7 @@ from langchain_core.embeddings import Embeddings from langchain_core.pydantic_v1 import BaseModel, root_validator - -from langchain.utils import get_from_dict_or_env +from langchain_core.utils import get_from_dict_or_env logger = logging.getLogger(__name__) From d149c33d24ad19ca8fa87fb2ffdb96e3cf682f1c Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 12:01:26 +0800 Subject: [PATCH 05/13] delete no used import --- .../langchain_community/embeddings/bytedance_volcano.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/bytedance_volcano.py index 59726ee847382..eb267c64d342d 100644 --- a/libs/community/langchain_community/embeddings/bytedance_volcano.py +++ b/libs/community/langchain_community/embeddings/bytedance_volcano.py @@ -78,7 +78,7 @@ def validate_environment(cls, values: Dict) -> Dict: ) try: - from volcengine.maas import MaasService, MaasException + from volcengine.maas import MaasService client = MaasService(values["host"], values["region"]) client.set_ak(values["volcano_ak"]) client.set_sk(values["volcano_sk"]) From 16cbeeec3e29c6113d28c7797ec23dd08420de26 Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 12:07:09 +0800 Subject: [PATCH 06/13] fix line too long --- .../langchain_community/embeddings/bytedance_volcano.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/bytedance_volcano.py index eb267c64d342d..1f3f26e0c4471 100644 --- a/libs/community/langchain_community/embeddings/bytedance_volcano.py +++ b/libs/community/langchain_community/embeddings/bytedance_volcano.py @@ -63,8 +63,8 @@ def validate_environment(cls, values: Dict) -> Dict: volcano_ak and volcano_sk will be returned. Raises: - ValueError: volcengine package not found, please install it with `pip install - volcengine` + ValueError: volcengine package not found, please install it with + `pip install volcengine` """ values["volcano_ak"] = get_from_dict_or_env( values, From 0ad53cf03f568e290e064d5e3cb294a09378c66a Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 14 Dec 2023 12:11:42 +0800 Subject: [PATCH 07/13] format --- libs/community/langchain_community/embeddings/__init__.py | 2 +- .../langchain_community/embeddings/bytedance_volcano.py | 6 ++++-- libs/community/tests/unit_tests/embeddings/test_imports.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 71f1aeee69179..f8c34f4b48353 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -25,6 +25,7 @@ ) from langchain_community.embeddings.bedrock import BedrockEmbeddings from langchain_community.embeddings.bookend import BookendEmbeddings +from langchain_community.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain_community.embeddings.clarifai import ClarifaiEmbeddings from langchain_community.embeddings.cohere import CohereEmbeddings from langchain_community.embeddings.dashscope import DashScopeEmbeddings @@ -78,7 +79,6 @@ from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain_community.embeddings.vertexai import VertexAIEmbeddings -from langchain_community.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain_community.embeddings.voyageai import VoyageEmbeddings from langchain_community.embeddings.xinference import XinferenceEmbeddings diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/bytedance_volcano.py index 1f3f26e0c4471..5cb43e6f54589 100644 --- a/libs/community/langchain_community/embeddings/bytedance_volcano.py +++ b/libs/community/langchain_community/embeddings/bytedance_volcano.py @@ -21,7 +21,7 @@ class VolcanoEmbeddings(BaseModel, Embeddings): """volcano secret key learn more from: https://www.volcengine.com/docs/6459/76491#ak-sk""" - host: str = 'maas-api.ml-platform-cn-beijing.volces.com' + host: str = "maas-api.ml-platform-cn-beijing.volces.com" """host learn more from https://www.volcengine.com/docs/82379/1174746""" region: str = "cn-beijing" @@ -79,6 +79,7 @@ def validate_environment(cls, values: Dict) -> Dict: try: from volcengine.maas import MaasService + client = MaasService(values["host"], values["region"]) client.set_ak(values["volcano_ak"]) client.set_sk(values["volcano_sk"]) @@ -105,7 +106,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: Each embedding is represented as a list of float values. """ text_in_chunks = [ - texts[i: i + self.chunk_size] + texts[i : i + self.chunk_size] for i in range(0, len(texts), self.chunk_size) ] lst = [] @@ -119,6 +120,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: } try: from volcengine.maas import MaasException + resp = self.client.embeddings(req) lst.extend([res["embedding"] for res in resp["data"]]) except MaasException as err: diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py index 4c6e7a7ce355d..cd91e675da5be 100644 --- a/libs/community/tests/unit_tests/embeddings/test_imports.py +++ b/libs/community/tests/unit_tests/embeddings/test_imports.py @@ -53,7 +53,7 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", - "VolcanoEmbeddings" + "VolcanoEmbeddings", ] From a0874638e5afa49affc8790099b7d2f2c064cc86 Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Mon, 18 Dec 2023 10:35:48 +0800 Subject: [PATCH 08/13] delete from langchain package --- .../tests/integration_tests/embeddings/test_volcano.py | 8 ++++---- libs/langchain/langchain/embeddings/__init__.py | 2 -- libs/langchain/langchain/embeddings/bytedance_volcano.py | 5 ----- .../langchain/tests/unit_tests/embeddings/test_imports.py | 1 - 4 files changed, 4 insertions(+), 12 deletions(-) delete mode 100644 libs/langchain/langchain/embeddings/bytedance_volcano.py diff --git a/libs/community/tests/integration_tests/embeddings/test_volcano.py b/libs/community/tests/integration_tests/embeddings/test_volcano.py index 3e4411d597a5f..2916ff12b7242 100644 --- a/libs/community/tests/integration_tests/embeddings/test_volcano.py +++ b/libs/community/tests/integration_tests/embeddings/test_volcano.py @@ -2,8 +2,8 @@ from langchain_community.embeddings import VolcanoEmbeddings -def test_modelscope_embedding_documents() -> None: - """Test modelscope embeddings for documents.""" +def test_embedding_documents() -> None: + """Test embeddings for documents.""" documents = ["foo", "bar"] embedding = VolcanoEmbeddings() output = embedding.embed_documents(documents) @@ -11,8 +11,8 @@ def test_modelscope_embedding_documents() -> None: assert len(output[0]) == 1024 -def test_modelscope_embedding_query() -> None: - """Test modelscope embeddings for query.""" +def test_embedding_query() -> None: + """Test embeddings for query.""" document = "foo bar" embedding = VolcanoEmbeddings() output = embedding.embed_query(document) diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 4fbc849bd40c7..3710a6e1969fa 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -23,7 +23,6 @@ from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings from langchain.embeddings.bookend import BookendEmbeddings -from langchain.embeddings.bytedance_volcano import VolcanoEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings from langchain.embeddings.clarifai import ClarifaiEmbeddings from langchain.embeddings.cohere import CohereEmbeddings @@ -130,7 +129,6 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", - "VolcanoEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/bytedance_volcano.py b/libs/langchain/langchain/embeddings/bytedance_volcano.py deleted file mode 100644 index 5b8592466d049..0000000000000 --- a/libs/langchain/langchain/embeddings/bytedance_volcano.py +++ /dev/null @@ -1,5 +0,0 @@ -from langchain_community.embeddings.bytedance_volcano import ( - VolcanoEmbeddings, -) - -__all__ = ["VolcanoEmbeddings"] diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index ac89763907eef..8fe5df0994a50 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -54,7 +54,6 @@ "JohnSnowLabsEmbeddings", "VoyageEmbeddings", "BookendEmbeddings", - "VolcanoEmbeddings", ] From acc3e51115699f5749f5058e8b87f852bf3ad631 Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Mon, 18 Dec 2023 10:58:04 +0800 Subject: [PATCH 09/13] fix name --- ...tedance_volcano.ipynb => volcengine.ipynb} | 19 +++++++++++-------- .../embeddings/__init__.py | 2 +- .../{bytedance_volcano.py => volcengine.py} | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) rename docs/docs/integrations/text_embedding/{bytedance_volcano.ipynb => volcengine.ipynb} (86%) rename libs/community/langchain_community/embeddings/{bytedance_volcano.py => volcengine.py} (98%) diff --git a/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb b/docs/docs/integrations/text_embedding/volcengine.ipynb similarity index 86% rename from docs/docs/integrations/text_embedding/bytedance_volcano.ipynb rename to docs/docs/integrations/text_embedding/volcengine.ipynb index bbb3f919e01a9..d3a5fa6c2c03c 100644 --- a/docs/docs/integrations/text_embedding/bytedance_volcano.ipynb +++ b/docs/docs/integrations/text_embedding/volcengine.ipynb @@ -1,16 +1,22 @@ { "cells": [ { - "cell_type": "raw", + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ - "# Bytedance Volcano\n", + "# Volc Engine\n", "\n", "This notebook provides you with a guide on how to load the Volcano Embedding class.\n", "\n", "\n", "## API Initialization\n", "\n", - "To use the LLM services based on [Bytedance Volcano](https://www.volcengine.com/docs/82379/1099455), you have to initialize these parameters:\n", + "To use the LLM services based on [VolcEngine](https://www.volcengine.com/docs/82379/1099455), you have to initialize these parameters:\n", "\n", "You could either choose to init the AK,SK in environment variables or init params:\n", "\n", @@ -18,10 +24,7 @@ "export VOLC_ACCESSKEY=XXX\n", "export VOLC_SECRETKEY=XXX\n", "```" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", @@ -39,7 +42,7 @@ "from langchain.embeddings import VolcanoEmbeddings\n", "\n", "os.environ[\"VOLC_ACCESSKEY\"] = \"\"\n", - "os.environ[\"VOLC_SECRETKEY\"] = \"==\"\n", + "os.environ[\"VOLC_SECRETKEY\"] = \"\"\n", "\n", "embed = VolcanoEmbeddings(\n", " # volcano_ak='xxx',\n", diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index f8c34f4b48353..75c70d1740617 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -25,7 +25,7 @@ ) from langchain_community.embeddings.bedrock import BedrockEmbeddings from langchain_community.embeddings.bookend import BookendEmbeddings -from langchain_community.embeddings.bytedance_volcano import VolcanoEmbeddings +from langchain_community.embeddings.volcengine import VolcanoEmbeddings from langchain_community.embeddings.clarifai import ClarifaiEmbeddings from langchain_community.embeddings.cohere import CohereEmbeddings from langchain_community.embeddings.dashscope import DashScopeEmbeddings diff --git a/libs/community/langchain_community/embeddings/bytedance_volcano.py b/libs/community/langchain_community/embeddings/volcengine.py similarity index 98% rename from libs/community/langchain_community/embeddings/bytedance_volcano.py rename to libs/community/langchain_community/embeddings/volcengine.py index 5cb43e6f54589..300df91de03da 100644 --- a/libs/community/langchain_community/embeddings/bytedance_volcano.py +++ b/libs/community/langchain_community/embeddings/volcengine.py @@ -11,7 +11,7 @@ class VolcanoEmbeddings(BaseModel, Embeddings): - """`Bytedance Volcano Embeddings` embedding models.""" + """`Volcengine Embeddings` embedding models.""" volcano_ak: Optional[str] = None """volcano access key From 69f97a7402e71a2d0b54d9e60b3ad7ae693bff51 Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Mon, 18 Dec 2023 14:01:19 +0800 Subject: [PATCH 10/13] update doc --- .../text_embedding/volcengine.ipynb | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/docs/docs/integrations/text_embedding/volcengine.ipynb b/docs/docs/integrations/text_embedding/volcengine.ipynb index d3a5fa6c2c03c..f406ac1d40946 100644 --- a/docs/docs/integrations/text_embedding/volcengine.ipynb +++ b/docs/docs/integrations/text_embedding/volcengine.ipynb @@ -28,25 +28,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "ExecuteTime": { "start_time": "2023-12-14T03:05:29.857798Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "embed_documents result:\n", + " [0.02929673343896866, -0.009310632012784481, -0.060323506593704224, 0.0031018739100545645, -0.002218986628577113, -0.0023125179577618837, -0.04864659160375595, -2.062115163425915e-05]\n", + " [0.01987231895327568, -0.026041055098176003, -0.08395249396562576, 0.020043574273586273, -0.028862033039331436, 0.004629664588719606, -0.023107370361685753, -0.0342753604054451]\n" + ] + } + ], "source": [ "\"\"\"For basic init and call\"\"\"\n", "import os\n", "\n", - "from langchain.embeddings import VolcanoEmbeddings\n", + "from langchain_community.embeddings import VolcanoEmbeddings\n", "\n", "os.environ[\"VOLC_ACCESSKEY\"] = \"\"\n", "os.environ[\"VOLC_SECRETKEY\"] = \"\"\n", "\n", "embed = VolcanoEmbeddings(\n", - " # volcano_ak='xxx',\n", - " # volcano_sk='xxx'\n", + " volcano_ak='',\n", + " volcano_sk=''\n", ")\n", "\n", "print(\"embed_documents result:\")\n", @@ -57,13 +67,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "ExecuteTime": { "start_time": "2023-12-14T03:05:29.859276Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "embed_query result:\n", + " [0.01987231895327568, -0.026041055098176003, -0.08395249396562576, 0.020043574273586273, -0.028862033039331436, 0.004629664588719606, -0.023107370361685753, -0.0342753604054451]\n" + ] + } + ], "source": [ "print(\"embed_query result:\")\n", "res2 = embed.embed_query(\"foo\")\n", From 70c7e766afeb066bf38b3be8ecf39476de1e0da3 Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Mon, 18 Dec 2023 14:06:40 +0800 Subject: [PATCH 11/13] format --- libs/community/langchain_community/embeddings/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 75c70d1740617..3ae7e8ac4f4e7 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -25,7 +25,6 @@ ) from langchain_community.embeddings.bedrock import BedrockEmbeddings from langchain_community.embeddings.bookend import BookendEmbeddings -from langchain_community.embeddings.volcengine import VolcanoEmbeddings from langchain_community.embeddings.clarifai import ClarifaiEmbeddings from langchain_community.embeddings.cohere import CohereEmbeddings from langchain_community.embeddings.dashscope import DashScopeEmbeddings @@ -79,6 +78,7 @@ from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain_community.embeddings.vertexai import VertexAIEmbeddings +from langchain_community.embeddings.volcengine import VolcanoEmbeddings from langchain_community.embeddings.voyageai import VoyageEmbeddings from langchain_community.embeddings.xinference import XinferenceEmbeddings From 642280101a6daa78ada9c4dce3f6a41a0c8ede9d Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Mon, 18 Dec 2023 14:10:31 +0800 Subject: [PATCH 12/13] format --- .../docs/integrations/text_embedding/volcengine.ipynb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/docs/integrations/text_embedding/volcengine.ipynb b/docs/docs/integrations/text_embedding/volcengine.ipynb index f406ac1d40946..c32bfb53aeecb 100644 --- a/docs/docs/integrations/text_embedding/volcengine.ipynb +++ b/docs/docs/integrations/text_embedding/volcengine.ipynb @@ -3,10 +3,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "# Volc Engine\n", @@ -54,11 +51,7 @@ "os.environ[\"VOLC_ACCESSKEY\"] = \"\"\n", "os.environ[\"VOLC_SECRETKEY\"] = \"\"\n", "\n", - "embed = VolcanoEmbeddings(\n", - " volcano_ak='',\n", - " volcano_sk=''\n", - ")\n", - "\n", + "embed = VolcanoEmbeddings(volcano_ak=\"\", volcano_sk=\"\")\n", "print(\"embed_documents result:\")\n", "res1 = embed.embed_documents([\"foo\", \"bar\"])\n", "for r in res1:\n", From eea5d7f7816d63882571ec859c2d87191244190f Mon Sep 17 00:00:00 2001 From: lujingxuansc Date: Thu, 21 Dec 2023 19:33:22 +0800 Subject: [PATCH 13/13] fix err --- libs/community/langchain_community/embeddings/volcengine.py | 4 ++-- .../tests/integration_tests/embeddings/test_volcano.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_community/embeddings/volcengine.py b/libs/community/langchain_community/embeddings/volcengine.py index 300df91de03da..98ac729b96807 100644 --- a/libs/community/langchain_community/embeddings/volcengine.py +++ b/libs/community/langchain_community/embeddings/volcengine.py @@ -123,6 +123,6 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: resp = self.client.embeddings(req) lst.extend([res["embedding"] for res in resp["data"]]) - except MaasException as err: - raise ValueError(f"Error: {err!r}") + except MaasException as e: + raise ValueError(f"embed by volcengine Error: {e}") return lst diff --git a/libs/community/tests/integration_tests/embeddings/test_volcano.py b/libs/community/tests/integration_tests/embeddings/test_volcano.py index 2916ff12b7242..7ef7ac33fa46b 100644 --- a/libs/community/tests/integration_tests/embeddings/test_volcano.py +++ b/libs/community/tests/integration_tests/embeddings/test_volcano.py @@ -1,4 +1,4 @@ -"""Test Bytedance Vocalno Embedding.""" +"""Test Bytedance Volcano Embedding.""" from langchain_community.embeddings import VolcanoEmbeddings