From 7ec932634cc908474dc9759d1d33f263a2ad438c Mon Sep 17 00:00:00 2001 From: Yuki Watanabe <31463517+B-Step62@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:54:56 +0900 Subject: [PATCH] Release 0.1.0 (#23) * Release 0.1.0 Signed-off-by: B-Step62 * Update README Signed-off-by: B-Step62 * Update README.md Co-authored-by: Ben Wilson <39283302+BenWilson2@users.noreply.github.com> --------- Signed-off-by: B-Step62 Co-authored-by: Ben Wilson <39283302+BenWilson2@users.noreply.github.com> --- README.md | 1 + libs/databricks/README.md | 73 ++++++++++++++++++++++++++++++---- libs/databricks/pyproject.toml | 2 +- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2f16e06..d48b2c7 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ This repository provides LangChain components to connect your LangChain applicat - **🤖 LLMs**: The `ChatDatabricks` component allows you to access chat endpoints hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving), including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. - **📐 Vector Store**: [Databricks Vector Search](https://www.databricks.com/product/machine-learning/vector-search) is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors. - **🔢 Embeddings**: Provides components for working with embedding models hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving). +- **📊 MLflow Integration**: LangChain Databricks components are fully integrated with [MLflow](https://python.langchain.com/docs/integrations/providers/mlflow_tracking/), providing various LLMOps capabilities such as experiment tracking, dependency management, evaluation, and tracing (observability). **Note**: This repository will replace all Databricks integrations currently present in the `langchain-community` package. Users are encouraged to migrate to this repository as soon as possible. diff --git a/libs/databricks/README.md b/libs/databricks/README.md index acba5c7..923d525 100644 --- a/libs/databricks/README.md +++ b/libs/databricks/README.md @@ -1,24 +1,81 @@ -# langchain-databricks +# 🦜️🔗 LangChain Databricks -This package contains the LangChain integration with Databricks +This repository provides LangChain components to connect your LangChain application with various Databricks services. + +## Features + +- **🤖 LLMs**: The `ChatDatabricks` component allows you to access chat endpoints hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving), including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. +- **📐 Vector Store**: [Databricks Vector Search](https://www.databricks.com/product/machine-learning/vector-search) is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors. +- **🔢 Embeddings**: Provides components for working with embedding models hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving). +- **📊 MLflow Integration**: LangChain Databricks components is fully integrated with [MLflow](https://python.langchain.com/docs/integrations/providers/mlflow_tracking/), providing various LLMOps capabilities such as experiment tracking, dependency management, evaluation, and tracing (observability). + +**Note**: This repository will replace all Databricks integrations currently present in the `langchain-community` package. Users are encouraged to migrate to this repository as soon as possible. ## Installation +You can install the `langchain-databricks` package from PyPI. + ```bash pip install -U langchain-databricks ``` -And you should configure credentials by setting the following environment variables: +If you are using this package outside Databricks workspace, you should configure credentials by setting the following environment variables: -* TODO: fill this out +```bash +export DATABRICKS_HOSTNAME="https://your-databricks-workspace" +export DATABRICKS_TOKEN="your-personal-access-token" +``` + +Instead of personal access token (PAT), you can also use [OAuth M2M authentication](https://docs.databricks.com/en/dev-tools/auth/oauth-m2m.html#language-Environment): + +```bash +export DATABRICKS_HOSTNAME="https://your-databricks-workspace" +export DATABRICKS_CLIENT_ID="your-service-principle-client-id" +export DATABRICKS_CLIENT_SECRET="your-service-principle-secret" +``` ## Chat Models -`ChatDatabricks` class exposes chat models from Databricks. +`ChatDatabricks` is a Chat Model class to access chat endpoints hosted on Databricks, including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. ```python from langchain_databricks import ChatDatabricks -llm = ChatDatabricks() -llm.invoke("Sing a ballad of LangChain.") -``` \ No newline at end of file +chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct") +chat_model.invoke("Sing a ballad of LangChain.") +``` + +See the [usage example](https://python.langchain.com/docs/integrations/chat/databricks/) for more guidance on how to use it within your LangChain application. + +**Note**: The LLM class [Databricks](https://python.langchain.com/docs/integrations/llms/databricks/) still lives in the `langchain-community` library. However, this class will be deprecated in the future and it is recommended to use `ChatDatabricks` to get the latest features. + +## Embeddings + +`DatabricksEmbeddings` is an Embeddings class to access text-embedding endpoints hosted on Databricks, including state-of-the-art models such as BGE, as well as your own fine-tuned models. + + +```python +from langchain_databricks import DatabricksEmbeddings + +embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en") +``` + +See the [usage example](https://python.langchain.com/docs/integrations/text_embedding/databricks) for more guidance on how to use it within your LangChain application. + + +## Vector Search + +Databricks Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from [Delta](https://docs.databricks.com/en/introduction/delta-comparison.html) tables managed by [Unity Catalog](https://www.databricks.com/product/unity-catalog) and query them with a simple API to return the most similar vectors. + +```python +from langchain_databricks.vectorstores import DatabricksVectorSearch + +dvs = DatabricksVectorSearch( + index_name="", + text_column="text", + columns=["source"] +) +docs = dvs.similarity_search("What is vector search?") +``` + +See the [usage example](https://python.langchain.com/docs/integrations/vectorstores/databricks_vector_search) for how to set up vector indices and integrate them with LangChain. diff --git a/libs/databricks/pyproject.toml b/libs/databricks/pyproject.toml index 253213d..1a3491e 100644 --- a/libs/databricks/pyproject.toml +++ b/libs/databricks/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-databricks" -version = "0.1.0rc2" +version = "0.1.0" description = "An integration package connecting Databricks and LangChain" authors = [] readme = "README.md"