From 5c86990ac83554ba6f9296d605287f8c1427b5a7 Mon Sep 17 00:00:00 2001 From: Prithvi Kannan <46332835+prithvikannan@users.noreply.github.com> Date: Mon, 16 Dec 2024 23:32:50 -0800 Subject: [PATCH] Introduce deprecations to langchain-databricks (#48) * Introduce deprecations to langchain-databricks Signed-off-by: Prithvi Kannan * readme Signed-off-by: Prithvi Kannan * update other readme Signed-off-by: Prithvi Kannan * remove removal field Signed-off-by: Prithvi Kannan * deperecation notice Signed-off-by: Prithvi Kannan * linter Signed-off-by: Prithvi Kannan --------- Signed-off-by: Prithvi Kannan --- README.md | 200 +++--------------- libs/databricks/README.md | 90 +++----- .../langchain_databricks/chat_models.py | 4 + .../langchain_databricks/embeddings.py | 6 + .../langchain_databricks/vectorstores.py | 6 + 5 files changed, 75 insertions(+), 231 deletions(-) diff --git a/README.md b/README.md index 4f91908..9f9944b 100644 --- a/README.md +++ b/README.md @@ -1,193 +1,51 @@ -# ๐Ÿฆœ๏ธ๐Ÿ”— LangChain Databricks +# ๐Ÿฆœ๏ธ๐Ÿ”— LangChain Databricks (Deprecated) -This repository provides LangChain components to connect your LangChain application with various Databricks services. +| Note: this package is deprecated in favor of the renamed `databricks-langchain` package ([repo](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain), [package](https://pypi.org/project/databricks-langchain/)). | +|-| -## Upcoming Package Consolidation Notice -This package (`langchain-databricks`) will soon be consolidated into a new package: `databricks-langchain`. The new package will serve as the primary hub for all Databricks Langchain integrations. +This repository previously provided LangChain components to connect your LangChain application with various Databricks services. -### Whatโ€™s Changing? -In the coming months, `databricks-langchain` will include all features currently in `langchain-databricks`, as well as additional integrations to provide a unified experience for Databricks users. +## Deprecation Notice -### What You Need to Know -For now, continue to use `langchain-databricks` as usual. When `databricks-langchain` is ready, weโ€™ll provide clear migration instructions to make the transition seamless. During the transition period, `langchain-databricks` will remain operational, and updates will be shared here with timelines and guidance. +The `langchain-databricks` package is now deprecated in favor of the consolidated package [`databricks-langchain`](https://pypi.org/project/databricks-langchain/). Please update your dependencies to use `databricks-langchain` going forward. -Thank you for your support as we work toward an improved, streamlined experience! +### Migration Guide -## Features +#### Whatโ€™s Changing? -- **๐Ÿค– LLMs**: The `ChatDatabricks` component allows you to access chat endpoints hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving), including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. -- **๐Ÿ“ Vector Store**: [Databricks Vector Search](https://www.databricks.com/product/machine-learning/vector-search) is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors. -- **๐Ÿ”ข Embeddings**: Provides components for working with embedding models hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving). -- **๐Ÿ“Š MLflow Integration**: LangChain Databricks components are fully integrated with [MLflow](https://python.langchain.com/docs/integrations/providers/mlflow_tracking/), providing various LLMOps capabilities such as experiment tracking, dependency management, evaluation, and tracing (observability). +- All features previously provided by `langchain-databricks` are now available in `databricks-langchain`. +- Future updates and new features will be released exclusively in `databricks-langchain`. -**Note**: This repository will replace all Databricks integrations currently present in the `langchain-community` package. Users are encouraged to migrate to this repository as soon as possible. +#### How to Migrate -## Installation +1. **Install the new package:** -You can install the `langchain-databricks` package from PyPI. + ```bash + pip install databricks-langchain + ``` -```bash -pip install langchain-databricks -``` +2. **Update Imports:** Replace occurrences of `langchain_databricks` in your code with `databricks_langchain`. Example: + ```python + from databricks_langchain import ChatDatabricks -## Usage + chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct") + response = chat_model.invoke("What is MLflow?") + print(response) + ``` -Here's a simple example of how to use the `langchain-databricks` package. +For more details, please refer to the [Langchain documentation](https://python.langchain.com/docs/integrations/providers/databricks/) and the [databricks-langchain package](https://pypi.org/project/databricks-langchain/). -```python -from langchain_databricks import ChatDatabricks - -chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct") - -response = chat_model.invoke("What is MLflow?") -print(response) -``` - -For more detailed usage examples and documentation, please refer to the [LangChain documentation](https://python.langchain.com/docs/integrations/providers/databricks//). +--- ## Contributing -We welcome contributions to this project! Please follow the following guidance to setup the project for development and start contributing. - -### Folk and clone the repository - -To contribute to this project, please follow the ["fork and pull request"](https://docs.github.com/en/get-started/exploring-projects-on-github/contributing-to-a-project) workflow. Please do not try to push directly to this repo unless you are a maintainer. - - -### Dependency Management: Poetry and other env/dependency managers - -This project utilizes [Poetry](https://python-poetry.org/) v1.7.1+ as a dependency manager. - -โ—Note: *Before installing Poetry*, if you use `Conda`, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`) - -Install Poetry: **[documentation on how to install it](https://python-poetry.org/docs/#installation)**. - -โ—Note: If you use `Conda` or `Pyenv` as your environment/package manager, after installing Poetry, -tell Poetry to use the virtualenv python environment (`poetry config virtualenvs.prefer-active-python true`) - -### Local Development Dependencies - -The project configuration and the makefile for running dev commands are located under the `libs/databricks` directory. - -```bash -cd libs/databricks -``` - -Install langchain-databricks development requirements (for running langchain, running examples, linting, formatting, tests, and coverage): - -```bash -poetry install --with lint,typing,test,test_integration,dev -``` - -Then verify the installation. - -```bash -make test -``` - -If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running -Poetry v1.6.1+. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. -If you are still seeing this bug on v1.6.1+, you may also try disabling "modern installation" -(`poetry config installer.modern-installation false`) and re-installing requirements. -See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details. - -### Testing - -Unit tests cover modular logic that does not require calls to outside APIs. -If you add new logic, please add a unit test. - -To run unit tests: - -```bash -make test -``` - -Integration tests cover the end-to-end service calls as much as possible. -However, in certain cases this might not be practical, so you can mock the -service response for these tests. There are examples of this in the repo, -that can help you write your own tests. If you have suggestions to improve -this, please get in touch with us. - -To run the integration tests: +Contributions are now accepted in the `databricks-langchain` repository. Please refer to its [contribution guide](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain) for more details. -```bash -make integration_test -``` +--- -### Formatting and Linting - -Formatting ensures that the code in this repo has consistent style so that the -code looks more presentable and readable. It corrects these errors when you run -the formatting command. Linting finds and highlights the code errors and helps -avoid coding practicies that can lead to errors. - -Run both of these locally before submitting a PR. The CI scripts will run these -when you submit a PR, and you won't be able to merge changes without fixing -issues identified by the CI. - -#### Code Formatting - -Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/). - -To run format: - -```bash -make format -``` - -Additionally, you can run the formatter only on the files that have been modified in your current branch -as compared to the master branch using the `format_diff` command. This is especially useful when you have -made changes to a subset of the project and want to ensure your changes are properly formatted without -affecting the rest of the codebase. - -```bash -make format_diff -``` - -#### Linting - -Linting for this project is done via a combination of [ruff](https://docs.astral.sh/ruff/rules/) and [mypy](http://mypy-lang.org/). - -To run lint: - -```bash -make lint -``` - -In addition, you can run the linter only on the files that have been modified in your current branch as compared to the master branch using the `lint_diff` command. This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase. - -```bash -make lint_diff -``` - -We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. - -#### Spellcheck - -Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell). -Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words. - -To check spelling for this project: - -```bash -make spell_check -``` - -To fix spelling in place: - -```bash -make spell_fix -``` - -If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file. +## License -```python -[tool.codespell] -... -# Add here: -ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure' -``` +This project was licensed under the [MIT License](LICENSE). -## License +Thank you for your support as we continue to improve Databricks integrations within LangChain! -This project is licensed under the [MIT License](LICENSE). \ No newline at end of file diff --git a/libs/databricks/README.md b/libs/databricks/README.md index 923d525..9f9944b 100644 --- a/libs/databricks/README.md +++ b/libs/databricks/README.md @@ -1,81 +1,51 @@ -# ๐Ÿฆœ๏ธ๐Ÿ”— LangChain Databricks +# ๐Ÿฆœ๏ธ๐Ÿ”— LangChain Databricks (Deprecated) -This repository provides LangChain components to connect your LangChain application with various Databricks services. +| Note: this package is deprecated in favor of the renamed `databricks-langchain` package ([repo](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain), [package](https://pypi.org/project/databricks-langchain/)). | +|-| -## Features +This repository previously provided LangChain components to connect your LangChain application with various Databricks services. -- **๐Ÿค– LLMs**: The `ChatDatabricks` component allows you to access chat endpoints hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving), including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. -- **๐Ÿ“ Vector Store**: [Databricks Vector Search](https://www.databricks.com/product/machine-learning/vector-search) is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from Delta tables managed by Unity Catalog and query them with a simple API to return the most similar vectors. -- **๐Ÿ”ข Embeddings**: Provides components for working with embedding models hosted on [Databricks Model Serving](https://www.databricks.com/product/model-serving). -- **๐Ÿ“Š MLflow Integration**: LangChain Databricks components is fully integrated with [MLflow](https://python.langchain.com/docs/integrations/providers/mlflow_tracking/), providing various LLMOps capabilities such as experiment tracking, dependency management, evaluation, and tracing (observability). +## Deprecation Notice -**Note**: This repository will replace all Databricks integrations currently present in the `langchain-community` package. Users are encouraged to migrate to this repository as soon as possible. +The `langchain-databricks` package is now deprecated in favor of the consolidated package [`databricks-langchain`](https://pypi.org/project/databricks-langchain/). Please update your dependencies to use `databricks-langchain` going forward. -## Installation +### Migration Guide -You can install the `langchain-databricks` package from PyPI. +#### Whatโ€™s Changing? -```bash -pip install -U langchain-databricks -``` +- All features previously provided by `langchain-databricks` are now available in `databricks-langchain`. +- Future updates and new features will be released exclusively in `databricks-langchain`. -If you are using this package outside Databricks workspace, you should configure credentials by setting the following environment variables: +#### How to Migrate -```bash -export DATABRICKS_HOSTNAME="https://your-databricks-workspace" -export DATABRICKS_TOKEN="your-personal-access-token" -``` +1. **Install the new package:** -Instead of personal access token (PAT), you can also use [OAuth M2M authentication](https://docs.databricks.com/en/dev-tools/auth/oauth-m2m.html#language-Environment): + ```bash + pip install databricks-langchain + ``` -```bash -export DATABRICKS_HOSTNAME="https://your-databricks-workspace" -export DATABRICKS_CLIENT_ID="your-service-principle-client-id" -export DATABRICKS_CLIENT_SECRET="your-service-principle-secret" -``` +2. **Update Imports:** Replace occurrences of `langchain_databricks` in your code with `databricks_langchain`. Example: + ```python + from databricks_langchain import ChatDatabricks -## Chat Models + chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct") + response = chat_model.invoke("What is MLflow?") + print(response) + ``` -`ChatDatabricks` is a Chat Model class to access chat endpoints hosted on Databricks, including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models. +For more details, please refer to the [Langchain documentation](https://python.langchain.com/docs/integrations/providers/databricks/) and the [databricks-langchain package](https://pypi.org/project/databricks-langchain/). -```python -from langchain_databricks import ChatDatabricks +--- -chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct") -chat_model.invoke("Sing a ballad of LangChain.") -``` +## Contributing -See the [usage example](https://python.langchain.com/docs/integrations/chat/databricks/) for more guidance on how to use it within your LangChain application. +Contributions are now accepted in the `databricks-langchain` repository. Please refer to its [contribution guide](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain) for more details. -**Note**: The LLM class [Databricks](https://python.langchain.com/docs/integrations/llms/databricks/) still lives in the `langchain-community` library. However, this class will be deprecated in the future and it is recommended to use `ChatDatabricks` to get the latest features. +--- -## Embeddings +## License -`DatabricksEmbeddings` is an Embeddings class to access text-embedding endpoints hosted on Databricks, including state-of-the-art models such as BGE, as well as your own fine-tuned models. +This project was licensed under the [MIT License](LICENSE). +Thank you for your support as we continue to improve Databricks integrations within LangChain! -```python -from langchain_databricks import DatabricksEmbeddings - -embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en") -``` - -See the [usage example](https://python.langchain.com/docs/integrations/text_embedding/databricks) for more guidance on how to use it within your LangChain application. - - -## Vector Search - -Databricks Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from [Delta](https://docs.databricks.com/en/introduction/delta-comparison.html) tables managed by [Unity Catalog](https://www.databricks.com/product/unity-catalog) and query them with a simple API to return the most similar vectors. - -```python -from langchain_databricks.vectorstores import DatabricksVectorSearch - -dvs = DatabricksVectorSearch( - index_name="", - text_column="text", - columns=["source"] -) -docs = dvs.similarity_search("What is vector search?") -``` - -See the [usage example](https://python.langchain.com/docs/integrations/vectorstores/databricks_vector_search) for how to set up vector indices and integrate them with LangChain. diff --git a/libs/databricks/langchain_databricks/chat_models.py b/libs/databricks/langchain_databricks/chat_models.py index dcbdbed..ab89396 100644 --- a/libs/databricks/langchain_databricks/chat_models.py +++ b/libs/databricks/langchain_databricks/chat_models.py @@ -17,6 +17,7 @@ Union, ) +from langchain_core._api import deprecated from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models import BaseChatModel from langchain_core.language_models.base import LanguageModelInput @@ -58,6 +59,9 @@ logger = logging.getLogger(__name__) +@deprecated( + since="0.1.2", message="Use databricks_langchain.ChatDatabricks", removal="1.0.0" +) class ChatDatabricks(BaseChatModel): """Databricks chat model integration. diff --git a/libs/databricks/langchain_databricks/embeddings.py b/libs/databricks/langchain_databricks/embeddings.py index 2cbf4a5..1282ea6 100644 --- a/libs/databricks/langchain_databricks/embeddings.py +++ b/libs/databricks/langchain_databricks/embeddings.py @@ -1,11 +1,17 @@ from typing import Any, Dict, Iterator, List +from langchain_core._api import deprecated from langchain_core.embeddings import Embeddings from pydantic import BaseModel, PrivateAttr from langchain_databricks.utils import get_deployment_client +@deprecated( + since="0.1.2", + message="Use databricks_langchain.DatabricksEmbeddings", + removal="1.0.0", +) class DatabricksEmbeddings(Embeddings, BaseModel): """Databricks embedding model integration. diff --git a/libs/databricks/langchain_databricks/vectorstores.py b/libs/databricks/langchain_databricks/vectorstores.py index 1e2aa0a..bb2dec6 100644 --- a/libs/databricks/langchain_databricks/vectorstores.py +++ b/libs/databricks/langchain_databricks/vectorstores.py @@ -18,6 +18,7 @@ ) import numpy as np +from langchain_core._api import deprecated from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VST, VectorStore @@ -38,6 +39,11 @@ class IndexType(str, Enum): ) +@deprecated( + since="0.1.2", + message="Use databricks_langchain.DatabricksVectorSearch", + removal="1.0.0", +) class DatabricksVectorSearch(VectorStore): """Databricks vector store integration.