diff --git a/libs/cli/langchain_cli/cli.py b/libs/cli/langchain_cli/cli.py index a6b67b1832146..fe7f0b8dc8ebd 100644 --- a/libs/cli/langchain_cli/cli.py +++ b/libs/cli/langchain_cli/cli.py @@ -4,16 +4,22 @@ from typing_extensions import Annotated from langchain_cli.namespaces import app as app_namespace +from langchain_cli.namespaces import integration as integration_namespace from langchain_cli.namespaces import template as template_namespace from langchain_cli.utils.packages import get_langserve_export, get_package_root -__version__ = "0.0.19" +__version__ = "0.0.20" app = typer.Typer(no_args_is_help=True, add_completion=False) app.add_typer( template_namespace.package_cli, name="template", help=template_namespace.__doc__ ) app.add_typer(app_namespace.app_cli, name="app", help=app_namespace.__doc__) +app.add_typer( + integration_namespace.integration_cli, + name="integration", + help=integration_namespace.__doc__, +) def version_callback(show_version: bool) -> None: diff --git a/libs/cli/langchain_cli/integration_template/.gitignore b/libs/cli/langchain_cli/integration_template/.gitignore new file mode 100644 index 0000000000000..bee8a64b79a99 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/libs/cli/langchain_cli/integration_template/LICENSE b/libs/cli/langchain_cli/integration_template/LICENSE new file mode 100644 index 0000000000000..426b65090341f --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/cli/langchain_cli/integration_template/Makefile b/libs/cli/langchain_cli/integration_template/Makefile new file mode 100644 index 0000000000000..cf748963e2263 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/Makefile @@ -0,0 +1,59 @@ +.PHONY: all format lint test tests integration_tests docker_tests help extended_tests + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ + +test: + poetry run pytest $(TEST_FILE) + +tests: + poetry run pytest $(TEST_FILE) + + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/__package_name_short__ --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=__module_name__ +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find __module_name__ -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/cli/langchain_cli/integration_template/README.md b/libs/cli/langchain_cli/integration_template/README.md new file mode 100644 index 0000000000000..e1f3e352472a9 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/README.md @@ -0,0 +1 @@ +# __package_name__ diff --git a/libs/cli/langchain_cli/integration_template/docs/chat.ipynb b/libs/cli/langchain_cli/integration_template/docs/chat.ipynb new file mode 100644 index 0000000000000..243262082fd37 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/docs/chat.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: __ModuleName__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Chat__ModuleName__\n", + "\n", + "This notebook covers how to get started with __ModuleName__ chat models.\n", + "\n", + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c3bef91", + "metadata": {}, + "outputs": [], + "source": [ + "# install package\n", + "!pip install -U __package_name__" + ] + }, + { + "cell_type": "markdown", + "id": "2b4f3e15", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "Make sure to set the following environment variables:\n", + "\n", + "- TODO: fill out relevant environment variables or secrets\n", + "\n", + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62e0dbc3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from __module_name__.chat_models import Chat__ModuleName__\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "chat = Chat__ModuleName__()\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"You are a helpful assistant that translates English to French.\"),\n", + " (\"human\", \"Translate this sentence from English to French. {english_text}.\"),\n", + " ]\n", + ")\n", + "\n", + "chain = prompt | chat\n", + "chain.invoke({\"english_text\": \"Hello, how are you?\"})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/cli/langchain_cli/integration_template/docs/llms.ipynb b/libs/cli/langchain_cli/integration_template/docs/llms.ipynb new file mode 100644 index 0000000000000..69c34d1af01a2 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/docs/llms.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "67db2992", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: __ModuleName__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# __ModuleName__LLM\n", + "\n", + "This example goes over how to use LangChain to interact with `__ModuleName__` models.\n", + "\n", + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59c710c4", + "metadata": {}, + "outputs": [], + "source": [ + "# install package\n", + "!pip install -U __package_name__" + ] + }, + { + "cell_type": "markdown", + "id": "0ee90032", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "Make sure to set the following environment variables:\n", + "\n", + "- TODO: fill out relevant environment variables or secrets\n", + "\n", + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "035dea0f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain_core.prompts import PromptTemplate\n", + "from __module_name__.llms import __ModuleName__LLM\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_string(template)\n", + "\n", + "model = __ModuleName__LLM()\n", + "\n", + "chain = prompt | model\n", + "\n", + "chain.invoke({\"question\": \"What is LangChain?\"})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.1 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/cli/langchain_cli/integration_template/docs/provider.ipynb b/libs/cli/langchain_cli/integration_template/docs/provider.ipynb new file mode 100644 index 0000000000000..11f770a42b929 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/docs/provider.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# __ModuleName__\n", + "\n", + "__ModuleName__ is a platform that offers..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "y8ku6X96sebl" + }, + "outputs": [], + "source": [ + "from __module_name__.chat_models import __ModuleName__Chat\n", + "from __module_name__.llms import __ModuleName__LLM\n", + "from __module_name__.vectorstores import __ModuleName__VectorStore" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb b/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb new file mode 100644 index 0000000000000..5bd7c293fd561 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/docs/vectorstores.ipynb @@ -0,0 +1,87 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: __ModuleName__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# __ModuleName__VectorStore\n", + "\n", + "This notebook covers how to get started with the __ModuleName__ vector store.\n", + "\n", + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d97b55c2", + "metadata": {}, + "outputs": [], + "source": [ + "# install package\n", + "!pip install -U __package_name__" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "Make sure to set the following environment variables:\n", + "\n", + "- TODO: fill out relevant environment variables or secrets\n", + "- Op\n", + "\n", + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from __module_name__.vectorstores import __ModuleName__VectorStore\n", + "\n", + "# TODO: switch for preferred way to init and use your vector store\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/cli/langchain_cli/integration_template/integration_template/__init__.py b/libs/cli/langchain_cli/integration_template/integration_template/__init__.py new file mode 100644 index 0000000000000..1002bc50d3a8f --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/integration_template/__init__.py @@ -0,0 +1,5 @@ +from __module_name__.chat_models import Chat__ModuleName__ +from __module_name__.llms import __ModuleName__LLM +from __module_name__.vectorstores import __ModuleName__VectorStore + +__all__ = ["__ModuleName__LLM", "Chat__ModuleName__", "__ModuleName__VectorStore"] diff --git a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py new file mode 100644 index 0000000000000..3f60c9e6a7224 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py @@ -0,0 +1,68 @@ +from typing import Any, AsyncIterator, Iterator, List, Optional + +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import BaseMessage, BaseMessageChunk +from langchain_core.outputs import ChatGenerationChunk, ChatResult + + +class Chat__ModuleName__(BaseChatModel): + """Chat__ModuleName__ chat model. + + Example: + .. code-block:: python + + from __module_name__ import Chat__ModuleName__ + + + model = Chat__ModuleName__() + """ + + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "chat-__package_name_short__" + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + raise NotImplementedError + + async def _astream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[ChatGenerationChunk]: + yield ChatGenerationChunk( + message=BaseMessageChunk(content="Yield chunks", type="ai"), + ) + yield ChatGenerationChunk( + message=BaseMessageChunk(content=" like this!", type="ai"), + ) + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + raise NotImplementedError + + async def _agenerate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + raise NotImplementedError diff --git a/libs/cli/langchain_cli/integration_template/integration_template/llms.py b/libs/cli/langchain_cli/integration_template/integration_template/llms.py new file mode 100644 index 0000000000000..bd8c2fc37fadb --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/integration_template/llms.py @@ -0,0 +1,73 @@ +import asyncio +from functools import partial +from typing import ( + Any, + AsyncIterator, + Iterator, + List, + Optional, +) + +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models import BaseLLM +from langchain_core.outputs import GenerationChunk, LLMResult + + +class __ModuleName__LLM(BaseLLM): + """__ModuleName__LLM large language models. + + Example: + .. code-block:: python + + from __module_name__ import __ModuleName__LLM + + model = __ModuleName__LLM() + """ + + @property + def _llm_type(self) -> str: + """Return type of LLM.""" + return "__package_name_short__-llm" + + def _generate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> LLMResult: + raise NotImplementedError + + async def _agenerate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> LLMResult: + # Change implementation if integration natively supports async generation. + return await asyncio.get_running_loop().run_in_executor( + None, partial(self._generate, **kwargs), prompts, stop, run_manager + ) + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + raise NotImplementedError + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + yield GenerationChunk(text="Yield chunks") + yield GenerationChunk(text=" like this!") diff --git a/libs/cli/langchain_cli/integration_template/integration_template/py.typed b/libs/cli/langchain_cli/integration_template/integration_template/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py b/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py new file mode 100644 index 0000000000000..10c22f22b2a64 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/integration_template/vectorstores.py @@ -0,0 +1,179 @@ +from __future__ import annotations + +import asyncio +from functools import partial +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, +) + +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +if TYPE_CHECKING: + from langchain_core.documents import Document + +VST = TypeVar("VST", bound=VectorStore) + + +class __ModuleName__VectorStore(VectorStore): + """Interface for vector store. + + Example: + .. code-block:: python + + from __module_name__.vectorstores import __ModuleName__VectorStore + + vectorstore = __ModuleName__VectorStore() + """ + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + raise NotImplementedError + + async def aadd_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + return await asyncio.get_running_loop().run_in_executor( + None, partial(self.add_texts, **kwargs), texts, metadatas + ) + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + raise NotImplementedError + + async def adelete( + self, ids: Optional[List[str]] = None, **kwargs: Any + ) -> Optional[bool]: + raise NotImplementedError + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + raise NotImplementedError + + async def asimilarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.similarity_search, query, k=k, **kwargs) + return await asyncio.get_event_loop().run_in_executor(None, func) + + def similarity_search_with_score( + self, *args: Any, **kwargs: Any + ) -> List[Tuple[Document, float]]: + raise NotImplementedError + + async def asimilarity_search_with_score( + self, *args: Any, **kwargs: Any + ) -> List[Tuple[Document, float]]: + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.similarity_search_with_score, *args, **kwargs) + return await asyncio.get_event_loop().run_in_executor(None, func) + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + raise NotImplementedError + + async def asimilarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs) + return await asyncio.get_event_loop().run_in_executor(None, func) + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + raise NotImplementedError + + async def amax_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial( + self.max_marginal_relevance_search, + query, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + **kwargs, + ) + return await asyncio.get_event_loop().run_in_executor(None, func) + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + raise NotImplementedError + + async def amax_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + raise NotImplementedError + + @classmethod + def from_texts( + cls: Type[VST], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> VST: + raise NotImplementedError + + @classmethod + async def afrom_texts( + cls: Type[VST], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> VST: + return await asyncio.get_running_loop().run_in_executor( + None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas + ) + + def _select_relevance_score_fn(self) -> Callable[[float], float]: + raise NotImplementedError diff --git a/libs/cli/langchain_cli/integration_template/pyproject.toml b/libs/cli/langchain_cli/integration_template/pyproject.toml new file mode 100644 index 0000000000000..95d8fbcb1850b --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/pyproject.toml @@ -0,0 +1,88 @@ +[tool.poetry] +name = "__package_name__" +version = "0.0.1" +description = "An integration package connecting __ModuleName__ and LangChain" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain-core = ">=0.0.12" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.3.0" +freezegun = "^1.2.2" +pytest-mock = "^3.10.0" +syrupy = "^4.0.2" +pytest-watcher = "^0.3.4" +pytest-asyncio = "^0.21.1" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.5" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +langchain-core = {path = "../../core", develop = true} + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort +] + +[tool.mypy] +disallow_untyped_defs = "True" + +[tool.coverage.run] +omit = [ + "tests/*", +] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "asyncio: mark tests as requiring asyncio", + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/libs/cli/langchain_cli/integration_template/scripts/check_imports.py b/libs/cli/langchain_cli/integration_template/scripts/check_imports.py new file mode 100644 index 0000000000000..fd21a4975b7f0 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) + traceback.print_exc() + print() + + sys.exit(1 if has_failure else 0) diff --git a/libs/cli/langchain_cli/integration_template/scripts/check_pydantic.sh b/libs/cli/langchain_cli/integration_template/scripts/check_pydantic.sh new file mode 100755 index 0000000000000..06b5bb81ae236 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/cli/langchain_cli/integration_template/scripts/lint_imports.sh b/libs/cli/langchain_cli/integration_template/scripts/lint_imports.sh new file mode 100755 index 0000000000000..695613c7ba8fd --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/scripts/lint_imports.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain or langchain_experimental +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/cli/langchain_cli/integration_template/tests/__init__.py b/libs/cli/langchain_cli/integration_template/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/cli/langchain_cli/integration_template/tests/integration_tests/__init__.py b/libs/cli/langchain_cli/integration_template/tests/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_chat_models.py b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_chat_models.py new file mode 100644 index 0000000000000..e88648cdc5aff --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_chat_models.py @@ -0,0 +1,63 @@ +"""Test Chat__ModuleName__ chat model.""" +from __module_name__.chat_models import Chat__ModuleName__ + + +def test_stream() -> None: + """Test streaming tokens from OpenAI.""" + llm = Chat__ModuleName__() + + for token in llm.stream("I'm Pickle Rick"): + assert isinstance(token.content, str) + + +async def test_astream() -> None: + """Test streaming tokens from OpenAI.""" + llm = Chat__ModuleName__() + + async for token in llm.astream("I'm Pickle Rick"): + assert isinstance(token.content, str) + + +async def test_abatch() -> None: + """Test streaming tokens from Chat__ModuleName__.""" + llm = Chat__ModuleName__() + + result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +async def test_abatch_tags() -> None: + """Test batch tokens from Chat__ModuleName__.""" + llm = Chat__ModuleName__() + + result = await llm.abatch( + ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} + ) + for token in result: + assert isinstance(token.content, str) + + +def test_batch() -> None: + """Test batch tokens from Chat__ModuleName__.""" + llm = Chat__ModuleName__() + + result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token.content, str) + + +async def test_ainvoke() -> None: + """Test invoke tokens from Chat__ModuleName__.""" + llm = Chat__ModuleName__() + + result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) + assert isinstance(result.content, str) + + +def test_invoke() -> None: + """Test invoke tokens from Chat__ModuleName__.""" + llm = Chat__ModuleName__() + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result.content, str) diff --git a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_compile.py b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_compile.py new file mode 100644 index 0000000000000..33ecccdfa0fbd --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_llms.py b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_llms.py new file mode 100644 index 0000000000000..64708c58497ed --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/integration_tests/test_llms.py @@ -0,0 +1,63 @@ +"""Test __ModuleName__LLM llm.""" +from __module_name__.llms import __ModuleName__LLM + + +def test_stream() -> None: + """Test streaming tokens from OpenAI.""" + llm = __ModuleName__LLM() + + for token in llm.stream("I'm Pickle Rick"): + assert isinstance(token, str) + + +async def test_astream() -> None: + """Test streaming tokens from OpenAI.""" + llm = __ModuleName__LLM() + + async for token in llm.astream("I'm Pickle Rick"): + assert isinstance(token, str) + + +async def test_abatch() -> None: + """Test streaming tokens from __ModuleName__LLM.""" + llm = __ModuleName__LLM() + + result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token, str) + + +async def test_abatch_tags() -> None: + """Test batch tokens from __ModuleName__LLM.""" + llm = __ModuleName__LLM() + + result = await llm.abatch( + ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} + ) + for token in result: + assert isinstance(token, str) + + +def test_batch() -> None: + """Test batch tokens from __ModuleName__LLM.""" + llm = __ModuleName__LLM() + + result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token, str) + + +async def test_ainvoke() -> None: + """Test invoke tokens from __ModuleName__LLM.""" + llm = __ModuleName__LLM() + + result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) + assert isinstance(result, str) + + +def test_invoke() -> None: + """Test invoke tokens from __ModuleName__LLM.""" + llm = __ModuleName__LLM() + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result, str) diff --git a/libs/cli/langchain_cli/integration_template/tests/unit_tests/__init__.py b/libs/cli/langchain_cli/integration_template/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_chat_models.py b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_chat_models.py new file mode 100644 index 0000000000000..f99dc41ebb1b4 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_chat_models.py @@ -0,0 +1,9 @@ +"""Test chat model integration.""" + + +from __module_name__.chat_models import Chat__ModuleName__ + + +def test_initialization() -> None: + """Test chat model initialization.""" + Chat__ModuleName__() diff --git a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_imports.py b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_imports.py new file mode 100644 index 0000000000000..40d8ec22581bf --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_imports.py @@ -0,0 +1,7 @@ +from __module_name__ import __all__ + +EXPECTED_ALL = ["__ModuleName__LLM", "Chat__ModuleName__", "__ModuleName__VectorStore"] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_llms.py b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_llms.py new file mode 100644 index 0000000000000..5a36c0db6a29f --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_llms.py @@ -0,0 +1,7 @@ +"""Test __ModuleName__ Chat API wrapper.""" +from __module_name__ import __ModuleName__LLM + + +def test_initialization() -> None: + """Test integration initialization.""" + __ModuleName__LLM() diff --git a/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_vectorstores.py b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_vectorstores.py new file mode 100644 index 0000000000000..6c044a9a66fb4 --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/tests/unit_tests/test_vectorstores.py @@ -0,0 +1,6 @@ +from __module_name__.vectorstores import __ModuleName__VectorStore + + +def test_initialization() -> None: + """Test integration vectorstore initialization.""" + __ModuleName__VectorStore() diff --git a/libs/cli/langchain_cli/namespaces/integration.py b/libs/cli/langchain_cli/namespaces/integration.py new file mode 100644 index 0000000000000..694d87ff91dcd --- /dev/null +++ b/libs/cli/langchain_cli/namespaces/integration.py @@ -0,0 +1,123 @@ +""" +Develop integration packages for LangChain. +""" + +import re +import shutil +import subprocess +from pathlib import Path +from typing import Optional + +import typer +from typing_extensions import Annotated, TypedDict + +from langchain_cli.utils.find_replace import replace_glob + +integration_cli = typer.Typer(no_args_is_help=True, add_completion=False) + +Replacements = TypedDict( + "Replacements", + { + "__package_name__": str, + "__module_name__": str, + "__ModuleName__": str, + "__package_name_short__": str, + }, +) + + +def _process_name(name: str): + preprocessed = name.replace("_", "-").lower() + + if preprocessed.startswith("langchain-"): + preprocessed = preprocessed[len("langchain-") :] + + if not re.match(r"^[a-z][a-z0-9-]*$", preprocessed): + raise ValueError( + "Name should only contain lowercase letters (a-z), numbers, and hyphens" + ", and start with a letter." + ) + if preprocessed.endswith("-"): + raise ValueError("Name should not end with `-`.") + if preprocessed.find("--") != -1: + raise ValueError("Name should not contain consecutive hyphens.") + return Replacements( + { + "__package_name__": f"langchain-{preprocessed}", + "__module_name__": "langchain_" + preprocessed.replace("-", "_"), + "__ModuleName__": preprocessed.title().replace("-", ""), + "__package_name_short__": preprocessed, + } + ) + + +@integration_cli.command() +def new( + name: Annotated[ + str, + typer.Option( + help="The name of the integration to create (e.g. `my-integration`)", + prompt=True, + ), + ], + name_class: Annotated[ + Optional[str], + typer.Option( + help="The name of the integration in PascalCase. e.g. `MyIntegration`." + " This is used to name classes like `MyIntegrationVectorStore`" + ), + ] = None, +): + """ + Creates a new integration package. + + Should be run from libs/partners + """ + # confirm that we are in the right directory + if not Path.cwd().name == "partners" or not Path.cwd().parent.name == "libs": + typer.echo( + "This command should be run from the `libs/partners` directory in the " + "langchain-ai/langchain monorepo. Continuing is NOT recommended." + ) + typer.confirm("Are you sure you want to continue?", abort=True) + + try: + replacements = _process_name(name) + except ValueError as e: + typer.echo(e) + raise typer.Exit(code=1) + + if name_class: + if not re.match(r"^[A-Z][a-zA-Z0-9]*$", name_class): + typer.echo( + "Name should only contain letters (a-z, A-Z), numbers, and underscores" + ", and start with a capital letter." + ) + raise typer.Exit(code=1) + replacements["__ModuleName__"] = name_class + else: + replacements["__ModuleName__"] = typer.prompt( + "Name of integration in PascalCase", default=replacements["__ModuleName__"] + ) + + destination_dir = Path.cwd() / replacements["__package_name_short__"] + if destination_dir.exists(): + typer.echo(f"Folder {destination_dir} exists.") + raise typer.Exit(code=1) + + # copy over template from ../integration_template + project_template_dir = Path(__file__).parents[1] / "integration_template" + shutil.copytree(project_template_dir, destination_dir, dirs_exist_ok=False) + + # folder movement + package_dir = destination_dir / replacements["__module_name__"] + shutil.move(destination_dir / "integration_template", package_dir) + + # replacements in files + replace_glob(destination_dir, "**/*", replacements) + + # poetry install + subprocess.run( + ["poetry", "install", "--with", "lint,test,typing,test_integration"], + cwd=destination_dir, + ) diff --git a/libs/cli/langchain_cli/utils/find_replace.py b/libs/cli/langchain_cli/utils/find_replace.py new file mode 100644 index 0000000000000..7053a9cddae77 --- /dev/null +++ b/libs/cli/langchain_cli/utils/find_replace.py @@ -0,0 +1,24 @@ +from pathlib import Path +from typing import Dict + + +def find_and_replace(source: str, replacements: Dict[str, str]) -> str: + rtn = source + + # replace keys in deterministic alphabetical order + finds = sorted(replacements.keys()) + for find in finds: + replace = replacements[find] + rtn = rtn.replace(find, replace) + return rtn + + +def replace_file(source: Path, replacements: Dict[str, str]) -> None: + source.write_text(find_and_replace(source.read_text(), replacements)) + + +def replace_glob(parent: Path, glob: str, replacements: Dict[str, str]) -> None: + for file in parent.glob(glob): + if not file.is_file(): + continue + replace_file(file, replacements) diff --git a/libs/cli/pyproject.toml b/libs/cli/pyproject.toml index 1d32386f8a2a0..f517ee7550514 100644 --- a/libs/cli/pyproject.toml +++ b/libs/cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-cli" -version = "0.0.19" +version = "0.0.20" description = "CLI for interacting with LangChain" authors = ["Erick Friis "] license = "MIT" diff --git a/libs/community/langchain_community/vectorstores/neo4j_vector.py b/libs/community/langchain_community/vectorstores/neo4j_vector.py index 7ccc2e7d109cc..bb8f1b9b30136 100644 --- a/libs/community/langchain_community/vectorstores/neo4j_vector.py +++ b/libs/community/langchain_community/vectorstores/neo4j_vector.py @@ -48,14 +48,19 @@ def _get_search_index_query(search_type: SearchType) -> str: "CALL { " "CALL db.index.vector.queryNodes($index, $k, $embedding) " "YIELD node, score " - "RETURN node, score UNION " + "WITH collect({node:node, score:score}) AS nodes, max(score) AS max " + "UNWIND nodes AS n " + # We use 0 as min + "RETURN n.node AS node, (n.score / max) AS score UNION " "CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) " "YIELD node, score " "WITH collect({node:node, score:score}) AS nodes, max(score) AS max " "UNWIND nodes AS n " - "RETURN n.node AS node, (n.score / max) AS score " # We use 0 as min + # We use 0 as min + "RETURN n.node AS node, (n.score / max) AS score " "} " - "WITH node, max(score) AS score ORDER BY score DESC LIMIT $k " # dedup + # dedup + "WITH node, max(score) AS score ORDER BY score DESC LIMIT $k " ), } return type_to_query_map[search_type] @@ -75,6 +80,34 @@ def sort_by_index_name( return sorted(lst, key=lambda x: x.get("index_name") != index_name) +def remove_lucene_chars(text: str) -> str: + """Remove Lucene special characters""" + special_chars = [ + "+", + "-", + "&", + "|", + "!", + "(", + ")", + "{", + "}", + "[", + "]", + "^", + '"', + "~", + "*", + "?", + ":", + "\\", + ] + for char in special_chars: + if char in text: + text = text.replace(char, " ") + return text.strip() + + class Neo4jVector(VectorStore): """`Neo4j` vector index. @@ -589,7 +622,7 @@ def similarity_search_with_score_by_vector( "k": k, "embedding": embedding, "keyword_index": self.keyword_index_name, - "query": kwargs["query"], + "query": remove_lucene_chars(kwargs["query"]), } results = self.query(read_query, params=parameters) diff --git a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py index f7ba418b47292..cb0c79a3a0adf 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py +++ b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py @@ -4,7 +4,11 @@ from langchain_core.documents import Document -from langchain_community.vectorstores.neo4j_vector import Neo4jVector, SearchType +from langchain_community.vectorstores.neo4j_vector import ( + Neo4jVector, + SearchType, + _get_search_index_query, +) from langchain_community.vectorstores.utils import DistanceStrategy from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings @@ -14,7 +18,7 @@ OS_TOKEN_COUNT = 1536 -texts = ["foo", "bar", "baz"] +texts = ["foo", "bar", "baz", "It is the end of the world. Take shelter!"] """ cd tests/integration_tests/vectorstores/docker-compose @@ -615,3 +619,62 @@ def test_neo4jvector_from_existing_graph_multiple_properties_hybrid() -> None: assert output == [Document(page_content="\nname: Foo\nname2: Fooz")] drop_vector_indexes(existing) + + +def test_neo4jvector_special_character() -> None: + """Test removing lucene.""" + text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts) + text_embedding_pairs = list(zip(texts, text_embeddings)) + docsearch = Neo4jVector.from_embeddings( + text_embeddings=text_embedding_pairs, + embedding=FakeEmbeddingsWithOsDimension(), + url=url, + username=username, + password=password, + pre_delete_collection=True, + search_type=SearchType.HYBRID, + ) + output = docsearch.similarity_search( + "It is the end of the world. Take shelter!", k=1 + ) + assert output == [ + Document(page_content="It is the end of the world. Take shelter!", metadata={}) + ] + + drop_vector_indexes(docsearch) + + +def test_hybrid_score_normalization() -> None: + """Test if we can get two 1.0 documents with RRF""" + text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts) + text_embedding_pairs = list(zip(["foo"], text_embeddings)) + docsearch = Neo4jVector.from_embeddings( + text_embeddings=text_embedding_pairs, + embedding=FakeEmbeddingsWithOsDimension(), + url=url, + username=username, + password=password, + pre_delete_collection=True, + search_type=SearchType.HYBRID, + ) + # Remove deduplication part of the query + rrf_query = ( + _get_search_index_query(SearchType.HYBRID) + .rstrip("WITH node, max(score) AS score ORDER BY score DESC LIMIT $k") + .replace("UNION", "UNION ALL") + + "RETURN node.text AS text, score LIMIT 2" + ) + + output = docsearch.query( + rrf_query, + params={ + "index": "vector", + "k": 1, + "embedding": FakeEmbeddingsWithOsDimension().embed_query("foo"), + "query": "foo", + "keyword_index": "keyword", + }, + ) + # Both FT and Vector must return 1.0 score + assert output == [{"text": "foo", "score": 1.0}, {"text": "foo", "score": 1.0}] + drop_vector_indexes(docsearch) diff --git a/libs/community/tests/unit_tests/vectorstores/test_neo4j.py b/libs/community/tests/unit_tests/vectorstores/test_neo4j.py new file mode 100644 index 0000000000000..280334283eb35 --- /dev/null +++ b/libs/community/tests/unit_tests/vectorstores/test_neo4j.py @@ -0,0 +1,45 @@ +"""Test Neo4j functionality.""" + +from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars + + +def test_escaping_lucene() -> None: + """Test escaping lucene characters""" + assert remove_lucene_chars("Hello+World") == "Hello World" + assert remove_lucene_chars("Hello World\\") == "Hello World" + assert ( + remove_lucene_chars("It is the end of the world. Take shelter!") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter&&") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("Bill&&Melinda Gates Foundation") + == "Bill Melinda Gates Foundation" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter(&&)") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter??") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter^") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter+") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter-") + == "It is the end of the world. Take shelter" + ) + assert ( + remove_lucene_chars("It is the end of the world. Take shelter~") + == "It is the end of the world. Take shelter" + )