diff --git a/docs/docs/integrations/retrievers/nimbleway.ipynb b/docs/docs/integrations/retrievers/nimbleway.ipynb new file mode 100644 index 0000000000000..29b8ec1145067 --- /dev/null +++ b/docs/docs/integrations/retrievers/nimbleway.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Nimble\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "72ee0c4b-9764-423a-9dbf-95129e185210", + "metadata": {}, + "source": [ + "# NimbleRetriever\n", + "\n", + "This will help you getting started with the Nimble [retriever](/docs/concepts/#retrievers). For detailed documentation of all NimbleRetriever features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_nimble.retrievers.Nimble.NimbleRetriever.html).\n", + "\n", + "\n", + "## Setup\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" + ] + }, + { + "cell_type": "code", + "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de", + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-12T17:53:37.779960Z", + "start_time": "2025-01-12T17:53:37.775887Z" + } + }, + "source": [ + "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n", + "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"" + ], + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "markdown", + "id": "0730d6a1-c893-4840-9817-5e5251676d5d", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "This retriever lives in the `langchain-community` package." + ] + }, + { + "cell_type": "code", + "id": "652d6238-1f87-422a-b135-f5abbb8652fc", + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-12T17:53:42.215483Z", + "start_time": "2025-01-12T17:53:37.907588Z" + } + }, + "source": "%pip install -qU langchain-community", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "We also need to set out Nimble API key.", + "id": "e0b6f0a0eb215a80" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-12T17:55:32.914431Z", + "start_time": "2025-01-12T17:53:42.296223Z" + } + }, + "cell_type": "code", + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"NIMBLE_API_KEY\"] = getpass.getpass()" + ], + "id": "4c6dc24c441ec1f0", + "outputs": [], + "execution_count": 3 + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.retrievers import NimbleRetriever\n", + "\n", + "retriever = NimbleRetriever(num_resulst=3)" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"Nimbleway\"\n", + "\n", + "retriever.invoke(query)" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, NimbleRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}\"\"\"\n", + ")\n", + "\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "\n", + "chain = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [], + "source": [ + "chain.invoke(\"...\")" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all NimbleRetriever features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/retrievers/langchain_nimble.retrievers.Nimble.NimbleRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/community/langchain_community/retrievers/__init__.py b/libs/community/langchain_community/retrievers/__init__.py index ce4ac731bde28..fa09c21842de1 100644 --- a/libs/community/langchain_community/retrievers/__init__.py +++ b/libs/community/langchain_community/retrievers/__init__.py @@ -144,7 +144,9 @@ from langchain_community.retrievers.zilliz import ( ZillizRetriever, ) - + from langchain_community.retrievers.nimbleway import( + NimblewayRetriever, + ) _module_lookup = { "AmazonKendraRetriever": "langchain_community.retrievers.kendra", @@ -193,6 +195,7 @@ "ZepCloudRetriever": "langchain_community.retrievers.zep_cloud", "ZillizRetriever": "langchain_community.retrievers.zilliz", "NeuralDBRetriever": "langchain_community.retrievers.thirdai_neuraldb", + "NimblewayRetriever": "langchain_community.retrievers.nimbleway", } @@ -250,4 +253,5 @@ def __getattr__(name: str) -> Any: "ZepRetriever", "ZepCloudRetriever", "ZillizRetriever", + "NimblewayRetriever", ] diff --git a/libs/community/langchain_community/retrievers/nimbleway.py b/libs/community/langchain_community/retrievers/nimbleway.py new file mode 100644 index 0000000000000..792812ca2b7d2 --- /dev/null +++ b/libs/community/langchain_community/retrievers/nimbleway.py @@ -0,0 +1,81 @@ +from enum import Enum +from typing import List, Any +import requests +import os + +from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun +from langchain_core.documents.base import Document +from langchain_core.retrievers import BaseRetriever + + +class SearchEngine(str, Enum): + """ + Enum representing the search engines supported by Nimble + """ + GOOGLE = "google_search" + GOOGLE_SGE = "google_sge" + BING = "bing_search" + YANDEX = "yandex_search" + + +class ParsingType(str, Enum): + """ + Enum representing the parsing types supported by Nimble + """ + PLAIN_TEXT = "plain_text" + MARKDOWN = "markdown" + SIMPLIFIED_HTML = "simplified_html" + + +class NimblewayRetriever(BaseRetriever): + """Nimbleway Search API retriever. + Allows you to retrieve search results from Google, Bing, and Yandex. + Visit https://www.nimbleway.com/ and sign up to receive an API key and to see more info. + + Args: + api_key: The API key for Nimbleway. + search_engine: The search engine to use. Default is Google. + """ + + api_key: str + num_results: int = 3 + search_engine: SearchEngine = SearchEngine.GOOGLE + parse: bool = False + render: bool = True + locale: str = "en" + country: str = "US" + parsing_type: ParsingType = ParsingType.PLAIN_TEXT + + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun + ) -> List[Document]: + request_body = { + 'query': query, + 'num_results': self.num_results, + 'search_engine': self.search_engine.value, + 'parse': self.parse, + 'render': self.render, + 'locale': self.locale, + 'country': self.country, + 'parsing_type': self.parsing_type + } + + response = requests.post("https://searchit-server.crawlit.live/search", + json=request_body, + headers={ + 'Authorization': f'Basic {self.api_key or os.getenv("NIMBLE_API_KEY")}', + 'Content-Type': 'application/json' + }) + response.raise_for_status() + raw_json_content = response.json() + docs = [Document(page_content=doc.get("page_content", ""), + metadata={ + "title": doc.get("metadata", {}).get("title", ""), + "snippet": doc.get("metadata", {}).get("snippet", ""), + "url": doc.get("metadata", {}).get("url", ""), + "position": doc.get("metadata", {}).get("position", -1), + "entity_type": doc.get("metadata", {}).get("entity_type", "") + } + ) + for doc in raw_json_content.get('body', [])] + return docs