diff --git a/docs/docs/integrations/chat/mlx.ipynb b/docs/docs/integrations/chat/mlx.ipynb index a5945dffae408..dc852da549d55 100644 --- a/docs/docs/integrations/chat/mlx.ipynb +++ b/docs/docs/integrations/chat/mlx.ipynb @@ -155,8 +155,48 @@ "tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n", "\n", "# setup ReAct style prompt\n", - "prompt = hub.pull(\"hwchase17/react-json\")\n", - "prompt = prompt.partial(\n", + "# Based on 'hwchase17/react' prompt modification, cause mlx does not support the `System` role\n", + "human_prompt = \"\"\"\n", + "Answer the following questions as best you can. You have access to the following tools:\n", + "\n", + "{tools}\n", + "\n", + "The way you use the tools is by specifying a json blob.\n", + "Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n", + "\n", + "The only values that should be in the \"action\" field are: {tool_names}\n", + "\n", + "The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n", + "\n", + "```\n", + "{{\n", + " \"action\": $TOOL_NAME,\n", + " \"action_input\": $INPUT\n", + "}}\n", + "```\n", + "\n", + "ALWAYS use the following format:\n", + "\n", + "Question: the input question you must answer\n", + "Thought: you should always think about what to do\n", + "Action:\n", + "```\n", + "$JSON_BLOB\n", + "```\n", + "Observation: the result of the action\n", + "... (this Thought/Action/Observation can repeat N times)\n", + "Thought: I now know the final answer\n", + "Final Answer: the final answer to the original input question\n", + "\n", + "Begin! Reminder to always use the exact characters `Final Answer` when responding.\n", + "\n", + "{input}\n", + "\n", + "{agent_scratchpad}\n", + "\n", + "\"\"\"\n", + "\n", + "prompt = human_prompt.partial(\n", " tools=render_text_description(tools),\n", " tool_names=\", \".join([t.name for t in tools]),\n", ")\n", @@ -207,7 +247,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/cratedb.mdx b/docs/docs/integrations/providers/cratedb.mdx new file mode 100644 index 0000000000000..24e47930407c0 --- /dev/null +++ b/docs/docs/integrations/providers/cratedb.mdx @@ -0,0 +1,132 @@ +# CrateDB + +> [CrateDB] is a distributed and scalable SQL database for storing and +> analyzing massive amounts of data in near real-time, even with complex +> queries. It is PostgreSQL-compatible, based on Lucene, and inheriting +> from Elasticsearch. + + +## Installation and Setup + +### Setup CrateDB +There are two ways to get started with CrateDB quickly. Alternatively, +choose other [CrateDB installation options]. + +#### Start CrateDB on your local machine +Example: Run a single-node CrateDB instance with security disabled, +using Docker or Podman. This is not recommended for production use. + +```bash +docker run --name=cratedb --rm \ + --publish=4200:4200 --publish=5432:5432 --env=CRATE_HEAP_SIZE=2g \ + crate:latest -Cdiscovery.type=single-node +``` + +#### Deploy cluster on CrateDB Cloud +[CrateDB Cloud] is a managed CrateDB service. Sign up for a +[free trial][CrateDB Cloud Console]. + +### Install Client +Install the most recent version of the `langchain-cratedb` package +and a few others that are needed for this tutorial. +```bash +pip install --upgrade langchain-cratedb langchain-openai unstructured +``` + + +## Documentation +For a more detailed walkthrough of the CrateDB wrapper, see +[using LangChain with CrateDB]. See also [all features of CrateDB] +to learn about other functionality provided by CrateDB. + + +## Features +The CrateDB adapter for LangChain provides APIs to use CrateDB as vector store, +document loader, and storage for chat messages. + +### Vector Store +Use the CrateDB vector store functionality around `FLOAT_VECTOR` and `KNN_MATCH` +for similarity search and other purposes. See also [CrateDBVectorStore Tutorial]. + +Make sure you've configured a valid OpenAI API key. +```bash +export OPENAI_API_KEY=sk-XJZ... +``` +```python +from langchain_community.document_loaders import UnstructuredURLLoader +from langchain_cratedb import CrateDBVectorStore +from langchain_openai import OpenAIEmbeddings +from langchain.text_splitter import CharacterTextSplitter + +loader = UnstructuredURLLoader(urls=["https://github.com/langchain-ai/langchain/raw/refs/tags/langchain-core==0.3.28/docs/docs/how_to/state_of_the_union.txt"]) +documents = loader.load() +text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +docs = text_splitter.split_documents(documents) + +embeddings = OpenAIEmbeddings() + +# Connect to a self-managed CrateDB instance on localhost. +CONNECTION_STRING = "crate://?schema=testdrive" + +store = CrateDBVectorStore.from_documents( + documents=docs, + embedding=embeddings, + collection_name="state_of_the_union", + connection=CONNECTION_STRING, +) + +query = "What did the president say about Ketanji Brown Jackson" +docs_with_score = store.similarity_search_with_score(query) +``` + +### Document Loader +Load load documents from a CrateDB database table, using the document loader +`CrateDBLoader`, which is based on SQLAlchemy. See also [CrateDBLoader Tutorial]. + +To use the document loader in your applications: +```python +import sqlalchemy as sa +from langchain_community.utilities import SQLDatabase +from langchain_cratedb import CrateDBLoader + +# Connect to a self-managed CrateDB instance on localhost. +CONNECTION_STRING = "crate://?schema=testdrive" + +db = SQLDatabase(engine=sa.create_engine(CONNECTION_STRING)) + +loader = CrateDBLoader( + 'SELECT * FROM sys.summits LIMIT 42', + db=db, +) +documents = loader.load() +``` + +### Chat Message History +Use CrateDB as the storage for your chat messages. +See also [CrateDBChatMessageHistory Tutorial]. + +To use the chat message history in your applications: +```python +from langchain_cratedb import CrateDBChatMessageHistory + +# Connect to a self-managed CrateDB instance on localhost. +CONNECTION_STRING = "crate://?schema=testdrive" + +message_history = CrateDBChatMessageHistory( + session_id="test-session", + connection=CONNECTION_STRING, +) + +message_history.add_user_message("hi!") +``` + + +[all features of CrateDB]: https://cratedb.com/docs/guide/feature/ +[CrateDB]: https://cratedb.com/database +[CrateDB Cloud]: https://cratedb.com/database/cloud +[CrateDB Cloud Console]: https://console.cratedb.cloud/?utm_source=langchain&utm_content=documentation +[CrateDB installation options]: https://cratedb.com/docs/guide/install/ +[CrateDBChatMessageHistory Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/conversational_memory.ipynb +[CrateDBLoader Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/document_loader.ipynb +[CrateDBVectorStore Tutorial]: https://github.com/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/vector_search.ipynb +[using LangChain with CrateDB]: https://cratedb.com/docs/guide/integrate/langchain/ diff --git a/docs/scripts/notebook_convert.py b/docs/scripts/notebook_convert.py index 429734f115817..fb0e3c807561d 100644 --- a/docs/scripts/notebook_convert.py +++ b/docs/scripts/notebook_convert.py @@ -143,16 +143,22 @@ def _modify_frontmatter( edit_url = ( f"https://github.com/langchain-ai/langchain/edit/master/docs/docs/{rel_path}" ) + frontmatter = { + "custom_edit_url": edit_url, + } if re.match(r"^[\s\n]*---\n", body): - # if custom_edit_url already exists, leave it - if re.match(r"custom_edit_url: ", body): - return body - else: - return re.sub( - r"^[\s\n]*---\n", f"---\ncustom_edit_url: {edit_url}\n", body, count=1 - ) + # frontmatter already present + + for k, v in frontmatter.items(): + # if key already exists, leave it + if re.match(f"{k}: ", body): + continue + else: + body = re.sub(r"^[\s\n]*---\n", f"---\n{k}: {v}\n", body, count=1) + return body else: - return f"---\ncustom_edit_url: {edit_url}\n---\n{body}" + insert = "\n".join([f"{k}: {v}" for k, v in frontmatter.items()]) + return f"---\n{insert}\n---\n{body}" def _convert_notebook( diff --git a/docs/scripts/packages_yml_get_downloads.py b/docs/scripts/packages_yml_get_downloads.py new file mode 100644 index 0000000000000..07525194b39c5 --- /dev/null +++ b/docs/scripts/packages_yml_get_downloads.py @@ -0,0 +1,71 @@ +from datetime import datetime, timedelta, timezone +from pathlib import Path + +import requests +from ruamel.yaml import YAML +from ruamel.yaml.comments import CommentedMap + +yaml = YAML() + +PACKAGE_YML = Path(__file__).parents[2] / "libs" / "packages.yml" + + +def _get_downloads(p: dict) -> int: + url = f"https://pypistats.org/api/packages/{p['name']}/recent?period=month" + r = requests.get(url) + r.raise_for_status() + return r.json()["data"]["last_month"] + + +current_datetime = datetime.now(timezone.utc) +yesterday = current_datetime - timedelta(days=1) + +with open(PACKAGE_YML) as f: + data = yaml.load(f) + + +def _reorder_keys(p): + keys = p.keys() + key_order = [ + "name", + "name_title", + "path", + "repo", + "type", + "provider_page", + "js", + "downloads", + "downloads_updated_at", + ] + if set(keys) - set(key_order): + raise ValueError(f"Unexpected keys: {set(keys) - set(key_order)}") + return CommentedMap((k, p[k]) for k in key_order if k in p) + + +data["packages"] = [_reorder_keys(p) for p in data["packages"]] + +seen = set() +for p in data["packages"]: + if p["name"] in seen: + raise ValueError(f"Duplicate package: {p['name']}") + seen.add(p["name"]) + downloads_updated_at_str = p.get("downloads_updated_at") + downloads_updated_at = ( + datetime.fromisoformat(downloads_updated_at_str) + if downloads_updated_at_str + else None + ) + + if downloads_updated_at is not None and downloads_updated_at > yesterday: + print(f"done: {p['name']}: {p['downloads']}") + continue + + p["downloads"] = _get_downloads(p) + p["downloads_updated_at"] = current_datetime.isoformat() + with open(PACKAGE_YML, "w") as f: + yaml.dump(data, f) + print(f"{p['name']}: {p['downloads']}") + + +with open(PACKAGE_YML, "w") as f: + yaml.dump(data, f) diff --git a/docs/scripts/partner_pkg_table.py b/docs/scripts/partner_pkg_table.py index 6dfdcecbbf2bb..04a605235ff78 100644 --- a/docs/scripts/partner_pkg_table.py +++ b/docs/scripts/partner_pkg_table.py @@ -2,110 +2,106 @@ import sys from pathlib import Path +import requests import yaml +################# +# CONFIGURATION # +################# + +# packages to ignore / exclude from the table +IGNORE_PACKGAGES = { + # top-level packages + "langchain-core", + "langchain-text-splitters", + "langchain", + "langchain-community", + "langchain-experimental", + "langchain-cli", + "langchain-tests", + # integration packages that don't have a provider index + # do NOT add to these. These were merged before having a + # provider index was required + # can remove these once they have a provider index + "langchain-yt-dlp", +} + +##################### +# END CONFIGURATION # +##################### + DOCS_DIR = Path(__file__).parents[1] PACKAGE_YML = Path(__file__).parents[2] / "libs" / "packages.yml" -IGNORE_PACKGAGES = {"langchain-experimental"} # for now, only include packages that are in the langchain-ai org # because we don't have a policy for inclusion in this table yet, # and including all packages will make the list too long -with open(PACKAGE_YML) as f: - data = yaml.safe_load(f) - EXTERNAL_PACKAGES = set( - p["name"][10:] - for p in data["packages"] - if p["repo"].startswith("langchain-ai/") - and p["repo"] != "langchain-ai/langchain" - and p["name"] not in IGNORE_PACKGAGES + + +def _get_type(package: dict) -> str: + if package["name"] in IGNORE_PACKGAGES: + return "ignore" + if package["repo"] == "langchain-ai/langchain": + return "B" + if package["repo"].startswith("langchain-ai/"): + return "C" + return "D" + + +def _enrich_package(p: dict) -> dict | None: + p["name_short"] = ( + p["name"][10:] if p["name"].startswith("langchain-") else p["name"] + ) + p["name_title"] = p.get("name_title") or p["name_short"].title().replace( + "-", " " + ).replace("db", "DB").replace("Db", "DB").replace("ai", "AI").replace("Ai", "AI") + p["type"] = _get_type(p) + + if p["type"] == "ignore": + return None + + p["js_exists"] = bool(p.get("js")) + custom_provider_page = p.get("provider_page") + default_provider_page = f"/docs/integrations/providers/{p['name_short']}/" + default_provider_page_exists = bool( + glob.glob(str(DOCS_DIR / f"docs/integrations/providers/{p['name_short']}.*")) ) - IN_REPO_PACKAGES = set( - p["name"][10:] - for p in data["packages"] - if p["repo"] == "langchain-ai/langchain" - and p["path"].startswith("libs/partners") - and p["name"] not in IGNORE_PACKGAGES + p["provider_page"] = custom_provider_page or ( + default_provider_page if default_provider_page_exists else None ) + if p["provider_page"] is None: + msg = ( + f"Provider page not found for {p['name_short']}. " + f"Please add one at docs/integrations/providers/{p['name_short']}.{{mdx,ipynb}}" + ) + raise ValueError(msg) -JS_PACKAGES = { - "google-gauth", - "openai", - "anthropic", - "google-genai", - "pinecone", - "aws", - "google-vertexai", - "qdrant", - "azure-dynamic-sessions", - "google-vertexai-web", - "redis", - "azure-openai", - "google-webauth", - "baidu-qianfan", - "groq", - "standard-tests", - "cloudflare", - "mistralai", - "textsplitters", - "cohere", - "mixedbread-ai", - "weaviate", - "mongodb", - "yandex", - "exa", - "nomic", - "google-common", - "ollama", - "ibm", -} + return p -ALL_PACKAGES = IN_REPO_PACKAGES.union(EXTERNAL_PACKAGES) -CUSTOM_NAME = { - "google-genai": "Google Generative AI", - "aws": "AWS", - "ibm": "IBM", -} -CUSTOM_PROVIDER_PAGES = { - "azure-dynamic-sessions": "/docs/integrations/providers/microsoft/", - "prompty": "/docs/integrations/providers/microsoft/", - "sqlserver": "/docs/integrations/providers/microsoft/", - "google-community": "/docs/integrations/providers/google/", - "google-genai": "/docs/integrations/providers/google/", - "google-vertexai": "/docs/integrations/providers/google/", - "nvidia-ai-endpoints": "/docs/integrations/providers/nvidia/", - "exa": "/docs/integrations/providers/exa_search/", - "mongodb": "/docs/integrations/providers/mongodb_atlas/", - "sema4": "/docs/integrations/providers/robocorp/", - "postgres": "/docs/integrations/providers/pgvector/", -} -PROVIDER_PAGES = { - name: f"/docs/integrations/providers/{name}/" - for name in ALL_PACKAGES - if glob.glob(str(DOCS_DIR / f"docs/integrations/providers/{name}.*")) -} -PROVIDER_PAGES = { - **PROVIDER_PAGES, - **CUSTOM_PROVIDER_PAGES, -} +with open(PACKAGE_YML) as f: + data = yaml.safe_load(f) + +packages_n = [_enrich_package(p) for p in data["packages"]] +packages = [p for p in packages_n if p is not None] + +# sort by downloads +packages_sorted = sorted(packages, key=lambda p: p["downloads"], reverse=True) -def package_row(name: str) -> str: - js = "✅" if name in JS_PACKAGES else "❌" - link = PROVIDER_PAGES.get(name) - title = CUSTOM_NAME.get(name) or name.title().replace("-", " ").replace( - "db", "DB" - ).replace("Db", "DB").replace("ai", "AI").replace("Ai", "AI") +def package_row(p: dict) -> str: + js = "✅" if p["js_exists"] else "❌" + link = p["provider_page"] + title = p["name_title"] provider = f"[{title}]({link})" if link else title - return f"| {provider} | [langchain-{name}](https://python.langchain.com/api_reference/{name.replace('-', '_')}/) | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-{name}?style=flat-square&label=%20&color=blue) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-{name}?style=flat-square&label=%20&color=orange) | {js} |" + return f"| {provider} | [{p['name']}](https://python.langchain.com/api_reference/{p['name_short'].replace('-', '_')}/) | ![PyPI - Downloads](https://img.shields.io/pypi/dm/{p['name']}?style=flat-square&label=%20&color=blue) | ![PyPI - Version](https://img.shields.io/pypi/v/{p['name']}?style=flat-square&label=%20&color=orange) | {js} |" def table() -> str: header = """| Provider | Package | Downloads | Latest | [JS](https://js.langchain.com/docs/integrations/providers/) | | :--- | :---: | :---: | :---: | :---: | """ - return header + "\n".join(package_row(name) for name in sorted(ALL_PACKAGES)) + return header + "\n".join(package_row(p) for p in packages_sorted) def doc() -> str: diff --git a/docs/src/theme/DocItem/Layout/index.js b/docs/src/theme/DocItem/Layout/index.js new file mode 100644 index 0000000000000..3aa880b0d6b29 --- /dev/null +++ b/docs/src/theme/DocItem/Layout/index.js @@ -0,0 +1,82 @@ +import React from 'react'; +import clsx from 'clsx'; +import {useWindowSize} from '@docusaurus/theme-common'; +import {useDoc} from '@docusaurus/plugin-content-docs/client'; +import DocItemPaginator from '@theme/DocItem/Paginator'; +import DocVersionBanner from '@theme/DocVersionBanner'; +import DocVersionBadge from '@theme/DocVersionBadge'; +import DocItemFooter from '@theme/DocItem/Footer'; +import DocItemTOCMobile from '@theme/DocItem/TOC/Mobile'; +import DocItemTOCDesktop from '@theme/DocItem/TOC/Desktop'; +import DocItemContent from '@theme/DocItem/Content'; +import DocBreadcrumbs from '@theme/DocBreadcrumbs'; +import ContentVisibility from '@theme/ContentVisibility'; +import styles from './styles.module.css'; +/** + * Decide if the toc should be rendered, on mobile or desktop viewports + */ +function useDocTOC() { + const {frontMatter, toc} = useDoc(); + const windowSize = useWindowSize(); + const hidden = frontMatter.hide_table_of_contents; + const canRender = !hidden && toc.length > 0; + const mobile = canRender ? : undefined; + const desktop = + canRender && (windowSize === 'desktop' || windowSize === 'ssr') ? ( + + ) : undefined; + return { + hidden, + mobile, + desktop, + }; +} +export default function DocItemLayout({children}) { + const docTOC = useDocTOC(); + const {metadata, frontMatter} = useDoc(); + + "https://github.com/langchain-ai/langchain/blob/master/docs/docs/introduction.ipynb" + "https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/introduction.ipynb" + + const linkColab = frontMatter.link_colab || ( + metadata.editUrl?.endsWith(".ipynb") + ? metadata.editUrl?.replace("https://github.com/langchain-ai/langchain/edit/", "https://colab.research.google.com/github/langchain-ai/langchain/blob/") + : null + ); + const linkGithub = frontMatter.link_github || metadata.editUrl?.replace("/edit/", "/blob/"); + + return ( +
+
+ + +
+
+ + + {docTOC.mobile} +
+ {linkColab && ( + Open In Colab + )} + {linkGithub && ( + Open on GitHub + )} +
+ {children} + +
+ +
+
+ {docTOC.desktop &&
{docTOC.desktop}
} +
+ ); +} diff --git a/docs/src/theme/DocItem/Layout/styles.module.css b/docs/src/theme/DocItem/Layout/styles.module.css new file mode 100644 index 0000000000000..d5aaec1322c92 --- /dev/null +++ b/docs/src/theme/DocItem/Layout/styles.module.css @@ -0,0 +1,10 @@ +.docItemContainer header + *, +.docItemContainer article > *:first-child { + margin-top: 0; +} + +@media (min-width: 997px) { + .docItemCol { + max-width: 75% !important; + } +} diff --git a/libs/community/langchain_community/document_loaders/dataframe.py b/libs/community/langchain_community/document_loaders/dataframe.py index 1b508533f8d93..74ad56b53f783 100644 --- a/libs/community/langchain_community/document_loaders/dataframe.py +++ b/libs/community/langchain_community/document_loaders/dataframe.py @@ -21,9 +21,8 @@ def lazy_load(self) -> Iterator[Document]: """Lazy load records from dataframe.""" for _, row in self.data_frame.iterrows(): - text = row[self.page_content_column] metadata = row.to_dict() - metadata.pop(self.page_content_column) + text = metadata.pop(self.page_content_column) yield Document(page_content=text, metadata=metadata) diff --git a/libs/community/langchain_community/embeddings/llamacpp.py b/libs/community/langchain_community/embeddings/llamacpp.py index 6487312fd31d0..4adfeb0e52774 100644 --- a/libs/community/langchain_community/embeddings/llamacpp.py +++ b/libs/community/langchain_community/embeddings/llamacpp.py @@ -20,7 +20,7 @@ class LlamaCppEmbeddings(BaseModel, Embeddings): """ client: Any = None #: :meta private: - model_path: str + model_path: str = Field(default="") n_ctx: int = Field(512, alias="n_ctx") """Token context window.""" @@ -88,21 +88,22 @@ def validate_environment(self) -> Self: if self.n_gpu_layers is not None: model_params["n_gpu_layers"] = self.n_gpu_layers - try: - from llama_cpp import Llama - - self.client = Llama(model_path, embedding=True, **model_params) - except ImportError: - raise ImportError( - "Could not import llama-cpp-python library. " - "Please install the llama-cpp-python library to " - "use this embedding model: pip install llama-cpp-python" - ) - except Exception as e: - raise ValueError( - f"Could not load Llama model from path: {model_path}. " - f"Received error {e}" - ) + if not self.client: + try: + from llama_cpp import Llama + + self.client = Llama(model_path, embedding=True, **model_params) + except ImportError: + raise ImportError( + "Could not import llama-cpp-python library. " + "Please install the llama-cpp-python library to " + "use this embedding model: pip install llama-cpp-python" + ) + except Exception as e: + raise ValueError( + f"Could not load Llama model from path: {model_path}. " + f"Received error {e}" + ) return self @@ -116,7 +117,17 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: List of embeddings, one for each text. """ embeddings = self.client.create_embedding(texts) - return [list(map(float, e["embedding"])) for e in embeddings["data"]] + final_embeddings = [] + for e in embeddings["data"]: + try: + if isinstance(e["embedding"][0], list): + for data in e["embedding"]: + final_embeddings.append(list(map(float, data))) + else: + final_embeddings.append(list(map(float, e["embedding"]))) + except (IndexError, TypeError): + final_embeddings.append(list(map(float, e["embedding"]))) + return final_embeddings def embed_query(self, text: str) -> List[float]: """Embed a query using the Llama model. @@ -128,4 +139,7 @@ def embed_query(self, text: str) -> List[float]: Embeddings for the text. """ embedding = self.client.embed(text) - return list(map(float, embedding)) + if not isinstance(embedding, list): + return list(map(float, embedding)) + else: + return list(map(float, embedding[0])) diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py index 6930c8319e4d9..d0aa15e2acbd1 100644 --- a/libs/community/langchain_community/vectorstores/azuresearch.py +++ b/libs/community/langchain_community/vectorstores/azuresearch.py @@ -42,6 +42,8 @@ logger = logging.getLogger() if TYPE_CHECKING: + from azure.core.credentials import TokenCredential + from azure.core.credentials_async import AsyncTokenCredential from azure.search.documents import SearchClient, SearchItemPaged from azure.search.documents.aio import ( AsyncSearchItemPaged, @@ -96,10 +98,13 @@ def _get_search_client( cors_options: Optional[CorsOptions] = None, async_: bool = False, additional_search_client_options: Optional[Dict[str, Any]] = None, + azure_credential: Optional[TokenCredential] = None, + azure_async_credential: Optional[AsyncTokenCredential] = None, ) -> Union[SearchClient, AsyncSearchClient]: from azure.core.credentials import AccessToken, AzureKeyCredential, TokenCredential from azure.core.exceptions import ResourceNotFoundError from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential + from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential from azure.search.documents import SearchClient from azure.search.documents.aio import SearchClient as AsyncSearchClient from azure.search.documents.indexes import SearchIndexClient @@ -143,12 +148,17 @@ def get_token( if key.upper() == "INTERACTIVE": credential = InteractiveBrowserCredential() credential.get_token("https://search.azure.com/.default") + async_credential = credential else: credential = AzureKeyCredential(key) + async_credential = credential elif azure_ad_access_token is not None: credential = AzureBearerTokenCredential(azure_ad_access_token) + async_credential = credential else: - credential = DefaultAzureCredential() + credential = azure_credential or DefaultAzureCredential() + async_credential = azure_async_credential or AsyncDefaultAzureCredential() + index_client: SearchIndexClient = SearchIndexClient( endpoint=endpoint, credential=credential, @@ -266,7 +276,7 @@ def fmt_err(x: str) -> str: return AsyncSearchClient( endpoint=endpoint, index_name=index_name, - credential=credential, + credential=async_credential, user_agent=user_agent, **additional_search_client_options, ) @@ -278,7 +288,7 @@ class AzureSearch(VectorStore): def __init__( self, azure_search_endpoint: str, - azure_search_key: str, + azure_search_key: Optional[str], index_name: str, embedding_function: Union[Callable, Embeddings], search_type: str = "hybrid", @@ -295,6 +305,8 @@ def __init__( vector_search_dimensions: Optional[int] = None, additional_search_client_options: Optional[Dict[str, Any]] = None, azure_ad_access_token: Optional[str] = None, + azure_credential: Optional[TokenCredential] = None, + azure_async_credential: Optional[AsyncTokenCredential] = None, **kwargs: Any, ): try: @@ -361,6 +373,7 @@ def __init__( user_agent=user_agent, cors_options=cors_options, additional_search_client_options=additional_search_client_options, + azure_credential=azure_credential, ) self.async_client = _get_search_client( azure_search_endpoint, @@ -377,6 +390,8 @@ def __init__( user_agent=user_agent, cors_options=cors_options, async_=True, + azure_credential=azure_credential, + azure_async_credential=azure_async_credential, ) self.search_type = search_type self.semantic_configuration_name = semantic_configuration_name diff --git a/libs/community/tests/unit_tests/embeddings/test_llamacpp.py b/libs/community/tests/unit_tests/embeddings/test_llamacpp.py new file mode 100644 index 0000000000000..ca2bd758216cf --- /dev/null +++ b/libs/community/tests/unit_tests/embeddings/test_llamacpp.py @@ -0,0 +1,40 @@ +from typing import Generator +from unittest.mock import MagicMock, patch + +import pytest + +from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings + + +@pytest.fixture +def mock_llama_client() -> Generator[MagicMock, None, None]: + with patch( + "langchain_community.embeddings.llamacpp.LlamaCppEmbeddings" + ) as MockLlama: + mock_client = MagicMock() + MockLlama.return_value = mock_client + yield mock_client + + +def test_initialization(mock_llama_client: MagicMock) -> None: + embeddings = LlamaCppEmbeddings(client=mock_llama_client) # type: ignore[call-arg] + assert embeddings.client is not None + + +def test_embed_documents(mock_llama_client: MagicMock) -> None: + mock_llama_client.create_embedding.return_value = { + "data": [{"embedding": [[0.1, 0.2, 0.3]]}, {"embedding": [[0.4, 0.5, 0.6]]}] + } + embeddings = LlamaCppEmbeddings(client=mock_llama_client) # type: ignore[call-arg] + texts = ["Hello world", "Test document"] + result = embeddings.embed_documents(texts) + expected = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + assert result == expected + + +def test_embed_query(mock_llama_client: MagicMock) -> None: + mock_llama_client.embed.return_value = [[0.1, 0.2, 0.3]] + embeddings = LlamaCppEmbeddings(client=mock_llama_client) # type: ignore[call-arg] + result = embeddings.embed_query("Sample query") + expected = [0.1, 0.2, 0.3] + assert result == expected diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 3e43fb10a8e6b..893f393d8b174 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -534,8 +534,6 @@ def get_config_jsonschema( def get_graph(self, config: Optional[RunnableConfig] = None) -> Graph: """Return a graph representation of this Runnable.""" - from langchain_core.runnables.graph import Graph - graph = Graph() try: input_node = graph.add_node(self.get_input_schema(config)) diff --git a/libs/core/langchain_core/utils/pydantic.py b/libs/core/langchain_core/utils/pydantic.py index ae403574bb761..65f12232f9fc6 100644 --- a/libs/core/langchain_core/utils/pydantic.py +++ b/libs/core/langchain_core/utils/pydantic.py @@ -279,7 +279,7 @@ def _create_subset_model_v2( fn_description: Optional[str] = None, ) -> type[pydantic.BaseModel]: """Create a pydantic model with a subset of the model fields.""" - from pydantic import ConfigDict, create_model + from pydantic import create_model from pydantic.fields import FieldInfo descriptions_ = descriptions or {} diff --git a/libs/packages.yml b/libs/packages.yml index da26ed6f0cfb8..810e54c5a857b 100644 --- a/libs/packages.yml +++ b/libs/packages.yml @@ -1,169 +1,310 @@ -# this file is used to define the packages that are used in the project +# this file is used to definet he packages that are used in the project # it is EXPERIMENTAL and may be removed in the future packages: - - name: langchain-core - repo: langchain-ai/langchain - path: libs/core - - name: langchain-text-splitters - repo: langchain-ai/langchain - path: libs/text-splitters - - name: langchain - repo: langchain-ai/langchain - path: libs/langchain - - name: langchain-community - repo: langchain-ai/langchain - path: libs/community - - name: langchain-experimental - repo: langchain-ai/langchain-experimental - path: libs/experimental - - name: langchain-cli - repo: langchain-ai/langchain - path: libs/cli - - name: langchain-ai21 - repo: langchain-ai/langchain-ai21 - path: libs/ai21 - - name: langchain-anthropic - repo: langchain-ai/langchain - path: libs/partners/anthropic - - name: langchain-chroma - repo: langchain-ai/langchain - path: libs/partners/chroma - - name: langchain-exa - repo: langchain-ai/langchain - path: libs/partners/exa - - name: langchain-fireworks - repo: langchain-ai/langchain - path: libs/partners/fireworks - - name: langchain-groq - repo: langchain-ai/langchain - path: libs/partners/groq - - name: langchain-huggingface - repo: langchain-ai/langchain - path: libs/partners/huggingface - - name: langchain-ibm - repo: langchain-ai/langchain-ibm - path: libs/ibm - - name: langchain-localai - repo: mkhludnev/langchain-localai - path: libs/localai - - name: langchain-milvus - repo: langchain-ai/langchain-milvus - path: libs/milvus - - name: langchain-mistralai - repo: langchain-ai/langchain - path: libs/partners/mistralai - - name: langchain-mongodb - repo: langchain-ai/langchain-mongodb - path: libs/mongodb - - name: langchain-nomic - repo: langchain-ai/langchain - path: libs/partners/nomic - - name: langchain-openai - repo: langchain-ai/langchain - path: libs/partners/openai - - name: langchain-pinecone - repo: langchain-ai/langchain - path: libs/partners/pinecone - - name: langchain-prompty - repo: langchain-ai/langchain - path: libs/partners/prompty - - name: langchain-qdrant - repo: langchain-ai/langchain - path: libs/partners/qdrant - - name: langchain-scrapegraph - repo: ScrapeGraphAI/langchain-scrapegraph - path: . - - name: langchain-sema4 - repo: langchain-ai/langchain-sema4 - path: libs/sema4 - - name: langchain-together - repo: langchain-ai/langchain-together - path: libs/together - - name: langchain-upstage - repo: langchain-ai/langchain-upstage - path: libs/upstage - - name: langchain-voyageai - repo: langchain-ai/langchain - path: libs/partners/voyageai - - name: langchain-aws - repo: langchain-ai/langchain-aws - path: libs/aws - - name: langchain-astradb - repo: langchain-ai/langchain-datastax - path: libs/astradb - - name: langchain-google-genai - repo: langchain-ai/langchain-google - path: libs/genai - - name: langchain-google-vertexai - repo: langchain-ai/langchain-google - path: libs/vertexai - - name: langchain-google-community - repo: langchain-ai/langchain-google - path: libs/community - - name: langchain-weaviate - repo: langchain-ai/langchain-weaviate - path: libs/weaviate - - name: langchain-cohere - repo: langchain-ai/langchain-cohere - path: libs/cohere - - name: langchain-elasticsearch - repo: langchain-ai/langchain-elastic - path: libs/elasticsearch - - name: langchain-nvidia-ai-endpoints - repo: langchain-ai/langchain-nvidia - path: libs/ai-endpoints - - name: langchain-postgres - repo: langchain-ai/langchain-postgres - path: . - - name: langchain-redis - repo: langchain-ai/langchain-redis - path: libs/redis - - name: langchain-unstructured - repo: langchain-ai/langchain-unstructured - path: libs/unstructured - - name: langchain-azure-dynamic-sessions - repo: langchain-ai/langchain-azure - path: libs/azure-dynamic-sessions - - name: langchain-sqlserver - repo: langchain-ai/langchain-azure - path: libs/sqlserver - - name: langchain-cerebras - repo: langchain-ai/langchain-cerebras - path: libs/cerebras - - name: langchain-snowflake - repo: langchain-ai/langchain-snowflake - path: libs/snowflake - - name: langchain-databricks - repo: langchain-ai/langchain-databricks - path: libs/databricks - - name: langchain-ibm - repo: langchain-ai/langchain-ibm - path: libs/ibm - - name: langchain-couchbase - repo: langchain-ai/langchain - path: libs/partners/couchbase - - name: langchain-ollama - repo: langchain-ai/langchain - path: libs/partners/ollama - - name: langchain-box - repo: langchain-ai/langchain-box - path: libs/box - - name: langchain-tests - repo: langchain-ai/langchain - path: libs/standard-tests - - name: langchain-neo4j - repo: langchain-ai/langchain-neo4j - path: libs/neo4j - - name: langchain-linkup - repo: LinkupPlatform/langchain-linkup - path: . - - name: langchain-yt-dlp - repo: aqib0770/langchain-yt-dlp - path: . - - name: langchain-oceanbase - repo: oceanbase/langchain-oceanbase - path: . - - name: langchain-predictionguard - repo: predictionguard/langchain-predictionguard - path: . \ No newline at end of file +- name: langchain-core + path: libs/core + repo: langchain-ai/langchain + downloads: 27728892 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-text-splitters + path: libs/text-splitters + repo: langchain-ai/langchain + downloads: 10343427 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain + path: libs/langchain + repo: langchain-ai/langchain + downloads: 27515102 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-community + path: libs/community + repo: langchain-ai/langchain + downloads: 17505668 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-experimental + path: libs/experimental + repo: langchain-ai/langchain-experimental + downloads: 1710421 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-cli + path: libs/cli + repo: langchain-ai/langchain + downloads: 55505 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-ai21 + path: libs/ai21 + repo: langchain-ai/langchain-ai21 + downloads: 3453 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-anthropic + path: libs/partners/anthropic + repo: langchain-ai/langchain + js: '@langchain/anthropic' + downloads: 1163020 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-chroma + path: libs/partners/chroma + repo: langchain-ai/langchain + downloads: 450092 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-exa + path: libs/partners/exa + repo: langchain-ai/langchain + provider_page: exa_search + js: '@langchain/exa' + downloads: 4560 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-fireworks + path: libs/partners/fireworks + repo: langchain-ai/langchain + downloads: 73179 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-groq + path: libs/partners/groq + repo: langchain-ai/langchain + js: '@langchain/groq' + downloads: 370373 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-huggingface + path: libs/partners/huggingface + repo: langchain-ai/langchain + downloads: 375151 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-ibm + path: libs/ibm + repo: langchain-ai/langchain-ibm + js: '@langchain/ibm' + downloads: 261091 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-localai + path: libs/localai + repo: mkhludnev/langchain-localai + downloads: 510 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-milvus + path: libs/milvus + repo: langchain-ai/langchain-milvus + downloads: 111126 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-mistralai + path: libs/partners/mistralai + repo: langchain-ai/langchain + js: '@langchain/mistralai' + downloads: 232463 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-mongodb + path: libs/mongodb + repo: langchain-ai/langchain-mongodb + provider_page: mongodb_atlas + js: '@langchain/mongodb' + downloads: 113328 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-nomic + path: libs/partners/nomic + repo: langchain-ai/langchain + js: '@langchain/nomic' + downloads: 10175 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-openai + path: libs/partners/openai + repo: langchain-ai/langchain + js: '@langchain/openai' + downloads: 7994138 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-pinecone + path: libs/partners/pinecone + repo: langchain-ai/langchain + js: '@langchain/pinecone' + downloads: 345657 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-prompty + path: libs/partners/prompty + repo: langchain-ai/langchain + provider_page: microsoft + downloads: 976 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-qdrant + path: libs/partners/qdrant + repo: langchain-ai/langchain + js: '@langchain/qdrant' + downloads: 77743 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-scrapegraph + path: . + repo: ScrapeGraphAI/langchain-scrapegraph + downloads: 907 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-sema4 + path: libs/sema4 + repo: langchain-ai/langchain-sema4 + provider_page: robocorp + downloads: 987 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-together + path: libs/together + repo: langchain-ai/langchain-together + downloads: 44887 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-upstage + path: libs/upstage + repo: langchain-ai/langchain-upstage + downloads: 20951 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-voyageai + path: libs/partners/voyageai + repo: langchain-ai/langchain + downloads: 11253 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-aws + name_title: AWS + path: libs/aws + repo: langchain-ai/langchain-aws + js: '@langchain/aws' + downloads: 1507701 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-astradb + path: libs/astradb + repo: langchain-ai/langchain-datastax + downloads: 64185 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-google-genai + name_title: Google Generative AI + path: libs/genai + repo: langchain-ai/langchain-google + provider_page: google + js: '@langchain/google-genai' + downloads: 732265 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-google-vertexai + path: libs/vertexai + repo: langchain-ai/langchain-google + provider_page: google + js: '@langchain/google-vertexai' + downloads: 7668881 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-google-community + path: libs/community + repo: langchain-ai/langchain-google + provider_page: google + downloads: 3055901 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-weaviate + path: libs/weaviate + repo: langchain-ai/langchain-weaviate + js: '@langchain/weaviate' + downloads: 26639 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-cohere + path: libs/cohere + repo: langchain-ai/langchain-cohere + js: '@langchain/cohere' + downloads: 513053 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-elasticsearch + path: libs/elasticsearch + repo: langchain-ai/langchain-elastic + downloads: 108874 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-nvidia-ai-endpoints + path: libs/ai-endpoints + repo: langchain-ai/langchain-nvidia + provider_page: nvidia + downloads: 129677 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-postgres + path: . + repo: langchain-ai/langchain-postgres + provider_page: pgvector + downloads: 293866 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-redis + path: libs/redis + repo: langchain-ai/langchain-redis + js: '@langchain/redis' + downloads: 17549 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-unstructured + path: libs/unstructured + repo: langchain-ai/langchain-unstructured + downloads: 88721 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-azure-dynamic-sessions + path: libs/azure-dynamic-sessions + repo: langchain-ai/langchain-azure + provider_page: microsoft + js: '@langchain/azure-dynamic-sessions' + downloads: 7285 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-sqlserver + path: libs/sqlserver + repo: langchain-ai/langchain-azure + provider_page: microsoft + downloads: 1489 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-cerebras + path: libs/cerebras + repo: langchain-ai/langchain-cerebras + downloads: 9426 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-snowflake + path: libs/snowflake + repo: langchain-ai/langchain-snowflake + downloads: 2374 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-databricks + path: libs/databricks + repo: langchain-ai/langchain-databricks + downloads: 35495 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-couchbase + path: libs/partners/couchbase + repo: langchain-ai/langchain + downloads: 347 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-ollama + path: libs/partners/ollama + repo: langchain-ai/langchain + js: '@langchain/ollama' + downloads: 310741 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-box + path: libs/box + repo: langchain-ai/langchain-box + downloads: 2749 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-tests + path: libs/standard-tests + repo: langchain-ai/langchain + downloads: 3691 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-neo4j + path: libs/neo4j + repo: langchain-ai/langchain-neo4j + downloads: 8871 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-linkup + path: . + repo: LinkupPlatform/langchain-linkup + downloads: 818 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-yt-dlp + path: . + repo: aqib0770/langchain-yt-dlp + downloads: 776 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-oceanbase + path: . + repo: oceanbase/langchain-oceanbase + downloads: 322 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-predictionguard + path: . + repo: predictionguard/langchain-predictionguard + downloads: 156 + downloads_updated_at: '2024-12-23T20:10:11.816059+00:00' +- name: langchain-cratedb + path: . + repo: crate/langchain-cratedb + downloads: 362 + downloads_updated_at: '2024-12-23T20:53:27.001852+00:00'