From 2a006e485d7e945e11460a1d1354971aaf981fbd Mon Sep 17 00:00:00 2001 From: leo-gan Date: Tue, 16 Jan 2024 17:14:33 -0800 Subject: [PATCH 1/2] refactor --- .../retrievers/self_query/__init__.py | 0 .../retrievers/self_query/astradb.py | 70 ++++ .../retrievers/self_query/base.py | 250 ++++++++++++ .../retrievers/self_query/chroma.py | 50 +++ .../retrievers/self_query/dashvector.py | 64 ++++ .../retrievers/self_query/deeplake.py | 88 +++++ .../retrievers/self_query/elasticsearch.py | 97 +++++ .../retrievers/self_query/milvus.py | 103 +++++ .../retrievers/self_query/mongodb_atlas.py | 74 ++++ .../retrievers/self_query/myscale.py | 125 ++++++ .../retrievers/self_query/opensearch.py | 104 +++++ .../retrievers/self_query/pgvector.py | 52 +++ .../retrievers/self_query/pinecone.py | 57 +++ .../retrievers/self_query/qdrant.py | 98 +++++ .../retrievers/self_query/redis.py | 103 +++++ .../retrievers/self_query/supabase.py | 97 +++++ .../retrievers/self_query/timescalevector.py | 84 ++++ .../retrievers/self_query/vectara.py | 70 ++++ .../retrievers/self_query/weaviate.py | 79 ++++ .../sql_constructor/__init__.py | 0 .../langchain_core/sql_constructor/base.py | 361 ++++++++++++++++++ .../core/langchain_core/sql_constructor/ir.py | 122 ++++++ .../langchain_core/sql_constructor/parser.py | 183 +++++++++ .../langchain_core/sql_constructor/prompt.py | 227 +++++++++++ .../langchain_core/sql_constructor/schema.py | 15 + libs/core/pyproject.toml | 1 + .../chains/query_constructor/base.py | 321 +--------------- .../langchain/chains/query_constructor/ir.py | 143 +------ .../chains/query_constructor/parser.py | 193 +--------- .../chains/query_constructor/prompt.py | 262 ++----------- .../chains/query_constructor/schema.py | 20 +- .../retrievers/self_query/astradb.py | 72 +--- .../langchain/retrievers/self_query/base.py | 246 +----------- .../langchain/retrievers/self_query/chroma.py | 51 +-- .../retrievers/self_query/dashvector.py | 65 +--- .../retrievers/self_query/deeplake.py | 97 +---- .../retrievers/self_query/elasticsearch.py | 101 +---- .../langchain/retrievers/self_query/milvus.py | 107 +----- .../retrievers/self_query/mongodb_atlas.py | 76 +--- .../retrievers/self_query/myscale.py | 128 +------ .../retrievers/self_query/opensearch.py | 105 +---- .../retrievers/self_query/pgvector.py | 53 +-- .../retrievers/self_query/pinecone.py | 58 +-- .../langchain/retrievers/self_query/qdrant.py | 99 +---- .../langchain/retrievers/self_query/redis.py | 105 +---- .../retrievers/self_query/supabase.py | 98 +---- .../retrievers/self_query/timescalevector.py | 85 +---- .../retrievers/self_query/vectara.py | 72 +--- .../retrievers/self_query/weaviate.py | 80 +--- poetry.lock | 228 +++++++++-- 50 files changed, 2929 insertions(+), 2510 deletions(-) create mode 100644 libs/community/langchain_community/retrievers/self_query/__init__.py create mode 100644 libs/community/langchain_community/retrievers/self_query/astradb.py create mode 100644 libs/community/langchain_community/retrievers/self_query/base.py create mode 100644 libs/community/langchain_community/retrievers/self_query/chroma.py create mode 100644 libs/community/langchain_community/retrievers/self_query/dashvector.py create mode 100644 libs/community/langchain_community/retrievers/self_query/deeplake.py create mode 100644 libs/community/langchain_community/retrievers/self_query/elasticsearch.py create mode 100644 libs/community/langchain_community/retrievers/self_query/milvus.py create mode 100644 libs/community/langchain_community/retrievers/self_query/mongodb_atlas.py create mode 100644 libs/community/langchain_community/retrievers/self_query/myscale.py create mode 100644 libs/community/langchain_community/retrievers/self_query/opensearch.py create mode 100644 libs/community/langchain_community/retrievers/self_query/pgvector.py create mode 100644 libs/community/langchain_community/retrievers/self_query/pinecone.py create mode 100644 libs/community/langchain_community/retrievers/self_query/qdrant.py create mode 100644 libs/community/langchain_community/retrievers/self_query/redis.py create mode 100644 libs/community/langchain_community/retrievers/self_query/supabase.py create mode 100644 libs/community/langchain_community/retrievers/self_query/timescalevector.py create mode 100644 libs/community/langchain_community/retrievers/self_query/vectara.py create mode 100644 libs/community/langchain_community/retrievers/self_query/weaviate.py create mode 100644 libs/core/langchain_core/sql_constructor/__init__.py create mode 100644 libs/core/langchain_core/sql_constructor/base.py create mode 100644 libs/core/langchain_core/sql_constructor/ir.py create mode 100644 libs/core/langchain_core/sql_constructor/parser.py create mode 100644 libs/core/langchain_core/sql_constructor/prompt.py create mode 100644 libs/core/langchain_core/sql_constructor/schema.py diff --git a/libs/community/langchain_community/retrievers/self_query/__init__.py b/libs/community/langchain_community/retrievers/self_query/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/community/langchain_community/retrievers/self_query/astradb.py b/libs/community/langchain_community/retrievers/self_query/astradb.py new file mode 100644 index 0000000000000..fa9439c380537 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/astradb.py @@ -0,0 +1,70 @@ +"""Logic for converting internal query language to a valid AstraDB query.""" +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN] + + +class AstraDBTranslator(Visitor): + """Translate AstraDB internal query language elements to valid filters.""" + + """Subset of allowed logical comparators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.IN, + Comparator.NIN, + ] + + """Subset of allowed logical operators.""" + allowed_operators = [Operator.AND, Operator.OR] + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + map_dict = { + Operator.AND: "$and", + Operator.OR: "$or", + Comparator.EQ: "$eq", + Comparator.NE: "$ne", + Comparator.GTE: "$gte", + Comparator.LTE: "$lte", + Comparator.LT: "$lt", + Comparator.GT: "$gt", + Comparator.IN: "$in", + Comparator.NIN: "$nin", + } + return map_dict[func] + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance( + comparison.value, list + ): + comparison.value = [comparison.value] + + comparator = self._format_func(comparison.comparator) + return {comparison.attribute: {comparator: comparison.value}} + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/base.py b/libs/community/langchain_community/retrievers/self_query/base.py new file mode 100644 index 0000000000000..5eff3bb28d0d6 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/base.py @@ -0,0 +1,250 @@ +"""Retriever that generates and executes structured queries over its own data source.""" +import logging +from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union + +from langchain_core.callbacks.manager import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) +from langchain_core.documents import Document +from langchain_core.language_models import BaseLanguageModel +from langchain_core.pydantic_v1 import Field, root_validator +from langchain_core.retrievers import BaseRetriever +from langchain_core.runnables import Runnable +from langchain_core.sql_constructor.base import load_query_constructor_runnable +from langchain_core.sql_constructor.ir import StructuredQuery, Visitor +from langchain_core.sql_constructor.schema import AttributeInfo +from langchain_core.vectorstores import VectorStore + +from langchain_community.retrievers.self_query.astradb import AstraDBTranslator +from langchain_community.retrievers.self_query.chroma import ChromaTranslator +from langchain_community.retrievers.self_query.dashvector import DashvectorTranslator +from langchain_community.retrievers.self_query.deeplake import DeepLakeTranslator +from langchain_community.retrievers.self_query.elasticsearch import ( + ElasticsearchTranslator, +) +from langchain_community.retrievers.self_query.milvus import MilvusTranslator +from langchain_community.retrievers.self_query.mongodb_atlas import ( + MongoDBAtlasTranslator, +) +from langchain_community.retrievers.self_query.myscale import MyScaleTranslator +from langchain_community.retrievers.self_query.opensearch import OpenSearchTranslator +from langchain_community.retrievers.self_query.pgvector import PGVectorTranslator +from langchain_community.retrievers.self_query.pinecone import PineconeTranslator +from langchain_community.retrievers.self_query.qdrant import QdrantTranslator +from langchain_community.retrievers.self_query.redis import RedisTranslator +from langchain_community.retrievers.self_query.supabase import SupabaseVectorTranslator +from langchain_community.retrievers.self_query.timescalevector import ( + TimescaleVectorTranslator, +) +from langchain_community.retrievers.self_query.vectara import VectaraTranslator +from langchain_community.retrievers.self_query.weaviate import WeaviateTranslator +from langchain_community.vectorstores import ( + AstraDB, + Chroma, + DashVector, + DeepLake, + ElasticsearchStore, + Milvus, + MongoDBAtlasVectorSearch, + MyScale, + OpenSearchVectorSearch, + PGVector, + Pinecone, + Qdrant, + Redis, + SupabaseVectorStore, + TimescaleVector, + Vectara, + Weaviate, +) + +logger = logging.getLogger(__name__) + + +def _get_builtin_translator(vectorstore: VectorStore) -> Visitor: + """Get the translator class corresponding to the vector store class.""" + BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = { + AstraDB: AstraDBTranslator, + Chroma: ChromaTranslator, + DashVector: DashvectorTranslator, + DeepLake: DeepLakeTranslator, + ElasticsearchStore: ElasticsearchTranslator, + Milvus: MilvusTranslator, + MongoDBAtlasVectorSearch: MongoDBAtlasTranslator, + MyScale: MyScaleTranslator, + OpenSearchVectorSearch: OpenSearchTranslator, + PGVector: PGVectorTranslator, + Pinecone: PineconeTranslator, + Qdrant: QdrantTranslator, + SupabaseVectorStore: SupabaseVectorTranslator, + TimescaleVector: TimescaleVectorTranslator, + Vectara: VectaraTranslator, + Weaviate: WeaviateTranslator, + } + if isinstance(vectorstore, Qdrant): + return QdrantTranslator(metadata_key=vectorstore.metadata_payload_key) + elif isinstance(vectorstore, MyScale): + return MyScaleTranslator(metadata_key=vectorstore.metadata_column) + elif isinstance(vectorstore, Redis): + return RedisTranslator.from_vectorstore(vectorstore) + elif vectorstore.__class__ in BUILTIN_TRANSLATORS: + return BUILTIN_TRANSLATORS[vectorstore.__class__]() + else: + raise ValueError( + f"Self query retriever with Vector Store type {vectorstore.__class__}" + f" not supported." + ) + + +class SelfQueryRetriever(BaseRetriever): + """Retriever that uses a vector store and an LLM to generate + the vector store queries.""" + + vectorstore: VectorStore + """The underlying vector store from which documents will be retrieved.""" + query_constructor: Runnable[dict, StructuredQuery] = Field(alias="llm_chain") + """The query constructor chain for generating the vector store queries. + + llm_chain is legacy name kept for backwards compatibility.""" + search_type: str = "similarity" + """The search type to perform on the vector store.""" + search_kwargs: dict = Field(default_factory=dict) + """Keyword arguments to pass in to the vector store search.""" + structured_query_translator: Visitor + """Translator for turning internal query language into vectorstore search params.""" + verbose: bool = False + + use_original_query: bool = False + """Use original query instead of the revised new query from LLM""" + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + allow_population_by_field_name = True + + @root_validator(pre=True) + def validate_translator(cls, values: Dict) -> Dict: + """Validate translator.""" + if "structured_query_translator" not in values: + values["structured_query_translator"] = _get_builtin_translator( + values["vectorstore"] + ) + return values + + @property + def llm_chain(self) -> Runnable: + """llm_chain is legacy name kept for backwards compatibility.""" + return self.query_constructor + + def _prepare_query( + self, query: str, structured_query: StructuredQuery + ) -> Tuple[str, Dict[str, Any]]: + new_query, new_kwargs = self.structured_query_translator.visit_structured_query( + structured_query + ) + if structured_query.limit is not None: + new_kwargs["k"] = structured_query.limit + if self.use_original_query: + new_query = query + search_kwargs = {**self.search_kwargs, **new_kwargs} + return new_query, search_kwargs + + def _get_docs_with_query( + self, query: str, search_kwargs: Dict[str, Any] + ) -> List[Document]: + docs = self.vectorstore.search(query, self.search_type, **search_kwargs) + return docs + + async def _aget_docs_with_query( + self, query: str, search_kwargs: Dict[str, Any] + ) -> List[Document]: + docs = await self.vectorstore.asearch(query, self.search_type, **search_kwargs) + return docs + + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun + ) -> List[Document]: + """Get documents relevant for a query. + + Args: + query: string to find relevant documents for + + Returns: + List of relevant documents + """ + structured_query = self.query_constructor.invoke( + {"query": query}, config={"callbacks": run_manager.get_child()} + ) + if self.verbose: + logger.info(f"Generated Query: {structured_query}") + new_query, search_kwargs = self._prepare_query(query, structured_query) + docs = self._get_docs_with_query(new_query, search_kwargs) + return docs + + async def _aget_relevant_documents( + self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun + ) -> List[Document]: + """Get documents relevant for a query. + + Args: + query: string to find relevant documents for + + Returns: + List of relevant documents + """ + structured_query = await self.query_constructor.ainvoke( + {"query": query}, config={"callbacks": run_manager.get_child()} + ) + if self.verbose: + logger.info(f"Generated Query: {structured_query}") + new_query, search_kwargs = self._prepare_query(query, structured_query) + docs = await self._aget_docs_with_query(new_query, search_kwargs) + return docs + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + vectorstore: VectorStore, + document_contents: str, + metadata_field_info: Sequence[Union[AttributeInfo, dict]], + structured_query_translator: Optional[Visitor] = None, + chain_kwargs: Optional[Dict] = None, + enable_limit: bool = False, + use_original_query: bool = False, + **kwargs: Any, + ) -> "SelfQueryRetriever": + if structured_query_translator is None: + structured_query_translator = _get_builtin_translator(vectorstore) + chain_kwargs = chain_kwargs or {} + + if ( + "allowed_comparators" not in chain_kwargs + and structured_query_translator.allowed_comparators is not None + ): + chain_kwargs[ + "allowed_comparators" + ] = structured_query_translator.allowed_comparators + if ( + "allowed_operators" not in chain_kwargs + and structured_query_translator.allowed_operators is not None + ): + chain_kwargs[ + "allowed_operators" + ] = structured_query_translator.allowed_operators + query_constructor = load_query_constructor_runnable( + llm, + document_contents, + metadata_field_info, + enable_limit=enable_limit, + **chain_kwargs, + ) + return cls( + query_constructor=query_constructor, + vectorstore=vectorstore, + use_original_query=use_original_query, + structured_query_translator=structured_query_translator, + **kwargs, + ) diff --git a/libs/community/langchain_community/retrievers/self_query/chroma.py b/libs/community/langchain_community/retrievers/self_query/chroma.py new file mode 100644 index 0000000000000..ea1d32623187f --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/chroma.py @@ -0,0 +1,50 @@ +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class ChromaTranslator(Visitor): + """Translate `Chroma` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + ] + """Subset of allowed logical comparators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + return f"${func.value}" + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + return { + comparison.attribute: { + self._format_func(comparison.comparator): comparison.value + } + } + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/dashvector.py b/libs/community/langchain_community/retrievers/self_query/dashvector.py new file mode 100644 index 0000000000000..23dd9668759e2 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/dashvector.py @@ -0,0 +1,64 @@ +"""Logic for converting internal query language to a valid DashVector query.""" +from typing import Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class DashvectorTranslator(Visitor): + """Logic for converting internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.LIKE, + ] + + map_dict = { + Operator.AND: " AND ", + Operator.OR: " OR ", + Comparator.EQ: " = ", + Comparator.GT: " > ", + Comparator.GTE: " >= ", + Comparator.LT: " < ", + Comparator.LTE: " <= ", + Comparator.LIKE: " LIKE ", + } + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + return self.map_dict[func] + + def visit_operation(self, operation: Operation) -> str: + args = [arg.accept(self) for arg in operation.arguments] + return self._format_func(operation.operator).join(args) + + def visit_comparison(self, comparison: Comparison) -> str: + value = comparison.value + if isinstance(value, str): + if comparison.comparator == Comparator.LIKE: + value = f"'%{value}%'" + else: + value = f"'{value}'" + return ( + f"{comparison.attribute}{self._format_func(comparison.comparator)}{value}" + ) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/deeplake.py b/libs/community/langchain_community/retrievers/self_query/deeplake.py new file mode 100644 index 0000000000000..d76adae6fedf5 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/deeplake.py @@ -0,0 +1,88 @@ +"""Logic for converting internal query language to a valid Chroma query.""" +from typing import Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +COMPARATOR_TO_TQL = { + Comparator.EQ: "==", + Comparator.GT: ">", + Comparator.GTE: ">=", + Comparator.LT: "<", + Comparator.LTE: "<=", +} + + +OPERATOR_TO_TQL = { + Operator.AND: "and", + Operator.OR: "or", + Operator.NOT: "NOT", +} + + +def can_cast_to_float(string: str) -> bool: + """Check if a string can be cast to a float.""" + try: + float(string) + return True + except ValueError: + return False + + +class DeepLakeTranslator(Visitor): + """Translate `DeepLake` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] + """Subset of allowed logical operators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + ] + """Subset of allowed logical comparators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + if isinstance(func, Operator): + value = OPERATOR_TO_TQL[func.value] # type: ignore + elif isinstance(func, Comparator): + value = COMPARATOR_TO_TQL[func.value] # type: ignore + return f"{value}" + + def visit_operation(self, operation: Operation) -> str: + args = [arg.accept(self) for arg in operation.arguments] + operator = self._format_func(operation.operator) + return "(" + (" " + operator + " ").join(args) + ")" + + def visit_comparison(self, comparison: Comparison) -> str: + comparator = self._format_func(comparison.comparator) + values = comparison.value + if isinstance(values, list): + tql = [] + for value in values: + comparison.value = value + tql.append(self.visit_comparison(comparison)) + + return "(" + (" or ").join(tql) + ")" + + if not can_cast_to_float(comparison.value): + values = f"'{values}'" + return f"metadata['{comparison.attribute}'] {comparator} {values}" + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + tqL = f"SELECT * WHERE {structured_query.filter.accept(self)}" + kwargs = {"tql": tqL} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/elasticsearch.py b/libs/community/langchain_community/retrievers/self_query/elasticsearch.py new file mode 100644 index 0000000000000..c4ea733ad6815 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/elasticsearch.py @@ -0,0 +1,97 @@ +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class ElasticsearchTranslator(Visitor): + """Translate `Elasticsearch` internal query language elements to valid filters.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.CONTAIN, + Comparator.LIKE, + ] + """Subset of allowed logical comparators.""" + + allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] + """Subset of allowed logical operators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + map_dict = { + Operator.OR: "should", + Operator.NOT: "must_not", + Operator.AND: "must", + Comparator.EQ: "term", + Comparator.GT: "gt", + Comparator.GTE: "gte", + Comparator.LT: "lt", + Comparator.LTE: "lte", + Comparator.CONTAIN: "match", + Comparator.LIKE: "match", + } + return map_dict[func] + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + + return {"bool": {self._format_func(operation.operator): args}} + + def visit_comparison(self, comparison: Comparison) -> Dict: + # ElasticsearchStore filters require to target + # the metadata object field + field = f"metadata.{comparison.attribute}" + + is_range_comparator = comparison.comparator in [ + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + ] + + if is_range_comparator: + return { + "range": { + field: {self._format_func(comparison.comparator): comparison.value} + } + } + + if comparison.comparator == Comparator.CONTAIN: + return { + self._format_func(comparison.comparator): { + field: {"query": comparison.value} + } + } + + if comparison.comparator == Comparator.LIKE: + return { + self._format_func(comparison.comparator): { + field: {"query": comparison.value, "fuzziness": "AUTO"} + } + } + + # we assume that if the value is a string, + # we want to use the keyword field + field = f"{field}.keyword" if isinstance(comparison.value, str) else field + + return {self._format_func(comparison.comparator): {field: comparison.value}} + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": [structured_query.filter.accept(self)]} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/milvus.py b/libs/community/langchain_community/retrievers/self_query/milvus.py new file mode 100644 index 0000000000000..fd7f24cd68bbd --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/milvus.py @@ -0,0 +1,103 @@ +"""Logic for converting internal query language to a valid Milvus query.""" +from typing import Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +COMPARATOR_TO_BER = { + Comparator.EQ: "==", + Comparator.GT: ">", + Comparator.GTE: ">=", + Comparator.LT: "<", + Comparator.LTE: "<=", + Comparator.IN: "in", + Comparator.LIKE: "like", +} + +UNARY_OPERATORS = [Operator.NOT] + + +def process_value(value: Union[int, float, str], comparator: Comparator) -> str: + """Convert a value to a string and add double quotes if it is a string. + + It required for comparators involving strings. + + Args: + value: The value to convert. + comparator: The comparator. + + Returns: + The converted value as a string. + """ + # + if isinstance(value, str): + if comparator is Comparator.LIKE: + # If the comparator is LIKE, add a percent sign after it for prefix matching + # and add double quotes + return f'"{value}%"' + else: + # If the value is already a string, add double quotes + return f'"{value}"' + else: + # If the value is not a string, convert it to a string without double quotes + return str(value) + + +class MilvusTranslator(Visitor): + """Translate Milvus internal query language elements to valid filters.""" + + """Subset of allowed logical operators.""" + allowed_operators = [Operator.AND, Operator.NOT, Operator.OR] + + """Subset of allowed logical comparators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.IN, + Comparator.LIKE, + ] + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + value = func.value + if isinstance(func, Comparator): + value = COMPARATOR_TO_BER[func] + return f"{value}" + + def visit_operation(self, operation: Operation) -> str: + if operation.operator in UNARY_OPERATORS and len(operation.arguments) == 1: + operator = self._format_func(operation.operator) + return operator + "(" + operation.arguments[0].accept(self) + ")" + elif operation.operator in UNARY_OPERATORS: + raise ValueError( + f'"{operation.operator.value}" can have only one argument in Milvus' + ) + else: + args = [arg.accept(self) for arg in operation.arguments] + operator = self._format_func(operation.operator) + return "(" + (" " + operator + " ").join(args) + ")" + + def visit_comparison(self, comparison: Comparison) -> str: + comparator = self._format_func(comparison.comparator) + processed_value = process_value(comparison.value, comparison.comparator) + attribute = comparison.attribute + + return "( " + attribute + " " + comparator + " " + processed_value + " )" + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"expr": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/mongodb_atlas.py b/libs/community/langchain_community/retrievers/self_query/mongodb_atlas.py new file mode 100644 index 0000000000000..4df8b824fd724 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/mongodb_atlas.py @@ -0,0 +1,74 @@ +"""Logic for converting internal query language to a valid MongoDB Atlas query.""" +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN] + + +class MongoDBAtlasTranslator(Visitor): + """Translate Mongo internal query language elements to valid filters.""" + + """Subset of allowed logical comparators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.IN, + Comparator.NIN, + ] + + """Subset of allowed logical operators.""" + allowed_operators = [Operator.AND, Operator.OR] + + ## Convert a operator or a comparator to Mongo Query Format + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + map_dict = { + Operator.AND: "$and", + Operator.OR: "$or", + Comparator.EQ: "$eq", + Comparator.NE: "$ne", + Comparator.GTE: "$gte", + Comparator.LTE: "$lte", + Comparator.LT: "$lt", + Comparator.GT: "$gt", + Comparator.IN: "$in", + Comparator.NIN: "$nin", + } + return map_dict[func] + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance( + comparison.value, list + ): + comparison.value = [comparison.value] + + comparator = self._format_func(comparison.comparator) + + attribute = comparison.attribute + + return {attribute: {comparator: comparison.value}} + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"pre_filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/myscale.py b/libs/community/langchain_community/retrievers/self_query/myscale.py new file mode 100644 index 0000000000000..642d0066f2f8b --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/myscale.py @@ -0,0 +1,125 @@ +import re +from typing import Any, Callable, Dict, Tuple + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +def _DEFAULT_COMPOSER(op_name: str) -> Callable: + """ + Default composer for logical operators. + + Args: + op_name: Name of the operator. + + Returns: + Callable that takes a list of arguments and returns a string. + """ + + def f(*args: Any) -> str: + args_: map[str] = map(str, args) + return f" {op_name} ".join(args_) + + return f + + +def _FUNCTION_COMPOSER(op_name: str) -> Callable: + """ + Composer for functions. + + Args: + op_name: Name of the function. + + Returns: + Callable that takes a list of arguments and returns a string. + """ + + def f(*args: Any) -> str: + args_: map[str] = map(str, args) + return f"{op_name}({','.join(args_)})" + + return f + + +class MyScaleTranslator(Visitor): + """Translate `MyScale` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] + """Subset of allowed logical operators.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.CONTAIN, + Comparator.LIKE, + ] + + map_dict = { + Operator.AND: _DEFAULT_COMPOSER("AND"), + Operator.OR: _DEFAULT_COMPOSER("OR"), + Operator.NOT: _DEFAULT_COMPOSER("NOT"), + Comparator.EQ: _DEFAULT_COMPOSER("="), + Comparator.GT: _DEFAULT_COMPOSER(">"), + Comparator.GTE: _DEFAULT_COMPOSER(">="), + Comparator.LT: _DEFAULT_COMPOSER("<"), + Comparator.LTE: _DEFAULT_COMPOSER("<="), + Comparator.CONTAIN: _FUNCTION_COMPOSER("has"), + Comparator.LIKE: _DEFAULT_COMPOSER("ILIKE"), + } + + def __init__(self, metadata_key: str = "metadata") -> None: + super().__init__() + self.metadata_key = metadata_key + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + func = operation.operator + self._validate_func(func) + return self.map_dict[func](*args) + + def visit_comparison(self, comparison: Comparison) -> Dict: + regex = r"\((.*?)\)" + matched = re.search(r"\(\w+\)", comparison.attribute) + + # If arbitrary function is applied to an attribute + if matched: + attr = re.sub( + regex, + f"({self.metadata_key}.{matched.group(0)[1:-1]})", + comparison.attribute, + ) + else: + attr = f"{self.metadata_key}.{comparison.attribute}" + value = comparison.value + comp = comparison.comparator + + value = f"'{value}'" if isinstance(value, str) else value + + # convert timestamp for datetime objects + if isinstance(value, dict) and value.get("type") == "date": + attr = f"parseDateTime32BestEffort({attr})" + value = f"parseDateTime32BestEffort('{value['date']}')" + + # string pattern match + if comp is Comparator.LIKE: + value = f"'%{value[1:-1]}%'" + return self.map_dict[comp](attr, value) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + print(structured_query) + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"where_str": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/opensearch.py b/libs/community/langchain_community/retrievers/self_query/opensearch.py new file mode 100644 index 0000000000000..3552412393485 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/opensearch.py @@ -0,0 +1,104 @@ +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class OpenSearchTranslator(Visitor): + """Translate `OpenSearch` internal query domain-specific + language elements to valid filters.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + Comparator.CONTAIN, + Comparator.LIKE, + ] + """Subset of allowed logical comparators.""" + + allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] + """Subset of allowed logical operators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + comp_operator_map = { + Comparator.EQ: "term", + Comparator.LT: "lt", + Comparator.LTE: "lte", + Comparator.GT: "gt", + Comparator.GTE: "gte", + Comparator.CONTAIN: "match", + Comparator.LIKE: "fuzzy", + Operator.AND: "must", + Operator.OR: "should", + Operator.NOT: "must_not", + } + return comp_operator_map[func] + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + + return {"bool": {self._format_func(operation.operator): args}} + + def visit_comparison(self, comparison: Comparison) -> Dict: + field = f"metadata.{comparison.attribute}" + + if comparison.comparator in [ + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + ]: + if isinstance(comparison.value, dict): + if "date" in comparison.value: + return { + "range": { + field: { + self._format_func( + comparison.comparator + ): comparison.value["date"] + } + } + } + else: + return { + "range": { + field: { + self._format_func(comparison.comparator): comparison.value + } + } + } + + if comparison.comparator == Comparator.LIKE: + return { + self._format_func(comparison.comparator): { + field: {"value": comparison.value} + } + } + + field = f"{field}.keyword" if isinstance(comparison.value, str) else field + + if isinstance(comparison.value, dict): + if "date" in comparison.value: + comparison.value = comparison.value["date"] + + return {self._format_func(comparison.comparator): {field: comparison.value}} + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/pgvector.py b/libs/community/langchain_community/retrievers/self_query/pgvector.py new file mode 100644 index 0000000000000..15de6702a87bd --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/pgvector.py @@ -0,0 +1,52 @@ +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class PGVectorTranslator(Visitor): + """Translate `PGVector` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.LT, + Comparator.IN, + Comparator.NIN, + Comparator.CONTAIN, + Comparator.LIKE, + ] + """Subset of allowed logical comparators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + return f"{func.value}" + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + return { + comparison.attribute: { + self._format_func(comparison.comparator): comparison.value + } + } + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/pinecone.py b/libs/community/langchain_community/retrievers/self_query/pinecone.py new file mode 100644 index 0000000000000..61c654abad26e --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/pinecone.py @@ -0,0 +1,57 @@ +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class PineconeTranslator(Visitor): + """Translate `Pinecone` internal query language elements to valid filters.""" + + allowed_comparators = ( + Comparator.EQ, + Comparator.NE, + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + Comparator.IN, + Comparator.NIN, + ) + """Subset of allowed logical comparators.""" + allowed_operators = (Operator.AND, Operator.OR) + """Subset of allowed logical operators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + return f"${func.value}" + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + if comparison.comparator in (Comparator.IN, Comparator.NIN) and not isinstance( + comparison.value, list + ): + comparison.value = [comparison.value] + + return { + comparison.attribute: { + self._format_func(comparison.comparator): comparison.value + } + } + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/qdrant.py b/libs/community/langchain_community/retrievers/self_query/qdrant.py new file mode 100644 index 0000000000000..b8d82af464d2b --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/qdrant.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Tuple + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +if TYPE_CHECKING: + from qdrant_client.http import models as rest + + +class QdrantTranslator(Visitor): + """Translate `Qdrant` internal query language elements to valid filters.""" + + allowed_operators = ( + Operator.AND, + Operator.OR, + Operator.NOT, + ) + """Subset of allowed logical operators.""" + + allowed_comparators = ( + Comparator.EQ, + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + Comparator.LIKE, + ) + """Subset of allowed logical comparators.""" + + def __init__(self, metadata_key: str): + self.metadata_key = metadata_key + + def visit_operation(self, operation: Operation) -> rest.Filter: + try: + from qdrant_client.http import models as rest + except ImportError as e: + raise ImportError( + "Cannot import qdrant_client. Please install with `pip install " + "qdrant-client`." + ) from e + + args = [arg.accept(self) for arg in operation.arguments] + operator = { + Operator.AND: "must", + Operator.OR: "should", + Operator.NOT: "must_not", + }[operation.operator] + return rest.Filter(**{operator: args}) + + def visit_comparison(self, comparison: Comparison) -> rest.FieldCondition: + try: + from qdrant_client.http import models as rest + except ImportError as e: + raise ImportError( + "Cannot import qdrant_client. Please install with `pip install " + "qdrant-client`." + ) from e + + self._validate_func(comparison.comparator) + attribute = self.metadata_key + "." + comparison.attribute + if comparison.comparator == Comparator.EQ: + return rest.FieldCondition( + key=attribute, match=rest.MatchValue(value=comparison.value) + ) + if comparison.comparator == Comparator.LIKE: + return rest.FieldCondition( + key=attribute, match=rest.MatchText(text=comparison.value) + ) + kwargs = {comparison.comparator.value: comparison.value} + return rest.FieldCondition(key=attribute, range=rest.Range(**kwargs)) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + try: + from qdrant_client.http import models as rest + except ImportError as e: + raise ImportError( + "Cannot import qdrant_client. Please install with `pip install " + "qdrant-client`." + ) from e + + if structured_query.filter is None: + kwargs = {} + else: + filter = structured_query.filter.accept(self) + if isinstance(filter, rest.FieldCondition): + filter = rest.Filter(must=[filter]) + kwargs = {"filter": filter} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/redis.py b/libs/community/langchain_community/retrievers/self_query/redis.py new file mode 100644 index 0000000000000..5d64f8a776f8d --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/redis.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from typing import Any, Tuple + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +from langchain_community.vectorstores.redis import Redis +from langchain_community.vectorstores.redis.filters import ( + RedisFilterExpression, + RedisFilterField, + RedisFilterOperator, + RedisNum, + RedisTag, + RedisText, +) +from langchain_community.vectorstores.redis.schema import RedisModel + +_COMPARATOR_TO_BUILTIN_METHOD = { + Comparator.EQ: "__eq__", + Comparator.NE: "__ne__", + Comparator.LT: "__lt__", + Comparator.GT: "__gt__", + Comparator.LTE: "__le__", + Comparator.GTE: "__ge__", + Comparator.CONTAIN: "__eq__", + Comparator.LIKE: "__mod__", +} + + +class RedisTranslator(Visitor): + """Visitor for translating structured queries to Redis filter expressions.""" + + allowed_comparators = ( + Comparator.EQ, + Comparator.NE, + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + Comparator.CONTAIN, + Comparator.LIKE, + ) + """Subset of allowed logical comparators.""" + allowed_operators = (Operator.AND, Operator.OR) + """Subset of allowed logical operators.""" + + def __init__(self, schema: RedisModel) -> None: + self._schema = schema + + def _attribute_to_filter_field(self, attribute: str) -> RedisFilterField: + if attribute in [tf.name for tf in self._schema.text]: + return RedisText(attribute) + elif attribute in [tf.name for tf in self._schema.tag or []]: + return RedisTag(attribute) + elif attribute in [tf.name for tf in self._schema.numeric or []]: + return RedisNum(attribute) + else: + raise ValueError( + f"Invalid attribute {attribute} not in vector store schema. Schema is:" + f"\n{self._schema.as_dict()}" + ) + + def visit_comparison(self, comparison: Comparison) -> RedisFilterExpression: + filter_field = self._attribute_to_filter_field(comparison.attribute) + comparison_method = _COMPARATOR_TO_BUILTIN_METHOD[comparison.comparator] + return getattr(filter_field, comparison_method)(comparison.value) + + def visit_operation(self, operation: Operation) -> Any: + left = operation.arguments[0].accept(self) + if len(operation.arguments) > 2: + right = self.visit_operation( + Operation( + operator=operation.operator, arguments=operation.arguments[1:] + ) + ) + else: + right = operation.arguments[1].accept(self) + redis_operator = ( + RedisFilterOperator.OR + if operation.operator == Operator.OR + else RedisFilterOperator.AND + ) + return RedisFilterExpression(operator=redis_operator, left=left, right=right) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs + + @classmethod + def from_vectorstore(cls, vectorstore: Redis) -> RedisTranslator: + return cls(vectorstore._schema) diff --git a/libs/community/langchain_community/retrievers/self_query/supabase.py b/libs/community/langchain_community/retrievers/self_query/supabase.py new file mode 100644 index 0000000000000..5f196971bd465 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/supabase.py @@ -0,0 +1,97 @@ +from typing import Any, Dict, Tuple + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class SupabaseVectorTranslator(Visitor): + """Translate Langchain filters to Supabase PostgREST filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + Comparator.LIKE, + ] + """Subset of allowed logical comparators.""" + + metadata_column = "metadata" + + def _map_comparator(self, comparator: Comparator) -> str: + """ + Maps Langchain comparator to PostgREST comparator: + + https://postgrest.org/en/stable/references/api/tables_views.html#operators + """ + postgrest_comparator = { + Comparator.EQ: "eq", + Comparator.NE: "neq", + Comparator.GT: "gt", + Comparator.GTE: "gte", + Comparator.LT: "lt", + Comparator.LTE: "lte", + Comparator.LIKE: "like", + }.get(comparator) + + if postgrest_comparator is None: + raise Exception( + f"Comparator '{comparator}' is not currently " + "supported in Supabase Vector" + ) + + return postgrest_comparator + + def _get_json_operator(self, value: Any) -> str: + if isinstance(value, str): + return "->>" + else: + return "->" + + def visit_operation(self, operation: Operation) -> str: + args = [arg.accept(self) for arg in operation.arguments] + return f"{operation.operator.value}({','.join(args)})" + + def visit_comparison(self, comparison: Comparison) -> str: + if isinstance(comparison.value, list): + return self.visit_operation( + Operation( + operator=Operator.AND, + arguments=( + Comparison( + comparator=comparison.comparator, + attribute=comparison.attribute, + value=value, + ) + for value in comparison.value + ), + ) + ) + + return ".".join( + [ + f"{self.metadata_column}{self._get_json_operator(comparison.value)}{comparison.attribute}", + f"{self._map_comparator(comparison.comparator)}", + f"{comparison.value}", + ] + ) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, Dict[str, str]]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"postgrest_filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/timescalevector.py b/libs/community/langchain_community/retrievers/self_query/timescalevector.py new file mode 100644 index 0000000000000..daf954d45ec7c --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/timescalevector.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +if TYPE_CHECKING: + from timescale_vector import client + + +class TimescaleVectorTranslator(Visitor): + """Translate the internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] + """Subset of allowed logical operators.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + ] + + COMPARATOR_MAP = { + Comparator.EQ: "==", + Comparator.GT: ">", + Comparator.GTE: ">=", + Comparator.LT: "<", + Comparator.LTE: "<=", + } + + OPERATOR_MAP = {Operator.AND: "AND", Operator.OR: "OR", Operator.NOT: "NOT"} + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + if isinstance(func, Operator): + value = self.OPERATOR_MAP[func.value] # type: ignore + elif isinstance(func, Comparator): + value = self.COMPARATOR_MAP[func.value] # type: ignore + return f"{value}" + + def visit_operation(self, operation: Operation) -> client.Predicates: + try: + from timescale_vector import client + except ImportError as e: + raise ImportError( + "Cannot import timescale-vector. Please install with `pip install " + "timescale-vector`." + ) from e + args = [arg.accept(self) for arg in operation.arguments] + return client.Predicates(*args, operator=self._format_func(operation.operator)) + + def visit_comparison(self, comparison: Comparison) -> client.Predicates: + try: + from timescale_vector import client + except ImportError as e: + raise ImportError( + "Cannot import timescale-vector. Please install with `pip install " + "timescale-vector`." + ) from e + return client.Predicates( + ( + comparison.attribute, + self._format_func(comparison.comparator), + comparison.value, + ) + ) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"predicates": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/vectara.py b/libs/community/langchain_community/retrievers/self_query/vectara.py new file mode 100644 index 0000000000000..4854fb7a54760 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/vectara.py @@ -0,0 +1,70 @@ +from typing import Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +def process_value(value: Union[int, float, str]) -> str: + """Convert a value to a string and add single quotes if it is a string.""" + if isinstance(value, str): + return f"'{value}'" + else: + return str(value) + + +class VectaraTranslator(Visitor): + """Translate `Vectara` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GT, + Comparator.GTE, + Comparator.LT, + Comparator.LTE, + ] + """Subset of allowed logical comparators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + map_dict = { + Operator.AND: " and ", + Operator.OR: " or ", + Comparator.EQ: "=", + Comparator.NE: "!=", + Comparator.GT: ">", + Comparator.GTE: ">=", + Comparator.LT: "<", + Comparator.LTE: "<=", + } + self._validate_func(func) + return map_dict[func] + + def visit_operation(self, operation: Operation) -> str: + args = [arg.accept(self) for arg in operation.arguments] + operator = self._format_func(operation.operator) + return "( " + operator.join(args) + " )" + + def visit_comparison(self, comparison: Comparison) -> str: + comparator = self._format_func(comparison.comparator) + processed_value = process_value(comparison.value) + attribute = comparison.attribute + return ( + "( " + "doc." + attribute + " " + comparator + " " + processed_value + " )" + ) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/community/langchain_community/retrievers/self_query/weaviate.py b/libs/community/langchain_community/retrievers/self_query/weaviate.py new file mode 100644 index 0000000000000..dd1f8e2475c32 --- /dev/null +++ b/libs/community/langchain_community/retrievers/self_query/weaviate.py @@ -0,0 +1,79 @@ +from datetime import datetime +from typing import Dict, Tuple, Union + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class WeaviateTranslator(Visitor): + """Translate `Weaviate` internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + + allowed_comparators = [ + Comparator.EQ, + Comparator.NE, + Comparator.GTE, + Comparator.LTE, + Comparator.LT, + Comparator.GT, + ] + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + self._validate_func(func) + # https://weaviate.io/developers/weaviate/api/graphql/filters + map_dict = { + Operator.AND: "And", + Operator.OR: "Or", + Comparator.EQ: "Equal", + Comparator.NE: "NotEqual", + Comparator.GTE: "GreaterThanEqual", + Comparator.LTE: "LessThanEqual", + Comparator.LT: "LessThan", + Comparator.GT: "GreaterThan", + } + return map_dict[func] + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {"operator": self._format_func(operation.operator), "operands": args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + value_type = "valueText" + value = comparison.value + if isinstance(comparison.value, bool): + value_type = "valueBoolean" + elif isinstance(comparison.value, float): + value_type = "valueNumber" + elif isinstance(comparison.value, int): + value_type = "valueInt" + elif ( + isinstance(comparison.value, dict) + and comparison.value.get("type") == "date" + ): + value_type = "valueDate" + # ISO 8601 timestamp, formatted as RFC3339 + date = datetime.strptime(comparison.value["date"], "%Y-%m-%d") + value = date.strftime("%Y-%m-%dT%H:%M:%SZ") + filter = { + "path": [comparison.attribute], + "operator": self._format_func(comparison.comparator), + value_type: value, + } + return filter + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"where_filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/libs/core/langchain_core/sql_constructor/__init__.py b/libs/core/langchain_core/sql_constructor/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/core/langchain_core/sql_constructor/base.py b/libs/core/langchain_core/sql_constructor/base.py new file mode 100644 index 0000000000000..e4061ad8c38db --- /dev/null +++ b/libs/core/langchain_core/sql_constructor/base.py @@ -0,0 +1,361 @@ +"""LLM Chain for turning a user text query into a structured query.""" +from __future__ import annotations + +import json +from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, cast + +from langchain_core.exceptions import OutputParserException +from langchain_core.language_models import BaseLanguageModel +from langchain_core.output_parsers import BaseOutputParser +from langchain_core.output_parsers.json import parse_and_check_json_markdown +from langchain_core.prompts import BasePromptTemplate +from langchain_core.prompts.few_shot import FewShotPromptTemplate +from langchain_core.runnables import Runnable +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + FilterDirective, + Operation, + Operator, + StructuredQuery, +) +from langchain_core.sql_constructor.parser import get_parser +from langchain_core.sql_constructor.prompt import ( + DEFAULT_EXAMPLES, + DEFAULT_PREFIX, + DEFAULT_SCHEMA_PROMPT, + DEFAULT_SUFFIX, + EXAMPLE_PROMPT, + EXAMPLES_WITH_LIMIT, + PREFIX_WITH_DATA_SOURCE, + SCHEMA_WITH_LIMIT_PROMPT, + SUFFIX_WITHOUT_DATA_SOURCE, + USER_SPECIFIED_EXAMPLE_PROMPT, +) +from langchain_core.sql_constructor.schema import AttributeInfo + + +class StructuredQueryOutputParser(BaseOutputParser[StructuredQuery]): + """Output parser that parses a structured query.""" + + ast_parse: Callable + """Callable that parses dict into internal representation of query language.""" + + def parse(self, text: str) -> StructuredQuery: + try: + expected_keys = ["query", "filter"] + allowed_keys = ["query", "filter", "limit"] + parsed = parse_and_check_json_markdown(text, expected_keys) + if parsed["query"] is None or len(parsed["query"]) == 0: + parsed["query"] = " " + if parsed["filter"] == "NO_FILTER" or not parsed["filter"]: + parsed["filter"] = None + else: + parsed["filter"] = self.ast_parse(parsed["filter"]) + if not parsed.get("limit"): + parsed.pop("limit", None) + return StructuredQuery( + **{k: v for k, v in parsed.items() if k in allowed_keys} + ) + except Exception as e: + raise OutputParserException( + f"Parsing text\n{text}\n raised following error:\n{e}" + ) + + @classmethod + def from_components( + cls, + allowed_comparators: Optional[Sequence[Comparator]] = None, + allowed_operators: Optional[Sequence[Operator]] = None, + allowed_attributes: Optional[Sequence[str]] = None, + fix_invalid: bool = False, + ) -> StructuredQueryOutputParser: + """ + Create a structured query output parser from components. + + Args: + allowed_comparators: allowed comparators + allowed_operators: allowed operators + + Returns: + a structured query output parser + """ + ast_parse: Callable + if fix_invalid: + + def ast_parse(raw_filter: str) -> Optional[FilterDirective]: + filter = cast(Optional[FilterDirective], get_parser().parse(raw_filter)) + fixed = fix_filter_directive( + filter, + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + allowed_attributes=allowed_attributes, + ) + return fixed + + else: + ast_parse = get_parser( + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + allowed_attributes=allowed_attributes, + ).parse + return cls(ast_parse=ast_parse) + + +def fix_filter_directive( + filter: Optional[FilterDirective], + *, + allowed_comparators: Optional[Sequence[Comparator]] = None, + allowed_operators: Optional[Sequence[Operator]] = None, + allowed_attributes: Optional[Sequence[str]] = None, +) -> Optional[FilterDirective]: + """Fix invalid filter directive. + + Args: + filter: Filter directive to fix. + allowed_comparators: allowed comparators. Defaults to all comparators. + allowed_operators: allowed operators. Defaults to all operators. + allowed_attributes: allowed attributes. Defaults to all attributes. + + Returns: + Fixed filter directive. + """ + if ( + not (allowed_comparators or allowed_operators or allowed_attributes) + ) or not filter: + return filter + + elif isinstance(filter, Comparison): + if allowed_comparators and filter.comparator not in allowed_comparators: + return None + if allowed_attributes and filter.attribute not in allowed_attributes: + return None + return filter + elif isinstance(filter, Operation): + if allowed_operators and filter.operator not in allowed_operators: + return None + args = [ + fix_filter_directive( + arg, + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + allowed_attributes=allowed_attributes, + ) + for arg in filter.arguments + ] + args = [arg for arg in args if arg is not None] + if not args: + return None + elif len(args) == 1 and filter.operator in (Operator.AND, Operator.OR): + return args[0] + else: + return Operation( + operator=filter.operator, + arguments=args, + ) + else: + return filter + + +def _format_attribute_info(info: Sequence[Union[AttributeInfo, dict]]) -> str: + info_dicts = {} + for i in info: + i_dict = dict(i) + info_dicts[i_dict.pop("name")] = i_dict + return json.dumps(info_dicts, indent=4).replace("{", "{{").replace("}", "}}") + + +def construct_examples(input_output_pairs: Sequence[Tuple[str, dict]]) -> List[dict]: + """Construct examples from input-output pairs. + + Args: + input_output_pairs: Sequence of input-output pairs. + + Returns: + List of examples. + """ + examples = [] + for i, (_input, output) in enumerate(input_output_pairs): + structured_request = ( + json.dumps(output, indent=4).replace("{", "{{").replace("}", "}}") + ) + example = { + "i": i + 1, + "user_query": _input, + "structured_request": structured_request, + } + examples.append(example) + return examples + + +def get_query_constructor_prompt( + document_contents: str, + attribute_info: Sequence[Union[AttributeInfo, dict]], + *, + examples: Optional[Sequence] = None, + allowed_comparators: Sequence[Comparator] = tuple(Comparator), + allowed_operators: Sequence[Operator] = tuple(Operator), + enable_limit: bool = False, + schema_prompt: Optional[BasePromptTemplate] = None, + **kwargs: Any, +) -> BasePromptTemplate: + """Create query construction prompt. + + Args: + document_contents: The contents of the document to be queried. + attribute_info: A list of AttributeInfo objects describing + the attributes of the document. + examples: Optional list of examples to use for the chain. + allowed_comparators: Sequence of allowed comparators. + allowed_operators: Sequence of allowed operators. + enable_limit: Whether to enable the limit operator. Defaults to False. + schema_prompt: Prompt for describing query schema. Should have string input + variables allowed_comparators and allowed_operators. + **kwargs: Additional named params to pass to FewShotPromptTemplate init. + + Returns: + A prompt template that can be used to construct queries. + """ + default_schema_prompt = ( + SCHEMA_WITH_LIMIT_PROMPT if enable_limit else DEFAULT_SCHEMA_PROMPT + ) + schema_prompt = schema_prompt or default_schema_prompt + attribute_str = _format_attribute_info(attribute_info) + schema = schema_prompt.format( + allowed_comparators=" | ".join(allowed_comparators), + allowed_operators=" | ".join(allowed_operators), + ) + if examples and isinstance(examples[0], tuple): + examples = construct_examples(examples) + example_prompt = USER_SPECIFIED_EXAMPLE_PROMPT + prefix = PREFIX_WITH_DATA_SOURCE.format( + schema=schema, content=document_contents, attributes=attribute_str + ) + suffix = SUFFIX_WITHOUT_DATA_SOURCE.format(i=len(examples) + 1) + else: + examples = examples or ( + EXAMPLES_WITH_LIMIT if enable_limit else DEFAULT_EXAMPLES + ) + example_prompt = EXAMPLE_PROMPT + prefix = DEFAULT_PREFIX.format(schema=schema) + suffix = DEFAULT_SUFFIX.format( + i=len(examples) + 1, content=document_contents, attributes=attribute_str + ) + return FewShotPromptTemplate( + examples=list(examples), + example_prompt=example_prompt, + input_variables=["query"], + suffix=suffix, + prefix=prefix, + **kwargs, + ) + + +# def load_query_constructor_chain( +# llm: BaseLanguageModel, +# document_contents: str, +# attribute_info: Sequence[Union[AttributeInfo, dict]], +# examples: Optional[List] = None, +# allowed_comparators: Sequence[Comparator] = tuple(Comparator), +# allowed_operators: Sequence[Operator] = tuple(Operator), +# enable_limit: bool = False, +# schema_prompt: Optional[BasePromptTemplate] = None, +# **kwargs: Any, +# ) -> LLMChain: +# """Load a query constructor chain. +# +# Args: +# llm: BaseLanguageModel to use for the chain. +# document_contents: The contents of the document to be queried. +# attribute_info: Sequence of attributes in the document. +# examples: Optional list of examples to use for the chain. +# allowed_comparators: Sequence of allowed comparators. Defaults to all +# Comparators. +# allowed_operators: Sequence of allowed operators. Defaults to all Operators. +# enable_limit: Whether to enable the limit operator. Defaults to False. +# schema_prompt: Prompt for describing query schema. Should have string input +# variables allowed_comparators and allowed_operators. +# **kwargs: Arbitrary named params to pass to LLMChain. +# +# Returns: +# A LLMChain that can be used to construct queries. +# """ +# prompt = get_query_constructor_prompt( +# document_contents, +# attribute_info, +# examples=examples, +# allowed_comparators=allowed_comparators, +# allowed_operators=allowed_operators, +# enable_limit=enable_limit, +# schema_prompt=schema_prompt, +# ) +# allowed_attributes = [] +# for ainfo in attribute_info: +# allowed_attributes.append( +# ainfo.name if isinstance(ainfo, AttributeInfo) else ainfo["name"] +# ) +# output_parser = StructuredQueryOutputParser.from_components( +# allowed_comparators=allowed_comparators, +# allowed_operators=allowed_operators, +# allowed_attributes=allowed_attributes, +# ) +# # For backwards compatibility. +# prompt.output_parser = output_parser +# return LLMChain(llm=llm, prompt=prompt, output_parser=output_parser, **kwargs) + + +def load_query_constructor_runnable( + llm: BaseLanguageModel, + document_contents: str, + attribute_info: Sequence[Union[AttributeInfo, dict]], + *, + examples: Optional[Sequence] = None, + allowed_comparators: Sequence[Comparator] = tuple(Comparator), + allowed_operators: Sequence[Operator] = tuple(Operator), + enable_limit: bool = False, + schema_prompt: Optional[BasePromptTemplate] = None, + fix_invalid: bool = False, + **kwargs: Any, +) -> Runnable: + """Load a query constructor runnable chain. + + Args: + llm: BaseLanguageModel to use for the chain. + document_contents: The contents of the document to be queried. + attribute_info: Sequence of attributes in the document. + examples: Optional list of examples to use for the chain. + allowed_comparators: Sequence of allowed comparators. Defaults to all + Comparators. + allowed_operators: Sequence of allowed operators. Defaults to all Operators. + enable_limit: Whether to enable the limit operator. Defaults to False. + schema_prompt: Prompt for describing query schema. Should have string input + variables allowed_comparators and allowed_operators. + fix_invalid: Whether to fix invalid filter directives by ignoring invalid + operators, comparators and attributes. + **kwargs: Additional named params to pass to FewShotPromptTemplate init. + + Returns: + A Runnable that can be used to construct queries. + """ + prompt = get_query_constructor_prompt( + document_contents, + attribute_info, + examples=examples, + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + enable_limit=enable_limit, + schema_prompt=schema_prompt, + **kwargs, + ) + allowed_attributes = [] + for ainfo in attribute_info: + allowed_attributes.append( + ainfo.name if isinstance(ainfo, AttributeInfo) else ainfo["name"] + ) + output_parser = StructuredQueryOutputParser.from_components( + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + allowed_attributes=allowed_attributes, + fix_invalid=fix_invalid, + ) + return prompt | llm | output_parser diff --git a/libs/core/langchain_core/sql_constructor/ir.py b/libs/core/langchain_core/sql_constructor/ir.py new file mode 100644 index 0000000000000..8c8cfaa4563a7 --- /dev/null +++ b/libs/core/langchain_core/sql_constructor/ir.py @@ -0,0 +1,122 @@ +"""Internal representation of a structured query language.""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from enum import Enum +from typing import Any, List, Optional, Sequence, Union + +from langchain_core.pydantic_v1 import BaseModel + + +class Visitor(ABC): + """Defines interface for IR translation using visitor pattern.""" + + allowed_comparators: Optional[Sequence[Comparator]] = None + allowed_operators: Optional[Sequence[Operator]] = None + + def _validate_func(self, func: Union[Operator, Comparator]) -> None: + if isinstance(func, Operator) and self.allowed_operators is not None: + if func not in self.allowed_operators: + raise ValueError( + f"Received disallowed operator {func}. Allowed " + f"comparators are {self.allowed_operators}" + ) + if isinstance(func, Comparator) and self.allowed_comparators is not None: + if func not in self.allowed_comparators: + raise ValueError( + f"Received disallowed comparator {func}. Allowed " + f"comparators are {self.allowed_comparators}" + ) + + @abstractmethod + def visit_operation(self, operation: Operation) -> Any: + """Translate an Operation.""" + + @abstractmethod + def visit_comparison(self, comparison: Comparison) -> Any: + """Translate a Comparison.""" + + @abstractmethod + def visit_structured_query(self, structured_query: StructuredQuery) -> Any: + """Translate a StructuredQuery.""" + + +def _to_snake_case(name: str) -> str: + """Convert a name into snake_case.""" + snake_case = "" + for i, char in enumerate(name): + if char.isupper() and i != 0: + snake_case += "_" + char.lower() + else: + snake_case += char.lower() + return snake_case + + +class Expr(BaseModel): + """Base class for all expressions.""" + + def accept(self, visitor: Visitor) -> Any: + """Accept a visitor. + + Args: + visitor: visitor to accept + + Returns: + result of visiting + """ + return getattr(visitor, f"visit_{_to_snake_case(self.__class__.__name__)}")( + self + ) + + +class Operator(str, Enum): + """Enumerator of the operations.""" + + AND = "and" + OR = "or" + NOT = "not" + + +class Comparator(str, Enum): + """Enumerator of the comparison operators.""" + + EQ = "eq" + NE = "ne" + GT = "gt" + GTE = "gte" + LT = "lt" + LTE = "lte" + CONTAIN = "contain" + LIKE = "like" + IN = "in" + NIN = "nin" + + +class FilterDirective(Expr, ABC): + """A filtering expression.""" + + +class Comparison(FilterDirective): + """A comparison to a value.""" + + comparator: Comparator + attribute: str + value: Any + + +class Operation(FilterDirective): + """A logical operation over other directives.""" + + operator: Operator + arguments: List[FilterDirective] + + +class StructuredQuery(Expr): + """A structured query.""" + + query: str + """Query string.""" + filter: Optional[FilterDirective] + """Filtering expression.""" + limit: Optional[int] + """Limit on the number of results.""" diff --git a/libs/core/langchain_core/sql_constructor/parser.py b/libs/core/langchain_core/sql_constructor/parser.py new file mode 100644 index 0000000000000..7733f7cce82c9 --- /dev/null +++ b/libs/core/langchain_core/sql_constructor/parser.py @@ -0,0 +1,183 @@ +import datetime +import warnings +from typing import Any, Literal, Optional, Sequence, Union + +from typing_extensions import TypedDict + +from langchain_core.utils import check_package_version + +try: + check_package_version("lark", gte_version="1.1.5") + from lark import Lark, Transformer, v_args # type: ignore +except ImportError: + + def v_args(*args: Any, **kwargs: Any) -> Any: # type: ignore + """Dummy decorator for when lark is not installed.""" + return lambda _: None + + Transformer = object # type: ignore + Lark = object # type: ignore + +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + FilterDirective, + Operation, + Operator, +) + +GRAMMAR = r""" + ?program: func_call + ?expr: func_call + | value + + func_call: CNAME "(" [args] ")" + + ?value: SIGNED_INT -> int + | SIGNED_FLOAT -> float + | DATE -> date + | list + | string + | ("false" | "False" | "FALSE") -> false + | ("true" | "True" | "TRUE") -> true + + args: expr ("," expr)* + DATE.2: /["']?(\d{4}-[01]\d-[0-3]\d)["']?/ + string: /'[^']*'/ | ESCAPED_STRING + list: "[" [args] "]" + + %import common.CNAME + %import common.ESCAPED_STRING + %import common.SIGNED_FLOAT + %import common.SIGNED_INT + %import common.WS + %ignore WS +""" + + +class ISO8601Date(TypedDict): + """A date in ISO 8601 format (YYYY-MM-DD).""" + + date: str + type: Literal["date"] + + +@v_args(inline=True) +class QueryTransformer(Transformer): + """Transforms a query string into an intermediate representation.""" + + def __init__( + self, + *args: Any, + allowed_comparators: Optional[Sequence[Comparator]] = None, + allowed_operators: Optional[Sequence[Operator]] = None, + allowed_attributes: Optional[Sequence[str]] = None, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self.allowed_comparators = allowed_comparators + self.allowed_operators = allowed_operators + self.allowed_attributes = allowed_attributes + + def program(self, *items: Any) -> tuple: + return items + + def func_call(self, func_name: Any, args: list) -> FilterDirective: + func = self._match_func_name(str(func_name)) + if isinstance(func, Comparator): + if self.allowed_attributes and args[0] not in self.allowed_attributes: + raise ValueError( + f"Received invalid attributes {args[0]}. Allowed attributes are " + f"{self.allowed_attributes}" + ) + return Comparison(comparator=func, attribute=args[0], value=args[1]) + elif len(args) == 1 and func in (Operator.AND, Operator.OR): + return args[0] + else: + return Operation(operator=func, arguments=args) + + def _match_func_name(self, func_name: str) -> Union[Operator, Comparator]: + if func_name in set(Comparator): + if self.allowed_comparators is not None: + if func_name not in self.allowed_comparators: + raise ValueError( + f"Received disallowed comparator {func_name}. Allowed " + f"comparators are {self.allowed_comparators}" + ) + return Comparator(func_name) + elif func_name in set(Operator): + if self.allowed_operators is not None: + if func_name not in self.allowed_operators: + raise ValueError( + f"Received disallowed operator {func_name}. Allowed operators" + f" are {self.allowed_operators}" + ) + return Operator(func_name) + else: + raise ValueError( + f"Received unrecognized function {func_name}. Valid functions are " + f"{list(Operator) + list(Comparator)}" + ) + + def args(self, *items: Any) -> tuple: + return items + + def false(self) -> bool: + return False + + def true(self) -> bool: + return True + + def list(self, item: Any) -> list: + if item is None: + return [] + return list(item) + + def int(self, item: Any) -> int: + return int(item) + + def float(self, item: Any) -> float: + return float(item) + + def date(self, item: Any) -> ISO8601Date: + item = str(item).strip("\"'") + try: + datetime.datetime.strptime(item, "%Y-%m-%d") + except ValueError: + warnings.warn( + "Dates are expected to be provided in ISO 8601 date format " + "(YYYY-MM-DD)." + ) + return {"date": item, "type": "date"} + + def string(self, item: Any) -> str: + # Remove escaped quotes + return str(item).strip("\"'") + + +def get_parser( + allowed_comparators: Optional[Sequence[Comparator]] = None, + allowed_operators: Optional[Sequence[Operator]] = None, + allowed_attributes: Optional[Sequence[str]] = None, +) -> Lark: + """ + Returns a parser for the query language. + + Args: + allowed_comparators: Optional[Sequence[Comparator]] + allowed_operators: Optional[Sequence[Operator]] + + Returns: + Lark parser for the query language. + """ + # QueryTransformer is None when Lark cannot be imported. + if QueryTransformer is None: + raise ImportError( + "Cannot import lark, please install it with 'pip install lark'." + ) + transformer = QueryTransformer( + allowed_comparators=allowed_comparators, + allowed_operators=allowed_operators, + allowed_attributes=allowed_attributes, + ) + return Lark(GRAMMAR, parser="lalr", transformer=transformer, start="program") diff --git a/libs/core/langchain_core/sql_constructor/prompt.py b/libs/core/langchain_core/sql_constructor/prompt.py new file mode 100644 index 0000000000000..d1355b32663d5 --- /dev/null +++ b/libs/core/langchain_core/sql_constructor/prompt.py @@ -0,0 +1,227 @@ +# flake8: noqa +from langchain_core.prompts import PromptTemplate + +SONG_DATA_SOURCE = """\ +```json +{{ + "content": "Lyrics of a song", + "attributes": {{ + "artist": {{ + "type": "string", + "description": "Name of the song artist" + }}, + "length": {{ + "type": "integer", + "description": "Length of the song in seconds" + }}, + "genre": {{ + "type": "string", + "description": "The song genre, one of \"pop\", \"rock\" or \"rap\"" + }} + }} +}} +```\ +""" + +FULL_ANSWER = """\ +```json +{{ + "query": "teenager love", + "filter": "and(or(eq(\\"artist\\", \\"Taylor Swift\\"), eq(\\"artist\\", \\"Katy Perry\\")), lt(\\"length\\", 180), eq(\\"genre\\", \\"pop\\"))" +}} +```\ +""" + +NO_FILTER_ANSWER = """\ +```json +{{ + "query": "", + "filter": "NO_FILTER" +}} +```\ +""" + +WITH_LIMIT_ANSWER = """\ +```json +{{ + "query": "love", + "filter": "NO_FILTER", + "limit": 2 +}} +```\ +""" + +DEFAULT_EXAMPLES = [ + { + "i": 1, + "data_source": SONG_DATA_SOURCE, + "user_query": "What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre", + "structured_request": FULL_ANSWER, + }, + { + "i": 2, + "data_source": SONG_DATA_SOURCE, + "user_query": "What are songs that were not published on Spotify", + "structured_request": NO_FILTER_ANSWER, + }, +] + +EXAMPLES_WITH_LIMIT = [ + { + "i": 1, + "data_source": SONG_DATA_SOURCE, + "user_query": "What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre", + "structured_request": FULL_ANSWER, + }, + { + "i": 2, + "data_source": SONG_DATA_SOURCE, + "user_query": "What are songs that were not published on Spotify", + "structured_request": NO_FILTER_ANSWER, + }, + { + "i": 3, + "data_source": SONG_DATA_SOURCE, + "user_query": "What are three songs about love", + "structured_request": WITH_LIMIT_ANSWER, + }, +] + +EXAMPLE_PROMPT_TEMPLATE = """\ +<< Example {i}. >> +Data Source: +{data_source} + +User Query: +{user_query} + +Structured Request: +{structured_request} +""" + +EXAMPLE_PROMPT = PromptTemplate.from_template(EXAMPLE_PROMPT_TEMPLATE) + +USER_SPECIFIED_EXAMPLE_PROMPT = PromptTemplate.from_template( + """\ +<< Example {i}. >> +User Query: +{user_query} + +Structured Request: +```json +{structured_request} +``` +""" +) + +DEFAULT_SCHEMA = """\ +<< Structured Request Schema >> +When responding use a markdown code snippet with a JSON object formatted in the following schema: + +```json +{{{{ + "query": string \\ text string to compare to document contents + "filter": string \\ logical condition statement for filtering documents +}}}} +``` + +The query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well. + +A logical condition statement is composed of one or more comparison and logical operation statements. + +A comparison statement takes the form: `comp(attr, val)`: +- `comp` ({allowed_comparators}): comparator +- `attr` (string): name of attribute to apply the comparison to +- `val` (string): is the comparison value + +A logical operation statement takes the form `op(statement1, statement2, ...)`: +- `op` ({allowed_operators}): logical operator +- `statement1`, `statement2`, ... (comparison statements or logical operation statements): one or more statements to apply the operation to + +Make sure that you only use the comparators and logical operators listed above and no others. +Make sure that filters only refer to attributes that exist in the data source. +Make sure that filters only use the attributed names with its function names if there are functions applied on them. +Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values. +Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored. +Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value.\ +""" +DEFAULT_SCHEMA_PROMPT = PromptTemplate.from_template(DEFAULT_SCHEMA) + +SCHEMA_WITH_LIMIT = """\ +<< Structured Request Schema >> +When responding use a markdown code snippet with a JSON object formatted in the following schema: + +```json +{{{{ + "query": string \\ text string to compare to document contents + "filter": string \\ logical condition statement for filtering documents + "limit": int \\ the number of documents to retrieve +}}}} +``` + +The query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well. + +A logical condition statement is composed of one or more comparison and logical operation statements. + +A comparison statement takes the form: `comp(attr, val)`: +- `comp` ({allowed_comparators}): comparator +- `attr` (string): name of attribute to apply the comparison to +- `val` (string): is the comparison value + +A logical operation statement takes the form `op(statement1, statement2, ...)`: +- `op` ({allowed_operators}): logical operator +- `statement1`, `statement2`, ... (comparison statements or logical operation statements): one or more statements to apply the operation to + +Make sure that you only use the comparators and logical operators listed above and no others. +Make sure that filters only refer to attributes that exist in the data source. +Make sure that filters only use the attributed names with its function names if there are functions applied on them. +Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values. +Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored. +Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value. +Make sure the `limit` is always an int value. It is an optional parameter so leave it blank if it does not make sense. +""" +SCHEMA_WITH_LIMIT_PROMPT = PromptTemplate.from_template(SCHEMA_WITH_LIMIT) + +DEFAULT_PREFIX = """\ +Your goal is to structure the user's query to match the request schema provided below. + +{schema}\ +""" + +PREFIX_WITH_DATA_SOURCE = ( + DEFAULT_PREFIX + + """ + +<< Data Source >> +```json +{{{{ + "content": "{content}", + "attributes": {attributes} +}}}} +``` +""" +) + +DEFAULT_SUFFIX = """\ +<< Example {i}. >> +Data Source: +```json +{{{{ + "content": "{content}", + "attributes": {attributes} +}}}} +``` + +User Query: +{{query}} + +Structured Request: +""" + +SUFFIX_WITHOUT_DATA_SOURCE = """\ +<< Example {i}. >> +User Query: +{{query}} + +Structured Request: +""" diff --git a/libs/core/langchain_core/sql_constructor/schema.py b/libs/core/langchain_core/sql_constructor/schema.py new file mode 100644 index 0000000000000..6171b3742f2ac --- /dev/null +++ b/libs/core/langchain_core/sql_constructor/schema.py @@ -0,0 +1,15 @@ +from langchain_core.pydantic_v1 import BaseModel + + +class AttributeInfo(BaseModel): + """Information about a data source attribute.""" + + name: str + description: str + type: str + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + frozen = True diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index b2e8a2f2673f9..a7872df72ebe7 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -57,6 +57,7 @@ syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" grandalf = "^0.8" +lark = "^1.1.5" pytest-profiling = "^1.7.0" diff --git a/libs/langchain/langchain/chains/query_constructor/base.py b/libs/langchain/langchain/chains/query_constructor/base.py index c08e74f20da5f..06502cf5ffb1a 100644 --- a/libs/langchain/langchain/chains/query_constructor/base.py +++ b/libs/langchain/langchain/chains/query_constructor/base.py @@ -1,256 +1,29 @@ -"""LLM Chain for turning a user text query into a structured query.""" -from __future__ import annotations +from typing import Any, List, Optional, Sequence, Union -import json -from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, cast - -from langchain_core.exceptions import OutputParserException from langchain_core.language_models import BaseLanguageModel -from langchain_core.output_parsers import BaseOutputParser -from langchain_core.output_parsers.json import parse_and_check_json_markdown from langchain_core.prompts import BasePromptTemplate -from langchain_core.prompts.few_shot import FewShotPromptTemplate -from langchain_core.runnables import Runnable - -from langchain.chains.llm import LLMChain -from langchain.chains.query_constructor.ir import ( +from langchain_core.sql_constructor.base import ( + StructuredQueryOutputParser, + construct_examples, + fix_filter_directive, + get_query_constructor_prompt, + load_query_constructor_runnable, +) +from langchain_core.sql_constructor.ir import ( Comparator, - Comparison, - FilterDirective, - Operation, Operator, - StructuredQuery, -) -from langchain.chains.query_constructor.parser import get_parser -from langchain.chains.query_constructor.prompt import ( - DEFAULT_EXAMPLES, - DEFAULT_PREFIX, - DEFAULT_SCHEMA_PROMPT, - DEFAULT_SUFFIX, - EXAMPLE_PROMPT, - EXAMPLES_WITH_LIMIT, - PREFIX_WITH_DATA_SOURCE, - SCHEMA_WITH_LIMIT_PROMPT, - SUFFIX_WITHOUT_DATA_SOURCE, - USER_SPECIFIED_EXAMPLE_PROMPT, ) -from langchain.chains.query_constructor.schema import AttributeInfo - - -class StructuredQueryOutputParser(BaseOutputParser[StructuredQuery]): - """Output parser that parses a structured query.""" - - ast_parse: Callable - """Callable that parses dict into internal representation of query language.""" +from langchain_core.sql_constructor.schema import AttributeInfo - def parse(self, text: str) -> StructuredQuery: - try: - expected_keys = ["query", "filter"] - allowed_keys = ["query", "filter", "limit"] - parsed = parse_and_check_json_markdown(text, expected_keys) - if parsed["query"] is None or len(parsed["query"]) == 0: - parsed["query"] = " " - if parsed["filter"] == "NO_FILTER" or not parsed["filter"]: - parsed["filter"] = None - else: - parsed["filter"] = self.ast_parse(parsed["filter"]) - if not parsed.get("limit"): - parsed.pop("limit", None) - return StructuredQuery( - **{k: v for k, v in parsed.items() if k in allowed_keys} - ) - except Exception as e: - raise OutputParserException( - f"Parsing text\n{text}\n raised following error:\n{e}" - ) +from langchain.chains import LLMChain - @classmethod - def from_components( - cls, - allowed_comparators: Optional[Sequence[Comparator]] = None, - allowed_operators: Optional[Sequence[Operator]] = None, - allowed_attributes: Optional[Sequence[str]] = None, - fix_invalid: bool = False, - ) -> StructuredQueryOutputParser: - """ - Create a structured query output parser from components. - - Args: - allowed_comparators: allowed comparators - allowed_operators: allowed operators - - Returns: - a structured query output parser - """ - ast_parse: Callable - if fix_invalid: - - def ast_parse(raw_filter: str) -> Optional[FilterDirective]: - filter = cast(Optional[FilterDirective], get_parser().parse(raw_filter)) - fixed = fix_filter_directive( - filter, - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - allowed_attributes=allowed_attributes, - ) - return fixed - - else: - ast_parse = get_parser( - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - allowed_attributes=allowed_attributes, - ).parse - return cls(ast_parse=ast_parse) - - -def fix_filter_directive( - filter: Optional[FilterDirective], - *, - allowed_comparators: Optional[Sequence[Comparator]] = None, - allowed_operators: Optional[Sequence[Operator]] = None, - allowed_attributes: Optional[Sequence[str]] = None, -) -> Optional[FilterDirective]: - """Fix invalid filter directive. - - Args: - filter: Filter directive to fix. - allowed_comparators: allowed comparators. Defaults to all comparators. - allowed_operators: allowed operators. Defaults to all operators. - allowed_attributes: allowed attributes. Defaults to all attributes. - - Returns: - Fixed filter directive. - """ - if ( - not (allowed_comparators or allowed_operators or allowed_attributes) - ) or not filter: - return filter - - elif isinstance(filter, Comparison): - if allowed_comparators and filter.comparator not in allowed_comparators: - return None - if allowed_attributes and filter.attribute not in allowed_attributes: - return None - return filter - elif isinstance(filter, Operation): - if allowed_operators and filter.operator not in allowed_operators: - return None - args = [ - fix_filter_directive( - arg, - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - allowed_attributes=allowed_attributes, - ) - for arg in filter.arguments - ] - args = [arg for arg in args if arg is not None] - if not args: - return None - elif len(args) == 1 and filter.operator in (Operator.AND, Operator.OR): - return args[0] - else: - return Operation( - operator=filter.operator, - arguments=args, - ) - else: - return filter - - -def _format_attribute_info(info: Sequence[Union[AttributeInfo, dict]]) -> str: - info_dicts = {} - for i in info: - i_dict = dict(i) - info_dicts[i_dict.pop("name")] = i_dict - return json.dumps(info_dicts, indent=4).replace("{", "{{").replace("}", "}}") - - -def construct_examples(input_output_pairs: Sequence[Tuple[str, dict]]) -> List[dict]: - """Construct examples from input-output pairs. - - Args: - input_output_pairs: Sequence of input-output pairs. - - Returns: - List of examples. - """ - examples = [] - for i, (_input, output) in enumerate(input_output_pairs): - structured_request = ( - json.dumps(output, indent=4).replace("{", "{{").replace("}", "}}") - ) - example = { - "i": i + 1, - "user_query": _input, - "structured_request": structured_request, - } - examples.append(example) - return examples - - -def get_query_constructor_prompt( - document_contents: str, - attribute_info: Sequence[Union[AttributeInfo, dict]], - *, - examples: Optional[Sequence] = None, - allowed_comparators: Sequence[Comparator] = tuple(Comparator), - allowed_operators: Sequence[Operator] = tuple(Operator), - enable_limit: bool = False, - schema_prompt: Optional[BasePromptTemplate] = None, - **kwargs: Any, -) -> BasePromptTemplate: - """Create query construction prompt. - - Args: - document_contents: The contents of the document to be queried. - attribute_info: A list of AttributeInfo objects describing - the attributes of the document. - examples: Optional list of examples to use for the chain. - allowed_comparators: Sequence of allowed comparators. - allowed_operators: Sequence of allowed operators. - enable_limit: Whether to enable the limit operator. Defaults to False. - schema_prompt: Prompt for describing query schema. Should have string input - variables allowed_comparators and allowed_operators. - **kwargs: Additional named params to pass to FewShotPromptTemplate init. - - Returns: - A prompt template that can be used to construct queries. - """ - default_schema_prompt = ( - SCHEMA_WITH_LIMIT_PROMPT if enable_limit else DEFAULT_SCHEMA_PROMPT - ) - schema_prompt = schema_prompt or default_schema_prompt - attribute_str = _format_attribute_info(attribute_info) - schema = schema_prompt.format( - allowed_comparators=" | ".join(allowed_comparators), - allowed_operators=" | ".join(allowed_operators), - ) - if examples and isinstance(examples[0], tuple): - examples = construct_examples(examples) - example_prompt = USER_SPECIFIED_EXAMPLE_PROMPT - prefix = PREFIX_WITH_DATA_SOURCE.format( - schema=schema, content=document_contents, attributes=attribute_str - ) - suffix = SUFFIX_WITHOUT_DATA_SOURCE.format(i=len(examples) + 1) - else: - examples = examples or ( - EXAMPLES_WITH_LIMIT if enable_limit else DEFAULT_EXAMPLES - ) - example_prompt = EXAMPLE_PROMPT - prefix = DEFAULT_PREFIX.format(schema=schema) - suffix = DEFAULT_SUFFIX.format( - i=len(examples) + 1, content=document_contents, attributes=attribute_str - ) - return FewShotPromptTemplate( - examples=list(examples), - example_prompt=example_prompt, - input_variables=["query"], - suffix=suffix, - prefix=prefix, - **kwargs, - ) +__all__ = [ + "StructuredQueryOutputParser", + "construct_examples", + "fix_filter_directive", + "get_query_constructor_prompt", + "load_query_constructor_runnable", +] def load_query_constructor_chain( @@ -304,61 +77,3 @@ def load_query_constructor_chain( # For backwards compatibility. prompt.output_parser = output_parser return LLMChain(llm=llm, prompt=prompt, output_parser=output_parser, **kwargs) - - -def load_query_constructor_runnable( - llm: BaseLanguageModel, - document_contents: str, - attribute_info: Sequence[Union[AttributeInfo, dict]], - *, - examples: Optional[Sequence] = None, - allowed_comparators: Sequence[Comparator] = tuple(Comparator), - allowed_operators: Sequence[Operator] = tuple(Operator), - enable_limit: bool = False, - schema_prompt: Optional[BasePromptTemplate] = None, - fix_invalid: bool = False, - **kwargs: Any, -) -> Runnable: - """Load a query constructor runnable chain. - - Args: - llm: BaseLanguageModel to use for the chain. - document_contents: Description of the page contents of the document to be - queried. - attribute_info: Sequence of attributes in the document. - examples: Optional list of examples to use for the chain. - allowed_comparators: Sequence of allowed comparators. Defaults to all - Comparators. - allowed_operators: Sequence of allowed operators. Defaults to all Operators. - enable_limit: Whether to enable the limit operator. Defaults to False. - schema_prompt: Prompt for describing query schema. Should have string input - variables allowed_comparators and allowed_operators. - fix_invalid: Whether to fix invalid filter directives by ignoring invalid - operators, comparators and attributes. - **kwargs: Additional named params to pass to FewShotPromptTemplate init. - - Returns: - A Runnable that can be used to construct queries. - """ - prompt = get_query_constructor_prompt( - document_contents, - attribute_info, - examples=examples, - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - enable_limit=enable_limit, - schema_prompt=schema_prompt, - **kwargs, - ) - allowed_attributes = [] - for ainfo in attribute_info: - allowed_attributes.append( - ainfo.name if isinstance(ainfo, AttributeInfo) else ainfo["name"] - ) - output_parser = StructuredQueryOutputParser.from_components( - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - allowed_attributes=allowed_attributes, - fix_invalid=fix_invalid, - ) - return prompt | llm | output_parser diff --git a/libs/langchain/langchain/chains/query_constructor/ir.py b/libs/langchain/langchain/chains/query_constructor/ir.py index 8c8cfaa4563a7..99becdb548f4b 100644 --- a/libs/langchain/langchain/chains/query_constructor/ir.py +++ b/libs/langchain/langchain/chains/query_constructor/ir.py @@ -1,122 +1,21 @@ -"""Internal representation of a structured query language.""" -from __future__ import annotations - -from abc import ABC, abstractmethod -from enum import Enum -from typing import Any, List, Optional, Sequence, Union - -from langchain_core.pydantic_v1 import BaseModel - - -class Visitor(ABC): - """Defines interface for IR translation using visitor pattern.""" - - allowed_comparators: Optional[Sequence[Comparator]] = None - allowed_operators: Optional[Sequence[Operator]] = None - - def _validate_func(self, func: Union[Operator, Comparator]) -> None: - if isinstance(func, Operator) and self.allowed_operators is not None: - if func not in self.allowed_operators: - raise ValueError( - f"Received disallowed operator {func}. Allowed " - f"comparators are {self.allowed_operators}" - ) - if isinstance(func, Comparator) and self.allowed_comparators is not None: - if func not in self.allowed_comparators: - raise ValueError( - f"Received disallowed comparator {func}. Allowed " - f"comparators are {self.allowed_comparators}" - ) - - @abstractmethod - def visit_operation(self, operation: Operation) -> Any: - """Translate an Operation.""" - - @abstractmethod - def visit_comparison(self, comparison: Comparison) -> Any: - """Translate a Comparison.""" - - @abstractmethod - def visit_structured_query(self, structured_query: StructuredQuery) -> Any: - """Translate a StructuredQuery.""" - - -def _to_snake_case(name: str) -> str: - """Convert a name into snake_case.""" - snake_case = "" - for i, char in enumerate(name): - if char.isupper() and i != 0: - snake_case += "_" + char.lower() - else: - snake_case += char.lower() - return snake_case - - -class Expr(BaseModel): - """Base class for all expressions.""" - - def accept(self, visitor: Visitor) -> Any: - """Accept a visitor. - - Args: - visitor: visitor to accept - - Returns: - result of visiting - """ - return getattr(visitor, f"visit_{_to_snake_case(self.__class__.__name__)}")( - self - ) - - -class Operator(str, Enum): - """Enumerator of the operations.""" - - AND = "and" - OR = "or" - NOT = "not" - - -class Comparator(str, Enum): - """Enumerator of the comparison operators.""" - - EQ = "eq" - NE = "ne" - GT = "gt" - GTE = "gte" - LT = "lt" - LTE = "lte" - CONTAIN = "contain" - LIKE = "like" - IN = "in" - NIN = "nin" - - -class FilterDirective(Expr, ABC): - """A filtering expression.""" - - -class Comparison(FilterDirective): - """A comparison to a value.""" - - comparator: Comparator - attribute: str - value: Any - - -class Operation(FilterDirective): - """A logical operation over other directives.""" - - operator: Operator - arguments: List[FilterDirective] - - -class StructuredQuery(Expr): - """A structured query.""" - - query: str - """Query string.""" - filter: Optional[FilterDirective] - """Filtering expression.""" - limit: Optional[int] - """Limit on the number of results.""" +from langchain_core.sql_constructor.ir import ( + Comparator, + Comparison, + Expr, + FilterDirective, + Operation, + Operator, + StructuredQuery, + Visitor, +) + +__all__ = [ + "Comparator", + "Comparison", + "Expr", + "FilterDirective", + "Operation", + "Operator", + "StructuredQuery", + "Visitor", +] diff --git a/libs/langchain/langchain/chains/query_constructor/parser.py b/libs/langchain/langchain/chains/query_constructor/parser.py index 26c5360d59feb..a8424b10ce742 100644 --- a/libs/langchain/langchain/chains/query_constructor/parser.py +++ b/libs/langchain/langchain/chains/query_constructor/parser.py @@ -1,182 +1,15 @@ -import datetime -import warnings -from typing import Any, Literal, Optional, Sequence, Union - -from langchain_core.utils import check_package_version -from typing_extensions import TypedDict - -try: - check_package_version("lark", gte_version="1.1.5") - from lark import Lark, Transformer, v_args -except ImportError: - - def v_args(*args: Any, **kwargs: Any) -> Any: # type: ignore - """Dummy decorator for when lark is not installed.""" - return lambda _: None - - Transformer = object # type: ignore - Lark = object # type: ignore - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - FilterDirective, - Operation, - Operator, +from langchain_core.sql_constructor.parser import ( + GRAMMAR, + ISO8601Date, + QueryTransformer, + get_parser, + v_args, ) -GRAMMAR = r""" - ?program: func_call - ?expr: func_call - | value - - func_call: CNAME "(" [args] ")" - - ?value: SIGNED_INT -> int - | SIGNED_FLOAT -> float - | DATE -> date - | list - | string - | ("false" | "False" | "FALSE") -> false - | ("true" | "True" | "TRUE") -> true - - args: expr ("," expr)* - DATE.2: /["']?(\d{4}-[01]\d-[0-3]\d)["']?/ - string: /'[^']*'/ | ESCAPED_STRING - list: "[" [args] "]" - - %import common.CNAME - %import common.ESCAPED_STRING - %import common.SIGNED_FLOAT - %import common.SIGNED_INT - %import common.WS - %ignore WS -""" - - -class ISO8601Date(TypedDict): - """A date in ISO 8601 format (YYYY-MM-DD).""" - - date: str - type: Literal["date"] - - -@v_args(inline=True) -class QueryTransformer(Transformer): - """Transforms a query string into an intermediate representation.""" - - def __init__( - self, - *args: Any, - allowed_comparators: Optional[Sequence[Comparator]] = None, - allowed_operators: Optional[Sequence[Operator]] = None, - allowed_attributes: Optional[Sequence[str]] = None, - **kwargs: Any, - ): - super().__init__(*args, **kwargs) - self.allowed_comparators = allowed_comparators - self.allowed_operators = allowed_operators - self.allowed_attributes = allowed_attributes - - def program(self, *items: Any) -> tuple: - return items - - def func_call(self, func_name: Any, args: list) -> FilterDirective: - func = self._match_func_name(str(func_name)) - if isinstance(func, Comparator): - if self.allowed_attributes and args[0] not in self.allowed_attributes: - raise ValueError( - f"Received invalid attributes {args[0]}. Allowed attributes are " - f"{self.allowed_attributes}" - ) - return Comparison(comparator=func, attribute=args[0], value=args[1]) - elif len(args) == 1 and func in (Operator.AND, Operator.OR): - return args[0] - else: - return Operation(operator=func, arguments=args) - - def _match_func_name(self, func_name: str) -> Union[Operator, Comparator]: - if func_name in set(Comparator): - if self.allowed_comparators is not None: - if func_name not in self.allowed_comparators: - raise ValueError( - f"Received disallowed comparator {func_name}. Allowed " - f"comparators are {self.allowed_comparators}" - ) - return Comparator(func_name) - elif func_name in set(Operator): - if self.allowed_operators is not None: - if func_name not in self.allowed_operators: - raise ValueError( - f"Received disallowed operator {func_name}. Allowed operators" - f" are {self.allowed_operators}" - ) - return Operator(func_name) - else: - raise ValueError( - f"Received unrecognized function {func_name}. Valid functions are " - f"{list(Operator) + list(Comparator)}" - ) - - def args(self, *items: Any) -> tuple: - return items - - def false(self) -> bool: - return False - - def true(self) -> bool: - return True - - def list(self, item: Any) -> list: - if item is None: - return [] - return list(item) - - def int(self, item: Any) -> int: - return int(item) - - def float(self, item: Any) -> float: - return float(item) - - def date(self, item: Any) -> ISO8601Date: - item = str(item).strip("\"'") - try: - datetime.datetime.strptime(item, "%Y-%m-%d") - except ValueError: - warnings.warn( - "Dates are expected to be provided in ISO 8601 date format " - "(YYYY-MM-DD)." - ) - return {"date": item, "type": "date"} - - def string(self, item: Any) -> str: - # Remove escaped quotes - return str(item).strip("\"'") - - -def get_parser( - allowed_comparators: Optional[Sequence[Comparator]] = None, - allowed_operators: Optional[Sequence[Operator]] = None, - allowed_attributes: Optional[Sequence[str]] = None, -) -> Lark: - """ - Returns a parser for the query language. - - Args: - allowed_comparators: Optional[Sequence[Comparator]] - allowed_operators: Optional[Sequence[Operator]] - - Returns: - Lark parser for the query language. - """ - # QueryTransformer is None when Lark cannot be imported. - if QueryTransformer is None: - raise ImportError( - "Cannot import lark, please install it with 'pip install lark'." - ) - transformer = QueryTransformer( - allowed_comparators=allowed_comparators, - allowed_operators=allowed_operators, - allowed_attributes=allowed_attributes, - ) - return Lark(GRAMMAR, parser="lalr", transformer=transformer, start="program") +__all__ = [ + "GRAMMAR", + "ISO8601Date", + "QueryTransformer", + "get_parser", + "v_args", +] diff --git a/libs/langchain/langchain/chains/query_constructor/prompt.py b/libs/langchain/langchain/chains/query_constructor/prompt.py index d1355b32663d5..408409bdbf851 100644 --- a/libs/langchain/langchain/chains/query_constructor/prompt.py +++ b/libs/langchain/langchain/chains/query_constructor/prompt.py @@ -1,227 +1,39 @@ -# flake8: noqa -from langchain_core.prompts import PromptTemplate - -SONG_DATA_SOURCE = """\ -```json -{{ - "content": "Lyrics of a song", - "attributes": {{ - "artist": {{ - "type": "string", - "description": "Name of the song artist" - }}, - "length": {{ - "type": "integer", - "description": "Length of the song in seconds" - }}, - "genre": {{ - "type": "string", - "description": "The song genre, one of \"pop\", \"rock\" or \"rap\"" - }} - }} -}} -```\ -""" - -FULL_ANSWER = """\ -```json -{{ - "query": "teenager love", - "filter": "and(or(eq(\\"artist\\", \\"Taylor Swift\\"), eq(\\"artist\\", \\"Katy Perry\\")), lt(\\"length\\", 180), eq(\\"genre\\", \\"pop\\"))" -}} -```\ -""" - -NO_FILTER_ANSWER = """\ -```json -{{ - "query": "", - "filter": "NO_FILTER" -}} -```\ -""" - -WITH_LIMIT_ANSWER = """\ -```json -{{ - "query": "love", - "filter": "NO_FILTER", - "limit": 2 -}} -```\ -""" - -DEFAULT_EXAMPLES = [ - { - "i": 1, - "data_source": SONG_DATA_SOURCE, - "user_query": "What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre", - "structured_request": FULL_ANSWER, - }, - { - "i": 2, - "data_source": SONG_DATA_SOURCE, - "user_query": "What are songs that were not published on Spotify", - "structured_request": NO_FILTER_ANSWER, - }, -] - -EXAMPLES_WITH_LIMIT = [ - { - "i": 1, - "data_source": SONG_DATA_SOURCE, - "user_query": "What are songs by Taylor Swift or Katy Perry about teenage romance under 3 minutes long in the dance pop genre", - "structured_request": FULL_ANSWER, - }, - { - "i": 2, - "data_source": SONG_DATA_SOURCE, - "user_query": "What are songs that were not published on Spotify", - "structured_request": NO_FILTER_ANSWER, - }, - { - "i": 3, - "data_source": SONG_DATA_SOURCE, - "user_query": "What are three songs about love", - "structured_request": WITH_LIMIT_ANSWER, - }, -] - -EXAMPLE_PROMPT_TEMPLATE = """\ -<< Example {i}. >> -Data Source: -{data_source} - -User Query: -{user_query} - -Structured Request: -{structured_request} -""" - -EXAMPLE_PROMPT = PromptTemplate.from_template(EXAMPLE_PROMPT_TEMPLATE) - -USER_SPECIFIED_EXAMPLE_PROMPT = PromptTemplate.from_template( - """\ -<< Example {i}. >> -User Query: -{user_query} - -Structured Request: -```json -{structured_request} -``` -""" -) - -DEFAULT_SCHEMA = """\ -<< Structured Request Schema >> -When responding use a markdown code snippet with a JSON object formatted in the following schema: - -```json -{{{{ - "query": string \\ text string to compare to document contents - "filter": string \\ logical condition statement for filtering documents -}}}} -``` - -The query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well. - -A logical condition statement is composed of one or more comparison and logical operation statements. - -A comparison statement takes the form: `comp(attr, val)`: -- `comp` ({allowed_comparators}): comparator -- `attr` (string): name of attribute to apply the comparison to -- `val` (string): is the comparison value - -A logical operation statement takes the form `op(statement1, statement2, ...)`: -- `op` ({allowed_operators}): logical operator -- `statement1`, `statement2`, ... (comparison statements or logical operation statements): one or more statements to apply the operation to - -Make sure that you only use the comparators and logical operators listed above and no others. -Make sure that filters only refer to attributes that exist in the data source. -Make sure that filters only use the attributed names with its function names if there are functions applied on them. -Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values. -Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored. -Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value.\ -""" -DEFAULT_SCHEMA_PROMPT = PromptTemplate.from_template(DEFAULT_SCHEMA) - -SCHEMA_WITH_LIMIT = """\ -<< Structured Request Schema >> -When responding use a markdown code snippet with a JSON object formatted in the following schema: - -```json -{{{{ - "query": string \\ text string to compare to document contents - "filter": string \\ logical condition statement for filtering documents - "limit": int \\ the number of documents to retrieve -}}}} -``` - -The query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well. - -A logical condition statement is composed of one or more comparison and logical operation statements. - -A comparison statement takes the form: `comp(attr, val)`: -- `comp` ({allowed_comparators}): comparator -- `attr` (string): name of attribute to apply the comparison to -- `val` (string): is the comparison value - -A logical operation statement takes the form `op(statement1, statement2, ...)`: -- `op` ({allowed_operators}): logical operator -- `statement1`, `statement2`, ... (comparison statements or logical operation statements): one or more statements to apply the operation to - -Make sure that you only use the comparators and logical operators listed above and no others. -Make sure that filters only refer to attributes that exist in the data source. -Make sure that filters only use the attributed names with its function names if there are functions applied on them. -Make sure that filters only use format `YYYY-MM-DD` when handling date data typed values. -Make sure that filters take into account the descriptions of attributes and only make comparisons that are feasible given the type of data being stored. -Make sure that filters are only used as needed. If there are no filters that should be applied return "NO_FILTER" for the filter value. -Make sure the `limit` is always an int value. It is an optional parameter so leave it blank if it does not make sense. -""" -SCHEMA_WITH_LIMIT_PROMPT = PromptTemplate.from_template(SCHEMA_WITH_LIMIT) - -DEFAULT_PREFIX = """\ -Your goal is to structure the user's query to match the request schema provided below. - -{schema}\ -""" - -PREFIX_WITH_DATA_SOURCE = ( - DEFAULT_PREFIX - + """ - -<< Data Source >> -```json -{{{{ - "content": "{content}", - "attributes": {attributes} -}}}} -``` -""" +from langchain_core.sql_constructor.prompt import ( + DEFAULT_EXAMPLES, + DEFAULT_PREFIX, + DEFAULT_SCHEMA, + DEFAULT_SCHEMA_PROMPT, + DEFAULT_SUFFIX, + EXAMPLE_PROMPT, + EXAMPLE_PROMPT_TEMPLATE, + EXAMPLES_WITH_LIMIT, + FULL_ANSWER, + NO_FILTER_ANSWER, + PREFIX_WITH_DATA_SOURCE, + SCHEMA_WITH_LIMIT, + SCHEMA_WITH_LIMIT_PROMPT, + SONG_DATA_SOURCE, + SUFFIX_WITHOUT_DATA_SOURCE, + USER_SPECIFIED_EXAMPLE_PROMPT, + WITH_LIMIT_ANSWER, ) -DEFAULT_SUFFIX = """\ -<< Example {i}. >> -Data Source: -```json -{{{{ - "content": "{content}", - "attributes": {attributes} -}}}} -``` - -User Query: -{{query}} - -Structured Request: -""" - -SUFFIX_WITHOUT_DATA_SOURCE = """\ -<< Example {i}. >> -User Query: -{{query}} - -Structured Request: -""" +__all__ = [ + "DEFAULT_EXAMPLES", + "DEFAULT_PREFIX", + "DEFAULT_SCHEMA", + "DEFAULT_SCHEMA_PROMPT", + "DEFAULT_SUFFIX", + "EXAMPLES_WITH_LIMIT", + "EXAMPLE_PROMPT", + "EXAMPLE_PROMPT_TEMPLATE", + "FULL_ANSWER", + "NO_FILTER_ANSWER", + "PREFIX_WITH_DATA_SOURCE", + "SCHEMA_WITH_LIMIT", + "SCHEMA_WITH_LIMIT_PROMPT", + "SONG_DATA_SOURCE", + "SUFFIX_WITHOUT_DATA_SOURCE", + "USER_SPECIFIED_EXAMPLE_PROMPT", + "WITH_LIMIT_ANSWER", +] diff --git a/libs/langchain/langchain/chains/query_constructor/schema.py b/libs/langchain/langchain/chains/query_constructor/schema.py index 6171b3742f2ac..402d0d0788766 100644 --- a/libs/langchain/langchain/chains/query_constructor/schema.py +++ b/libs/langchain/langchain/chains/query_constructor/schema.py @@ -1,15 +1,7 @@ -from langchain_core.pydantic_v1 import BaseModel +from langchain_core.sql_constructor.schema import ( + AttributeInfo, +) - -class AttributeInfo(BaseModel): - """Information about a data source attribute.""" - - name: str - description: str - type: str - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - frozen = True +__all__ = [ + "AttributeInfo", +] diff --git a/libs/langchain/langchain/retrievers/self_query/astradb.py b/libs/langchain/langchain/retrievers/self_query/astradb.py index 0b8d3ab800f7c..fbca9497354e8 100644 --- a/libs/langchain/langchain/retrievers/self_query/astradb.py +++ b/libs/langchain/langchain/retrievers/self_query/astradb.py @@ -1,70 +1,6 @@ -"""Logic for converting internal query language to a valid AstraDB query.""" -from typing import Dict, Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.astradb import ( + MULTIPLE_ARITY_COMPARATORS, + AstraDBTranslator, ) -MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN] - - -class AstraDBTranslator(Visitor): - """Translate AstraDB internal query language elements to valid filters.""" - - """Subset of allowed logical comparators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.IN, - Comparator.NIN, - ] - - """Subset of allowed logical operators.""" - allowed_operators = [Operator.AND, Operator.OR] - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - map_dict = { - Operator.AND: "$and", - Operator.OR: "$or", - Comparator.EQ: "$eq", - Comparator.NE: "$ne", - Comparator.GTE: "$gte", - Comparator.LTE: "$lte", - Comparator.LT: "$lt", - Comparator.GT: "$gt", - Comparator.IN: "$in", - Comparator.NIN: "$nin", - } - return map_dict[func] - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {self._format_func(operation.operator): args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance( - comparison.value, list - ): - comparison.value = [comparison.value] - - comparator = self._format_func(comparison.comparator) - return {comparison.attribute: {comparator: comparison.value}} - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["AstraDBTranslator", "MULTIPLE_ARITY_COMPARATORS"] diff --git a/libs/langchain/langchain/retrievers/self_query/base.py b/libs/langchain/langchain/retrievers/self_query/base.py index d54120ccfc3d8..0cbf860846604 100644 --- a/libs/langchain/langchain/retrievers/self_query/base.py +++ b/libs/langchain/langchain/retrievers/self_query/base.py @@ -1,244 +1,6 @@ -"""Retriever that generates and executes structured queries over its own data source.""" -import logging -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union - -from langchain_community.vectorstores import ( - AstraDB, - Chroma, - DashVector, - DeepLake, - ElasticsearchStore, - Milvus, - MongoDBAtlasVectorSearch, - MyScale, - OpenSearchVectorSearch, - PGVector, - Pinecone, - Qdrant, - Redis, - SupabaseVectorStore, - TimescaleVector, - Vectara, - Weaviate, -) -from langchain_core.documents import Document -from langchain_core.language_models import BaseLanguageModel -from langchain_core.pydantic_v1 import Field, root_validator -from langchain_core.retrievers import BaseRetriever -from langchain_core.runnables import Runnable -from langchain_core.vectorstores import VectorStore - -from langchain.callbacks.manager import ( - AsyncCallbackManagerForRetrieverRun, - CallbackManagerForRetrieverRun, +from langchain_community.retrievers.self_query.base import ( + SelfQueryRetriever, + _get_builtin_translator, ) -from langchain.chains.query_constructor.base import load_query_constructor_runnable -from langchain.chains.query_constructor.ir import StructuredQuery, Visitor -from langchain.chains.query_constructor.schema import AttributeInfo -from langchain.retrievers.self_query.astradb import AstraDBTranslator -from langchain.retrievers.self_query.chroma import ChromaTranslator -from langchain.retrievers.self_query.dashvector import DashvectorTranslator -from langchain.retrievers.self_query.deeplake import DeepLakeTranslator -from langchain.retrievers.self_query.elasticsearch import ElasticsearchTranslator -from langchain.retrievers.self_query.milvus import MilvusTranslator -from langchain.retrievers.self_query.mongodb_atlas import MongoDBAtlasTranslator -from langchain.retrievers.self_query.myscale import MyScaleTranslator -from langchain.retrievers.self_query.opensearch import OpenSearchTranslator -from langchain.retrievers.self_query.pgvector import PGVectorTranslator -from langchain.retrievers.self_query.pinecone import PineconeTranslator -from langchain.retrievers.self_query.qdrant import QdrantTranslator -from langchain.retrievers.self_query.redis import RedisTranslator -from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator -from langchain.retrievers.self_query.timescalevector import TimescaleVectorTranslator -from langchain.retrievers.self_query.vectara import VectaraTranslator -from langchain.retrievers.self_query.weaviate import WeaviateTranslator - -logger = logging.getLogger(__name__) - - -def _get_builtin_translator(vectorstore: VectorStore) -> Visitor: - """Get the translator class corresponding to the vector store class.""" - BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = { - AstraDB: AstraDBTranslator, - PGVector: PGVectorTranslator, - Pinecone: PineconeTranslator, - Chroma: ChromaTranslator, - DashVector: DashvectorTranslator, - Weaviate: WeaviateTranslator, - Vectara: VectaraTranslator, - Qdrant: QdrantTranslator, - MyScale: MyScaleTranslator, - DeepLake: DeepLakeTranslator, - ElasticsearchStore: ElasticsearchTranslator, - Milvus: MilvusTranslator, - SupabaseVectorStore: SupabaseVectorTranslator, - TimescaleVector: TimescaleVectorTranslator, - OpenSearchVectorSearch: OpenSearchTranslator, - MongoDBAtlasVectorSearch: MongoDBAtlasTranslator, - } - if isinstance(vectorstore, Qdrant): - return QdrantTranslator(metadata_key=vectorstore.metadata_payload_key) - elif isinstance(vectorstore, MyScale): - return MyScaleTranslator(metadata_key=vectorstore.metadata_column) - elif isinstance(vectorstore, Redis): - return RedisTranslator.from_vectorstore(vectorstore) - elif vectorstore.__class__ in BUILTIN_TRANSLATORS: - return BUILTIN_TRANSLATORS[vectorstore.__class__]() - else: - raise ValueError( - f"Self query retriever with Vector Store type {vectorstore.__class__}" - f" not supported." - ) - - -class SelfQueryRetriever(BaseRetriever): - """Retriever that uses a vector store and an LLM to generate - the vector store queries.""" - - vectorstore: VectorStore - """The underlying vector store from which documents will be retrieved.""" - query_constructor: Runnable[dict, StructuredQuery] = Field(alias="llm_chain") - """The query constructor chain for generating the vector store queries. - - llm_chain is legacy name kept for backwards compatibility.""" - search_type: str = "similarity" - """The search type to perform on the vector store.""" - search_kwargs: dict = Field(default_factory=dict) - """Keyword arguments to pass in to the vector store search.""" - structured_query_translator: Visitor - """Translator for turning internal query language into vectorstore search params.""" - verbose: bool = False - - use_original_query: bool = False - """Use original query instead of the revised new query from LLM""" - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - allow_population_by_field_name = True - - @root_validator(pre=True) - def validate_translator(cls, values: Dict) -> Dict: - """Validate translator.""" - if "structured_query_translator" not in values: - values["structured_query_translator"] = _get_builtin_translator( - values["vectorstore"] - ) - return values - - @property - def llm_chain(self) -> Runnable: - """llm_chain is legacy name kept for backwards compatibility.""" - return self.query_constructor - - def _prepare_query( - self, query: str, structured_query: StructuredQuery - ) -> Tuple[str, Dict[str, Any]]: - new_query, new_kwargs = self.structured_query_translator.visit_structured_query( - structured_query - ) - if structured_query.limit is not None: - new_kwargs["k"] = structured_query.limit - if self.use_original_query: - new_query = query - search_kwargs = {**self.search_kwargs, **new_kwargs} - return new_query, search_kwargs - - def _get_docs_with_query( - self, query: str, search_kwargs: Dict[str, Any] - ) -> List[Document]: - docs = self.vectorstore.search(query, self.search_type, **search_kwargs) - return docs - - async def _aget_docs_with_query( - self, query: str, search_kwargs: Dict[str, Any] - ) -> List[Document]: - docs = await self.vectorstore.asearch(query, self.search_type, **search_kwargs) - return docs - - def _get_relevant_documents( - self, query: str, *, run_manager: CallbackManagerForRetrieverRun - ) -> List[Document]: - """Get documents relevant for a query. - - Args: - query: string to find relevant documents for - - Returns: - List of relevant documents - """ - structured_query = self.query_constructor.invoke( - {"query": query}, config={"callbacks": run_manager.get_child()} - ) - if self.verbose: - logger.info(f"Generated Query: {structured_query}") - new_query, search_kwargs = self._prepare_query(query, structured_query) - docs = self._get_docs_with_query(new_query, search_kwargs) - return docs - - async def _aget_relevant_documents( - self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun - ) -> List[Document]: - """Get documents relevant for a query. - - Args: - query: string to find relevant documents for - - Returns: - List of relevant documents - """ - structured_query = await self.query_constructor.ainvoke( - {"query": query}, config={"callbacks": run_manager.get_child()} - ) - if self.verbose: - logger.info(f"Generated Query: {structured_query}") - new_query, search_kwargs = self._prepare_query(query, structured_query) - docs = await self._aget_docs_with_query(new_query, search_kwargs) - return docs - - @classmethod - def from_llm( - cls, - llm: BaseLanguageModel, - vectorstore: VectorStore, - document_contents: str, - metadata_field_info: Sequence[Union[AttributeInfo, dict]], - structured_query_translator: Optional[Visitor] = None, - chain_kwargs: Optional[Dict] = None, - enable_limit: bool = False, - use_original_query: bool = False, - **kwargs: Any, - ) -> "SelfQueryRetriever": - if structured_query_translator is None: - structured_query_translator = _get_builtin_translator(vectorstore) - chain_kwargs = chain_kwargs or {} - if ( - "allowed_comparators" not in chain_kwargs - and structured_query_translator.allowed_comparators is not None - ): - chain_kwargs[ - "allowed_comparators" - ] = structured_query_translator.allowed_comparators - if ( - "allowed_operators" not in chain_kwargs - and structured_query_translator.allowed_operators is not None - ): - chain_kwargs[ - "allowed_operators" - ] = structured_query_translator.allowed_operators - query_constructor = load_query_constructor_runnable( - llm, - document_contents, - metadata_field_info, - enable_limit=enable_limit, - **chain_kwargs, - ) - return cls( - query_constructor=query_constructor, - vectorstore=vectorstore, - use_original_query=use_original_query, - structured_query_translator=structured_query_translator, - **kwargs, - ) +__all__ = ["SelfQueryRetriever", "_get_builtin_translator"] diff --git a/libs/langchain/langchain/retrievers/self_query/chroma.py b/libs/langchain/langchain/retrievers/self_query/chroma.py index 8c9a79b12d35e..dd4dcbfb6de58 100644 --- a/libs/langchain/langchain/retrievers/self_query/chroma.py +++ b/libs/langchain/langchain/retrievers/self_query/chroma.py @@ -1,50 +1,3 @@ -from typing import Dict, Tuple, Union +from langchain_community.retrievers.self_query.chroma import ChromaTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class ChromaTranslator(Visitor): - """Translate `Chroma` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - """Subset of allowed logical operators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - ] - """Subset of allowed logical comparators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - return f"${func.value}" - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {self._format_func(operation.operator): args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - return { - comparison.attribute: { - self._format_func(comparison.comparator): comparison.value - } - } - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["ChromaTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/dashvector.py b/libs/langchain/langchain/retrievers/self_query/dashvector.py index 24ae50239adf8..08c2c709ea42b 100644 --- a/libs/langchain/langchain/retrievers/self_query/dashvector.py +++ b/libs/langchain/langchain/retrievers/self_query/dashvector.py @@ -1,64 +1,3 @@ -"""Logic for converting internal query language to a valid DashVector query.""" -from typing import Tuple, Union +from langchain_community.retrievers.self_query.dashvector import DashvectorTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class DashvectorTranslator(Visitor): - """Logic for converting internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.LIKE, - ] - - map_dict = { - Operator.AND: " AND ", - Operator.OR: " OR ", - Comparator.EQ: " = ", - Comparator.GT: " > ", - Comparator.GTE: " >= ", - Comparator.LT: " < ", - Comparator.LTE: " <= ", - Comparator.LIKE: " LIKE ", - } - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - return self.map_dict[func] - - def visit_operation(self, operation: Operation) -> str: - args = [arg.accept(self) for arg in operation.arguments] - return self._format_func(operation.operator).join(args) - - def visit_comparison(self, comparison: Comparison) -> str: - value = comparison.value - if isinstance(value, str): - if comparison.comparator == Comparator.LIKE: - value = f"'%{value}%'" - else: - value = f"'{value}'" - return ( - f"{comparison.attribute}{self._format_func(comparison.comparator)}{value}" - ) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["DashvectorTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/deeplake.py b/libs/langchain/langchain/retrievers/self_query/deeplake.py index 030933b32e8b0..b8c0d7c288fec 100644 --- a/libs/langchain/langchain/retrievers/self_query/deeplake.py +++ b/libs/langchain/langchain/retrievers/self_query/deeplake.py @@ -1,88 +1,13 @@ -"""Logic for converting internal query language to a valid Chroma query.""" -from typing import Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.deeplake import ( + COMPARATOR_TO_TQL, + OPERATOR_TO_TQL, + DeepLakeTranslator, + can_cast_to_float, ) -COMPARATOR_TO_TQL = { - Comparator.EQ: "==", - Comparator.GT: ">", - Comparator.GTE: ">=", - Comparator.LT: "<", - Comparator.LTE: "<=", -} - - -OPERATOR_TO_TQL = { - Operator.AND: "and", - Operator.OR: "or", - Operator.NOT: "NOT", -} - - -def can_cast_to_float(string: str) -> bool: - """Check if a string can be cast to a float.""" - try: - float(string) - return True - except ValueError: - return False - - -class DeepLakeTranslator(Visitor): - """Translate `DeepLake` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] - """Subset of allowed logical operators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - ] - """Subset of allowed logical comparators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - if isinstance(func, Operator): - value = OPERATOR_TO_TQL[func.value] # type: ignore - elif isinstance(func, Comparator): - value = COMPARATOR_TO_TQL[func.value] # type: ignore - return f"{value}" - - def visit_operation(self, operation: Operation) -> str: - args = [arg.accept(self) for arg in operation.arguments] - operator = self._format_func(operation.operator) - return "(" + (" " + operator + " ").join(args) + ")" - - def visit_comparison(self, comparison: Comparison) -> str: - comparator = self._format_func(comparison.comparator) - values = comparison.value - if isinstance(values, list): - tql = [] - for value in values: - comparison.value = value - tql.append(self.visit_comparison(comparison)) - - return "(" + (" or ").join(tql) + ")" - - if not can_cast_to_float(comparison.value): - values = f"'{values}'" - return f"metadata['{comparison.attribute}'] {comparator} {values}" - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - tqL = f"SELECT * WHERE {structured_query.filter.accept(self)}" - kwargs = {"tql": tqL} - return structured_query.query, kwargs +__all__ = [ + "DeepLakeTranslator", + "OPERATOR_TO_TQL", + "COMPARATOR_TO_TQL", + "can_cast_to_float", +] diff --git a/libs/langchain/langchain/retrievers/self_query/elasticsearch.py b/libs/langchain/langchain/retrievers/self_query/elasticsearch.py index 7c2f7671a5c9b..289d8578e82a5 100644 --- a/libs/langchain/langchain/retrievers/self_query/elasticsearch.py +++ b/libs/langchain/langchain/retrievers/self_query/elasticsearch.py @@ -1,100 +1,5 @@ -from typing import Dict, Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.elasticsearch import ( + ElasticsearchTranslator, ) - -class ElasticsearchTranslator(Visitor): - """Translate `Elasticsearch` internal query language elements to valid filters.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.CONTAIN, - Comparator.LIKE, - ] - """Subset of allowed logical comparators.""" - - allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] - """Subset of allowed logical operators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - map_dict = { - Operator.OR: "should", - Operator.NOT: "must_not", - Operator.AND: "must", - Comparator.EQ: "term", - Comparator.GT: "gt", - Comparator.GTE: "gte", - Comparator.LT: "lt", - Comparator.LTE: "lte", - Comparator.CONTAIN: "match", - Comparator.LIKE: "match", - } - return map_dict[func] - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - - return {"bool": {self._format_func(operation.operator): args}} - - def visit_comparison(self, comparison: Comparison) -> Dict: - # ElasticsearchStore filters require to target - # the metadata object field - field = f"metadata.{comparison.attribute}" - - is_range_comparator = comparison.comparator in [ - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - ] - - if is_range_comparator: - value = comparison.value - if isinstance(comparison.value, dict) and "date" in comparison.value: - value = comparison.value["date"] - return {"range": {field: {self._format_func(comparison.comparator): value}}} - - if comparison.comparator == Comparator.CONTAIN: - return { - self._format_func(comparison.comparator): { - field: {"query": comparison.value} - } - } - - if comparison.comparator == Comparator.LIKE: - return { - self._format_func(comparison.comparator): { - field: {"query": comparison.value, "fuzziness": "AUTO"} - } - } - - # we assume that if the value is a string, - # we want to use the keyword field - field = f"{field}.keyword" if isinstance(comparison.value, str) else field - - if isinstance(comparison.value, dict): - if "date" in comparison.value: - comparison.value = comparison.value["date"] - - return {self._format_func(comparison.comparator): {field: comparison.value}} - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": [structured_query.filter.accept(self)]} - return structured_query.query, kwargs +__all__ = ["ElasticsearchTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/milvus.py b/libs/langchain/langchain/retrievers/self_query/milvus.py index dbc61f6f71203..4cbc7390e65ac 100644 --- a/libs/langchain/langchain/retrievers/self_query/milvus.py +++ b/libs/langchain/langchain/retrievers/self_query/milvus.py @@ -1,103 +1,8 @@ -"""Logic for converting internal query language to a valid Milvus query.""" -from typing import Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.milvus import ( + COMPARATOR_TO_BER, + UNARY_OPERATORS, + MilvusTranslator, + process_value, ) -COMPARATOR_TO_BER = { - Comparator.EQ: "==", - Comparator.GT: ">", - Comparator.GTE: ">=", - Comparator.LT: "<", - Comparator.LTE: "<=", - Comparator.IN: "in", - Comparator.LIKE: "like", -} - -UNARY_OPERATORS = [Operator.NOT] - - -def process_value(value: Union[int, float, str], comparator: Comparator) -> str: - """Convert a value to a string and add double quotes if it is a string. - - It required for comparators involving strings. - - Args: - value: The value to convert. - comparator: The comparator. - - Returns: - The converted value as a string. - """ - # - if isinstance(value, str): - if comparator is Comparator.LIKE: - # If the comparator is LIKE, add a percent sign after it for prefix matching - # and add double quotes - return f'"{value}%"' - else: - # If the value is already a string, add double quotes - return f'"{value}"' - else: - # If the value is not a string, convert it to a string without double quotes - return str(value) - - -class MilvusTranslator(Visitor): - """Translate Milvus internal query language elements to valid filters.""" - - """Subset of allowed logical operators.""" - allowed_operators = [Operator.AND, Operator.NOT, Operator.OR] - - """Subset of allowed logical comparators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.IN, - Comparator.LIKE, - ] - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - value = func.value - if isinstance(func, Comparator): - value = COMPARATOR_TO_BER[func] - return f"{value}" - - def visit_operation(self, operation: Operation) -> str: - if operation.operator in UNARY_OPERATORS and len(operation.arguments) == 1: - operator = self._format_func(operation.operator) - return operator + "(" + operation.arguments[0].accept(self) + ")" - elif operation.operator in UNARY_OPERATORS: - raise ValueError( - f'"{operation.operator.value}" can have only one argument in Milvus' - ) - else: - args = [arg.accept(self) for arg in operation.arguments] - operator = self._format_func(operation.operator) - return "(" + (" " + operator + " ").join(args) + ")" - - def visit_comparison(self, comparison: Comparison) -> str: - comparator = self._format_func(comparison.comparator) - processed_value = process_value(comparison.value, comparison.comparator) - attribute = comparison.attribute - - return "( " + attribute + " " + comparator + " " + processed_value + " )" - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"expr": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["MilvusTranslator", "COMPARATOR_TO_BER", "UNARY_OPERATORS", "process_value"] diff --git a/libs/langchain/langchain/retrievers/self_query/mongodb_atlas.py b/libs/langchain/langchain/retrievers/self_query/mongodb_atlas.py index a10e7b58fa933..cb5269987461f 100644 --- a/libs/langchain/langchain/retrievers/self_query/mongodb_atlas.py +++ b/libs/langchain/langchain/retrievers/self_query/mongodb_atlas.py @@ -1,74 +1,6 @@ -"""Logic for converting internal query language to a valid MongoDB Atlas query.""" -from typing import Dict, Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.mongodb_atlas import ( + MULTIPLE_ARITY_COMPARATORS, + MongoDBAtlasTranslator, ) -MULTIPLE_ARITY_COMPARATORS = [Comparator.IN, Comparator.NIN] - - -class MongoDBAtlasTranslator(Visitor): - """Translate Mongo internal query language elements to valid filters.""" - - """Subset of allowed logical comparators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.IN, - Comparator.NIN, - ] - - """Subset of allowed logical operators.""" - allowed_operators = [Operator.AND, Operator.OR] - - ## Convert a operator or a comparator to Mongo Query Format - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - map_dict = { - Operator.AND: "$and", - Operator.OR: "$or", - Comparator.EQ: "$eq", - Comparator.NE: "$ne", - Comparator.GTE: "$gte", - Comparator.LTE: "$lte", - Comparator.LT: "$lt", - Comparator.GT: "$gt", - Comparator.IN: "$in", - Comparator.NIN: "$nin", - } - return map_dict[func] - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {self._format_func(operation.operator): args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - if comparison.comparator in MULTIPLE_ARITY_COMPARATORS and not isinstance( - comparison.value, list - ): - comparison.value = [comparison.value] - - comparator = self._format_func(comparison.comparator) - - attribute = comparison.attribute - - return {attribute: {comparator: comparison.value}} - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"pre_filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["MongoDBAtlasTranslator", "MULTIPLE_ARITY_COMPARATORS"] diff --git a/libs/langchain/langchain/retrievers/self_query/myscale.py b/libs/langchain/langchain/retrievers/self_query/myscale.py index 8b1afaf8b9de6..30af85b4ddf36 100644 --- a/libs/langchain/langchain/retrievers/self_query/myscale.py +++ b/libs/langchain/langchain/retrievers/self_query/myscale.py @@ -1,125 +1,7 @@ -import re -from typing import Any, Callable, Dict, Tuple - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.myscale import ( + _DEFAULT_COMPOSER, + _FUNCTION_COMPOSER, + MyScaleTranslator, ) - -def _DEFAULT_COMPOSER(op_name: str) -> Callable: - """ - Default composer for logical operators. - - Args: - op_name: Name of the operator. - - Returns: - Callable that takes a list of arguments and returns a string. - """ - - def f(*args: Any) -> str: - args_: map[str] = map(str, args) - return f" {op_name} ".join(args_) - - return f - - -def _FUNCTION_COMPOSER(op_name: str) -> Callable: - """ - Composer for functions. - - Args: - op_name: Name of the function. - - Returns: - Callable that takes a list of arguments and returns a string. - """ - - def f(*args: Any) -> str: - args_: map[str] = map(str, args) - return f"{op_name}({','.join(args_)})" - - return f - - -class MyScaleTranslator(Visitor): - """Translate `MyScale` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] - """Subset of allowed logical operators.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.CONTAIN, - Comparator.LIKE, - ] - - map_dict = { - Operator.AND: _DEFAULT_COMPOSER("AND"), - Operator.OR: _DEFAULT_COMPOSER("OR"), - Operator.NOT: _DEFAULT_COMPOSER("NOT"), - Comparator.EQ: _DEFAULT_COMPOSER("="), - Comparator.GT: _DEFAULT_COMPOSER(">"), - Comparator.GTE: _DEFAULT_COMPOSER(">="), - Comparator.LT: _DEFAULT_COMPOSER("<"), - Comparator.LTE: _DEFAULT_COMPOSER("<="), - Comparator.CONTAIN: _FUNCTION_COMPOSER("has"), - Comparator.LIKE: _DEFAULT_COMPOSER("ILIKE"), - } - - def __init__(self, metadata_key: str = "metadata") -> None: - super().__init__() - self.metadata_key = metadata_key - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - func = operation.operator - self._validate_func(func) - return self.map_dict[func](*args) - - def visit_comparison(self, comparison: Comparison) -> Dict: - regex = r"\((.*?)\)" - matched = re.search(r"\(\w+\)", comparison.attribute) - - # If arbitrary function is applied to an attribute - if matched: - attr = re.sub( - regex, - f"({self.metadata_key}.{matched.group(0)[1:-1]})", - comparison.attribute, - ) - else: - attr = f"{self.metadata_key}.{comparison.attribute}" - value = comparison.value - comp = comparison.comparator - - value = f"'{value}'" if isinstance(value, str) else value - - # convert timestamp for datetime objects - if isinstance(value, dict) and value.get("type") == "date": - attr = f"parseDateTime32BestEffort({attr})" - value = f"parseDateTime32BestEffort('{value['date']}')" - - # string pattern match - if comp is Comparator.LIKE: - value = f"'%{value[1:-1]}%'" - return self.map_dict[comp](attr, value) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - print(structured_query) # noqa: T201 - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"where_str": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["MyScaleTranslator", "_DEFAULT_COMPOSER", "_FUNCTION_COMPOSER"] diff --git a/libs/langchain/langchain/retrievers/self_query/opensearch.py b/libs/langchain/langchain/retrievers/self_query/opensearch.py index bb27cddd0b481..6aa498a48d642 100644 --- a/libs/langchain/langchain/retrievers/self_query/opensearch.py +++ b/libs/langchain/langchain/retrievers/self_query/opensearch.py @@ -1,104 +1,3 @@ -from typing import Dict, Tuple, Union +from langchain_community.retrievers.self_query.opensearch import OpenSearchTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class OpenSearchTranslator(Visitor): - """Translate `OpenSearch` internal query domain-specific - language elements to valid filters.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.LT, - Comparator.LTE, - Comparator.GT, - Comparator.GTE, - Comparator.CONTAIN, - Comparator.LIKE, - ] - """Subset of allowed logical comparators.""" - - allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] - """Subset of allowed logical operators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - comp_operator_map = { - Comparator.EQ: "term", - Comparator.LT: "lt", - Comparator.LTE: "lte", - Comparator.GT: "gt", - Comparator.GTE: "gte", - Comparator.CONTAIN: "match", - Comparator.LIKE: "fuzzy", - Operator.AND: "must", - Operator.OR: "should", - Operator.NOT: "must_not", - } - return comp_operator_map[func] - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - - return {"bool": {self._format_func(operation.operator): args}} - - def visit_comparison(self, comparison: Comparison) -> Dict: - field = f"metadata.{comparison.attribute}" - - if comparison.comparator in [ - Comparator.LT, - Comparator.LTE, - Comparator.GT, - Comparator.GTE, - ]: - if isinstance(comparison.value, dict): - if "date" in comparison.value: - return { - "range": { - field: { - self._format_func( - comparison.comparator - ): comparison.value["date"] - } - } - } - else: - return { - "range": { - field: { - self._format_func(comparison.comparator): comparison.value - } - } - } - - if comparison.comparator == Comparator.LIKE: - return { - self._format_func(comparison.comparator): { - field: {"value": comparison.value} - } - } - - field = f"{field}.keyword" if isinstance(comparison.value, str) else field - - if isinstance(comparison.value, dict): - if "date" in comparison.value: - comparison.value = comparison.value["date"] - - return {self._format_func(comparison.comparator): {field: comparison.value}} - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - - return structured_query.query, kwargs +__all__ = ["OpenSearchTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/pgvector.py b/libs/langchain/langchain/retrievers/self_query/pgvector.py index ebe5bf42ffc43..653068f8cded2 100644 --- a/libs/langchain/langchain/retrievers/self_query/pgvector.py +++ b/libs/langchain/langchain/retrievers/self_query/pgvector.py @@ -1,52 +1,3 @@ -from typing import Dict, Tuple, Union +from langchain_community.retrievers.self_query.pgvector import PGVectorTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class PGVectorTranslator(Visitor): - """Translate `PGVector` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - """Subset of allowed logical operators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.LT, - Comparator.IN, - Comparator.NIN, - Comparator.CONTAIN, - Comparator.LIKE, - ] - """Subset of allowed logical comparators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - return f"{func.value}" - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {self._format_func(operation.operator): args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - return { - comparison.attribute: { - self._format_func(comparison.comparator): comparison.value - } - } - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["PGVectorTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/pinecone.py b/libs/langchain/langchain/retrievers/self_query/pinecone.py index 80c401dd95fe1..56515a06e0c2c 100644 --- a/libs/langchain/langchain/retrievers/self_query/pinecone.py +++ b/libs/langchain/langchain/retrievers/self_query/pinecone.py @@ -1,57 +1,3 @@ -from typing import Dict, Tuple, Union +from langchain_community.retrievers.self_query.pinecone import PineconeTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class PineconeTranslator(Visitor): - """Translate `Pinecone` internal query language elements to valid filters.""" - - allowed_comparators = ( - Comparator.EQ, - Comparator.NE, - Comparator.LT, - Comparator.LTE, - Comparator.GT, - Comparator.GTE, - Comparator.IN, - Comparator.NIN, - ) - """Subset of allowed logical comparators.""" - allowed_operators = (Operator.AND, Operator.OR) - """Subset of allowed logical operators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - return f"${func.value}" - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {self._format_func(operation.operator): args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - if comparison.comparator in (Comparator.IN, Comparator.NIN) and not isinstance( - comparison.value, list - ): - comparison.value = [comparison.value] - - return { - comparison.attribute: { - self._format_func(comparison.comparator): comparison.value - } - } - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["PineconeTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/qdrant.py b/libs/langchain/langchain/retrievers/self_query/qdrant.py index c99287751a158..ab26957aebcba 100644 --- a/libs/langchain/langchain/retrievers/self_query/qdrant.py +++ b/libs/langchain/langchain/retrievers/self_query/qdrant.py @@ -1,98 +1,3 @@ -from __future__ import annotations +from langchain_community.retrievers.self_query.qdrant import QdrantTranslator -from typing import TYPE_CHECKING, Tuple - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - -if TYPE_CHECKING: - from qdrant_client.http import models as rest - - -class QdrantTranslator(Visitor): - """Translate `Qdrant` internal query language elements to valid filters.""" - - allowed_operators = ( - Operator.AND, - Operator.OR, - Operator.NOT, - ) - """Subset of allowed logical operators.""" - - allowed_comparators = ( - Comparator.EQ, - Comparator.LT, - Comparator.LTE, - Comparator.GT, - Comparator.GTE, - Comparator.LIKE, - ) - """Subset of allowed logical comparators.""" - - def __init__(self, metadata_key: str): - self.metadata_key = metadata_key - - def visit_operation(self, operation: Operation) -> rest.Filter: - try: - from qdrant_client.http import models as rest - except ImportError as e: - raise ImportError( - "Cannot import qdrant_client. Please install with `pip install " - "qdrant-client`." - ) from e - - args = [arg.accept(self) for arg in operation.arguments] - operator = { - Operator.AND: "must", - Operator.OR: "should", - Operator.NOT: "must_not", - }[operation.operator] - return rest.Filter(**{operator: args}) - - def visit_comparison(self, comparison: Comparison) -> rest.FieldCondition: - try: - from qdrant_client.http import models as rest - except ImportError as e: - raise ImportError( - "Cannot import qdrant_client. Please install with `pip install " - "qdrant-client`." - ) from e - - self._validate_func(comparison.comparator) - attribute = self.metadata_key + "." + comparison.attribute - if comparison.comparator == Comparator.EQ: - return rest.FieldCondition( - key=attribute, match=rest.MatchValue(value=comparison.value) - ) - if comparison.comparator == Comparator.LIKE: - return rest.FieldCondition( - key=attribute, match=rest.MatchText(text=comparison.value) - ) - kwargs = {comparison.comparator.value: comparison.value} - return rest.FieldCondition(key=attribute, range=rest.Range(**kwargs)) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - try: - from qdrant_client.http import models as rest - except ImportError as e: - raise ImportError( - "Cannot import qdrant_client. Please install with `pip install " - "qdrant-client`." - ) from e - - if structured_query.filter is None: - kwargs = {} - else: - filter = structured_query.filter.accept(self) - if isinstance(filter, rest.FieldCondition): - filter = rest.Filter(must=[filter]) - kwargs = {"filter": filter} - return structured_query.query, kwargs +__all__ = ["QdrantTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/redis.py b/libs/langchain/langchain/retrievers/self_query/redis.py index cefe576182e4a..0b40b709a3130 100644 --- a/libs/langchain/langchain/retrievers/self_query/redis.py +++ b/libs/langchain/langchain/retrievers/self_query/redis.py @@ -1,103 +1,6 @@ -from __future__ import annotations - -from typing import Any, Tuple - -from langchain_community.vectorstores.redis import Redis -from langchain_community.vectorstores.redis.filters import ( - RedisFilterExpression, - RedisFilterField, - RedisFilterOperator, - RedisNum, - RedisTag, - RedisText, -) -from langchain_community.vectorstores.redis.schema import RedisModel - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.redis import ( + _COMPARATOR_TO_BUILTIN_METHOD, + RedisTranslator, ) -_COMPARATOR_TO_BUILTIN_METHOD = { - Comparator.EQ: "__eq__", - Comparator.NE: "__ne__", - Comparator.LT: "__lt__", - Comparator.GT: "__gt__", - Comparator.LTE: "__le__", - Comparator.GTE: "__ge__", - Comparator.CONTAIN: "__eq__", - Comparator.LIKE: "__mod__", -} - - -class RedisTranslator(Visitor): - """Visitor for translating structured queries to Redis filter expressions.""" - - allowed_comparators = ( - Comparator.EQ, - Comparator.NE, - Comparator.LT, - Comparator.LTE, - Comparator.GT, - Comparator.GTE, - Comparator.CONTAIN, - Comparator.LIKE, - ) - """Subset of allowed logical comparators.""" - allowed_operators = (Operator.AND, Operator.OR) - """Subset of allowed logical operators.""" - - def __init__(self, schema: RedisModel) -> None: - self._schema = schema - - def _attribute_to_filter_field(self, attribute: str) -> RedisFilterField: - if attribute in [tf.name for tf in self._schema.text]: - return RedisText(attribute) - elif attribute in [tf.name for tf in self._schema.tag or []]: - return RedisTag(attribute) - elif attribute in [tf.name for tf in self._schema.numeric or []]: - return RedisNum(attribute) - else: - raise ValueError( - f"Invalid attribute {attribute} not in vector store schema. Schema is:" - f"\n{self._schema.as_dict()}" - ) - - def visit_comparison(self, comparison: Comparison) -> RedisFilterExpression: - filter_field = self._attribute_to_filter_field(comparison.attribute) - comparison_method = _COMPARATOR_TO_BUILTIN_METHOD[comparison.comparator] - return getattr(filter_field, comparison_method)(comparison.value) - - def visit_operation(self, operation: Operation) -> Any: - left = operation.arguments[0].accept(self) - if len(operation.arguments) > 2: - right = self.visit_operation( - Operation( - operator=operation.operator, arguments=operation.arguments[1:] - ) - ) - else: - right = operation.arguments[1].accept(self) - redis_operator = ( - RedisFilterOperator.OR - if operation.operator == Operator.OR - else RedisFilterOperator.AND - ) - return RedisFilterExpression(operator=redis_operator, left=left, right=right) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs - - @classmethod - def from_vectorstore(cls, vectorstore: Redis) -> RedisTranslator: - return cls(vectorstore._schema) +__all__ = ["RedisTranslator", "_COMPARATOR_TO_BUILTIN_METHOD"] diff --git a/libs/langchain/langchain/retrievers/self_query/supabase.py b/libs/langchain/langchain/retrievers/self_query/supabase.py index 267e228fcd98b..a8bf661b39b44 100644 --- a/libs/langchain/langchain/retrievers/self_query/supabase.py +++ b/libs/langchain/langchain/retrievers/self_query/supabase.py @@ -1,97 +1,3 @@ -from typing import Any, Dict, Tuple +from langchain_community.retrievers.self_query.supabase import SupabaseVectorTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class SupabaseVectorTranslator(Visitor): - """Translate Langchain filters to Supabase PostgREST filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - """Subset of allowed logical operators.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - Comparator.LIKE, - ] - """Subset of allowed logical comparators.""" - - metadata_column = "metadata" - - def _map_comparator(self, comparator: Comparator) -> str: - """ - Maps Langchain comparator to PostgREST comparator: - - https://postgrest.org/en/stable/references/api/tables_views.html#operators - """ - postgrest_comparator = { - Comparator.EQ: "eq", - Comparator.NE: "neq", - Comparator.GT: "gt", - Comparator.GTE: "gte", - Comparator.LT: "lt", - Comparator.LTE: "lte", - Comparator.LIKE: "like", - }.get(comparator) - - if postgrest_comparator is None: - raise Exception( - f"Comparator '{comparator}' is not currently " - "supported in Supabase Vector" - ) - - return postgrest_comparator - - def _get_json_operator(self, value: Any) -> str: - if isinstance(value, str): - return "->>" - else: - return "->" - - def visit_operation(self, operation: Operation) -> str: - args = [arg.accept(self) for arg in operation.arguments] - return f"{operation.operator.value}({','.join(args)})" - - def visit_comparison(self, comparison: Comparison) -> str: - if isinstance(comparison.value, list): - return self.visit_operation( - Operation( - operator=Operator.AND, - arguments=( - Comparison( - comparator=comparison.comparator, - attribute=comparison.attribute, - value=value, - ) - for value in comparison.value - ), - ) - ) - - return ".".join( - [ - f"{self.metadata_column}{self._get_json_operator(comparison.value)}{comparison.attribute}", - f"{self._map_comparator(comparison.comparator)}", - f"{comparison.value}", - ] - ) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, Dict[str, str]]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"postgrest_filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["SupabaseVectorTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/timescalevector.py b/libs/langchain/langchain/retrievers/self_query/timescalevector.py index 3d417578fe577..69115595d7c19 100644 --- a/libs/langchain/langchain/retrievers/self_query/timescalevector.py +++ b/libs/langchain/langchain/retrievers/self_query/timescalevector.py @@ -1,84 +1,5 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.timescalevector import ( + TimescaleVectorTranslator, ) -if TYPE_CHECKING: - from timescale_vector import client - - -class TimescaleVectorTranslator(Visitor): - """Translate the internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR, Operator.NOT] - """Subset of allowed logical operators.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - ] - - COMPARATOR_MAP = { - Comparator.EQ: "==", - Comparator.GT: ">", - Comparator.GTE: ">=", - Comparator.LT: "<", - Comparator.LTE: "<=", - } - - OPERATOR_MAP = {Operator.AND: "AND", Operator.OR: "OR", Operator.NOT: "NOT"} - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - if isinstance(func, Operator): - value = self.OPERATOR_MAP[func.value] # type: ignore - elif isinstance(func, Comparator): - value = self.COMPARATOR_MAP[func.value] # type: ignore - return f"{value}" - - def visit_operation(self, operation: Operation) -> client.Predicates: - try: - from timescale_vector import client - except ImportError as e: - raise ImportError( - "Cannot import timescale-vector. Please install with `pip install " - "timescale-vector`." - ) from e - args = [arg.accept(self) for arg in operation.arguments] - return client.Predicates(*args, operator=self._format_func(operation.operator)) - - def visit_comparison(self, comparison: Comparison) -> client.Predicates: - try: - from timescale_vector import client - except ImportError as e: - raise ImportError( - "Cannot import timescale-vector. Please install with `pip install " - "timescale-vector`." - ) from e - return client.Predicates( - ( - comparison.attribute, - self._format_func(comparison.comparator), - comparison.value, - ) - ) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"predicates": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["TimescaleVectorTranslator"] diff --git a/libs/langchain/langchain/retrievers/self_query/vectara.py b/libs/langchain/langchain/retrievers/self_query/vectara.py index 02d64f04708bb..4e46c2ed7ef0f 100644 --- a/libs/langchain/langchain/retrievers/self_query/vectara.py +++ b/libs/langchain/langchain/retrievers/self_query/vectara.py @@ -1,70 +1,6 @@ -from typing import Tuple, Union - -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, +from langchain_community.retrievers.self_query.vectara import ( + VectaraTranslator, + process_value, ) - -def process_value(value: Union[int, float, str]) -> str: - """Convert a value to a string and add single quotes if it is a string.""" - if isinstance(value, str): - return f"'{value}'" - else: - return str(value) - - -class VectaraTranslator(Visitor): - """Translate `Vectara` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - """Subset of allowed logical operators.""" - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GT, - Comparator.GTE, - Comparator.LT, - Comparator.LTE, - ] - """Subset of allowed logical comparators.""" - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - map_dict = { - Operator.AND: " and ", - Operator.OR: " or ", - Comparator.EQ: "=", - Comparator.NE: "!=", - Comparator.GT: ">", - Comparator.GTE: ">=", - Comparator.LT: "<", - Comparator.LTE: "<=", - } - self._validate_func(func) - return map_dict[func] - - def visit_operation(self, operation: Operation) -> str: - args = [arg.accept(self) for arg in operation.arguments] - operator = self._format_func(operation.operator) - return "( " + operator.join(args) + " )" - - def visit_comparison(self, comparison: Comparison) -> str: - comparator = self._format_func(comparison.comparator) - processed_value = process_value(comparison.value) - attribute = comparison.attribute - return ( - "( " + "doc." + attribute + " " + comparator + " " + processed_value + " )" - ) - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["VectaraTranslator", "process_value"] diff --git a/libs/langchain/langchain/retrievers/self_query/weaviate.py b/libs/langchain/langchain/retrievers/self_query/weaviate.py index 1db80b9ba00f5..507c1a3af56a2 100644 --- a/libs/langchain/langchain/retrievers/self_query/weaviate.py +++ b/libs/langchain/langchain/retrievers/self_query/weaviate.py @@ -1,79 +1,3 @@ -from datetime import datetime -from typing import Dict, Tuple, Union +from langchain_community.retrievers.self_query.weaviate import WeaviateTranslator -from langchain.chains.query_constructor.ir import ( - Comparator, - Comparison, - Operation, - Operator, - StructuredQuery, - Visitor, -) - - -class WeaviateTranslator(Visitor): - """Translate `Weaviate` internal query language elements to valid filters.""" - - allowed_operators = [Operator.AND, Operator.OR] - """Subset of allowed logical operators.""" - - allowed_comparators = [ - Comparator.EQ, - Comparator.NE, - Comparator.GTE, - Comparator.LTE, - Comparator.LT, - Comparator.GT, - ] - - def _format_func(self, func: Union[Operator, Comparator]) -> str: - self._validate_func(func) - # https://weaviate.io/developers/weaviate/api/graphql/filters - map_dict = { - Operator.AND: "And", - Operator.OR: "Or", - Comparator.EQ: "Equal", - Comparator.NE: "NotEqual", - Comparator.GTE: "GreaterThanEqual", - Comparator.LTE: "LessThanEqual", - Comparator.LT: "LessThan", - Comparator.GT: "GreaterThan", - } - return map_dict[func] - - def visit_operation(self, operation: Operation) -> Dict: - args = [arg.accept(self) for arg in operation.arguments] - return {"operator": self._format_func(operation.operator), "operands": args} - - def visit_comparison(self, comparison: Comparison) -> Dict: - value_type = "valueText" - value = comparison.value - if isinstance(comparison.value, bool): - value_type = "valueBoolean" - elif isinstance(comparison.value, float): - value_type = "valueNumber" - elif isinstance(comparison.value, int): - value_type = "valueInt" - elif ( - isinstance(comparison.value, dict) - and comparison.value.get("type") == "date" - ): - value_type = "valueDate" - # ISO 8601 timestamp, formatted as RFC3339 - date = datetime.strptime(comparison.value["date"], "%Y-%m-%d") - value = date.strftime("%Y-%m-%dT%H:%M:%SZ") - filter = { - "path": [comparison.attribute], - "operator": self._format_func(comparison.comparator), - value_type: value, - } - return filter - - def visit_structured_query( - self, structured_query: StructuredQuery - ) -> Tuple[str, dict]: - if structured_query.filter is None: - kwargs = {} - else: - kwargs = {"where_filter": structured_query.filter.accept(self)} - return structured_query.query, kwargs +__all__ = ["WeaviateTranslator"] diff --git a/poetry.lock b/poetry.lock index d712c5ab7d230..3694204023427 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -112,6 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -126,6 +128,7 @@ frozenlist = ">=1.1.0" name = "alabaster" version = "0.7.13" description = "A configurable sidebar-enabled Sphinx theme" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -137,6 +140,7 @@ files = [ name = "annotated-types" version = "0.5.0" description = "Reusable constraint types to use with typing.Annotated" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -151,6 +155,7 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -172,6 +177,7 @@ trio = ["trio (<0.22)"] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" +category = "dev" optional = false python-versions = "*" files = [ @@ -183,6 +189,7 @@ files = [ name = "argon2-cffi" version = "23.1.0" description = "Argon2 for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -203,6 +210,7 @@ typing = ["mypy"] name = "argon2-cffi-bindings" version = "21.2.0" description = "Low-level CFFI bindings for Argon2" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -240,6 +248,7 @@ tests = ["pytest"] name = "arrow" version = "1.3.0" description = "Better dates & times for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -253,12 +262,13 @@ types-python-dateutil = ">=2.8.10" [package.extras] doc = ["doc8", "sphinx (>=7.0.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx_rtd_theme (>=1.3.0)"] -test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (==3.*)"] +test = ["dateparser (>=1.0.0,<2.0.0)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (>=3.0.0,<4.0.0)"] [[package]] name = "asttokens" version = "2.4.0" description = "Annotate AST trees with source code positions" +category = "dev" optional = false python-versions = "*" files = [ @@ -276,6 +286,7 @@ test = ["astroid", "pytest"] name = "async-lru" version = "2.0.4" description = "Simple LRU cache for asyncio" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -290,6 +301,7 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -301,6 +313,7 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -319,6 +332,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "autodoc-pydantic" version = "1.8.0" description = "Seamlessly integrate pydantic models in your Sphinx documentation." +category = "dev" optional = false python-versions = ">=3.6,<4.0.0" files = [ @@ -339,6 +353,7 @@ test = ["coverage (>=5,<6)", "pytest (>=6,<7)"] name = "babel" version = "2.13.0" description = "Internationalization utilities" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -356,6 +371,7 @@ dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" +category = "dev" optional = false python-versions = "*" files = [ @@ -367,6 +383,7 @@ files = [ name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" +category = "dev" optional = false python-versions = ">=3.6.0" files = [ @@ -385,6 +402,7 @@ lxml = ["lxml"] name = "black" version = "23.10.1" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -429,6 +447,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "bleach" version = "6.0.0" description = "An easy safelist-based HTML-sanitizing tool." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -447,6 +466,7 @@ css = ["tinycss2 (>=1.1.0,<1.2)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -458,6 +478,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -522,6 +543,7 @@ pycparser = "*" name = "charset-normalizer" version = "3.3.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -621,6 +643,7 @@ files = [ name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -635,6 +658,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "codespell" version = "2.2.6" description = "Codespell" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -652,6 +676,7 @@ types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -663,6 +688,7 @@ files = [ name = "comm" version = "0.1.4" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -682,6 +708,7 @@ typing = ["mypy (>=0.990)"] name = "dataclasses-json" version = "0.6.1" description = "Easily serialize dataclasses to and from JSON." +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -697,6 +724,7 @@ typing-inspect = ">=0.4.0,<1" name = "debugpy" version = "1.8.0" description = "An implementation of the Debug Adapter Protocol for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -724,6 +752,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -735,6 +764,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -746,6 +776,7 @@ files = [ name = "distro" version = "1.9.0" description = "Distro - an OS platform information API" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -757,6 +788,7 @@ files = [ name = "dnspython" version = "2.4.2" description = "DNS toolkit" +category = "dev" optional = false python-versions = ">=3.8,<4.0" files = [ @@ -776,6 +808,7 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "docutils" version = "0.17.1" description = "Docutils -- Python Documentation Utilities" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -787,6 +820,7 @@ files = [ name = "entrypoints" version = "0.4" description = "Discover and load entry points from installed packages." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -798,6 +832,7 @@ files = [ name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -812,6 +847,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.0" description = "Get the currently executing AST node of a frame, and other information" +category = "dev" optional = false python-versions = "*" files = [ @@ -826,6 +862,7 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth name = "fastcore" version = "1.4.2" description = "Python supercharged for fastai development" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -844,6 +881,7 @@ dev = ["matplotlib", "nbdev (>=0.2.39)", "numpy", "pandas", "pillow", "torch"] name = "fastjsonschema" version = "2.18.1" description = "Fastest Python implementation of JSON schema" +category = "dev" optional = false python-versions = "*" files = [ @@ -858,6 +896,7 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc name = "fastrelease" version = "0.1.17" description = "Simplified releases using GitHub Issues" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -876,6 +915,7 @@ pyyaml = "*" name = "fqdn" version = "1.5.1" description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" +category = "dev" optional = false python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" files = [ @@ -887,6 +927,7 @@ files = [ name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -957,6 +998,7 @@ files = [ name = "ghapi" version = "0.1.22" description = "A python client for the GitHub API" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -976,6 +1018,7 @@ dev = ["jsonref"] name = "greenlet" version = "3.0.0" description = "Lightweight in-process concurrent programming" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1051,6 +1094,7 @@ test = ["objgraph", "psutil"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1062,6 +1106,7 @@ files = [ name = "httpcore" version = "1.0.2" description = "A minimal low-level HTTP client." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1076,13 +1121,14 @@ h11 = ">=0.13,<0.15" [package.extras] asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] trio = ["trio (>=0.22.0,<0.23.0)"] [[package]] name = "httpx" version = "0.26.0" description = "The next generation HTTP client." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1093,20 +1139,21 @@ files = [ [package.dependencies] anyio = "*" certifi = "*" -httpcore = "==1.*" +httpcore = ">=1.0.0,<2.0.0" idna = "*" sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1118,6 +1165,7 @@ files = [ name = "imagesize" version = "1.4.1" description = "Getting image size from png/jpeg/jpeg2000/gif file" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1129,6 +1177,7 @@ files = [ name = "importlib-metadata" version = "6.8.0" description = "Read metadata from Python packages" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1148,6 +1197,7 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs name = "importlib-resources" version = "6.1.0" description = "Read resources from Python packages" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1166,6 +1216,7 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "ipykernel" version = "6.25.2" description = "IPython Kernel for Jupyter" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1179,7 +1230,7 @@ comm = ">=0.1.1" debugpy = ">=1.6.5" ipython = ">=7.23.1" jupyter-client = ">=6.1.12" -jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" matplotlib-inline = ">=0.1" nest-asyncio = "*" packaging = "*" @@ -1199,6 +1250,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio" name = "ipython" version = "8.12.3" description = "IPython: Productive Interactive Computing" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1238,6 +1290,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "ipython-genutils" version = "0.2.0" description = "Vestigial utilities from IPython" +category = "dev" optional = false python-versions = "*" files = [ @@ -1249,6 +1302,7 @@ files = [ name = "ipywidgets" version = "8.1.1" description = "Jupyter interactive widgets" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1270,6 +1324,7 @@ test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"] name = "isoduration" version = "20.11.0" description = "Operations with ISO 8601 durations" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1284,6 +1339,7 @@ arrow = ">=0.15.0" name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1303,6 +1359,7 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1320,6 +1377,7 @@ i18n = ["Babel (>=2.7)"] name = "json5" version = "0.9.14" description = "A Python implementation of the JSON5 data format." +category = "dev" optional = false python-versions = "*" files = [ @@ -1334,6 +1392,7 @@ dev = ["hypothesis"] name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -1348,17 +1407,18 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] name = "jsonschema" version = "4.19.1" description = "An implementation of JSON Schema validation for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1390,6 +1450,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1405,6 +1466,7 @@ referencing = ">=0.28.0" name = "jupyter" version = "1.0.0" description = "Jupyter metapackage. Install all the Jupyter components in one go." +category = "dev" optional = false python-versions = "*" files = [ @@ -1425,6 +1487,7 @@ qtconsole = "*" name = "jupyter-cache" version = "0.6.1" description = "A defined interface for working with a cache of jupyter notebooks." +category = "dev" optional = false python-versions = "~=3.8" files = [ @@ -1452,6 +1515,7 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma name = "jupyter-client" version = "7.4.9" description = "Jupyter protocol implementation and client libraries" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1476,6 +1540,7 @@ test = ["codecov", "coverage", "ipykernel (>=6.12)", "ipython", "mypy", "pre-com name = "jupyter-console" version = "6.6.3" description = "Jupyter terminal console" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1487,7 +1552,7 @@ files = [ ipykernel = ">=6.14" ipython = "*" jupyter-client = ">=7.0.0" -jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" prompt-toolkit = ">=3.0.30" pygments = "*" pyzmq = ">=17" @@ -1500,6 +1565,7 @@ test = ["flaky", "pexpect", "pytest"] name = "jupyter-core" version = "5.3.2" description = "Jupyter core package. A base package on which Jupyter projects rely." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1520,6 +1586,7 @@ test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] name = "jupyter-events" version = "0.7.0" description = "Jupyter Event System library" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1545,6 +1612,7 @@ test = ["click", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "p name = "jupyter-lsp" version = "2.2.0" description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1560,6 +1628,7 @@ jupyter-server = ">=1.1.2" name = "jupyter-server" version = "2.7.3" description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1572,7 +1641,7 @@ anyio = ">=3.1.0" argon2-cffi = "*" jinja2 = "*" jupyter-client = ">=7.4.4" -jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" jupyter-events = ">=0.6.0" jupyter-server-terminals = "*" nbconvert = ">=6.4.4" @@ -1596,6 +1665,7 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-sc name = "jupyter-server-terminals" version = "0.4.4" description = "A Jupyter Server Extension Providing Terminals." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1615,6 +1685,7 @@ test = ["coverage", "jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-cov", name = "jupyterlab" version = "4.0.6" description = "JupyterLab computational environment" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1648,6 +1719,7 @@ test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-cons name = "jupyterlab-pygments" version = "0.2.2" description = "Pygments theme using JupyterLab CSS variables" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1659,6 +1731,7 @@ files = [ name = "jupyterlab-server" version = "2.25.0" description = "A set of server components for JupyterLab and JupyterLab like applications." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1685,6 +1758,7 @@ test = ["hatch", "ipykernel", "openapi-core (>=0.18.0,<0.19.0)", "openapi-spec-v name = "jupyterlab-widgets" version = "3.0.9" description = "Jupyter interactive widgets for JupyterLab" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1696,6 +1770,7 @@ files = [ name = "langchain" version = "0.1.5" description = "Building applications with LLMs through composability" +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [] @@ -1739,6 +1814,7 @@ url = "libs/langchain" name = "langchain-community" version = "0.0.17" description = "Community contributed LangChain integrations." +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [] @@ -1767,6 +1843,7 @@ url = "libs/community" name = "langchain-core" version = "0.1.18" description = "Building applications with LLMs through composability" +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [] @@ -1793,6 +1870,7 @@ url = "libs/core" name = "langchain-experimental" version = "0.0.50" description = "Building applications with LLMs through composability" +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [] @@ -1813,6 +1891,7 @@ url = "libs/experimental" name = "langchain-openai" version = "0.0.5" description = "An integration package connecting OpenAI and LangChain" +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [] @@ -1832,6 +1911,7 @@ url = "libs/partners/openai" name = "langsmith" version = "0.0.86" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1847,6 +1927,7 @@ requests = ">=2,<3" name = "linkchecker" version = "10.3.0" description = "check links in web documents or full websites" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1863,6 +1944,7 @@ requests = ">=2.20" name = "livereload" version = "2.6.3" description = "Python LiveReload is an awesome tool for web developers" +category = "dev" optional = false python-versions = "*" files = [ @@ -1878,6 +1960,7 @@ tornado = {version = "*", markers = "python_version > \"2.7\""} name = "markdown-it-py" version = "2.2.0" description = "Python port of markdown-it. Markdown parsing, done right!" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1902,6 +1985,7 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1925,16 +2009,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1971,6 +2045,7 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1991,6 +2066,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2005,6 +2081,7 @@ traitlets = "*" name = "mdit-py-plugins" version = "0.3.5" description = "Collection of plugins for markdown-it-py" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2024,6 +2101,7 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2035,6 +2113,7 @@ files = [ name = "mistune" version = "3.0.2" description = "A sane and fast Markdown parser with useful plugins and renderers" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2046,6 +2125,7 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2129,6 +2209,7 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2140,6 +2221,7 @@ files = [ name = "myst-nb" version = "0.17.2" description = "A Jupyter Notebook Sphinx reader built on top of the MyST markdown parser." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2168,6 +2250,7 @@ testing = ["beautifulsoup4", "coverage (>=6.4,<8.0)", "ipykernel (>=5.5,<6.0)", name = "myst-parser" version = "0.18.1" description = "An extended commonmark compliant parser, with bridges to docutils & sphinx." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2194,6 +2277,7 @@ testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=6,<7)", "pytest-cov", name = "nbclient" version = "0.7.4" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -2203,7 +2287,7 @@ files = [ [package.dependencies] jupyter-client = ">=6.1.12" -jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" nbformat = ">=5.1" traitlets = ">=5.3" @@ -2216,6 +2300,7 @@ test = ["flaky", "ipykernel", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "p name = "nbconvert" version = "7.8.0" description = "Converting Jupyter Notebooks" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2254,6 +2339,7 @@ webpdf = ["playwright"] name = "nbdev" version = "1.2.0" description = "Writing a library entirely in notebooks" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2278,6 +2364,7 @@ pyyaml = "*" name = "nbdoc" version = "0.0.82" description = "Generate beautiful, testable documentation with Jupyter Notebooks" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2298,6 +2385,7 @@ pip = "*" name = "nbformat" version = "5.9.2" description = "The Jupyter Notebook format" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2319,6 +2407,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nbsphinx" version = "0.8.12" description = "Jupyter Notebook Tools for Sphinx" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2338,6 +2427,7 @@ traitlets = ">=5" name = "nest-asyncio" version = "1.5.8" description = "Patch asyncio to allow nested event loops" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2349,6 +2439,7 @@ files = [ name = "notebook" version = "7.0.4" description = "Jupyter Notebook - A web-based notebook environment for interactive computing" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2372,6 +2463,7 @@ test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4 name = "notebook-shim" version = "0.2.3" description = "A shim layer for notebook traits and config" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2389,6 +2481,7 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" name = "numpy" version = "1.24.4" description = "Fundamental package for array computing in Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2426,6 +2519,7 @@ files = [ name = "numpydoc" version = "1.2" description = "Sphinx extension to support docstrings in Numpy format" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2444,6 +2538,7 @@ testing = ["matplotlib", "pytest", "pytest-cov"] name = "openai" version = "1.10.0" description = "The official Python library for the openai API" +category = "dev" optional = false python-versions = ">=3.7.1" files = [ @@ -2467,6 +2562,7 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] name = "overrides" version = "7.4.0" description = "A decorator to automatically detect mismatch when overriding a method." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2478,6 +2574,7 @@ files = [ name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2489,6 +2586,7 @@ files = [ name = "pandocfilters" version = "1.5.0" description = "Utilities for writing pandoc filters in python" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2500,6 +2598,7 @@ files = [ name = "parso" version = "0.8.3" description = "A Python Parser" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2515,6 +2614,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2526,6 +2626,7 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." +category = "dev" optional = false python-versions = "*" files = [ @@ -2540,6 +2641,7 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" +category = "dev" optional = false python-versions = "*" files = [ @@ -2551,6 +2653,7 @@ files = [ name = "pip" version = "23.2.1" description = "The PyPA recommended tool for installing Python packages." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2562,6 +2665,7 @@ files = [ name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2573,6 +2677,7 @@ files = [ name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2588,6 +2693,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "prometheus-client" version = "0.17.1" description = "Python client for the Prometheus monitoring system." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2602,6 +2708,7 @@ twisted = ["twisted"] name = "prompt-toolkit" version = "3.0.39" description = "Library for building powerful interactive command lines in Python" +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -2616,6 +2723,7 @@ wcwidth = "*" name = "psutil" version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2642,6 +2750,7 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" +category = "dev" optional = false python-versions = "*" files = [ @@ -2653,6 +2762,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" +category = "dev" optional = false python-versions = "*" files = [ @@ -2667,6 +2777,7 @@ tests = ["pytest"] name = "pycparser" version = "2.21" description = "C parser in Python" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2678,6 +2789,7 @@ files = [ name = "pydantic" version = "2.4.2" description = "Data validation using Python type hints" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2697,6 +2809,7 @@ email = ["email-validator (>=2.0.0)"] name = "pydantic-core" version = "2.10.1" description = "" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2815,6 +2928,7 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pydata-sphinx-theme" version = "0.8.1" description = "Bootstrap-based Sphinx theme from the PyData community" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2838,6 +2952,7 @@ test = ["pydata-sphinx-theme[doc]", "pytest"] name = "pygments" version = "2.16.1" description = "Pygments is a syntax highlighting package written in Python." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2852,6 +2967,7 @@ plugins = ["importlib-metadata"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2866,6 +2982,7 @@ six = ">=1.5" name = "python-json-logger" version = "2.0.7" description = "A python library adding a json log formatter" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2877,6 +2994,7 @@ files = [ name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" +category = "dev" optional = false python-versions = "*" files = [ @@ -2888,6 +3006,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" +category = "dev" optional = false python-versions = "*" files = [ @@ -2911,6 +3030,7 @@ files = [ name = "pywinpty" version = "2.0.11" description = "Pseudo terminal support for Windows from Python." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2925,6 +3045,7 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2933,7 +3054,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2941,16 +3061,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2967,7 +3079,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2975,7 +3086,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2985,6 +3095,7 @@ files = [ name = "pyzmq" version = "25.1.1" description = "Python bindings for 0MQ" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3090,6 +3201,7 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} name = "qtconsole" version = "5.4.4" description = "Jupyter Qt console" +category = "dev" optional = false python-versions = ">= 3.7" files = [ @@ -3116,6 +3228,7 @@ test = ["flaky", "pytest", "pytest-qt"] name = "qtpy" version = "2.4.0" description = "Provides an abstraction layer on top of the various Qt bindings (PyQt5/6 and PySide2/6)." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3133,6 +3246,7 @@ test = ["pytest (>=6,!=7.0.0,!=7.0.1)", "pytest-cov (>=3.0.0)", "pytest-qt"] name = "referencing" version = "0.30.2" description = "JSON Referencing + Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3148,6 +3262,7 @@ rpds-py = ">=0.7.0" name = "regex" version = "2023.12.25" description = "Alternative regular expression module, to replace re." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3250,6 +3365,7 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3271,6 +3387,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rfc3339-validator" version = "0.1.4" description = "A pure python RFC3339 validator" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3285,6 +3402,7 @@ six = "*" name = "rfc3986-validator" version = "0.1.1" description = "Pure python rfc3986 validator" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3296,6 +3414,7 @@ files = [ name = "rpds-py" version = "0.10.3" description = "Python bindings to Rust's persistent data structures (rpds)" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3402,6 +3521,7 @@ files = [ name = "ruff" version = "0.1.5" description = "An extremely fast Python linter and code formatter, written in Rust." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3428,6 +3548,7 @@ files = [ name = "send2trash" version = "1.8.2" description = "Send file to trash natively under Mac OS X, Windows and Linux" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -3444,6 +3565,7 @@ win32 = ["pywin32"] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3455,6 +3577,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3466,6 +3589,7 @@ files = [ name = "snowballstemmer" version = "2.2.0" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" optional = false python-versions = "*" files = [ @@ -3477,6 +3601,7 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3488,6 +3613,7 @@ files = [ name = "sphinx" version = "4.5.0" description = "Python documentation generator" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3523,6 +3649,7 @@ test = ["cython", "html5lib", "pytest", "pytest-cov", "typed-ast"] name = "sphinx-autobuild" version = "2021.3.14" description = "Rebuild Sphinx documentation on changes, with live-reload in the browser." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3542,6 +3669,7 @@ test = ["pytest", "pytest-cov"] name = "sphinx-book-theme" version = "0.3.3" description = "A clean book theme for scientific explanations and documentation with Sphinx" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3563,6 +3691,7 @@ test = ["beautifulsoup4 (>=4.6.1,<5)", "coverage", "myst-nb (>=0.13.2,<0.14.0)", name = "sphinx-copybutton" version = "0.5.2" description = "Add a copy button to each of your code cells." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3581,6 +3710,7 @@ rtd = ["ipython", "myst-nb", "sphinx", "sphinx-book-theme", "sphinx-examples"] name = "sphinx-panels" version = "0.6.0" description = "A sphinx extension for creating panels in a grid layout." +category = "dev" optional = false python-versions = "*" files = [ @@ -3602,6 +3732,7 @@ themes = ["myst-parser (>=0.12.9,<0.13.0)", "pydata-sphinx-theme (>=0.4.0,<0.5.0 name = "sphinx-rtd-theme" version = "1.3.0" description = "Read the Docs theme for Sphinx" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3621,6 +3752,7 @@ dev = ["bump2version", "sphinxcontrib-httpdomain", "transifex-client", "wheel"] name = "sphinx-typlog-theme" version = "0.8.0" description = "A typlog Sphinx theme" +category = "dev" optional = false python-versions = "*" files = [ @@ -3635,6 +3767,7 @@ dev = ["livereload", "sphinx"] name = "sphinxcontrib-applehelp" version = "1.0.4" description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3650,6 +3783,7 @@ test = ["pytest"] name = "sphinxcontrib-devhelp" version = "1.0.2" description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3665,6 +3799,7 @@ test = ["pytest"] name = "sphinxcontrib-htmlhelp" version = "2.0.1" description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3680,6 +3815,7 @@ test = ["html5lib", "pytest"] name = "sphinxcontrib-jquery" version = "4.1" description = "Extension to include jQuery on newer Sphinx releases" +category = "dev" optional = false python-versions = ">=2.7" files = [ @@ -3694,6 +3830,7 @@ Sphinx = ">=1.8" name = "sphinxcontrib-jsmath" version = "1.0.1" description = "A sphinx extension which renders display math in HTML via JavaScript" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3708,6 +3845,7 @@ test = ["flake8", "mypy", "pytest"] name = "sphinxcontrib-qthelp" version = "1.0.3" description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3723,6 +3861,7 @@ test = ["pytest"] name = "sphinxcontrib-serializinghtml" version = "1.1.5" description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3738,6 +3877,7 @@ test = ["pytest"] name = "sqlalchemy" version = "2.0.21" description = "Database Abstraction Library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3824,6 +3964,7 @@ sqlcipher = ["sqlcipher3-binary"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" +category = "dev" optional = false python-versions = "*" files = [ @@ -3843,6 +3984,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3857,6 +3999,7 @@ widechars = ["wcwidth"] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3871,6 +4014,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "terminado" version = "0.17.1" description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3891,6 +4035,7 @@ test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] name = "tiktoken" version = "0.5.2" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3943,6 +4088,7 @@ blobfile = ["blobfile (>=2)"] name = "tinycss2" version = "1.2.1" description = "A tiny CSS parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3961,6 +4107,7 @@ test = ["flake8", "isort", "pytest"] name = "tokenize-rt" version = "5.2.0" description = "A wrapper around the stdlib `tokenize` which roundtrips." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3972,6 +4119,7 @@ files = [ name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3983,6 +4131,7 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3994,6 +4143,7 @@ files = [ name = "tornado" version = "6.3.3" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +category = "dev" optional = false python-versions = ">= 3.8" files = [ @@ -4014,6 +4164,7 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4034,6 +4185,7 @@ telegram = ["requests"] name = "traitlets" version = "5.11.1" description = "Traitlets Python configuration system" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -4049,6 +4201,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.5.1)", "pre-commit", "pytest (>=7.0, name = "types-python-dateutil" version = "2.8.19.14" description = "Typing stubs for python-dateutil" +category = "dev" optional = false python-versions = "*" files = [ @@ -4060,6 +4213,7 @@ files = [ name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -4071,6 +4225,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." +category = "dev" optional = false python-versions = "*" files = [ @@ -4086,6 +4241,7 @@ typing-extensions = ">=3.7.4" name = "uri-template" version = "1.3.0" description = "RFC 6570 URI Template Processor" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4100,6 +4256,7 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake name = "urllib3" version = "2.0.6" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4117,6 +4274,7 @@ zstd = ["zstandard (>=0.18.0)"] name = "wcwidth" version = "0.2.8" description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" optional = false python-versions = "*" files = [ @@ -4128,6 +4286,7 @@ files = [ name = "webcolors" version = "1.13" description = "A library for working with the color formats defined by HTML and CSS." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4143,6 +4302,7 @@ tests = ["pytest", "pytest-cov"] name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" +category = "dev" optional = false python-versions = "*" files = [ @@ -4154,6 +4314,7 @@ files = [ name = "websocket-client" version = "1.6.3" description = "WebSocket client for Python with low level API options" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -4170,6 +4331,7 @@ test = ["websockets"] name = "widgetsnbextension" version = "4.0.9" description = "Jupyter interactive widgets for Jupyter Notebook" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4181,6 +4343,7 @@ files = [ name = "yarl" version = "1.9.2" description = "Yet another URL library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4268,6 +4431,7 @@ multidict = ">=4.0" name = "zipp" version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" +category = "dev" optional = false python-versions = ">=3.8" files = [ From 580cd704c975d036018223f789a0375fe5e1c3f8 Mon Sep 17 00:00:00 2001 From: leo-gan Date: Thu, 22 Feb 2024 10:43:12 -0800 Subject: [PATCH 2/2] fixed merging conflicts --- docs/docs/integrations/chat/groq.ipynb | 4 +--- .../retrievers/self_query/elasticsearch.py | 13 ++++++++----- .../retrievers/self_query/myscale.py | 2 +- libs/core/langchain_core/sql_constructor/base.py | 3 ++- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/docs/integrations/chat/groq.ipynb b/docs/docs/integrations/chat/groq.ipynb index 15c967f553ddd..5ee3841207700 100644 --- a/docs/docs/integrations/chat/groq.ipynb +++ b/docs/docs/integrations/chat/groq.ipynb @@ -98,9 +98,7 @@ "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n", "\n", "chain = prompt | chat\n", - "chain.invoke({\n", - " \"text\": \"Explain the importance of low latency LLMs.\"\n", - "})" + "chain.invoke({\"text\": \"Explain the importance of low latency LLMs.\"})" ] }, { diff --git a/libs/community/langchain_community/retrievers/self_query/elasticsearch.py b/libs/community/langchain_community/retrievers/self_query/elasticsearch.py index c4ea733ad6815..036b12010c60b 100644 --- a/libs/community/langchain_community/retrievers/self_query/elasticsearch.py +++ b/libs/community/langchain_community/retrievers/self_query/elasticsearch.py @@ -61,11 +61,10 @@ def visit_comparison(self, comparison: Comparison) -> Dict: ] if is_range_comparator: - return { - "range": { - field: {self._format_func(comparison.comparator): comparison.value} - } - } + value = comparison.value + if isinstance(comparison.value, dict) and "date" in comparison.value: + value = comparison.value["date"] + return {"range": {field: {self._format_func(comparison.comparator): value}}} if comparison.comparator == Comparator.CONTAIN: return { @@ -85,6 +84,10 @@ def visit_comparison(self, comparison: Comparison) -> Dict: # we want to use the keyword field field = f"{field}.keyword" if isinstance(comparison.value, str) else field + if isinstance(comparison.value, dict): + if "date" in comparison.value: + comparison.value = comparison.value["date"] + return {self._format_func(comparison.comparator): {field: comparison.value}} def visit_structured_query( diff --git a/libs/community/langchain_community/retrievers/self_query/myscale.py b/libs/community/langchain_community/retrievers/self_query/myscale.py index 642d0066f2f8b..4a811764a4fba 100644 --- a/libs/community/langchain_community/retrievers/self_query/myscale.py +++ b/libs/community/langchain_community/retrievers/self_query/myscale.py @@ -117,7 +117,7 @@ def visit_comparison(self, comparison: Comparison) -> Dict: def visit_structured_query( self, structured_query: StructuredQuery ) -> Tuple[str, dict]: - print(structured_query) + print(structured_query) # noqa: T201 if structured_query.filter is None: kwargs = {} else: diff --git a/libs/core/langchain_core/sql_constructor/base.py b/libs/core/langchain_core/sql_constructor/base.py index e4061ad8c38db..5ff0c639b1915 100644 --- a/libs/core/langchain_core/sql_constructor/base.py +++ b/libs/core/langchain_core/sql_constructor/base.py @@ -321,7 +321,8 @@ def load_query_constructor_runnable( Args: llm: BaseLanguageModel to use for the chain. - document_contents: The contents of the document to be queried. + document_contents: Description of the page contents of the document to be + queried. attribute_info: Sequence of attributes in the document. examples: Optional list of examples to use for the chain. allowed_comparators: Sequence of allowed comparators. Defaults to all