diff --git a/module_text_llm/.env.example b/module_text_llm/.env.example index aedc5bdba..db957e6db 100644 --- a/module_text_llm/.env.example +++ b/module_text_llm/.env.example @@ -12,7 +12,7 @@ DATABASE_URL=sqlite:///../data/data.sqlite # Default model to use # See below for options, available models are also logged on startup -LLM_DEFAULT_MODEL="azure_openai_gpt-35" +LLM_DEFAULT_MODEL="azure_openai_gpt-35-turbo" # Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled LLM_ENABLE_LLM_AS_A_JUDGE=1 @@ -23,13 +23,14 @@ LLM_EVALUATION_MODEL="azure_openai_gpt-4" # Standard OpenAI (Non-Azure) [leave blank if not used] # Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` # A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models) -LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # Azure OpenAI [leave blank if not used] # Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35` -LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed -LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed +AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +AZURE_OPENAI_ENDPOINT="https://ase-eu01.openai.azure.com/" # change base if needed +OPENAI_API_VERSION="2023-07-01-preview" # change base if needed +AZURE_DEPLOYMENTS=gpt-35-turbo,gpt-4-turbo,gpt-4-vision,gpt-4o # Replicate [leave blank if not used] # See https://replicate.com and adjust model config options in `module_text_llm/helpers/models/replicate.py` @@ -40,4 +41,4 @@ REPLICATE_API_TOKEN= # LANGCHAIN_TRACING_V2=true # LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" # LANGCHAIN_API_KEY="XXX" -# LANGCHAIN_PROJECT="XXX" \ No newline at end of file +# LANGCHAIN_PROJECT="XXX" diff --git a/module_text_llm/module_text_llm/helpers/models/__init__.py b/module_text_llm/module_text_llm/helpers/models/__init__.py index 144bcf923..85078d4d7 100644 --- a/module_text_llm/module_text_llm/helpers/models/__init__.py +++ b/module_text_llm/module_text_llm/helpers/models/__init__.py @@ -23,16 +23,6 @@ except AttributeError: pass -try: - import module_text_llm.helpers.models.replicate as replicate_config - types.append(replicate_config.ReplicateModelConfig) - if default_model_name in replicate_config.available_models: - DefaultModelConfig = replicate_config.ReplicateModelConfig - if evaluation_model_name in replicate_config.available_models: - evaluation_model = replicate_config.available_models[evaluation_model_name] -except AttributeError: - pass - if not types: raise EnvironmentError( "No model configurations available, please set up at least one provider in the environment variables.") diff --git a/module_text_llm/module_text_llm/helpers/models/openai.py b/module_text_llm/module_text_llm/helpers/models/openai.py index ab3044277..e6faaa01e 100644 --- a/module_text_llm/module_text_llm/helpers/models/openai.py +++ b/module_text_llm/module_text_llm/helpers/models/openai.py @@ -1,254 +1,44 @@ import os -from contextlib import contextmanager -from typing import Any, Callable, Dict, List +from typing import Any , Dict from pydantic import Field, validator, PositiveInt from enum import Enum - import openai -from langchain.chat_models import AzureChatOpenAI, ChatOpenAI -from langchain.llms import AzureOpenAI, OpenAI -from langchain.llms.openai import BaseOpenAI from langchain.base_language import BaseLanguageModel - +from langchain_openai import AzureChatOpenAI, ChatOpenAI from athena.logger import logger from .model_config import ModelConfig - OPENAI_PREFIX = "openai_" AZURE_OPENAI_PREFIX = "azure_openai_" - - -######################################################################### -# Monkey patching openai/langchain api # -# ===================================================================== # -# This allows us to have multiple api keys i.e. mixing # -# openai and azure openai api keys so we can use not only deployed # -# models but also models from the non-azure openai api. # -# This is mostly for testing purposes, in production we can just deploy # -# the models to azure that we want to use. # -######################################################################### - -# Prevent LangChain error, we will set the key later -os.environ["OPENAI_API_KEY"] = "" - -def _wrap(old: Any, new: Any) -> Callable: - def repl(*args: Any, **kwargs: Any) -> Any: - new(args[0]) # args[0] is self - return old(*args, **kwargs) - return repl - - -def _async_wrap(old: Any, new: Any): - async def repl(*args, **kwargs): - new(args[0]) # args[0] is self - return await old(*args, **kwargs) - return repl - - -def _set_credentials(self): - openai.api_key = self.openai_api_key - - api_type = "open_ai" - api_base = "https://api.openai.com/v1" - api_version = None - if hasattr(self, "openai_api_type"): - api_type = self.openai_api_type - - if api_type == "azure": - if hasattr(self, "openai_api_base"): - api_base = self.openai_api_base - if hasattr(self, "openai_api_version"): - api_version = self.openai_api_version - - openai.api_type = api_type - openai.api_base = api_base - openai.api_version = api_version - - -# Monkey patching langchain -# pylint: disable=protected-access -ChatOpenAI._generate = _wrap(ChatOpenAI._generate, _set_credentials) # type: ignore -ChatOpenAI._agenerate = _async_wrap(ChatOpenAI._agenerate, _set_credentials) # type: ignore -BaseOpenAI._generate = _wrap(BaseOpenAI._generate, _set_credentials) # type: ignore -BaseOpenAI._agenerate = _async_wrap(BaseOpenAI._agenerate, _set_credentials) # type: ignore -# pylint: enable=protected-access - -######################################################################### -# Monkey patching end # -######################################################################### - - -def _use_azure_credentials(): - openai.api_type = "azure" - openai.api_key = os.environ.get("LLM_AZURE_OPENAI_API_KEY") - openai.api_base = os.environ.get("LLM_AZURE_OPENAI_API_BASE") - # os.environ.get("LLM_AZURE_OPENAI_API_VERSION") - openai.api_version = "2023-03-15-preview" - - -def _use_openai_credentials(): - openai.api_type = "open_ai" - openai.api_key = os.environ.get("LLM_OPENAI_API_KEY") - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - - -openai_available = bool(os.environ.get("LLM_OPENAI_API_KEY")) -azure_openai_available = bool(os.environ.get("LLM_AZURE_OPENAI_API_KEY")) - - -# This is a hack to make sure that the openai api is set correctly -# Right now it is overkill, but it will be useful when the api gets fixed and we no longer -# hardcode the model names (i.e. OpenAI fixes their api) -@contextmanager -def _openai_client(use_azure_api: bool, is_preference: bool): - """Set the openai client to use the correct api type, if available - - Args: - use_azure_api (bool): If true, use the azure api, else use the openai api - is_preference (bool): If true, it can fall back to the other api if the preferred one is not available - """ - if use_azure_api: - if azure_openai_available: - _use_azure_credentials() - elif is_preference and openai_available: - _use_openai_credentials() - elif is_preference: - raise EnvironmentError( - "No OpenAI api available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and " - "LLM_AZURE_OPENAI_API_VERSION environment variables or LLM_OPENAI_API_KEY environment variable" - ) - else: - raise EnvironmentError( - "Azure OpenAI api not available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and " - "LLM_AZURE_OPENAI_API_VERSION environment variables" - ) - else: - if openai_available: - _use_openai_credentials() - elif is_preference and azure_openai_available: - _use_azure_credentials() - elif is_preference: - raise EnvironmentError( - "No OpenAI api available, please set LLM_OPENAI_API_KEY environment variable or LLM_AZURE_OPENAI_API_KEY, " - "LLM_AZURE_OPENAI_API_BASE and LLM_AZURE_OPENAI_API_VERSION environment variables" - ) - else: - raise EnvironmentError( - "OpenAI api not available, please set LLM_OPENAI_API_KEY environment variable" - ) - - # API client is setup correctly - yield - - -def _get_available_deployments(openai_models: Dict[str, List[str]], model_aliases: Dict[str, str]): - available_deployments: Dict[str, Dict[str, Any]] = { - "chat_completion": {}, - "completion": {}, - "fine_tuneing": {}, - } - - if azure_openai_available: - with _openai_client(use_azure_api=True, is_preference=False): - deployments = openai.Deployment.list().get("data") or [] # type: ignore - for deployment in deployments: - model_name = deployment.model - if model_name in model_aliases: - model_name = model_aliases[model_name] - if model_name in openai_models["chat_completion"]: - available_deployments["chat_completion"][deployment.id] = deployment - elif model_name in openai_models["completion"]: - available_deployments["completion"][deployment.id] = deployment - elif model_name in openai_models["fine_tuneing"]: - available_deployments["fine_tuneing"][deployment.id] = deployment - - return available_deployments - - -def _get_available_models(openai_models: Dict[str, List[str]], - available_deployments: Dict[str, Dict[str, Any]]): - available_models: Dict[str, BaseLanguageModel] = {} - - if openai_available: - openai_api_key = os.environ["LLM_OPENAI_API_KEY"] - for model_name in openai_models["chat_completion"]: - available_models[OPENAI_PREFIX + model_name] = ChatOpenAI( - model=model_name, - openai_api_key=openai_api_key, - client="", - temperature=0 - ) - for model_name in openai_models["completion"]: - available_models[OPENAI_PREFIX + model_name] = OpenAI( - model=model_name, - openai_api_key=openai_api_key, - client="", - temperature=0 - ) - - if azure_openai_available: - azure_openai_api_key = os.environ["LLM_AZURE_OPENAI_API_KEY"] - azure_openai_api_base = os.environ["LLM_AZURE_OPENAI_API_BASE"] - azure_openai_api_version = os.environ["LLM_AZURE_OPENAI_API_VERSION"] - - for model_type, Model in [("chat_completion", AzureChatOpenAI), ("completion", AzureOpenAI)]: - for deployment_name, deployment in available_deployments[model_type].items(): - available_models[AZURE_OPENAI_PREFIX + deployment_name] = Model( - model=deployment.model, - deployment_name=deployment_name, - openai_api_base=azure_openai_api_base, - openai_api_version=azure_openai_api_version, - openai_api_key=azure_openai_api_key, - client="", - temperature=0 - ) - - return available_models - - -_model_aliases = { - "gpt-35-turbo": "gpt-3.5-turbo", -} - -# Hardcoded because openai can't provide a trustworthly api to get the list of models and capabilities... -openai_models = { - "chat_completion": [ - "gpt-4", - # "gpt-4-32k", # Not publicly available - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k" - ], - "completion": [ - "text-davinci-003", - "text-curie-001", - "text-babbage-001", - "text-ada-001", - ], - "fine_tuneing": [ - "davinci", - "curie", - "babbage", - "ada", - ] -} -available_deployments = _get_available_deployments(openai_models, _model_aliases) -available_models = _get_available_models(openai_models, available_deployments) - +openai_available = bool(os.environ.get("OPENAI_API_KEY")) +azure_openai_available = bool(os.environ.get("AZURE_OPENAI_API_KEY")) + +available_models: Dict[str, BaseLanguageModel] = {} + +if openai_available: + openai.api_type= "openai" + for model in openai.models.list(): + if model.owned_by == "openai": + available_models[OPENAI_PREFIX + model.id] = ChatOpenAI(#type:ignore + model=model.id, + ) + +if azure_openai_available: + # Will be replaced in the future + for deployment in os.environ['AZURE_DEPLOYMENTS'].split(','): + available_models[AZURE_OPENAI_PREFIX + deployment] = AzureChatOpenAI(deployment_name=deployment, temperature=0, client="") + if available_models: logger.info("Available openai models: %s", ", ".join(available_models.keys())) OpenAIModel = Enum('OpenAIModel', {name: name for name in available_models}) # type: ignore - - - default_model_name = "gpt-3.5-turbo" + default_model_name = None if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models: default_model_name = os.environ["LLM_DEFAULT_MODEL"] if default_model_name not in available_models: default_model_name = list(available_models.keys())[0] - default_openai_model = OpenAIModel[default_model_name] - + default_openai_model = OpenAIModel[default_model_name]#type:ignore # Long descriptions will be displayed in the playground UI and are copied from the OpenAI docs class OpenAIModelConfig(ModelConfig): @@ -307,7 +97,7 @@ def get_model(self) -> BaseLanguageModel: BaseLanguageModel: The model. """ model = available_models[self.model_name.value] - kwargs = model._lc_kwargs + kwargs = model.__dict__ secrets = {secret: getattr(model, secret) for secret in model.lc_secrets.keys()} kwargs.update(secrets) diff --git a/module_text_llm/module_text_llm/helpers/models/replicate.py b/module_text_llm/module_text_llm/helpers/models/replicate.py deleted file mode 100644 index 4941a46ac..000000000 --- a/module_text_llm/module_text_llm/helpers/models/replicate.py +++ /dev/null @@ -1,116 +0,0 @@ -import os -from pydantic import Field, PositiveInt -from enum import Enum - -from langchain.llms import Replicate -from langchain.base_language import BaseLanguageModel - -from athena.logger import logger -from .model_config import ModelConfig - - -# Hardcoded list of models -# If necessary, add more models from replicate here, the config below might need adjustments depending on the available -# parameters of the model -# -# To update the version of the models, go to the respective page on replicate.com and copy the (latest) version id -# from and paste it after the colon in the value of the dictionary. Ever so often a new version is released. -replicate_models = { - # LLAMA 2 70B Chat - # https://replicate.com/replicate/llama-2-70b-chat - "llama-2-70b-chat": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", - # LLaMA 2 13B Chat - # https://replicate.com/a16z-infra/llama-2-13b-chat - "llama-2-13b-chat": "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", - # LLaMA 2 7B Chat - # https://replicate.com/a16z-infra/llama-2-7b-chat - "llama-2-7b-chat": "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c", -} - -available_models = {} -if os.environ.get("REPLICATE_API_TOKEN"): # If Replicate is available - available_models = { - name: Replicate( - model=model, - model_kwargs={ "temperature": 0.01 } - ) - for name, model in replicate_models.items() - } -else: - logger.warning("REPLICATE_API_TOKEN not found in environment variables. Replicate models are disabled.") - -if available_models: - logger.info("Available replicate models: %s", - ", ".join(available_models.keys())) - - ReplicateModel = Enum('ReplicateModel', {name: name for name in available_models}) # type: ignore - - - default_model_name = "llama-2-13b-chat" - if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models: - default_model_name = os.environ["LLM_DEFAULT_MODEL"] - if default_model_name not in available_models: - default_model_name = list(available_models.keys())[0] - - default_replicate_model = ReplicateModel[default_model_name] - - - # Note: Config has been setup with LLaMA 2 chat models in mind, other models may not work as expected - class ReplicateModelConfig(ModelConfig): - """Replicate LLM configuration.""" - - model_name: ReplicateModel = Field(default=default_replicate_model, # type: ignore - description="The name of the model to use.") - max_new_tokens: PositiveInt = Field(1000, description="""\ -Maximum number of tokens to generate. A word is generally 2-3 tokens (minimum: 1)\ -""") - min_new_tokens: int = Field(-1, description="""\ -Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens. (minimum: -1)\ -""") - temperature: float = Field(default=0.01, ge=0.01, le=5, description="""\ -Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\ -(minimum: 0.01; maximum: 5)\ -""") - top_p: float = Field(default=1, ge=0, le=1, description="""\ -When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens (maximum: 1)\ -""") - top_k: PositiveInt = Field(default=250, description="""\ -When decoding text, samples from the top k most likely tokens; lower to ignore less likely tokens\ -""") - repetition_penalty: float = Field(default=1, ge=0.01, le=5, description="""\ -Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, \ -less than 1 encourage it. (minimum: 0.01; maximum: 5)\ -""") - repetition_penalty_sustain: int = Field(default=-1, description=""" -Number of most recent tokens to apply repetition penalty to, -1 to apply to whole context (minimum: -1)\ -""") - token_repetition_penalty_decay: PositiveInt = Field(default=128, description="""\ -Gradually decrease penalty over this many tokens (minimum: 1)\ -""") - - def get_model(self) -> BaseLanguageModel: - """Get the model from the configuration. - - Returns: - BaseLanguageModel: The model. - """ - model = available_models[self.model_name.value] - kwargs = model._lc_kwargs - - model_kwargs = {} - for attr, value in self.dict().items(): - # Skip model_name - if attr == "model_name": - continue - model_kwargs[attr] = value - - # Set model parameters - kwargs["model_kwargs"] = model_kwargs - - # Initialize a copy of the model using the config - model = model.__class__(**kwargs) - return model - - - class Config: - title = 'Replicate' diff --git a/module_text_llm/poetry.lock b/module_text_llm/poetry.lock index 31187894c..4dd126d40 100644 --- a/module_text_llm/poetry.lock +++ b/module_text_llm/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohttp" @@ -347,6 +347,17 @@ files = [ [package.extras] graph = ["objgraph (>=1.7.2)"] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "dodgy" version = "0.2.1" @@ -699,106 +710,113 @@ files = [ [[package]] name = "langchain" -version = "0.0.350" +version = "0.2.11" description = "Building applications with LLMs through composability" optional = false -python-versions = ">=3.8.1,<4.0" +python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain-0.0.350-py3-none-any.whl", hash = "sha256:11b605f325a4271a7815baaec05bc7622e3ad1f10f26b05c752cafa27663ed38"}, - {file = "langchain-0.0.350.tar.gz", hash = "sha256:f0e68a92d200bb722586688ab7b411b2430bd98ad265ca03b264e7e7acbb6c01"}, + {file = "langchain-0.2.11-py3-none-any.whl", hash = "sha256:5a7a8b4918f3d3bebce9b4f23b92d050699e6f7fb97591e8941177cf07a260a2"}, + {file = "langchain-0.2.11.tar.gz", hash = "sha256:d7a9e4165f02dca0bd78addbc2319d5b9286b5d37c51d784124102b57e9fd297"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" -dataclasses-json = ">=0.5.7,<0.7" -jsonpatch = ">=1.33,<2.0" -langchain-community = ">=0.0.2,<0.1" -langchain-core = ">=0.1,<0.2" -langsmith = ">=0.0.63,<0.1.0" -numpy = ">=1,<2" +langchain-core = ">=0.2.23,<0.3.0" +langchain-text-splitters = ">=0.2.0,<0.3.0" +langsmith = ">=0.1.17,<0.2.0" +numpy = {version = ">=1,<2", markers = "python_version < \"3.12\""} pydantic = ">=1,<3" PyYAML = ">=5.3" requests = ">=2,<3" SQLAlchemy = ">=1.4,<3" -tenacity = ">=8.1.0,<9.0.0" - -[package.extras] -azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] -clarifai = ["clarifai (>=9.1.0)"] -cli = ["typer (>=0.9.0,<0.10.0)"] -cohere = ["cohere (>=4,<5)"] -docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] -embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] -javascript = ["esprima (>=4.0.1,<5.0.0)"] -llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"] -qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] -text-helpers = ["chardet (>=5.1.0,<6.0.0)"] +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-community" -version = "0.0.13" +version = "0.2.10" description = "Community contributed LangChain integrations." optional = false -python-versions = ">=3.8.1,<4.0" +python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_community-0.0.13-py3-none-any.whl", hash = "sha256:655196e446e7f37f4882221b6f3f791d6add28ea596d521ccf6f4507386b9a13"}, - {file = "langchain_community-0.0.13.tar.gz", hash = "sha256:cf66c6ff7fcbeb582f5e88ee00decea7fdeca5ccddda725046f28efc697c41a7"}, + {file = "langchain_community-0.2.10-py3-none-any.whl", hash = "sha256:9f4d1b5ab7f0b0a704f538e26e50fce45a461da6d2bf6b7b636d24f22fbc088a"}, + {file = "langchain_community-0.2.10.tar.gz", hash = "sha256:3a0404bad4bd07d6f86affdb62fb3d080a456c66191754d586a409d9d6024d62"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" dataclasses-json = ">=0.5.7,<0.7" -langchain-core = ">=0.1.9,<0.2" -langsmith = ">=0.0.63,<0.1.0" -numpy = ">=1,<2" +langchain = ">=0.2.9,<0.3.0" +langchain-core = ">=0.2.23,<0.3.0" +langsmith = ">=0.1.0,<0.2.0" +numpy = {version = ">=1,<2", markers = "python_version < \"3.12\""} PyYAML = ">=5.3" requests = ">=2,<3" SQLAlchemy = ">=1.4,<3" -tenacity = ">=8.1.0,<9.0.0" - -[package.extras] -cli = ["typer (>=0.9.0,<0.10.0)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-core" -version = "0.1.12" +version = "0.2.25" description = "Building applications with LLMs through composability" optional = false -python-versions = ">=3.8.1,<4.0" +python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_core-0.1.12-py3-none-any.whl", hash = "sha256:d11c6262f7a9deff7de8fdf14498b8a951020dfed3a80f2358ab731ad04abef0"}, - {file = "langchain_core-0.1.12.tar.gz", hash = "sha256:f18e9300e9a07589b3e280e51befbc5a4513f535949406e55eb7a2dc40c3ce66"}, + {file = "langchain_core-0.2.25-py3-none-any.whl", hash = "sha256:03d61b2a7f4b5f98df248c1b1f0ccd95c9d5ef2269e174133724365cd2a7ee1e"}, + {file = "langchain_core-0.2.25.tar.gz", hash = "sha256:e64106a7d0e37e4d35b767f79e6c62b56e825f08f9e8cc4368bcea9955257a7e"}, ] [package.dependencies] -anyio = ">=3,<5" jsonpatch = ">=1.33,<2.0" -langsmith = ">=0.0.63,<0.1.0" -packaging = ">=23.2,<24.0" -pydantic = ">=1,<3" +langsmith = ">=0.1.75,<0.2.0" +packaging = ">=23.2,<25" +pydantic = {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""} PyYAML = ">=5.3" -requests = ">=2,<3" -tenacity = ">=8.1.0,<9.0.0" +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" -[package.extras] -extended-testing = ["jinja2 (>=3,<4)"] +[[package]] +name = "langchain-openai" +version = "0.1.19" +description = "An integration package connecting OpenAI and LangChain" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_openai-0.1.19-py3-none-any.whl", hash = "sha256:a7a739f1469d54cd988865420e7fc21b50fb93727b2e6da5ad30273fc61ecf19"}, + {file = "langchain_openai-0.1.19.tar.gz", hash = "sha256:3bf342bb302d1444f4abafdf01c467dbd9b248497e1133808c4bae70396c79b3"}, +] + +[package.dependencies] +langchain-core = ">=0.2.24,<0.3.0" +openai = ">=1.32.0,<2.0.0" +tiktoken = ">=0.7,<1" + +[[package]] +name = "langchain-text-splitters" +version = "0.2.2" +description = "LangChain text splitting utilities" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_text_splitters-0.2.2-py3-none-any.whl", hash = "sha256:1c80d4b11b55e2995f02d2a326c0323ee1eeff24507329bb22924e420c782dff"}, + {file = "langchain_text_splitters-0.2.2.tar.gz", hash = "sha256:a1e45de10919fa6fb080ef0525deab56557e9552083600455cb9fa4238076140"}, +] + +[package.dependencies] +langchain-core = ">=0.2.10,<0.3.0" [[package]] name = "langsmith" -version = "0.0.63" +version = "0.1.94" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false -python-versions = ">=3.8.1,<4.0" +python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.0.63-py3-none-any.whl", hash = "sha256:43a521dd10d8405ac21a0b959e3de33e2270e4abe6c73cc4036232a6990a0793"}, - {file = "langsmith-0.0.63.tar.gz", hash = "sha256:ddb2dfadfad3e05151ed8ba1643d1c516024b80fbd0c6263024400ced06a3768"}, + {file = "langsmith-0.1.94-py3-none-any.whl", hash = "sha256:0d01212086d58699f75814117b026784218042f7859877ce08a248a98d84aa8d"}, + {file = "langsmith-0.1.94.tar.gz", hash = "sha256:e44afcdc9eee6f238f6a87a02bba83111bd5fad376d881ae299834e06d39d712"}, ] [package.dependencies] -pydantic = ">=1,<3" +orjson = ">=3.9.14,<4.0.0" +pydantic = {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""} requests = ">=2,<3" [[package]] @@ -1085,25 +1103,86 @@ files = [ [[package]] name = "openai" -version = "0.27.10" -description = "Python client library for the OpenAI API" +version = "1.37.1" +description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"}, - {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"}, + {file = "openai-1.37.1-py3-none-any.whl", hash = "sha256:9a6adda0d6ae8fce02d235c5671c399cfa40d6a281b3628914c7ebf244888ee3"}, + {file = "openai-1.37.1.tar.gz", hash = "sha256:faf87206785a6b5d9e34555d6a3242482a6852bc802e453e2a891f68ee04ce55"}, ] [package.dependencies] -aiohttp = "*" -requests = ">=2.20" -tqdm = "*" +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.7,<5" [package.extras] -datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] -embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] -wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] + +[[package]] +name = "orjson" +version = "3.10.6" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.10.6-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:fb0ee33124db6eaa517d00890fc1a55c3bfe1cf78ba4a8899d71a06f2d6ff5c7"}, + {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1c4b53b24a4c06547ce43e5fee6ec4e0d8fe2d597f4647fc033fd205707365"}, + {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eadc8fd310edb4bdbd333374f2c8fec6794bbbae99b592f448d8214a5e4050c0"}, + {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61272a5aec2b2661f4fa2b37c907ce9701e821b2c1285d5c3ab0207ebd358d38"}, + {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57985ee7e91d6214c837936dc1608f40f330a6b88bb13f5a57ce5257807da143"}, + {file = "orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:633a3b31d9d7c9f02d49c4ab4d0a86065c4a6f6adc297d63d272e043472acab5"}, + {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1c680b269d33ec444afe2bdc647c9eb73166fa47a16d9a75ee56a374f4a45f43"}, + {file = "orjson-3.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f759503a97a6ace19e55461395ab0d618b5a117e8d0fbb20e70cfd68a47327f2"}, + {file = "orjson-3.10.6-cp310-none-win32.whl", hash = "sha256:95a0cce17f969fb5391762e5719575217bd10ac5a189d1979442ee54456393f3"}, + {file = "orjson-3.10.6-cp310-none-win_amd64.whl", hash = "sha256:df25d9271270ba2133cc88ee83c318372bdc0f2cd6f32e7a450809a111efc45c"}, + {file = "orjson-3.10.6-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b1ec490e10d2a77c345def52599311849fc063ae0e67cf4f84528073152bb2ba"}, + {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d43d3feb8f19d07e9f01e5b9be4f28801cf7c60d0fa0d279951b18fae1932b"}, + {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac3045267e98fe749408eee1593a142e02357c5c99be0802185ef2170086a863"}, + {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c27bc6a28ae95923350ab382c57113abd38f3928af3c80be6f2ba7eb8d8db0b0"}, + {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d27456491ca79532d11e507cadca37fb8c9324a3976294f68fb1eff2dc6ced5a"}, + {file = "orjson-3.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05ac3d3916023745aa3b3b388e91b9166be1ca02b7c7e41045da6d12985685f0"}, + {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1335d4ef59ab85cab66fe73fd7a4e881c298ee7f63ede918b7faa1b27cbe5212"}, + {file = "orjson-3.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4bbc6d0af24c1575edc79994c20e1b29e6fb3c6a570371306db0993ecf144dc5"}, + {file = "orjson-3.10.6-cp311-none-win32.whl", hash = "sha256:450e39ab1f7694465060a0550b3f6d328d20297bf2e06aa947b97c21e5241fbd"}, + {file = "orjson-3.10.6-cp311-none-win_amd64.whl", hash = "sha256:227df19441372610b20e05bdb906e1742ec2ad7a66ac8350dcfd29a63014a83b"}, + {file = "orjson-3.10.6-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ea2977b21f8d5d9b758bb3f344a75e55ca78e3ff85595d248eee813ae23ecdfb"}, + {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6f3d167d13a16ed263b52dbfedff52c962bfd3d270b46b7518365bcc2121eed"}, + {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f710f346e4c44a4e8bdf23daa974faede58f83334289df80bc9cd12fe82573c7"}, + {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7275664f84e027dcb1ad5200b8b18373e9c669b2a9ec33d410c40f5ccf4b257e"}, + {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0943e4c701196b23c240b3d10ed8ecd674f03089198cf503105b474a4f77f21f"}, + {file = "orjson-3.10.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:446dee5a491b5bc7d8f825d80d9637e7af43f86a331207b9c9610e2f93fee22a"}, + {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:64c81456d2a050d380786413786b057983892db105516639cb5d3ee3c7fd5148"}, + {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"}, + {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"}, + {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"}, + {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"}, + {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"}, + {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"}, + {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2c116072a8533f2fec435fde4d134610f806bdac20188c7bd2081f3e9e0133f"}, + {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6eeb13218c8cf34c61912e9df2de2853f1d009de0e46ea09ccdf3d757896af0a"}, + {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:965a916373382674e323c957d560b953d81d7a8603fbeee26f7b8248638bd48b"}, + {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03c95484d53ed8e479cade8628c9cea00fd9d67f5554764a1110e0d5aa2de96e"}, + {file = "orjson-3.10.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:e060748a04cccf1e0a6f2358dffea9c080b849a4a68c28b1b907f272b5127e9b"}, + {file = "orjson-3.10.6-cp38-none-win32.whl", hash = "sha256:738dbe3ef909c4b019d69afc19caf6b5ed0e2f1c786b5d6215fbb7539246e4c6"}, + {file = "orjson-3.10.6-cp38-none-win_amd64.whl", hash = "sha256:d40f839dddf6a7d77114fe6b8a70218556408c71d4d6e29413bb5f150a692ff7"}, + {file = "orjson-3.10.6-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:697a35a083c4f834807a6232b3e62c8b280f7a44ad0b759fd4dce748951e70db"}, + {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd502f96bf5ea9a61cbc0b2b5900d0dd68aa0da197179042bdd2be67e51a1e4b"}, + {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f215789fb1667cdc874c1b8af6a84dc939fd802bf293a8334fce185c79cd359b"}, + {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2debd8ddce948a8c0938c8c93ade191d2f4ba4649a54302a7da905a81f00b56"}, + {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5410111d7b6681d4b0d65e0f58a13be588d01b473822483f77f513c7f93bd3b2"}, + {file = "orjson-3.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb1f28a137337fdc18384079fa5726810681055b32b92253fa15ae5656e1dddb"}, + {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bf2fbbce5fe7cd1aa177ea3eab2b8e6a6bc6e8592e4279ed3db2d62e57c0e1b2"}, + {file = "orjson-3.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:79b9b9e33bd4c517445a62b90ca0cc279b0f1f3970655c3df9e608bc3f91741a"}, + {file = "orjson-3.10.6-cp39-none-win32.whl", hash = "sha256:30b0a09a2014e621b1adf66a4f705f0809358350a757508ee80209b2d8dae219"}, + {file = "orjson-3.10.6-cp39-none-win_amd64.whl", hash = "sha256:49e3bc615652617d463069f91b867a4458114c5b104e13b7ae6872e5f79d0844"}, + {file = "orjson-3.10.6.tar.gz", hash = "sha256:e54b63d0a7c6c54a5f5f726bc93a2078111ef060fec4ecbf34c5db800ca3b3a7"}, +] [[package]] name = "packaging" @@ -1564,25 +1643,6 @@ files = [ {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, ] -[[package]] -name = "replicate" -version = "0.11.0" -description = "Python client for Replicate" -optional = false -python-versions = ">=3.8" -files = [ - {file = "replicate-0.11.0-py3-none-any.whl", hash = "sha256:fbb8815068864dc822cd4fa7b6103d6f4089d6ef122abd6c3441ca0f0f110c46"}, - {file = "replicate-0.11.0.tar.gz", hash = "sha256:4d54b5838c1552a6f76cc37c3af8d9a7998105382082d672acad31636ad443b5"}, -] - -[package.dependencies] -packaging = "*" -pydantic = ">1" -requests = ">2" - -[package.extras] -dev = ["black", "mypy", "pytest", "responses", "ruff"] - [[package]] name = "requests" version = "2.31.0" @@ -1800,40 +1860,47 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] [[package]] name = "tiktoken" -version = "0.4.0" +version = "0.7.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" files = [ - {file = "tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:176cad7f053d2cc82ce7e2a7c883ccc6971840a4b5276740d0b732a2b2011f8a"}, - {file = "tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:450d504892b3ac80207700266ee87c932df8efea54e05cefe8613edc963c1285"}, - {file = "tiktoken-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d662de1e7986d129139faf15e6a6ee7665ee103440769b8dedf3e7ba6ac37f"}, - {file = "tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5727d852ead18b7927b8adf558a6f913a15c7766725b23dbe21d22e243041b28"}, - {file = "tiktoken-0.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c06cd92b09eb0404cedce3702fa866bf0d00e399439dad3f10288ddc31045422"}, - {file = "tiktoken-0.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9ec161e40ed44e4210d3b31e2ff426b4a55e8254f1023e5d2595cb60044f8ea6"}, - {file = "tiktoken-0.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:1e8fa13cf9889d2c928b9e258e9dbbbf88ab02016e4236aae76e3b4f82dd8288"}, - {file = "tiktoken-0.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb2341836b725c60d0ab3c84970b9b5f68d4b733a7bcb80fb25967e5addb9920"}, - {file = "tiktoken-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ca30367ad750ee7d42fe80079d3092bd35bb266be7882b79c3bd159b39a17b0"}, - {file = "tiktoken-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dc3df19ddec79435bb2a94ee46f4b9560d0299c23520803d851008445671197"}, - {file = "tiktoken-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d980fa066e962ef0f4dad0222e63a484c0c993c7a47c7dafda844ca5aded1f3"}, - {file = "tiktoken-0.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:329f548a821a2f339adc9fbcfd9fc12602e4b3f8598df5593cfc09839e9ae5e4"}, - {file = "tiktoken-0.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b1a038cee487931a5caaef0a2e8520e645508cde21717eacc9af3fbda097d8bb"}, - {file = "tiktoken-0.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:08efa59468dbe23ed038c28893e2a7158d8c211c3dd07f2bbc9a30e012512f1d"}, - {file = "tiktoken-0.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3020350685e009053829c1168703c346fb32c70c57d828ca3742558e94827a9"}, - {file = "tiktoken-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba16698c42aad8190e746cd82f6a06769ac7edd415d62ba027ea1d99d958ed93"}, - {file = "tiktoken-0.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c15d9955cc18d0d7ffcc9c03dc51167aedae98542238b54a2e659bd25fe77ed"}, - {file = "tiktoken-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64e1091c7103100d5e2c6ea706f0ec9cd6dc313e6fe7775ef777f40d8c20811e"}, - {file = "tiktoken-0.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e87751b54eb7bca580126353a9cf17a8a8eaadd44edaac0e01123e1513a33281"}, - {file = "tiktoken-0.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e063b988b8ba8b66d6cc2026d937557437e79258095f52eaecfafb18a0a10c03"}, - {file = "tiktoken-0.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9c6dd439e878172dc163fced3bc7b19b9ab549c271b257599f55afc3a6a5edef"}, - {file = "tiktoken-0.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8d1d97f83697ff44466c6bef5d35b6bcdb51e0125829a9c0ed1e6e39fb9a08fb"}, - {file = "tiktoken-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1b6bce7c68aa765f666474c7c11a7aebda3816b58ecafb209afa59c799b0dd2d"}, - {file = "tiktoken-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a73286c35899ca51d8d764bc0b4d60838627ce193acb60cc88aea60bddec4fd"}, - {file = "tiktoken-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0394967d2236a60fd0aacef26646b53636423cc9c70c32f7c5124ebe86f3093"}, - {file = "tiktoken-0.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:dae2af6f03ecba5f679449fa66ed96585b2fa6accb7fd57d9649e9e398a94f44"}, - {file = "tiktoken-0.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55e251b1da3c293432179cf7c452cfa35562da286786be5a8b1ee3405c2b0dd2"}, - {file = "tiktoken-0.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:c835d0ee1f84a5aa04921717754eadbc0f0a56cf613f78dfc1cf9ad35f6c3fea"}, - {file = "tiktoken-0.4.0.tar.gz", hash = "sha256:59b20a819969735b48161ced9b92f05dc4519c17be4015cfb73b65270a243620"}, + {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"}, + {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"}, + {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"}, + {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"}, + {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"}, + {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"}, + {file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"}, + {file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"}, + {file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"}, + {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"}, + {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"}, + {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"}, + {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"}, + {file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"}, + {file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"}, + {file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"}, + {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"}, + {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"}, + {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"}, + {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"}, + {file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"}, + {file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"}, + {file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"}, + {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"}, + {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"}, + {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"}, + {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"}, + {file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"}, + {file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"}, + {file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"}, + {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"}, + {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"}, + {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"}, + {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"}, + {file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"}, + {file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"}, ] [package.dependencies] @@ -2115,4 +2182,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "fc546e1b84d0435bf224a6a0d311171d6b40c44722c5c690c21f920a476976cb" +content-hash = "255d802caecee7e3b501e69124430d49a40fe1f7dfcad49f6a03921a317a4143" diff --git a/module_text_llm/pyproject.toml b/module_text_llm/pyproject.toml index 76fcab84e..b513b71e6 100644 --- a/module_text_llm/pyproject.toml +++ b/module_text_llm/pyproject.toml @@ -6,16 +6,16 @@ authors = ["Felix Dietrich "] license = "MIT" [tool.poetry.dependencies] -python = "3.11.*" athena = {path = "../athena", develop = true} -openai = "^0.27.8" -langchain = "^0.0.350" -python-dotenv = "^1.0.0" -nltk = "^3.8.1" -gitpython = "^3.1.41" -replicate = "^0.11.0" -tiktoken = "^0.4.0" -langsmith = "^0.0.63" +gitpython = "3.1.41" +langchain = "0.2.11" +langchain-community="0.2.10" +langchain-openai = "0.1.19" +nltk = "3.8.1" +openai = "1.37.1" +python = "3.11.*" +python-dotenv = "1.0.0" +tiktoken = "0.7.0" [tool.poetry.scripts] module = "athena:run_module"