Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Module_text_llm: Upgrade dependencies and openai model retrieval #323

Merged
merged 16 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions module_text_llm/module_text_llm/helpers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,6 @@
except AttributeError:
pass

try:
import module_text_llm.helpers.models.replicate as replicate_config
types.append(replicate_config.ReplicateModelConfig)
if default_model_name in replicate_config.available_models:
DefaultModelConfig = replicate_config.ReplicateModelConfig
if evaluation_model_name in replicate_config.available_models:
evaluation_model = replicate_config.available_models[evaluation_model_name]
except AttributeError:
pass

if not types:
raise EnvironmentError(
"No model configurations available, please set up at least one provider in the environment variables.")
Expand Down
254 changes: 50 additions & 204 deletions module_text_llm/module_text_llm/helpers/models/openai.py
Original file line number Diff line number Diff line change
@@ -1,254 +1,99 @@
import os
from contextlib import contextmanager
from typing import Any, Callable, Dict, List
from typing import Any , Dict
from pydantic import Field, validator, PositiveInt
from enum import Enum

import openai
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.llms import AzureOpenAI, OpenAI
from langchain.llms.openai import BaseOpenAI
from langchain.base_language import BaseLanguageModel

from langchain_openai import AzureChatOpenAI, AzureOpenAI, ChatOpenAI
FelixTJDietrich marked this conversation as resolved.
Show resolved Hide resolved
from athena.logger import logger
from .model_config import ModelConfig


OPENAI_PREFIX = "openai_"
AZURE_OPENAI_PREFIX = "azure_openai_"
############################################################################################
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
# START Set Enviorment variables that are automatically used by ChatOpenAI/ChatAzureOpenAI#
############################################################################################
# Might be worth renaming them
openai_available = bool(os.environ.get("LLM_OPENAI_API_KEY"))
if openai_available:
os.environ["OPENAI_API_KEY"] = os.environ["LLM_OPENAI_API_KEY"]


#########################################################################
# Monkey patching openai/langchain api #
# ===================================================================== #
# This allows us to have multiple api keys i.e. mixing #
# openai and azure openai api keys so we can use not only deployed #
# models but also models from the non-azure openai api. #
# This is mostly for testing purposes, in production we can just deploy #
# the models to azure that we want to use. #
#########################################################################

# Prevent LangChain error, we will set the key later
os.environ["OPENAI_API_KEY"] = ""

def _wrap(old: Any, new: Any) -> Callable:
def repl(*args: Any, **kwargs: Any) -> Any:
new(args[0]) # args[0] is self
return old(*args, **kwargs)
return repl


def _async_wrap(old: Any, new: Any):
async def repl(*args, **kwargs):
new(args[0]) # args[0] is self
return await old(*args, **kwargs)
return repl


def _set_credentials(self):
openai.api_key = self.openai_api_key

api_type = "open_ai"
api_base = "https://api.openai.com/v1"
api_version = None
if hasattr(self, "openai_api_type"):
api_type = self.openai_api_type

if api_type == "azure":
if hasattr(self, "openai_api_base"):
api_base = self.openai_api_base
if hasattr(self, "openai_api_version"):
api_version = self.openai_api_version

openai.api_type = api_type
openai.api_base = api_base
openai.api_version = api_version


# Monkey patching langchain
# pylint: disable=protected-access
ChatOpenAI._generate = _wrap(ChatOpenAI._generate, _set_credentials) # type: ignore
ChatOpenAI._agenerate = _async_wrap(ChatOpenAI._agenerate, _set_credentials) # type: ignore
BaseOpenAI._generate = _wrap(BaseOpenAI._generate, _set_credentials) # type: ignore
BaseOpenAI._agenerate = _async_wrap(BaseOpenAI._agenerate, _set_credentials) # type: ignore
# pylint: enable=protected-access

#########################################################################
# Monkey patching end #
#########################################################################


def _use_azure_credentials():
openai.api_type = "azure"
openai.api_key = os.environ.get("LLM_AZURE_OPENAI_API_KEY")
openai.api_base = os.environ.get("LLM_AZURE_OPENAI_API_BASE")
# os.environ.get("LLM_AZURE_OPENAI_API_VERSION")
openai.api_version = "2023-03-15-preview"


def _use_openai_credentials():
openai.api_type = "open_ai"
openai.api_key = os.environ.get("LLM_OPENAI_API_KEY")
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None


openai_available = bool(os.environ.get("LLM_OPENAI_API_KEY"))
azure_openai_available = bool(os.environ.get("LLM_AZURE_OPENAI_API_KEY"))


# This is a hack to make sure that the openai api is set correctly
# Right now it is overkill, but it will be useful when the api gets fixed and we no longer
# hardcode the model names (i.e. OpenAI fixes their api)
@contextmanager
def _openai_client(use_azure_api: bool, is_preference: bool):
"""Set the openai client to use the correct api type, if available

Args:
use_azure_api (bool): If true, use the azure api, else use the openai api
is_preference (bool): If true, it can fall back to the other api if the preferred one is not available
"""
if use_azure_api:
if azure_openai_available:
_use_azure_credentials()
elif is_preference and openai_available:
_use_openai_credentials()
elif is_preference:
raise EnvironmentError(
"No OpenAI api available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and "
"LLM_AZURE_OPENAI_API_VERSION environment variables or LLM_OPENAI_API_KEY environment variable"
)
else:
raise EnvironmentError(
"Azure OpenAI api not available, please set LLM_AZURE_OPENAI_API_KEY, LLM_AZURE_OPENAI_API_BASE and "
"LLM_AZURE_OPENAI_API_VERSION environment variables"
)
else:
if openai_available:
_use_openai_credentials()
elif is_preference and azure_openai_available:
_use_azure_credentials()
elif is_preference:
raise EnvironmentError(
"No OpenAI api available, please set LLM_OPENAI_API_KEY environment variable or LLM_AZURE_OPENAI_API_KEY, "
"LLM_AZURE_OPENAI_API_BASE and LLM_AZURE_OPENAI_API_VERSION environment variables"
)
else:
raise EnvironmentError(
"OpenAI api not available, please set LLM_OPENAI_API_KEY environment variable"
)

# API client is setup correctly
yield


def _get_available_deployments(openai_models: Dict[str, List[str]], model_aliases: Dict[str, str]):
if azure_openai_available:
os.environ["AZURE_OPENAI_ENDPOINT"]=os.environ["LLM_AZURE_OPENAI_API_BASE"]
os.environ["AZURE_OPENAI_API_KEY"]=os.environ["LLM_AZURE_OPENAI_API_KEY"]
os.environ["OPENAI_API_VERSION"]=os.environ["LLM_AZURE_OPENAI_API_VERSION"]
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
#########################################################################################
# END Set Enviorment variables that are automatically used by ChatOpenAI/ChatAzureOpenAI#
#########################################################################################

actually_deployed_azure= [
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
"gpt-35-turbo",
"gpt-4-turbo",
"gpt-4-vision",
"gpt-4o"
]

new_openai_models = []

def _get_available_deployments():
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
available_deployments: Dict[str, Dict[str, Any]] = {
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
"chat_completion": {},
"completion": {},
"fine_tuneing": {},
"fine_tune": {},
"embeddings": {},
"inference": {}
}

if azure_openai_available:
with _openai_client(use_azure_api=True, is_preference=False):
deployments = openai.Deployment.list().get("data") or [] # type: ignore
for deployment in deployments:
model_name = deployment.model
if model_name in model_aliases:
model_name = model_aliases[model_name]
if model_name in openai_models["chat_completion"]:
available_deployments["chat_completion"][deployment.id] = deployment
elif model_name in openai_models["completion"]:
available_deployments["completion"][deployment.id] = deployment
elif model_name in openai_models["fine_tuneing"]:
available_deployments["fine_tuneing"][deployment.id] = deployment
for deployment in actually_deployed_azure:
available_deployments["chat_completion"][deployment] = deployment


if openai_available:
# This will return only the usable models
openai.api_type= "openai"
for model in openai.models.list():
if model.owned_by == "openai":
new_openai_models.append(model.id)

return available_deployments


def _get_available_models(openai_models: Dict[str, List[str]],
available_deployments: Dict[str, Dict[str, Any]]):
def _get_available_models(available_deployments: Dict[str, Dict[str, Any]]):
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
available_models: Dict[str, BaseLanguageModel] = {}

if openai_available:
openai_api_key = os.environ["LLM_OPENAI_API_KEY"]
for model_name in openai_models["chat_completion"]:
available_models[OPENAI_PREFIX + model_name] = ChatOpenAI(
model=model_name,
openai_api_key=openai_api_key,
client="",
temperature=0
)
for model_name in openai_models["completion"]:
available_models[OPENAI_PREFIX + model_name] = OpenAI(
model=model_name,
openai_api_key=openai_api_key,
client="",
temperature=0
for model in new_openai_models:
available_models[OPENAI_PREFIX + model] = ChatOpenAI(#type:ignore
model=model,
)

if azure_openai_available:
azure_openai_api_key = os.environ["LLM_AZURE_OPENAI_API_KEY"]
azure_openai_api_base = os.environ["LLM_AZURE_OPENAI_API_BASE"]
azure_openai_api_version = os.environ["LLM_AZURE_OPENAI_API_VERSION"]

for model_type, Model in [("chat_completion", AzureChatOpenAI), ("completion", AzureOpenAI)]:
for deployment_name, deployment in available_deployments[model_type].items():
available_models[AZURE_OPENAI_PREFIX + deployment_name] = Model(
model=deployment.model,
deployment_name=deployment_name,
openai_api_base=azure_openai_api_base,
openai_api_version=azure_openai_api_version,
openai_api_key=azure_openai_api_key,
client="",
temperature=0
)

return available_models


_model_aliases = {
"gpt-35-turbo": "gpt-3.5-turbo",
}

# Hardcoded because openai can't provide a trustworthly api to get the list of models and capabilities...
openai_models = {
"chat_completion": [
"gpt-4",
# "gpt-4-32k", # Not publicly available
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k"
],
"completion": [
"text-davinci-003",
"text-curie-001",
"text-babbage-001",
"text-ada-001",
],
"fine_tuneing": [
"davinci",
"curie",
"babbage",
"ada",
]
}
available_deployments = _get_available_deployments(openai_models, _model_aliases)
available_models = _get_available_models(openai_models, available_deployments)
available_deployments = _get_available_deployments()
available_models = _get_available_models(available_deployments)

if available_models:
logger.info("Available openai models: %s", ", ".join(available_models.keys()))

OpenAIModel = Enum('OpenAIModel', {name: name for name in available_models}) # type: ignore


default_model_name = "gpt-3.5-turbo"
default_model_name = "gpt-35-turbo"
FelixTJDietrich marked this conversation as resolved.
Show resolved Hide resolved
if "LLM_DEFAULT_MODEL" in os.environ and os.environ["LLM_DEFAULT_MODEL"] in available_models:
default_model_name = os.environ["LLM_DEFAULT_MODEL"]
if default_model_name not in available_models:
default_model_name = list(available_models.keys())[0]

default_openai_model = OpenAIModel[default_model_name]

default_openai_model = OpenAIModel[default_model_name]#type:ignore

# Long descriptions will be displayed in the playground UI and are copied from the OpenAI docs
class OpenAIModelConfig(ModelConfig):
Expand Down Expand Up @@ -307,7 +152,8 @@ def get_model(self) -> BaseLanguageModel:
BaseLanguageModel: The model.
"""
model = available_models[self.model_name.value]
kwargs = model._lc_kwargs
kwargs = model.__dict__ #BaseLanguageModel type
# kw = model._lc_kwargs
EneaGore marked this conversation as resolved.
Show resolved Hide resolved
secrets = {secret: getattr(model, secret) for secret in model.lc_secrets.keys()}
kwargs.update(secrets)

Expand Down
Loading
Loading