Skip to content

Commit

Permalink
Dh 5776/add support for azure (#479)
Browse files Browse the repository at this point in the history
* DH-5776/support for Azure

* DH-5776/add support for Azure embedding
  • Loading branch information
MohammadrezaPourreza authored May 8, 2024
1 parent 581b6ff commit 2537ca5
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 54 deletions.
12 changes: 6 additions & 6 deletions dataherald/api/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def async_scanning(scanner, database, table_descriptions, storage):
)


def async_fine_tuning(storage, model):
openai_fine_tuning = OpenAIFineTuning(storage, model)
def async_fine_tuning(system, storage, model):
openai_fine_tuning = OpenAIFineTuning(system, storage, model)
openai_fine_tuning.create_fintuning_dataset()
openai_fine_tuning.create_fine_tuning_job()

Expand Down Expand Up @@ -626,7 +626,7 @@ def create_finetuning_job(
e, fine_tuning_request.dict(), "finetuning_not_created"
)

background_tasks.add_task(async_fine_tuning, self.storage, model)
background_tasks.add_task(async_fine_tuning, self.system, self.storage, model)

return model

Expand All @@ -652,7 +652,7 @@ def cancel_finetuning_job(
status_code=400, detail="Model has already been cancelled."
)

openai_fine_tuning = OpenAIFineTuning(self.storage, model)
openai_fine_tuning = OpenAIFineTuning(self.system, self.storage, model)

return openai_fine_tuning.cancel_finetuning_job()

Expand All @@ -665,7 +665,7 @@ def get_finetunings(self, db_connection_id: str | None = None) -> list[Finetunin
models = model_repository.find_by(query)
result = []
for model in models:
openai_fine_tuning = OpenAIFineTuning(self.storage, model)
openai_fine_tuning = OpenAIFineTuning(self.system, self.storage, model)
result.append(
Finetuning(**openai_fine_tuning.retrieve_finetuning_job().dict())
)
Expand All @@ -685,7 +685,7 @@ def get_finetuning_job(self, finetuning_job_id: str) -> Finetuning:
model = model_repository.find_by_id(finetuning_job_id)
if not model:
raise HTTPException(status_code=404, detail="Model not found")
openai_fine_tuning = OpenAIFineTuning(self.storage, model)
openai_fine_tuning = OpenAIFineTuning(self.system, self.storage, model)
return openai_fine_tuning.retrieve_finetuning_job()

@override
Expand Down
2 changes: 1 addition & 1 deletion dataherald/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Settings(BaseSettings):
encrypt_key: str = os.environ.get("ENCRYPT_KEY")
s3_aws_access_key_id: str | None = os.environ.get("S3_AWS_ACCESS_KEY_ID")
s3_aws_secret_access_key: str | None = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
#Needed for Azure OpenAI integration:
# Needed for Azure OpenAI integration:
azure_api_key: str | None = os.environ.get("AZURE_API_KEY")
embedding_model: str | None = os.environ.get("EMBEDDING_MODEL")
azure_api_version: str | None = os.environ.get("AZURE_API_VERSION")
Expand Down
20 changes: 14 additions & 6 deletions dataherald/finetuning/openai_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

import numpy as np
import tiktoken
from langchain_openai import OpenAIEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from openai import OpenAI
from overrides import override
from sql_metadata import Parser
from tiktoken import Encoding

from dataherald.config import System
from dataherald.db_scanner.models.types import TableDescription, TableDescriptionStatus
from dataherald.db_scanner.repository.base import TableDescriptionRepository
from dataherald.finetuning import FinetuningModel
Expand All @@ -36,17 +37,24 @@ class OpenAIFineTuning(FinetuningModel):
storage: Any
client: OpenAI

def __init__(self, storage: Any, fine_tuning_model: Finetuning):
def __init__(self, system: System, storage: Any, fine_tuning_model: Finetuning):
self.storage = storage
self.system = system
self.fine_tuning_model = fine_tuning_model
db_connection_repository = DatabaseConnectionRepository(storage)
db_connection = db_connection_repository.find_by_id(
fine_tuning_model.db_connection_id
)
self.embedding = OpenAIEmbeddings( #TODO AzureOpenAIEmbeddings when Azure
openai_api_key=db_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
if self.system.settings["azure_api_key"] is not None:
self.embedding = AzureOpenAIEmbeddings(
azure_api_key=db_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
else:
self.embedding = OpenAIEmbeddings(
openai_api_key=db_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
self.encoding = tiktoken.encoding_for_model(
fine_tuning_model.base_llm.model_name
)
Expand Down
10 changes: 5 additions & 5 deletions dataherald/model/base_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from typing import Any

from langchain.llms import AlephAlpha, Anthropic, Cohere, OpenAI
from langchain.llms import AlephAlpha, Anthropic, AzureOpenAI, Cohere, OpenAI
from overrides import override

from dataherald.model import LLMModel
Expand All @@ -19,16 +19,16 @@ def __init__(self, system):
self.azure_api_key = os.environ.get("AZURE_API_KEY")

@override
def get_model(
def get_model( # noqa: C901
self,
database_connection: DatabaseConnection,
model_family="openai",
model_name="davinci-003",
api_base: str | None = None, # noqa: ARG002
**kwargs: Any
) -> Any:
if self.system.settings['azure_api_key'] != None:
model_family = 'azure'
if self.system.settings["azure_api_key"] is not None:
model_family = "azure"
if database_connection.llm_api_key is not None:
fernet_encrypt = FernetEncrypt()
api_key = fernet_encrypt.decrypt(database_connection.llm_api_key)
Expand All @@ -39,7 +39,7 @@ def get_model(
elif model_family == "google":
self.google_api_key = api_key
elif model_family == "azure":
self.azure_api_key == api_key
self.azure_api_key = api_key
if self.openai_api_key:
self.model = OpenAI(model_name=model_name, **kwargs)
elif self.aleph_alpha_api_key:
Expand Down
12 changes: 6 additions & 6 deletions dataherald/model/chat_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any

from langchain_community.chat_models import ChatAnthropic, ChatCohere, ChatGooglePalm
from langchain_openai import ChatOpenAI, AzureChatOpenAI
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from overrides import override

from dataherald.model import LLMModel
Expand All @@ -22,16 +22,16 @@ def get_model(
**kwargs: Any
) -> Any:
api_key = database_connection.decrypt_api_key()
if self.system.settings['azure_api_key'] != None:
model_family = 'azure'
if self.system.settings["azure_api_key"] is not None:
model_family = "azure"
if model_family == "azure":
if api_base.endswith("/"): #TODO check where final "/" is added to api_base
if api_base.endswith("/"): # check where final "/" is added to api_base
api_base = api_base[:-1]
return AzureChatOpenAI(
deployment_name=model_name,
openai_api_key=api_key,
azure_endpoint= api_base,
api_version=self.system.settings['azure_api_version'],
azure_endpoint=api_base,
api_version=self.system.settings["azure_api_version"],
**kwargs
)
if model_family == "openai":
Expand Down
36 changes: 25 additions & 11 deletions dataherald/sql_generator/dataherald_finetuning_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from langchain.chains.llm import LLMChain
from langchain.tools.base import BaseTool
from langchain_community.callbacks import get_openai_callback
from langchain_openai import OpenAIEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from openai import OpenAI
from overrides import override
from pydantic import BaseModel, Field
Expand Down Expand Up @@ -587,14 +587,24 @@ def generate_response(
)
finetunings_repository = FinetuningsRepository(storage)
finetuning = finetunings_repository.find_by_id(self.finetuning_id)
openai_fine_tuning = OpenAIFineTuning(storage, finetuning)
openai_fine_tuning = OpenAIFineTuning(self.system, storage, finetuning)
finetuning = openai_fine_tuning.retrieve_finetuning_job()
if finetuning.status != FineTuningStatus.SUCCEEDED.value:
raise FinetuningNotAvailableError(
f"Finetuning({self.finetuning_id}) has the status {finetuning.status}."
f"Finetuning should have the status {FineTuningStatus.SUCCEEDED.value} to generate SQL queries."
)
self.database = SQLDatabase.get_sql_engine(database_connection)
if self.llm.openai_api_type == "azure":
embedding = AzureOpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
else:
embedding = OpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
toolkit = SQLDatabaseToolkit(
db=self.database,
instructions=instructions,
Expand All @@ -605,10 +615,7 @@ def generate_response(
use_finetuned_model_only=self.use_fintuned_model_only,
model_name=finetuning.base_llm.model_name,
openai_fine_tuning=openai_fine_tuning,
embedding=OpenAIEmbeddings( #TODO AzureOpenAIEmbeddings when Azure
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
),
embedding=embedding,
)
agent_executor = self.create_sql_agent(
toolkit=toolkit,
Expand Down Expand Up @@ -693,14 +700,24 @@ def stream_response(
)
finetunings_repository = FinetuningsRepository(storage)
finetuning = finetunings_repository.find_by_id(self.finetuning_id)
openai_fine_tuning = OpenAIFineTuning(storage, finetuning)
openai_fine_tuning = OpenAIFineTuning(self.system, storage, finetuning)
finetuning = openai_fine_tuning.retrieve_finetuning_job()
if finetuning.status != FineTuningStatus.SUCCEEDED.value:
raise FinetuningNotAvailableError(
f"Finetuning({self.finetuning_id}) has the status {finetuning.status}."
f"Finetuning should have the status {FineTuningStatus.SUCCEEDED.value} to generate SQL queries."
)
self.database = SQLDatabase.get_sql_engine(database_connection)
if self.llm.openai_api_type == "azure":
embedding = AzureOpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
else:
embedding = OpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
toolkit = SQLDatabaseToolkit(
db=self.database,
instructions=instructions,
Expand All @@ -710,10 +727,7 @@ def stream_response(
use_finetuned_model_only=self.use_fintuned_model_only,
model_name=finetuning.base_llm.model_name,
openai_fine_tuning=openai_fine_tuning,
embedding=OpenAIEmbeddings( #TODO AzureOpenAIEmbeddings when Azure
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
),
embedding=embedding,
)
agent_executor = self.create_sql_agent(
toolkit=toolkit,
Expand Down
31 changes: 12 additions & 19 deletions dataherald/sql_generator/dataherald_sqlagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from langchain.chains.llm import LLMChain
from langchain.tools.base import BaseTool
from langchain_community.callbacks import get_openai_callback
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from overrides import override
from pydantic import BaseModel, Field
from sql_metadata import Parser
Expand Down Expand Up @@ -753,7 +753,7 @@ def generate_response(
number_of_samples = 0
logger.info(f"Generating SQL response to question: {str(user_prompt.dict())}")
self.database = SQLDatabase.get_sql_engine(database_connection)
#Set Embeddings class depending on azure / not azure
# Set Embeddings class depending on azure / not azure
if self.llm.openai_api_type == "azure":
toolkit = SQLDatabaseToolkit(
db=self.database,
Expand Down Expand Up @@ -873,21 +873,17 @@ def stream_response(
new_fewshot_examples = None
number_of_samples = 0
self.database = SQLDatabase.get_sql_engine(database_connection)
#Set Embeddings class depending on azure / not azure
# Set Embeddings class depending on azure / not azure
if self.llm.openai_api_type == "azure":
toolkit = SQLDatabaseToolkit(
db=self.database,
context=context,
few_shot_examples=new_fewshot_examples,
instructions=instructions,
is_multiple_schema=True if user_prompt.schemas else False,
db_scan=db_scan,
embedding=AzureOpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
),
embedding = AzureOpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
else:
embedding = OpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
)
else:
toolkit = SQLDatabaseToolkit(
queuer=queue,
db=self.database,
Expand All @@ -896,10 +892,7 @@ def stream_response(
instructions=instructions,
is_multiple_schema=True if user_prompt.schemas else False,
db_scan=db_scan,
embedding=OpenAIEmbeddings(
openai_api_key=database_connection.decrypt_api_key(),
model=EMBEDDING_MODEL,
),
embedding=embedding,
)
agent_executor = self.create_sql_agent(
toolkit=toolkit,
Expand Down

0 comments on commit 2537ca5

Please sign in to comment.