Skip to content

Commit

Permalink
feat: Added max_tokens and timeout to all LLMs and few embedding adap…
Browse files Browse the repository at this point in the history
…ters (#93)

* Added max_tokens and timeout to all LLMs and few embedding adapters

* Updated Open AI to OpenAI
  • Loading branch information
chandrasekharan-zipstack authored Aug 29, 2024
1 parent 88e27d7 commit 7613bec
Show file tree
Hide file tree
Showing 41 changed files with 302 additions and 171 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Unstract Azure Open AI Embedding Adapter
# Unstract Azure OpenAI Embedding Adapter

This package consists of the functionalities required to adapt with Azure OpenAI Embedding
Version supported
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "pdm.backend"
[project]
name = "unstract-azure-open-ai-embedding"
version = "0.0.1"
description = "Azure Open AI Embedding"
description = "Azure OpenAI Embedding"
authors = [
{name = "Zipstack Inc.", email = "[email protected]"},
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class Constants:
AZURE_ENDPOINT = "azure_endpoint"
DEPLOYMENT_NAME = "deployment_name"
API_TYPE = "azure"
TIMEOUT = "timeout"
DEFAULT_TIMEOUT = 60


class AzureOpenAI(EmbeddingAdapter):
Expand Down Expand Up @@ -56,6 +58,7 @@ def get_embedding_instance(self) -> BaseEmbedding:
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size(
config=self.config
)
timeout = int(self.config.get(Constants.TIMEOUT, Constants.DEFAULT_TIMEOUT))
embedding: BaseEmbedding = AzureOpenAIEmbedding(
model=str(self.config.get(Constants.MODEL)),
deployment_name=str(self.config.get(Constants.DEPLOYMENT_NAME)),
Expand All @@ -64,6 +67,7 @@ def get_embedding_instance(self) -> BaseEmbedding:
azure_endpoint=str(self.config.get(Constants.AZURE_ENDPOINT)),
embed_batch_size=embedding_batch_size,
api_type=Constants.API_TYPE,
timeout=timeout,
)
return embedding
except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Azure Open AI Embedding",
"title": "Azure OpenAI Embedding",
"type": "object",
"required": [
"adapter_name",
Expand Down Expand Up @@ -28,13 +28,6 @@
"default": "",
"description": "Provide the name of the deployment you defined in Azure console"
},
"embed_batch_size": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Embedding Batch Size",
"default": 5
},
"api_key": {
"type": "string",
"title": "API Key",
Expand All @@ -53,6 +46,21 @@
"default": "",
"format": "uri",
"description": "Provide the Azure endpoint. Example: https://<your-deployment>.openai.azure.com/"
},
"embed_batch_size": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Embedding Batch Size",
"default": 5
},
"timeout": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Timeout",
"default": 60,
"description": "Timeout for each request in seconds"
}
}
}
2 changes: 1 addition & 1 deletion src/unstract/sdk/adapters/embedding/open_ai/README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# Unstract Open AI Embeddings
# Unstract OpenAI Embeddings
2 changes: 1 addition & 1 deletion src/unstract/sdk/adapters/embedding/open_ai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "pdm.backend"
[project]
name = "unstract-open-ai-embedding"
version = "0.0.1"
description = "Open AI Embedding"
description = "OpenAI Embedding"
authors = [
{name = "Zipstack Inc.", email = "[email protected]"},
]
Expand Down
4 changes: 4 additions & 0 deletions src/unstract/sdk/adapters/embedding/open_ai/src/open_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Constants:
API_BASE_KEY = "api_base"
ADAPTER_NAME = "adapter_name"
API_TYPE = "openai"
TIMEOUT = "timeout"
DEFAULT_TIMEOUT = 60


class OpenAI(EmbeddingAdapter):
Expand Down Expand Up @@ -51,12 +53,14 @@ def get_json_schema() -> str:

def get_embedding_instance(self) -> BaseEmbedding:
try:
timeout = int(self.config.get(Constants.TIMEOUT, Constants.DEFAULT_TIMEOUT))
embedding: BaseEmbedding = OpenAIEmbedding(
api_key=str(self.config.get(Constants.API_KEY)),
api_base=str(
self.config.get(Constants.API_BASE_KEY, Constants.API_BASE_VALUE)
),
api_type=Constants.API_TYPE,
timeout=timeout,
)
return embedding
except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Open AI Embedding",
"title": "OpenAI Embedding",
"type": "object",
"required": [
"adapter_name",
Expand All @@ -18,18 +18,26 @@
"default": "",
"format": "password"
},
"api_base": {
"type": "string",
"title": "API Base",
"format": "uri",
"default": "https://api.openai.com/v1/"
},
"embed_batch_size": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Embed Batch Size",
"default": 10
},
"api_base": {
"type": "string",
"title": "API Base",
"format": "uri",
"default": "https://api.openai.com/v1/"
"timeout": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Timeout",
"default": 60,
"description": "Timeout in seconds"
}
}
}
2 changes: 2 additions & 0 deletions src/unstract/sdk/adapters/llm/anthropic/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
"description": "Anthropic LLM adapter",
"is_active": True,
}

__all__ = ["AnthropicLLM"]
38 changes: 30 additions & 8 deletions src/unstract/sdk/adapters/llm/anthropic/src/anthropic.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import os
from typing import Any

from anthropic import APIError
from llama_index.core.llms import LLM
from llama_index.llms.anthropic import Anthropic
from llama_index.llms.anthropic.base import DEFAULT_ANTHROPIC_MAX_TOKENS

from unstract.sdk.adapters.exceptions import AdapterError
from unstract.sdk.adapters.exceptions import AdapterError, LLMError
from unstract.sdk.adapters.llm.constants import LLMKeys
from unstract.sdk.adapters.llm.helper import LLMHelper
from unstract.sdk.adapters.llm.llm_adapter import LLMAdapter


class Constants:
MODEL = "model"
API_KEY = "api_key"
TIMEOUT = "timeout"
MAX_RETIRES = "max_retries"
MAX_RETRIES = "max_retries"
MAX_TOKENS = "max_tokens"


class AnthropicLLM(LLMAdapter):
Expand Down Expand Up @@ -50,6 +52,9 @@ def get_json_schema() -> str:
return schema

def get_llm_instance(self) -> LLM:
max_tokens = int(
self.config.get(Constants.MAX_TOKENS, DEFAULT_ANTHROPIC_MAX_TOKENS)
)
try:
llm: LLM = Anthropic(
model=str(self.config.get(Constants.MODEL)),
Expand All @@ -58,15 +63,32 @@ def get_llm_instance(self) -> LLM:
self.config.get(Constants.TIMEOUT, LLMKeys.DEFAULT_TIMEOUT)
),
max_retries=int(
self.config.get(Constants.MAX_RETIRES, LLMKeys.DEFAULT_MAX_RETRIES)
self.config.get(Constants.MAX_RETRIES, LLMKeys.DEFAULT_MAX_RETRIES)
),
temperature=0,
max_tokens=max_tokens,
)
return llm
except Exception as e:
raise AdapterError(str(e))

def test_connection(self) -> bool:
llm = self.get_llm_instance()
test_result: bool = LLMHelper.test_llm_instance(llm=llm)
return test_result
@staticmethod
def parse_llm_err(e: APIError) -> LLMError:
"""Parse the error from Anthropic.
Helps parse errors from Anthropic and wraps with custom exception.
Args:
e (AnthropicAPIError): Exception from Anthropic
Returns:
LLMError: Error to be sent to the user
"""
msg = "Error from Anthropic. "
if hasattr(e, "body"):
if isinstance(e.body, dict) and "error" in e.body:
err = e.body["error"]
msg += err.get("message", e.message)
else:
msg += e.message
return LLMError(msg)
28 changes: 18 additions & 10 deletions src/unstract/sdk/adapters/llm/anthropic/src/static/json_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,20 @@
"default": "claude-instant-1.2",
"description": "Model name. Refer to Anthropic's documentation for the list of available models."
},
"timeout": {
"api_key": {
"type": "string",
"title": "API Key",
"default": "",
"description": "API Key",
"format": "password"
},
"max_tokens": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Timeout",
"default": 900,
"description": "Timeout in seconds"
"default": 512,
"title": "Maximum Output Tokens",
"description": "Maximum number of output tokens to limit LLM replies, the maximum possible differs from model to model."
},
"max_retries": {
"type": "number",
Expand All @@ -36,12 +43,13 @@
"format": "number",
"description": "Maximum number of retries"
},
"api_key": {
"type": "string",
"title": "API Key",
"default": "",
"description": "API Key",
"format": "password"
"timeout": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Timeout",
"default": 900,
"description": "Timeout in seconds"
}
}
}
2 changes: 2 additions & 0 deletions src/unstract/sdk/adapters/llm/any_scale/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
"description": "AnyScale LLM adapter",
"is_active": True,
}

__all__ = ["AnyScaleLLM"]
14 changes: 6 additions & 8 deletions src/unstract/sdk/adapters/llm/any_scale/src/anyscale.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import os
from typing import Any

from llama_index.core.constants import DEFAULT_NUM_OUTPUTS
from llama_index.core.llms import LLM
from llama_index.llms.anyscale import Anyscale

from unstract.sdk.adapters.exceptions import AdapterError
from unstract.sdk.adapters.llm.constants import LLMKeys
from unstract.sdk.adapters.llm.helper import LLMHelper
from unstract.sdk.adapters.llm.llm_adapter import LLMAdapter


class Constants:
MODEL = "model"
API_KEY = "api_key"
API_BASE = "api_base"
MAX_RETIRES = "max_retries"
MAX_RETRIES = "max_retries"
ADDITIONAL_KWARGS = "additional_kwargs"
MAX_TOKENS = "max_tokens"


class AnyScaleLLM(LLMAdapter):
Expand Down Expand Up @@ -52,21 +53,18 @@ def get_json_schema() -> str:

def get_llm_instance(self) -> LLM:
try:
max_tokens = int(self.config.get(Constants.MAX_TOKENS, DEFAULT_NUM_OUTPUTS))
llm: LLM = Anyscale(
model=str(self.config.get(Constants.MODEL)),
api_key=str(self.config.get(Constants.API_KEY)),
api_base=str(self.config.get(Constants.API_BASE)),
additional_kwargs=self.config.get(Constants.ADDITIONAL_KWARGS),
max_retries=int(
self.config.get(Constants.MAX_RETIRES, LLMKeys.DEFAULT_MAX_RETRIES)
self.config.get(Constants.MAX_RETRIES, LLMKeys.DEFAULT_MAX_RETRIES)
),
temperature=0,
max_tokens=max_tokens,
)
return llm
except Exception as e:
raise AdapterError(str(e))

def test_connection(self) -> bool:
llm = self.get_llm_instance()
test_result: bool = LLMHelper.test_llm_instance(llm=llm)
return test_result
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@
"default": "",
"description": "Additional kwargs to pass to the model."
},
"max_retries": {
"type": "number",
"minimum": 0,
"title": "Max Retries",
"default": 5,
"description": "Maximum number of retries to make when a request fails."
},
"api_base": {
"type": "string",
"title": "API Base",
Expand All @@ -43,6 +36,22 @@
"title": "API Key",
"format": "password",
"description": "API Key for the AnyScale API."
},
"max_tokens": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Maximum Output Tokens",
"default": 256,
"description": "Maximum number of output tokens to limit LLM replies, maximum possible varies from model to model."
},
"max_retries": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Max Retries",
"default": 5,
"description": "Maximum number of retries to attempt when a request fails."
}
}
}
2 changes: 1 addition & 1 deletion src/unstract/sdk/adapters/llm/azure_open_ai/README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# Unstract Azure Open AI LLM Adapter
# Unstract Azure OpenAI LLM Adapter
2 changes: 1 addition & 1 deletion src/unstract/sdk/adapters/llm/azure_open_ai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "pdm.backend"
[project]
name = "unstract-azure-open-ai-llm"
version = "0.0.1"
description = "Azure Open AI LLM"
description = "Azure OpenAI LLM"
authors = [
{name = "Zipstack Inc.", email = "[email protected]"},
]
Expand Down
2 changes: 2 additions & 0 deletions src/unstract/sdk/adapters/llm/azure_open_ai/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
"description": "AzureOpenAI LLM adapter",
"is_active": True,
}

__all__ = ["AzureOpenAILLM"]
Loading

0 comments on commit 7613bec

Please sign in to comment.