From f5d734fa05c5714032660ff8d6dca5df2f23fac5 Mon Sep 17 00:00:00 2001 From: Anton Dubovik Date: Wed, 31 Jul 2024 14:46:38 +0100 Subject: [PATCH] fix: supported reporting of image tokens from amazon image embeddings --- .gitignore | 3 +-- .../embedding/amazon/titan_image.py | 20 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index da8669c..ba9a087 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,4 @@ __pycache__ .nox dist .vscode/launch.json -~* -leftovers \ No newline at end of file +~* \ No newline at end of file diff --git a/aidial_adapter_bedrock/embedding/amazon/titan_image.py b/aidial_adapter_bedrock/embedding/amazon/titan_image.py index 3d14569..a13f5eb 100644 --- a/aidial_adapter_bedrock/embedding/amazon/titan_image.py +++ b/aidial_adapter_bedrock/embedding/amazon/titan_image.py @@ -42,6 +42,13 @@ class AmazonRequest(BaseModel): inputText: str | None = None inputImage: str | None = None + def get_image_tokens(self) -> int: + # According to https://aws.amazon.com/bedrock/pricing/: + # Price per 1000 input (text) tokens = $0.0008 + # Price per input image = $0.00006 + # Therefore, cost of input image = $0.00006 / ($0.0008 / 1000) = 75 tokens + return 0 if self.inputImage is None else 75 + def create_titan_request( request: AmazonRequest, dimensions: int | None @@ -146,12 +153,15 @@ async def embeddings( token_count = 0 # NOTE: Amazon Titan doesn't support batched inputs - async for text_input in get_requests(request, self.storage): - sub_request = create_titan_request(text_input, request.dimensions) - embedding, tokens = await call_embedding_model( - self.client, self.model, sub_request + async for sub_request in get_requests(request, self.storage): + embedding, text_tokens = await call_embedding_model( + self.client, + self.model, + create_titan_request(sub_request, request.dimensions), ) + image_tokens = sub_request.get_image_tokens() + vector = ( vector_to_base64(embedding) if request.encoding_format == "base64" @@ -159,7 +169,7 @@ async def embeddings( ) vectors.append(vector) - token_count += tokens + token_count += text_tokens + image_tokens return make_embeddings_response( model=self.model,