Skip to content

Commit 3417308

Browse files
authored
feat(bedrock): add support for ARN and cross region endpoint (#2785)
1 parent 04811b5 commit 3417308

File tree

5 files changed

+256
-9
lines changed

5 files changed

+256
-9
lines changed

packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py

+35-9
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def with_instrumentation(*args, **kwargs):
261261
@dont_throw
262262
def _handle_stream_call(span, kwargs, response, metric_params):
263263

264-
(vendor, model) = kwargs.get("modelId").split(".")
264+
(vendor, model) = _get_vendor_model(kwargs.get("modelId"))
265265
request_body = json.loads(kwargs.get("body"))
266266

267267
headers = {}
@@ -299,12 +299,7 @@ def _handle_call(span, kwargs, response, metric_params):
299299
if "ResponseMetadata" in response:
300300
headers = response.get("ResponseMetadata").get("HTTPHeaders", {})
301301

302-
modelId = kwargs.get("modelId")
303-
if modelId is not None and "." in modelId:
304-
(vendor, model) = modelId.split(".")
305-
else:
306-
vendor = "imported_model"
307-
model = kwargs.get("modelId")
302+
(vendor, model) = _get_vendor_model(kwargs.get("modelId"))
308303
metric_params.vendor = vendor
309304
metric_params.model = model
310305
metric_params.is_stream = False
@@ -318,7 +313,7 @@ def _handle_call(span, kwargs, response, metric_params):
318313

319314
@dont_throw
320315
def _handle_converse(span, kwargs, response, metric_params):
321-
(vendor, model) = kwargs.get("modelId").split(".")
316+
(vendor, model) = _get_vendor_model(kwargs.get("modelId"))
322317
guardrail_converse(response, vendor, model, metric_params)
323318

324319
_set_span_attribute(span, SpanAttributes.LLM_SYSTEM, vendor)
@@ -359,7 +354,7 @@ def _handle_converse(span, kwargs, response, metric_params):
359354

360355
@dont_throw
361356
def _handle_converse_stream(span, kwargs, response, metric_params):
362-
(vendor, model) = kwargs.get("modelId").split(".")
357+
(vendor, model) = _get_vendor_model(kwargs.get("modelId"))
363358

364359
_set_span_attribute(span, SpanAttributes.LLM_SYSTEM, vendor)
365360
_set_span_attribute(span, SpanAttributes.LLM_REQUEST_MODEL, model)
@@ -417,6 +412,37 @@ def wrap(*args, **kwargs):
417412
stream._parse_event = handler(stream._parse_event)
418413

419414

415+
def _get_vendor_model(modelId):
416+
# Docs:
417+
# https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system
418+
vendor = "imported_model"
419+
model = modelId
420+
421+
if modelId is not None and modelId.startswith("arn"):
422+
components = modelId.split(":")
423+
if len(components) > 5:
424+
inf_profile = components[5].split("/")
425+
if len(inf_profile) == 2:
426+
if "." in inf_profile[1]:
427+
(vendor, model) = _cross_region_check(inf_profile[1])
428+
elif modelId is not None and "." in modelId:
429+
(vendor, model) = _cross_region_check(modelId)
430+
431+
return vendor, model
432+
433+
434+
def _cross_region_check(value):
435+
prefixes = ["us", "us-gov", "eu", "apac"]
436+
if any(value.startswith(prefix + ".") for prefix in prefixes):
437+
parts = value.split(".")
438+
if len(parts) > 2:
439+
parts.pop(0)
440+
return parts[0], parts[1]
441+
else:
442+
(vendor, model) = value.split(".")
443+
return vendor, model
444+
445+
420446
def _report_converse_input_prompt(kwargs, span):
421447
prompt_idx = 0
422448
if "system" in kwargs:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
interactions:
2+
- request:
3+
body: '{"messages": [{"role": "user", "content": [{"text": "Human: Tell me a joke
4+
about opentelemetry Assistant:"}]}], "guardrailConfig": {"guardrailIdentifier":
5+
"v9kpg6yrwhs2", "guardrailVersion": "DRAFT", "trace": "enabled"}, "inferenceConfig":
6+
{"temperature": 0.5}}'
7+
headers:
8+
Content-Length:
9+
- '261'
10+
Content-Type:
11+
- !!binary |
12+
YXBwbGljYXRpb24vanNvbg==
13+
User-Agent:
14+
- !!binary |
15+
Qm90bzMvMS4zNy4xMyBtZC9Cb3RvY29yZSMxLjM3LjEzIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg
16+
bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjkuNiBtZC9weWltcGwjQ1B5dGhvbiBjZmcvcmV0
17+
cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNy4xMw==
18+
X-Amz-Date:
19+
- !!binary |
20+
MjAyNTAzMTdUMTMzMjAzWg==
21+
amz-sdk-invocation-id:
22+
- !!binary |
23+
YWRkNDMwM2EtMDhmZS00MjdjLWE0OWEtMzU0NzkzYzE5OWIz
24+
amz-sdk-request:
25+
- !!binary |
26+
YXR0ZW1wdD0x
27+
method: POST
28+
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/arn%3Aaws%3Abedrock%3Aus-east-1%3A012345678901%3Ainference-profile%2Fus.anthropic.claude-3-7-sonnet-20250219-v1%3A0/converse
29+
response:
30+
body:
31+
string: '{"metrics":{"latencyMs":2900},"output":{"message":{"content":[{"text":"Why
32+
did the developer start using OpenTelemetry?\n\nBecause they wanted to \"trace\"
33+
their steps when their application was running slow, but got lost in a \"span\"
34+
of metrics, logs, and traces!\n\nTurns out debugging is much easier when you''re
35+
not completely in the dark about what your distributed system is doing!"}],"role":"assistant"}},"stopReason":"end_turn","trace":{"guardrail":{"inputAssessment":{"v9kpg6yrwhs2":{"invocationMetrics":{"guardrailCoverage":{"textCharacters":{"guarded":52,"total":52}},"guardrailProcessingLatency":296,"usage":{"contentPolicyUnits":1,"contextualGroundingPolicyUnits":0,"sensitiveInformationPolicyFreeUnits":0,"sensitiveInformationPolicyUnits":1,"topicPolicyUnits":1,"wordPolicyUnits":1}}}},"outputAssessments":{"v9kpg6yrwhs2":[{"invocationMetrics":{"guardrailCoverage":{"textCharacters":{"guarded":308,"total":308}},"guardrailProcessingLatency":236,"usage":{"contentPolicyUnits":1,"contextualGroundingPolicyUnits":0,"sensitiveInformationPolicyFreeUnits":1,"sensitiveInformationPolicyUnits":1,"topicPolicyUnits":1,"wordPolicyUnits":1}}}]}}},"usage":{"cacheReadInputTokenCount":0,"cacheReadInputTokens":0,"cacheWriteInputTokenCount":0,"cacheWriteInputTokens":0,"inputTokens":20,"outputTokens":72,"totalTokens":92}}'
36+
headers:
37+
Connection:
38+
- keep-alive
39+
Content-Length:
40+
- '1322'
41+
Content-Type:
42+
- application/json
43+
Date:
44+
- Mon, 17 Mar 2025 13:32:07 GMT
45+
x-amzn-RequestId:
46+
- 48a1e323-e366-4401-93a3-eb326790a4db
47+
status:
48+
code: 200
49+
message: OK
50+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
interactions:
2+
- request:
3+
body: '{"messages": [{"role": "user", "content": [{"text": "Tell me a joke about
4+
OpenTelemetry"}]}], "inferenceConfig": {"maxTokens": 500, "topP": 0.9, "topK":
5+
20, "temperature": 0.7}}'
6+
headers:
7+
Content-Length:
8+
- '177'
9+
User-Agent:
10+
- !!binary |
11+
Qm90bzMvMS4zNy4xMyBtZC9Cb3RvY29yZSMxLjM3LjEzIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg
12+
bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjkuNiBtZC9weWltcGwjQ1B5dGhvbiBjZmcvcmV0
13+
cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNy4xMw==
14+
X-Amz-Date:
15+
- !!binary |
16+
MjAyNTAzMTdUMTQxOTIyWg==
17+
amz-sdk-invocation-id:
18+
- !!binary |
19+
MTQ1ZTJiMTktYzRiMC00OTQzLTkxNzEtNjdiZjA3NDRhYmFj
20+
amz-sdk-request:
21+
- !!binary |
22+
YXR0ZW1wdD0x
23+
method: POST
24+
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-lite-v1%3A0/invoke
25+
response:
26+
body:
27+
string: '{"output":{"message":{"content":[{"text":"Sure, here''s a joke for
28+
you:\n\nWhy did the OpenTelemetry developer bring a ladder to work?\n\nBecause
29+
they heard the company was going to great lengths to trace their performance
30+
issues to the \"root cause\"!\n\nHope that brought a smile to your face!"}],"role":"assistant"}},"stopReason":"end_turn","usage":{"inputTokens":7,"outputTokens":53,"totalTokens":60,"cacheReadInputTokenCount":0,"cacheWriteInputTokenCount":0}}'
31+
headers:
32+
Connection:
33+
- keep-alive
34+
Content-Length:
35+
- '463'
36+
Content-Type:
37+
- application/json
38+
Date:
39+
- Mon, 17 Mar 2025 14:19:23 GMT
40+
X-Amzn-Bedrock-Cache-Read-Input-Token-Count:
41+
- '0'
42+
X-Amzn-Bedrock-Cache-Write-Input-Token-Count:
43+
- '0'
44+
X-Amzn-Bedrock-Input-Token-Count:
45+
- '7'
46+
X-Amzn-Bedrock-Invocation-Latency:
47+
- '596'
48+
X-Amzn-Bedrock-Output-Token-Count:
49+
- '53'
50+
x-amzn-RequestId:
51+
- ae14e85a-2f22-46b0-8540-277b810fab77
52+
status:
53+
code: 200
54+
message: OK
55+
version: 1

packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py

+52
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,55 @@ def test_anthropic_3_completion_string_content(test_context, brt):
232232
anthropic_span.attributes.get("gen_ai.response.id")
233233
== "msg_bdrk_01WR9VHqpyBzBhzgwCDapaQD"
234234
)
235+
236+
237+
@pytest.mark.vcr
238+
def test_anthropic_cross_region(test_context, brt):
239+
inference_config = {"temperature": 0.5}
240+
messages = [
241+
{
242+
"role": "user",
243+
"content": [
244+
{"text": "Human: Tell me a joke about opentelemetry Assistant:"},
245+
],
246+
},
247+
]
248+
response = brt.converse(
249+
modelId="arn:aws:bedrock:us-east-1:012345678901:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
250+
messages=messages,
251+
inferenceConfig=inference_config,
252+
)
253+
254+
completion = response["output"]["message"]["content"][0]['text']
255+
256+
exporter, _, _ = test_context
257+
spans = exporter.get_finished_spans()
258+
259+
assert len(spans) == 1
260+
261+
anthropic_span = spans[0]
262+
assert anthropic_span.name == "bedrock.converse"
263+
264+
# Assert on model name and vendor
265+
assert anthropic_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "claude-3-7-sonnet-20250219-v1"
266+
assert anthropic_span.attributes[SpanAttributes.LLM_SYSTEM] == "anthropic"
267+
268+
assert (
269+
anthropic_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
270+
== json.dumps(messages[0]["content"])
271+
)
272+
assert (
273+
anthropic_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
274+
== completion
275+
)
276+
277+
assert anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_PROMPT_TOKENS) == 20
278+
assert anthropic_span.attributes.get(
279+
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS
280+
) + anthropic_span.attributes.get(
281+
SpanAttributes.LLM_USAGE_PROMPT_TOKENS
282+
) == anthropic_span.attributes.get(
283+
SpanAttributes.LLM_USAGE_TOTAL_TOKENS
284+
)
285+
# Bedrock does not return the response id for claude-v2:1
286+
assert anthropic_span.attributes.get("gen_ai.response.id") is None

packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py

+64
Original file line numberDiff line numberDiff line change
@@ -374,3 +374,67 @@ def test_nova_converse_stream(test_context, brt):
374374
bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
375375
== inputTokens + outputTokens
376376
)
377+
378+
379+
@pytest.mark.vcr
380+
def test_nova_cross_region_invoke(test_context, brt):
381+
382+
message_list = [{"role": "user", "content": [{"text": "Tell me a joke about OpenTelemetry"}]}]
383+
inf_params = {"maxTokens": 500, "topP": 0.9, "topK": 20, "temperature": 0.7}
384+
request_body = {
385+
"messages": message_list,
386+
"inferenceConfig": inf_params,
387+
}
388+
389+
modelId = "us.amazon.nova-lite-v1:0"
390+
accept = "application/json"
391+
contentType = "application/json"
392+
393+
response = brt.invoke_model(
394+
body=json.dumps(request_body),
395+
modelId=modelId,
396+
accept=accept,
397+
contentType=contentType,
398+
)
399+
400+
response_body = json.loads(response.get("body").read())
401+
402+
exporter, _, _ = test_context
403+
spans = exporter.get_finished_spans()
404+
assert len(spans) == 1
405+
assert spans[0].name == "bedrock.completion"
406+
407+
bedrock_span = spans[0]
408+
409+
# Assert on model name and vendor
410+
assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] == "nova-lite-v1:0"
411+
assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "amazon"
412+
413+
# Assert on vendor
414+
assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "amazon"
415+
416+
# Assert on request type
417+
assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion"
418+
419+
# Assert on prompt
420+
assert bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.role"] == "user"
421+
422+
assert bedrock_span.attributes[
423+
f"{SpanAttributes.LLM_PROMPTS}.0.content"
424+
] == json.dumps(message_list[0].get("content"), default=str)
425+
426+
# Assert on response
427+
generated_text = response_body["output"]["message"]["content"]
428+
for i in range(0, len(generated_text)):
429+
assert (
430+
bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{i}.content"]
431+
== generated_text[i]["text"]
432+
)
433+
434+
# Assert on other request parameters
435+
assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MAX_TOKENS] == 500
436+
assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TEMPERATURE] == 0.7
437+
assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TOP_P] == 0.9
438+
# There is no response id for Amazon Titan models in the response body,
439+
# only request id in the response.
440+
assert bedrock_span.attributes.get("gen_ai.response.id") is None

0 commit comments

Comments
 (0)