Skip to content

Commit

Permalink
LLM TextGen Bedrock Support (#811)
Browse files Browse the repository at this point in the history
* Update values and configmap to support Bedrock backend

Signed-off-by: Jonathan Minkin <[email protected]>

* Add access keys to bedrock values, fix readme example

Signed-off-by: Jonathan Minkin <[email protected]>

* Add Bedrock instructions to llm-uservice readme

Signed-off-by: Jonathan Minkin <[email protected]>

---------

Signed-off-by: Jonathan Minkin <[email protected]>
  • Loading branch information
jonminkin97 authored Feb 20, 2025
1 parent 235f22f commit da37b9f
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 12 deletions.
37 changes: 26 additions & 11 deletions helm-charts/common/llm-uservice/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,18 @@ helm install llm-uservice . --set TEXTGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM
# install llm-textgen with vLLM backend
# helm install llm-uservice . --set TEXTGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait

# install llm-textgen with BEDROCK backend
export LLM_MODEL_ID="insert-bedrock-model-id-here"

# If you plan to use an IAM User to provide AWS access
export AWS_ACCESS_KEY_ID="insert-your-aws-access-key-here"
export AWS_SECRET_ACCESS_KEY="insert-your-aws-secret-key-here"
helm install llm-uservice . --set TEXTGEN_BACKEND="BEDROCK" --set LLM_MODEL_ID=${LLM_MODEL_ID} --set bedrock.AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} --set bedrock.AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} --wait

# If you plan to use EKS Pod Identity or IAM Role for Service Account to provide AWS access
export SERVICE_ACCOUNT_NAME="insert-service-account-name"
helm install llm-uservice . --set TEXTGEN_BACKEND="BEDROCK" --set LLM_MODEL_ID=${LLM_MODEL_ID} --set bedrock.AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} --set serviceAccount.create=true --set serviceAccount.name=${SERVICE_ACCOUNT_NAME} --wait

# install llm-docsum with TGI backend
# helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait

Expand All @@ -60,7 +72,7 @@ Open another terminal and run the following command to verify the service if wor
# for llm-textgen service
curl http://localhost:9000/v1/chat/completions \
-X POST \
-d d '{"model": "${LLM_MODEL_ID}", "messages": "What is Deep Learning?", "max_tokens":17}' \
-d '{"model": "'${LLM_MODEL_ID}'", "messages": "What is Deep Learning?", "max_tokens":17}' \
-H 'Content-Type: application/json'

# for llm-docsum service
Expand All @@ -78,13 +90,16 @@ curl http://localhost:9000/v1/faqgen \

## Values

| Key | Type | Default | Description |
| ------------------------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------- |
| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token |
| image.repository | string | `"opea/llm-textgen"` | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen" |
| LLM_ENDPOINT | string | `""` | backend inference service endpoint |
| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend |
| TEXTGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM" |
| DOCSUM_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM" |
| FAQGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM" |
| global.monitoring | bool | `false` | Service usage metrics |
| Key | Type | Default | Description |
| ------------------------------- | ------ | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token |
| image.repository | string | `"opea/llm-textgen"` | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen" |
| LLM_ENDPOINT | string | `""` | backend inference service endpoint |
| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend |
| TEXTGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM", "BEDROCK" |
| DOCSUM_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM" |
| FAQGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM" |
| global.monitoring | bool | `false` | Service usage metrics |
| bedrock.BEDROCK_REGION | string | `"us-east-1"` | The AWS Region to use when accessing the Bedrock service |
| bedrock.AWS_ACCESS_KEY_ID | string | `""` | The AWS Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_SECRET_ACCESS_KEY must also be set |
| bedrock.AWS_SECRET_ACCESS_KEY | string | `""` | The AWS Secret Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_ACCESS_KEY_ID must also be set |
11 changes: 11 additions & 0 deletions helm-charts/common/llm-uservice/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ data:
{{- if not .Values.LLM_ENDPOINT }}
LLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
{{- end }}
{{- else if eq "BEDROCK" .Values.TEXTGEN_BACKEND }}
LLM_COMPONENT_NAME: "OpeaTextGenBedrock"
BEDROCK_REGION: {{ .Values.bedrock.BEDROCK_REGION }}
LLM_ENDPOINT: "https://bedrock.{{ .Values.bedrock.BEDROCK_REGION }}.amazonaws.com"
{{- if .Values.LLM_MODEL_ID }}
MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
{{- end }}
{{- if and .Values.bedrock.AWS_ACCESS_KEY_ID .Values.bedrock.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: {{ .Values.bedrock.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: {{ .Values.bedrock.AWS_SECRET_ACCESS_KEY }}
{{- end }}
{{- else }}
{{- cat "Invalid TEXTGEN_BACKEND:" .Values.TEXTGEN_BACKEND | fail }}
{{- end }}
Expand Down
11 changes: 10 additions & 1 deletion helm-charts/common/llm-uservice/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
LOGFLAG: ""

# settings for llm-textgen service
# backend inference engine to use, i.e. TGI, vLLM
# backend inference engine to use, i.e. TGI, vLLM, BEDROCK
TEXTGEN_BACKEND: "TGI"

# settings for llm-docsum service
Expand All @@ -29,6 +29,15 @@ LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
# inference backend URL, e.g. http://tgi:80
LLM_ENDPOINT: ""

# settings for llm-textgen using Bedrock
bedrock:
BEDROCK_REGION: "us-east-1"
# Used for providing access to AWS
# If using EKS Pod Identity or IAM Role for Service Accounts (IRSA),
# do not set these values and ensure a service account is created
AWS_ACCESS_KEY_ID: ""
AWS_SECRET_ACCESS_KEY: ""

replicaCount: 1

image:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

image:
repository: opea/llm-textgen
tag: "latest"

TEXTGEN_BACKEND: "BEDROCK"
LLM_MODEL_ID: "anthropic.claude-3-5-sonnet-20240620-v1:0"

bedrock:
BEDROCK_REGION: "us-east-1"
# Uncomment if using an IAM User
# AWS_ACCESS_KEY_ID: ""
# AWS_SECRET_ACCESS_KEY: ""

serviceAccount:
create: true
automount: true

tgi:
enabled: false
vllm:
enabled: false

0 comments on commit da37b9f

Please sign in to comment.