LLM TextGen Bedrock Support (#811)

* Update values and configmap to support Bedrock backend Signed-off-by: Jonathan Minkin <[email protected]> * Add access keys to bedrock values, fix readme example Signed-off-by: Jonathan Minkin <[email protected]> * Add Bedrock instructions to llm-uservice readme Signed-off-by: Jonathan Minkin <[email protected]> --------- Signed-off-by: Jonathan Minkin <[email protected]>
opea-project · Feb 20, 2025 · da37b9f · da37b9f
1 parent 235f22f
commit da37b9f
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 12 deletions.
diff --git a/helm-charts/common/llm-uservice/README.md b/helm-charts/common/llm-uservice/README.md
@@ -35,6 +35,18 @@ helm install llm-uservice . --set TEXTGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM
 # install llm-textgen with vLLM backend
 # helm install llm-uservice . --set TEXTGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait
 
+# install llm-textgen with BEDROCK backend
+export LLM_MODEL_ID="insert-bedrock-model-id-here"
+
+# If you plan to use an IAM User to provide AWS access
+export AWS_ACCESS_KEY_ID="insert-your-aws-access-key-here"
+export AWS_SECRET_ACCESS_KEY="insert-your-aws-secret-key-here"
+helm install llm-uservice . --set TEXTGEN_BACKEND="BEDROCK" --set LLM_MODEL_ID=${LLM_MODEL_ID} --set bedrock.AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} --set bedrock.AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} --wait
+
+# If you plan to use EKS Pod Identity or IAM Role for Service Account to provide AWS access
+export SERVICE_ACCOUNT_NAME="insert-service-account-name"
+helm install llm-uservice . --set TEXTGEN_BACKEND="BEDROCK" --set LLM_MODEL_ID=${LLM_MODEL_ID} --set bedrock.AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} --set serviceAccount.create=true --set serviceAccount.name=${SERVICE_ACCOUNT_NAME} --wait
+
 # install llm-docsum with TGI backend
 # helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait
 
@@ -60,7 +72,7 @@ Open another terminal and run the following command to verify the service if wor
 # for llm-textgen service
 curl http://localhost:9000/v1/chat/completions \
   -X POST \
-  -d d '{"model": "${LLM_MODEL_ID}", "messages": "What is Deep Learning?", "max_tokens":17}' \
+  -d '{"model": "'${LLM_MODEL_ID}'", "messages": "What is Deep Learning?", "max_tokens":17}' \
   -H 'Content-Type: application/json'
 
 # for llm-docsum service
@@ -78,13 +90,16 @@ curl http://localhost:9000/v1/faqgen \
 
 ## Values
 
-| Key                             | Type   | Default                       | Description                                                                      |
-| ------------------------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------- |
-| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                          | Your own Hugging Face API token                                                  |
-| image.repository                | string | `"opea/llm-textgen"`          | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen"                  |
-| LLM_ENDPOINT                    | string | `""`                          | backend inference service endpoint                                               |
-| LLM_MODEL_ID                    | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend                                              |
-| TEXTGEN_BACKEND                 | string | `"TGI"`                       | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM" |
-| DOCSUM_BACKEND                  | string | `"TGI"`                       | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM"  |
-| FAQGEN_BACKEND                  | string | `"TGI"`                       | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM"  |
-| global.monitoring               | bool   | `false`                       | Service usage metrics                                                            |
+| Key                             | Type   | Default                       | Description                                                                                                                       |
+| ------------------------------- | ------ | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                          | Your own Hugging Face API token                                                                                                   |
+| image.repository                | string | `"opea/llm-textgen"`          | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen"                                                                   |
+| LLM_ENDPOINT                    | string | `""`                          | backend inference service endpoint                                                                                                |
+| LLM_MODEL_ID                    | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend                                                                                               |
+| TEXTGEN_BACKEND                 | string | `"TGI"`                       | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM", "BEDROCK"                                       |
+| DOCSUM_BACKEND                  | string | `"TGI"`                       | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM"                                                   |
+| FAQGEN_BACKEND                  | string | `"TGI"`                       | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM"                                                   |
+| global.monitoring               | bool   | `false`                       | Service usage metrics                                                                                                             |
+| bedrock.BEDROCK_REGION          | string | `"us-east-1"`                 | The AWS Region to use when accessing the Bedrock service                                                                          |
+| bedrock.AWS_ACCESS_KEY_ID       | string | `""`                          | The AWS Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_SECRET_ACCESS_KEY must also be set    |
+| bedrock.AWS_SECRET_ACCESS_KEY   | string | `""`                          | The AWS Secret Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_ACCESS_KEY_ID must also be set |
diff --git a/helm-charts/common/llm-uservice/templates/configmap.yaml b/helm-charts/common/llm-uservice/templates/configmap.yaml
@@ -19,6 +19,17 @@ data:
   {{- if not .Values.LLM_ENDPOINT }}
   LLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
   {{- end }}
+  {{- else if eq "BEDROCK" .Values.TEXTGEN_BACKEND }}
+  LLM_COMPONENT_NAME: "OpeaTextGenBedrock"
+  BEDROCK_REGION: {{ .Values.bedrock.BEDROCK_REGION }}
+  LLM_ENDPOINT: "https://bedrock.{{ .Values.bedrock.BEDROCK_REGION }}.amazonaws.com"
+  {{- if .Values.LLM_MODEL_ID }}
+  MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
+  {{- end }}
+  {{- if and .Values.bedrock.AWS_ACCESS_KEY_ID .Values.bedrock.AWS_SECRET_ACCESS_KEY }}
+  AWS_ACCESS_KEY_ID: {{ .Values.bedrock.AWS_ACCESS_KEY_ID }}
+  AWS_SECRET_ACCESS_KEY: {{ .Values.bedrock.AWS_SECRET_ACCESS_KEY }}
+  {{- end }}
   {{- else }}
   {{- cat "Invalid TEXTGEN_BACKEND:" .Values.TEXTGEN_BACKEND | fail }}
   {{- end }}

diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
@@ -10,7 +10,7 @@
 LOGFLAG: ""
 
 # settings for llm-textgen service
-# backend inference engine to use, i.e. TGI, vLLM
+# backend inference engine to use, i.e. TGI, vLLM, BEDROCK
 TEXTGEN_BACKEND: "TGI"
 
 # settings for llm-docsum service
@@ -29,6 +29,15 @@ LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
 # inference backend URL, e.g. http://tgi:80
 LLM_ENDPOINT: ""
 
+# settings for llm-textgen using Bedrock
+bedrock:
+  BEDROCK_REGION: "us-east-1"
+  # Used for providing access to AWS
+  # If using EKS Pod Identity or IAM Role for Service Accounts (IRSA),
+  # do not set these values and ensure a service account is created
+  AWS_ACCESS_KEY_ID: ""
+  AWS_SECRET_ACCESS_KEY: ""
+
 replicaCount: 1
 
 image:

diff --git a/helm-charts/common/llm-uservice/variant_textgen-bedrock-values.yaml b/helm-charts/common/llm-uservice/variant_textgen-bedrock-values.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/llm-textgen
+  tag: "latest"
+
+TEXTGEN_BACKEND: "BEDROCK"
+LLM_MODEL_ID: "anthropic.claude-3-5-sonnet-20240620-v1:0"
+
+bedrock:
+  BEDROCK_REGION: "us-east-1"
+  # Uncomment if using an IAM User
+  # AWS_ACCESS_KEY_ID: ""
+  # AWS_SECRET_ACCESS_KEY: ""
+
+serviceAccount:
+  create: true
+  automount: true
+
+tgi:
+  enabled: false
+vllm:
+  enabled: false