From a838022ff268f5ecf57ceda3bbe465975f61094c Mon Sep 17 00:00:00 2001 From: John Collier Date: Thu, 1 Aug 2024 18:49:10 -0400 Subject: [PATCH] DEVHAS-731: Allow existing model servers to be passed into templates (#23) * DEVHAS-731: Allow existing model servers to be passed into templates Signed-off-by: John Collier * Revert initcontainercommand Signed-off-by: John Collier * Remove modelPath param Signed-off-by: John Collier * Update skeleton/template.yaml Co-authored-by: Stephanie Cao * Regenerate templates Signed-off-by: John Collier * Allow model to be passed in for audio-to-text Signed-off-by: John Collier * Allow model to be selected for all samples Signed-off-by: John Collier * Fix bug preventing proper defaults from being shown Signed-off-by: John Collier * Fix typo Signed-off-by: John Collier * Pull in latest from generate.sh Signed-off-by: John Collier --------- Signed-off-by: John Collier Co-authored-by: Stephanie Cao --- scripts/envs/audio-to-text | 6 +- scripts/envs/chatbot | 7 +- scripts/envs/codegen | 7 +- scripts/envs/object-detection | 7 +- scripts/import-ai-lab-samples | 10 ++- .../.tekton/gitops-on-pull-request.yaml | 5 ++ .../http/base/deployment-model-server.yaml | 2 +- .../components/http/base/kustomization.yaml | 2 + .../components/http/base/model-config.yaml | 8 +- skeleton/template.yaml | 79 ++++++++++++++++--- templates/audio-to-text/template.yaml | 31 ++++++++ templates/chatbot/template.yaml | 53 ++++++++++--- templates/codegen/template.yaml | 53 ++++++++++--- templates/object-detection/template.yaml | 31 ++++++++ 14 files changed, 254 insertions(+), 47 deletions(-) diff --git a/scripts/envs/audio-to-text b/scripts/envs/audio-to-text index f975db3f..cfda4af0 100644 --- a/scripts/envs/audio-to-text +++ b/scripts/envs/audio-to-text @@ -9,8 +9,10 @@ export APP_PORT=8501 export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/whispercpp:latest" export MODEL_SERVICE_PORT=8001 -# vllm configurations -export SUPPORT_VLLM=false +# model configurations +export SUPPORT_LLM=false +export SUPPORT_ASR=true +export SUPPORT_DETR=false # for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/audio-to-text:latest" diff --git a/scripts/envs/chatbot b/scripts/envs/chatbot index 16fd5384..e2adf4b8 100755 --- a/scripts/envs/chatbot +++ b/scripts/envs/chatbot @@ -9,8 +9,11 @@ export APP_PORT=8501 export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest" export MODEL_SERVICE_PORT=8001 -# vllm configurations -export SUPPORT_VLLM=true +# model configurations +export SUPPORT_LLM=true +export SUPPORT_ASR=false +export SUPPORT_DETR=false + export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2" export VLLM_MODEL_NAME="instructlab/granite-7b-lab" export VLLM_MAX_MODEL_LEN=4096 diff --git a/scripts/envs/codegen b/scripts/envs/codegen index a8a71681..79317724 100755 --- a/scripts/envs/codegen +++ b/scripts/envs/codegen @@ -9,8 +9,11 @@ export APP_PORT=8501 export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest" export MODEL_SERVICE_PORT=8001 -# vllm configurations -export SUPPORT_VLLM=true +# model configurations +export SUPPORT_LLM=true +export SUPPORT_ASR=false +export SUPPORT_DETR=false + export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2" export VLLM_MODEL_NAME="Nondzu/Mistral-7B-code-16k-qlora" export VLLM_MAX_MODEL_LEN=6144 diff --git a/scripts/envs/object-detection b/scripts/envs/object-detection index 880bd099..02655499 100755 --- a/scripts/envs/object-detection +++ b/scripts/envs/object-detection @@ -9,9 +9,10 @@ export APP_PORT=8501 export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/object_detection_python:latest" export MODEL_SERVICE_PORT=8000 -# vllm configurations -export SUPPORT_VLLM=false - +# model configurations +export SUPPORT_LLM=false +export SUPPORT_ASR=false +export SUPPORT_DETR=true # for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/object_detection:latest" diff --git a/scripts/import-ai-lab-samples b/scripts/import-ai-lab-samples index 86f8b486..12e028eb 100755 --- a/scripts/import-ai-lab-samples +++ b/scripts/import-ai-lab-samples @@ -53,8 +53,14 @@ for f in */; do sed -i "s!sed.edit.APPTAGS!$APP_TAGS!g" $DEST/template.yaml sed -i "s!sed.edit.CATALOG_DESCRIPTION!Secure Supply Chain Example for $APP_DESC!g" $DEST/template.yaml - if [ $SUPPORT_VLLM == false ]; then - sed -i '/# SED_MODEL_SERVER_START/,/# SED_MODEL_SERVER_END/d' $DEST/template.yaml + if [ $SUPPORT_LLM == false ]; then + sed -i '/# SED_LLM_SERVER_START/,/# SED_LLM_SERVER_END/d' $DEST/template.yaml + fi + if [ $SUPPORT_ASR == false ]; then + sed -i '/# SED_ASR_MODEL_SERVER_START/,/# SED_ASR_MODEL_SERVER_END/d' $DEST/template.yaml + fi + if [ $SUPPORT_DETR == false ]; then + sed -i '/# SED_DETR_MODEL_SERVER_START/,/# SED_DETR_MODEL_SERVER_END/d' $DEST/template.yaml fi source $ROOT_DIR/properties diff --git a/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml b/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml index fa41c30f..4edf1929 100644 --- a/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml +++ b/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml @@ -9,6 +9,9 @@ metadata: pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/git-clone.yaml" pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml" pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/verify-enterprise-contract.yaml" + pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml" + pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/download-sbom-from-url-in-attestation.yaml" + pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/upload-sbom-to-trustification.yaml" labels: argocd/app-name: ${{ values.name }} janus-idp.io/tekton: ${{ values.name }} @@ -23,6 +26,8 @@ spec: value: '{{revision}}' - name: target-branch value: '{{target_branch}}' + - name: fail-if-trustification-not-configured + value: 'false' pipelineRef: name: gitops-pull-request workspaces: diff --git a/skeleton/gitops-template/components/http/base/deployment-model-server.yaml b/skeleton/gitops-template/components/http/base/deployment-model-server.yaml index 0687ab73..d47f6589 100644 --- a/skeleton/gitops-template/components/http/base/deployment-model-server.yaml +++ b/skeleton/gitops-template/components/http/base/deployment-model-server.yaml @@ -36,7 +36,7 @@ spec: - image: ${{ values.vllmModelServiceContainer }} args: [ "--model", - "${{ values.vllmModelName }}", + "${{ values.modelName }}", "--port", "${{ values.modelServicePort }}", "--download-dir", diff --git a/skeleton/gitops-template/components/http/base/kustomization.yaml b/skeleton/gitops-template/components/http/base/kustomization.yaml index 9b30acb9..4418fd99 100644 --- a/skeleton/gitops-template/components/http/base/kustomization.yaml +++ b/skeleton/gitops-template/components/http/base/kustomization.yaml @@ -11,8 +11,10 @@ resources: {%- if values.vllmSelected %} - pvc.yaml {%- endif %} +{%- if not values.existingModelServer %} - deployment-model-server.yaml - service-model-server.yaml +{%- endif %} - deployment.yaml - route.yaml - service.yaml diff --git a/skeleton/gitops-template/components/http/base/model-config.yaml b/skeleton/gitops-template/components/http/base/model-config.yaml index 092839ab..7b8185ac 100644 --- a/skeleton/gitops-template/components/http/base/model-config.yaml +++ b/skeleton/gitops-template/components/http/base/model-config.yaml @@ -3,7 +3,11 @@ kind: ConfigMap metadata: name: ${{ values.name }}-model-config data: + {%- if values.existingModelServer %} + MODEL_ENDPOINT: "${{ values.modelEndpoint }}" + {%- else %} MODEL_ENDPOINT: "http://${{ values.name }}-model-server:${{ values.modelServicePort }}" - {%- if values.vllmSelected %} - MODEL_NAME: "${{ values.vllmModelName }}" + {%- endif %} + {%- if values.vllmSelected or values.existingModelServer %} + MODEL_NAME: "${{ values.modelName }}" {%- endif %} \ No newline at end of file diff --git a/skeleton/template.yaml b/skeleton/template.yaml index 60c79466..23b263ce 100644 --- a/skeleton/template.yaml +++ b/skeleton/template.yaml @@ -15,9 +15,7 @@ spec: required: - name - owner - # SED_MODEL_SERVER_START - modelServer - # SED_MODEL_SERVER_END properties: name: title: Name @@ -37,17 +35,74 @@ spec: ui:options: catalogFilter: kind: [Group, User] - # SED_MODEL_SERVER_START modelServer: - title: Large Language Model(LLM) Server + # SED_LLM_SERVER_START + title: Large Language Model(LLM) Server description: The model server container to use + ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory + default: llama.cpp type: string enum: - - llama.cpp - vLLM - default: llama.cpp - ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory - # SED_MODEL_SERVER_END + - llama.cpp + # SED_LLM_SERVER_END + # SED_ASR_MODEL_SERVER_START + title: ASR Model Server + description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR). + type: string + default: whisper.cpp + enum: + - whisper.cpp + # SED_ASR_MODEL_SERVER_END + # SED_DETR_MODEL_SERVER_START + title: DETR Model Server + description: The model server container to use. The deployed model on the server must support object detection. + type: string + default: detr-resnet-101 + enum: + - detr-resnet-101 + # SED_DETR_MODEL_SERVER_END + - Existing model server + dependencies: + modelServer: + oneOf: + - required: + - modelEndpoint + # SED_LLM_SERVER_START + - modelName + # SED_LLM_SERVER_END + properties: + modelServer: + const: Existing model server + modelEndpoint: + title: Model Server Endpoint + type: string + description: "The endpoint for an existing model server." + # SED_LLM_SERVER_START + modelName: + title: Model Name + type: string + ui:help: "The name of the model deployed on the model server you would like to use." + # SED_LLM_SERVER_END + # SED_LLM_SERVER_START + - properties: + modelServer: + const: vLLM + - properties: + modelServer: + const: llama.cpp + # SED_LLM_SERVER_END + # SED_ASR_MODEL_SERVER_START + - properties: + modelServer: + const: whisper.cpp + # SED_ASR_MODEL_SERVER_END + # SED_DETR_MODEL_SERVER_START + - properties: + modelServer: + const: detr-resnet-101 + # SED_DETR_MODEL_SERVER_END + - title: Application Repository Information required: - hostType @@ -245,13 +300,15 @@ spec: appPort: ${APP_PORT} modelServiceContainer: ${MODEL_SERVICE_CONTAINER} modelServicePort: ${MODEL_SERVICE_PORT} - # SED_MODEL_SERVER_START + # SED_LLM_SERVER_START # for vllm vllmSelected: ${{ parameters.modelServer === 'vLLM' }} vllmModelServiceContainer: ${VLLM_CONTAINER} - vllmModelName: ${VLLM_MODEL_NAME} + modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else '${VLLM_MODEL_NAME}' }} maxModelLength: ${VLLM_MAX_MODEL_LEN} - # SED_MODEL_SERVER_END + # SED_LLM_SERVER_END + existingModelServer: ${{ parameters.modelServer === 'Existing model server' }} + modelEndpoint: ${{ parameters.modelEndpoint }} # for RHOAI rhoaiSelected: ${{ parameters.rhoaiSelected }} - action: fs:rename diff --git a/templates/audio-to-text/template.yaml b/templates/audio-to-text/template.yaml index a1868181..3fa011f7 100644 --- a/templates/audio-to-text/template.yaml +++ b/templates/audio-to-text/template.yaml @@ -15,6 +15,7 @@ spec: required: - name - owner + - modelServer properties: name: title: Name @@ -34,6 +35,34 @@ spec: ui:options: catalogFilter: kind: [Group, User] + modelServer: + # SED_ASR_MODEL_SERVER_START + title: ASR Model Server + description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR). + type: string + default: whisper.cpp + enum: + - whisper.cpp + # SED_ASR_MODEL_SERVER_END + - Existing model server + dependencies: + modelServer: + oneOf: + - required: + - modelEndpoint + properties: + modelServer: + const: Existing model server + modelEndpoint: + title: Model Server Endpoint + type: string + description: "The endpoint for an existing model server." + # SED_ASR_MODEL_SERVER_START + - properties: + modelServer: + const: whisper.cpp + # SED_ASR_MODEL_SERVER_END + - title: Application Repository Information required: - hostType @@ -231,6 +260,8 @@ spec: appPort: 8501 modelServiceContainer: quay.io/redhat-ai-dev/whispercpp:latest modelServicePort: 8001 + existingModelServer: ${{ parameters.modelServer === 'Existing model server' }} + modelEndpoint: ${{ parameters.modelEndpoint }} # for RHOAI rhoaiSelected: ${{ parameters.rhoaiSelected }} - action: fs:rename diff --git a/templates/chatbot/template.yaml b/templates/chatbot/template.yaml index f76e7ed3..8abb9ffd 100644 --- a/templates/chatbot/template.yaml +++ b/templates/chatbot/template.yaml @@ -15,9 +15,7 @@ spec: required: - name - owner - # SED_MODEL_SERVER_START - modelServer - # SED_MODEL_SERVER_END properties: name: title: Name @@ -37,17 +35,48 @@ spec: ui:options: catalogFilter: kind: [Group, User] - # SED_MODEL_SERVER_START modelServer: - title: Large Language Model(LLM) Server + # SED_LLM_SERVER_START + title: Large Language Model(LLM) Server description: The model server container to use + ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory + default: llama.cpp type: string enum: - - llama.cpp - vLLM - default: llama.cpp - ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory - # SED_MODEL_SERVER_END + - llama.cpp + # SED_LLM_SERVER_END + - Existing model server + dependencies: + modelServer: + oneOf: + - required: + - modelEndpoint + # SED_LLM_SERVER_START + - modelName + # SED_LLM_SERVER_END + properties: + modelServer: + const: Existing model server + modelEndpoint: + title: Model Server Endpoint + type: string + description: "The endpoint for an existing model server." + # SED_LLM_SERVER_START + modelName: + title: Model Name + type: string + ui:help: "The name of the model deployed on the model server you would like to use." + # SED_LLM_SERVER_END + # SED_LLM_SERVER_START + - properties: + modelServer: + const: vLLM + - properties: + modelServer: + const: llama.cpp + # SED_LLM_SERVER_END + - title: Application Repository Information required: - hostType @@ -245,13 +274,15 @@ spec: appPort: 8501 modelServiceContainer: quay.io/ai-lab/llamacpp_python:latest modelServicePort: 8001 - # SED_MODEL_SERVER_START + # SED_LLM_SERVER_START # for vllm vllmSelected: ${{ parameters.modelServer === 'vLLM' }} vllmModelServiceContainer: quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2 - vllmModelName: instructlab/granite-7b-lab + modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'instructlab/granite-7b-lab' }} maxModelLength: 4096 - # SED_MODEL_SERVER_END + # SED_LLM_SERVER_END + existingModelServer: ${{ parameters.modelServer === 'Existing model server' }} + modelEndpoint: ${{ parameters.modelEndpoint }} # for RHOAI rhoaiSelected: ${{ parameters.rhoaiSelected }} - action: fs:rename diff --git a/templates/codegen/template.yaml b/templates/codegen/template.yaml index 147ce396..89b2d70d 100644 --- a/templates/codegen/template.yaml +++ b/templates/codegen/template.yaml @@ -15,9 +15,7 @@ spec: required: - name - owner - # SED_MODEL_SERVER_START - modelServer - # SED_MODEL_SERVER_END properties: name: title: Name @@ -37,17 +35,48 @@ spec: ui:options: catalogFilter: kind: [Group, User] - # SED_MODEL_SERVER_START modelServer: - title: Large Language Model(LLM) Server + # SED_LLM_SERVER_START + title: Large Language Model(LLM) Server description: The model server container to use + ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory + default: llama.cpp type: string enum: - - llama.cpp - vLLM - default: llama.cpp - ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory - # SED_MODEL_SERVER_END + - llama.cpp + # SED_LLM_SERVER_END + - Existing model server + dependencies: + modelServer: + oneOf: + - required: + - modelEndpoint + # SED_LLM_SERVER_START + - modelName + # SED_LLM_SERVER_END + properties: + modelServer: + const: Existing model server + modelEndpoint: + title: Model Server Endpoint + type: string + description: "The endpoint for an existing model server." + # SED_LLM_SERVER_START + modelName: + title: Model Name + type: string + ui:help: "The name of the model deployed on the model server you would like to use." + # SED_LLM_SERVER_END + # SED_LLM_SERVER_START + - properties: + modelServer: + const: vLLM + - properties: + modelServer: + const: llama.cpp + # SED_LLM_SERVER_END + - title: Application Repository Information required: - hostType @@ -245,13 +274,15 @@ spec: appPort: 8501 modelServiceContainer: quay.io/ai-lab/llamacpp_python:latest modelServicePort: 8001 - # SED_MODEL_SERVER_START + # SED_LLM_SERVER_START # for vllm vllmSelected: ${{ parameters.modelServer === 'vLLM' }} vllmModelServiceContainer: quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2 - vllmModelName: Nondzu/Mistral-7B-code-16k-qlora + modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'Nondzu/Mistral-7B-code-16k-qlora' }} maxModelLength: 6144 - # SED_MODEL_SERVER_END + # SED_LLM_SERVER_END + existingModelServer: ${{ parameters.modelServer === 'Existing model server' }} + modelEndpoint: ${{ parameters.modelEndpoint }} # for RHOAI rhoaiSelected: ${{ parameters.rhoaiSelected }} - action: fs:rename diff --git a/templates/object-detection/template.yaml b/templates/object-detection/template.yaml index bd9ea292..21bee624 100644 --- a/templates/object-detection/template.yaml +++ b/templates/object-detection/template.yaml @@ -15,6 +15,7 @@ spec: required: - name - owner + - modelServer properties: name: title: Name @@ -34,6 +35,34 @@ spec: ui:options: catalogFilter: kind: [Group, User] + modelServer: + # SED_DETR_MODEL_SERVER_START + title: DETR Model Server + description: The model server container to use. The deployed model on the server must support object detection. + type: string + default: detr-resnet-101 + enum: + - detr-resnet-101 + # SED_DETR_MODEL_SERVER_END + - Existing model server + dependencies: + modelServer: + oneOf: + - required: + - modelEndpoint + properties: + modelServer: + const: Existing model server + modelEndpoint: + title: Model Server Endpoint + type: string + description: "The endpoint for an existing model server." + # SED_DETR_MODEL_SERVER_START + - properties: + modelServer: + const: detr-resnet-101 + # SED_DETR_MODEL_SERVER_END + - title: Application Repository Information required: - hostType @@ -231,6 +260,8 @@ spec: appPort: 8501 modelServiceContainer: quay.io/redhat-ai-dev/object_detection_python:latest modelServicePort: 8000 + existingModelServer: ${{ parameters.modelServer === 'Existing model server' }} + modelEndpoint: ${{ parameters.modelEndpoint }} # for RHOAI rhoaiSelected: ${{ parameters.rhoaiSelected }} - action: fs:rename