DEVHAS-731: Allow existing model servers to be passed into templates (#…

…23) * DEVHAS-731: Allow existing model servers to be passed into templates Signed-off-by: John Collier <[email protected]> * Revert initcontainercommand Signed-off-by: John Collier <[email protected]> * Remove modelPath param Signed-off-by: John Collier <[email protected]> * Update skeleton/template.yaml Co-authored-by: Stephanie Cao <[email protected]> * Regenerate templates Signed-off-by: John Collier <[email protected]> * Allow model to be passed in for audio-to-text Signed-off-by: John Collier <[email protected]> * Allow model to be selected for all samples Signed-off-by: John Collier <[email protected]> * Fix bug preventing proper defaults from being shown Signed-off-by: John Collier <[email protected]> * Fix typo Signed-off-by: John Collier <[email protected]> * Pull in latest from generate.sh Signed-off-by: John Collier <[email protected]> --------- Signed-off-by: John Collier <[email protected]> Co-authored-by: Stephanie Cao <[email protected]>
redhat-ai-dev · Aug 1, 2024 · a838022 · a838022
1 parent 6478242
commit a838022
Show file tree

Hide file tree

Showing 14 changed files with 254 additions and 47 deletions.
diff --git a/scripts/envs/audio-to-text b/scripts/envs/audio-to-text
@@ -9,8 +9,10 @@ export APP_PORT=8501
 export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/whispercpp:latest"
 export MODEL_SERVICE_PORT=8001
 
-# vllm configurations
-export SUPPORT_VLLM=false
+# model configurations
+export SUPPORT_LLM=false
+export SUPPORT_ASR=true
+export SUPPORT_DETR=false
 
 # for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build
 export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/audio-to-text:latest"
diff --git a/scripts/envs/chatbot b/scripts/envs/chatbot
@@ -9,8 +9,11 @@ export APP_PORT=8501
 export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest"
 export MODEL_SERVICE_PORT=8001
 
-# vllm configurations
-export SUPPORT_VLLM=true
+# model configurations
+export SUPPORT_LLM=true
+export SUPPORT_ASR=false
+export SUPPORT_DETR=false
+
 export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2"
 export VLLM_MODEL_NAME="instructlab/granite-7b-lab"
 export VLLM_MAX_MODEL_LEN=4096

diff --git a/scripts/envs/codegen b/scripts/envs/codegen
@@ -9,8 +9,11 @@ export APP_PORT=8501
 export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest"
 export MODEL_SERVICE_PORT=8001
 
-# vllm configurations
-export SUPPORT_VLLM=true
+# model configurations
+export SUPPORT_LLM=true
+export SUPPORT_ASR=false
+export SUPPORT_DETR=false
+
 export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2"
 export VLLM_MODEL_NAME="Nondzu/Mistral-7B-code-16k-qlora"
 export VLLM_MAX_MODEL_LEN=6144

diff --git a/scripts/envs/object-detection b/scripts/envs/object-detection
@@ -9,9 +9,10 @@ export APP_PORT=8501
 export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/object_detection_python:latest"
 export MODEL_SERVICE_PORT=8000
 
-# vllm configurations
-export SUPPORT_VLLM=false
-
+# model configurations
+export SUPPORT_LLM=false
+export SUPPORT_ASR=false
+export SUPPORT_DETR=true
 
 # for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build
 export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/object_detection:latest"
diff --git a/scripts/import-ai-lab-samples b/scripts/import-ai-lab-samples
@@ -53,8 +53,14 @@ for f in */; do
         sed -i "s!sed.edit.APPTAGS!$APP_TAGS!g" $DEST/template.yaml
         sed -i "s!sed.edit.CATALOG_DESCRIPTION!Secure Supply Chain Example for $APP_DESC!g" $DEST/template.yaml
 
-        if [ $SUPPORT_VLLM == false ]; then
-            sed -i '/# SED_MODEL_SERVER_START/,/# SED_MODEL_SERVER_END/d' $DEST/template.yaml
+        if [ $SUPPORT_LLM == false ]; then
+            sed -i '/# SED_LLM_SERVER_START/,/# SED_LLM_SERVER_END/d' $DEST/template.yaml
+        fi
+        if [ $SUPPORT_ASR == false ]; then
+            sed -i '/# SED_ASR_MODEL_SERVER_START/,/# SED_ASR_MODEL_SERVER_END/d' $DEST/template.yaml
+        fi
+        if [ $SUPPORT_DETR == false ]; then
+            sed -i '/# SED_DETR_MODEL_SERVER_START/,/# SED_DETR_MODEL_SERVER_END/d' $DEST/template.yaml
         fi
 
         source $ROOT_DIR/properties

diff --git a/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml b/skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml
@@ -9,6 +9,9 @@ metadata:
     pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/git-clone.yaml"
     pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml"
     pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/verify-enterprise-contract.yaml"
+    pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml"
+    pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/download-sbom-from-url-in-attestation.yaml"
+    pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/upload-sbom-to-trustification.yaml"
   labels:
     argocd/app-name: ${{ values.name }}
     janus-idp.io/tekton: ${{ values.name }}
@@ -23,6 +26,8 @@ spec:
       value: '{{revision}}'
     - name: target-branch
       value: '{{target_branch}}'
+    - name: fail-if-trustification-not-configured
+      value: 'false'
   pipelineRef:
     name: gitops-pull-request
   workspaces:

diff --git a/skeleton/gitops-template/components/http/base/deployment-model-server.yaml b/skeleton/gitops-template/components/http/base/deployment-model-server.yaml
@@ -36,7 +36,7 @@ spec:
       - image: ${{ values.vllmModelServiceContainer }}
         args: [
             "--model",
-            "${{ values.vllmModelName }}",
+            "${{ values.modelName }}",
             "--port",
             "${{ values.modelServicePort }}",
             "--download-dir",

diff --git a/skeleton/gitops-template/components/http/base/kustomization.yaml b/skeleton/gitops-template/components/http/base/kustomization.yaml
@@ -11,8 +11,10 @@ resources:
 {%- if values.vllmSelected %}
 - pvc.yaml
 {%- endif %}
+{%- if not values.existingModelServer %}
 - deployment-model-server.yaml
 - service-model-server.yaml
+{%- endif %}
 - deployment.yaml
 - route.yaml
 - service.yaml

diff --git a/skeleton/gitops-template/components/http/base/model-config.yaml b/skeleton/gitops-template/components/http/base/model-config.yaml
@@ -3,7 +3,11 @@ kind: ConfigMap
 metadata:
   name: ${{ values.name }}-model-config
 data:
+  {%- if values.existingModelServer %}
+  MODEL_ENDPOINT: "${{ values.modelEndpoint }}"
+  {%- else %}
   MODEL_ENDPOINT: "http://${{ values.name }}-model-server:${{ values.modelServicePort }}"
-  {%- if values.vllmSelected %}
-  MODEL_NAME: "${{ values.vllmModelName }}"
+  {%- endif %}
+  {%- if values.vllmSelected or values.existingModelServer %}
+  MODEL_NAME: "${{ values.modelName }}"
   {%- endif %}
diff --git a/skeleton/template.yaml b/skeleton/template.yaml
@@ -15,9 +15,7 @@ spec:
       required:
         - name
         - owner
-      # SED_MODEL_SERVER_START
         - modelServer
-      # SED_MODEL_SERVER_END
       properties:
         name:
           title: Name
@@ -37,17 +35,74 @@ spec:
           ui:options:
             catalogFilter:
               kind: [Group, User]
-        # SED_MODEL_SERVER_START
         modelServer:
-          title: Large Language Model(LLM) Server 
+          # SED_LLM_SERVER_START
+          title: Large Language Model(LLM) Server
           description: The model server container to use
+          ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
+          default: llama.cpp
           type: string
           enum:
-            - llama.cpp
             - vLLM
-          default: llama.cpp
-          ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
-        # SED_MODEL_SERVER_END
+            - llama.cpp
+          # SED_LLM_SERVER_END
+          # SED_ASR_MODEL_SERVER_START
+          title: ASR Model Server
+          description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR).
+          type: string
+          default: whisper.cpp
+          enum:
+            - whisper.cpp
+          # SED_ASR_MODEL_SERVER_END
+          # SED_DETR_MODEL_SERVER_START
+          title: DETR Model Server
+          description: The model server container to use. The deployed model on the server must support object detection.
+          type: string
+          default: detr-resnet-101
+          enum:
+            - detr-resnet-101
+          # SED_DETR_MODEL_SERVER_END
+            - Existing model server
+      dependencies:
+        modelServer:
+          oneOf:
+            - required:
+                - modelEndpoint
+                # SED_LLM_SERVER_START
+                - modelName
+                # SED_LLM_SERVER_END
+              properties:
+                modelServer:
+                    const: Existing model server
+                modelEndpoint:
+                  title: Model Server Endpoint
+                  type: string
+                  description: "The endpoint for an existing model server."
+                # SED_LLM_SERVER_START
+                modelName:
+                  title: Model Name
+                  type: string
+                  ui:help: "The name of the model deployed on the model server you would like to use."
+                # SED_LLM_SERVER_END
+            # SED_LLM_SERVER_START
+            - properties:
+                modelServer:
+                    const: vLLM
+            - properties:
+                modelServer:
+                    const: llama.cpp
+            # SED_LLM_SERVER_END
+            # SED_ASR_MODEL_SERVER_START
+            - properties:
+                modelServer:
+                    const: whisper.cpp
+            # SED_ASR_MODEL_SERVER_END
+            # SED_DETR_MODEL_SERVER_START
+            - properties:
+                modelServer:
+                    const: detr-resnet-101
+            # SED_DETR_MODEL_SERVER_END
+
     - title: Application Repository Information
       required:
         - hostType
@@ -245,13 +300,15 @@ spec:
           appPort: ${APP_PORT}
           modelServiceContainer: ${MODEL_SERVICE_CONTAINER}
           modelServicePort: ${MODEL_SERVICE_PORT}
-          # SED_MODEL_SERVER_START
+          # SED_LLM_SERVER_START
           # for vllm
           vllmSelected: ${{ parameters.modelServer === 'vLLM' }}
           vllmModelServiceContainer: ${VLLM_CONTAINER}
-          vllmModelName: ${VLLM_MODEL_NAME}
+          modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else '${VLLM_MODEL_NAME}' }}
           maxModelLength: ${VLLM_MAX_MODEL_LEN}
-          # SED_MODEL_SERVER_END
+          # SED_LLM_SERVER_END
+          existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
+          modelEndpoint: ${{ parameters.modelEndpoint }}
           # for RHOAI
           rhoaiSelected: ${{ parameters.rhoaiSelected }}
     - action: fs:rename

diff --git a/templates/audio-to-text/template.yaml b/templates/audio-to-text/template.yaml
@@ -15,6 +15,7 @@ spec:
       required:
         - name
         - owner
+        - modelServer
       properties:
         name:
           title: Name
@@ -34,6 +35,34 @@ spec:
           ui:options:
             catalogFilter:
               kind: [Group, User]
+        modelServer:
+          # SED_ASR_MODEL_SERVER_START
+          title: ASR Model Server
+          description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR).
+          type: string
+          default: whisper.cpp
+          enum:
+            - whisper.cpp
+          # SED_ASR_MODEL_SERVER_END
+            - Existing model server
+      dependencies:
+        modelServer:
+          oneOf:
+            - required:
+                - modelEndpoint
+              properties:
+                modelServer:
+                    const: Existing model server
+                modelEndpoint:
+                  title: Model Server Endpoint
+                  type: string
+                  description: "The endpoint for an existing model server."
+            # SED_ASR_MODEL_SERVER_START
+            - properties:
+                modelServer:
+                    const: whisper.cpp
+            # SED_ASR_MODEL_SERVER_END
+
     - title: Application Repository Information
       required:
         - hostType
@@ -231,6 +260,8 @@ spec:
           appPort: 8501
           modelServiceContainer: quay.io/redhat-ai-dev/whispercpp:latest
           modelServicePort: 8001
+          existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
+          modelEndpoint: ${{ parameters.modelEndpoint }}
           # for RHOAI
           rhoaiSelected: ${{ parameters.rhoaiSelected }}
     - action: fs:rename

diff --git a/templates/chatbot/template.yaml b/templates/chatbot/template.yaml
@@ -15,9 +15,7 @@ spec:
       required:
         - name
         - owner
-      # SED_MODEL_SERVER_START
         - modelServer
-      # SED_MODEL_SERVER_END
       properties:
         name:
           title: Name
@@ -37,17 +35,48 @@ spec:
           ui:options:
             catalogFilter:
               kind: [Group, User]
-        # SED_MODEL_SERVER_START
         modelServer:
-          title: Large Language Model(LLM) Server 
+          # SED_LLM_SERVER_START
+          title: Large Language Model(LLM) Server
           description: The model server container to use
+          ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
+          default: llama.cpp
           type: string
           enum:
-            - llama.cpp
             - vLLM
-          default: llama.cpp
-          ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
-        # SED_MODEL_SERVER_END
+            - llama.cpp
+          # SED_LLM_SERVER_END
+            - Existing model server
+      dependencies:
+        modelServer:
+          oneOf:
+            - required:
+                - modelEndpoint
+                # SED_LLM_SERVER_START
+                - modelName
+                # SED_LLM_SERVER_END
+              properties:
+                modelServer:
+                    const: Existing model server
+                modelEndpoint:
+                  title: Model Server Endpoint
+                  type: string
+                  description: "The endpoint for an existing model server."
+                # SED_LLM_SERVER_START
+                modelName:
+                  title: Model Name
+                  type: string
+                  ui:help: "The name of the model deployed on the model server you would like to use."
+                # SED_LLM_SERVER_END
+            # SED_LLM_SERVER_START
+            - properties:
+                modelServer:
+                    const: vLLM
+            - properties:
+                modelServer:
+                    const: llama.cpp
+            # SED_LLM_SERVER_END
+
     - title: Application Repository Information
       required:
         - hostType
@@ -245,13 +274,15 @@ spec:
           appPort: 8501
           modelServiceContainer: quay.io/ai-lab/llamacpp_python:latest
           modelServicePort: 8001
-          # SED_MODEL_SERVER_START
+          # SED_LLM_SERVER_START
           # for vllm
           vllmSelected: ${{ parameters.modelServer === 'vLLM' }}
           vllmModelServiceContainer: quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2
-          vllmModelName: instructlab/granite-7b-lab
+          modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'instructlab/granite-7b-lab' }}
           maxModelLength: 4096
-          # SED_MODEL_SERVER_END
+          # SED_LLM_SERVER_END
+          existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
+          modelEndpoint: ${{ parameters.modelEndpoint }}
           # for RHOAI
           rhoaiSelected: ${{ parameters.rhoaiSelected }}
     - action: fs:rename