Skip to content

Commit

Permalink
DEVHAS-731: Allow existing model servers to be passed into templates (#…
Browse files Browse the repository at this point in the history
…23)

* DEVHAS-731: Allow existing model servers to be passed into templates

Signed-off-by: John Collier <[email protected]>

* Revert initcontainercommand

Signed-off-by: John Collier <[email protected]>

* Remove modelPath param

Signed-off-by: John Collier <[email protected]>

* Update skeleton/template.yaml

Co-authored-by: Stephanie Cao <[email protected]>

* Regenerate templates

Signed-off-by: John Collier <[email protected]>

* Allow model to be passed in for audio-to-text

Signed-off-by: John Collier <[email protected]>

* Allow model to be selected for all samples

Signed-off-by: John Collier <[email protected]>

* Fix bug preventing proper defaults from being shown

Signed-off-by: John Collier <[email protected]>

* Fix typo

Signed-off-by: John Collier <[email protected]>

* Pull in latest from generate.sh

Signed-off-by: John Collier <[email protected]>

---------

Signed-off-by: John Collier <[email protected]>
Co-authored-by: Stephanie Cao <[email protected]>
  • Loading branch information
johnmcollier and yangcao77 authored Aug 1, 2024
1 parent 6478242 commit a838022
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 47 deletions.
6 changes: 4 additions & 2 deletions scripts/envs/audio-to-text
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ export APP_PORT=8501
export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/whispercpp:latest"
export MODEL_SERVICE_PORT=8001

# vllm configurations
export SUPPORT_VLLM=false
# model configurations
export SUPPORT_LLM=false
export SUPPORT_ASR=true
export SUPPORT_DETR=false

# for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build
export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/audio-to-text:latest"
7 changes: 5 additions & 2 deletions scripts/envs/chatbot
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ export APP_PORT=8501
export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest"
export MODEL_SERVICE_PORT=8001

# vllm configurations
export SUPPORT_VLLM=true
# model configurations
export SUPPORT_LLM=true
export SUPPORT_ASR=false
export SUPPORT_DETR=false

export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2"
export VLLM_MODEL_NAME="instructlab/granite-7b-lab"
export VLLM_MAX_MODEL_LEN=4096
Expand Down
7 changes: 5 additions & 2 deletions scripts/envs/codegen
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ export APP_PORT=8501
export MODEL_SERVICE_CONTAINER="quay.io/ai-lab/llamacpp_python:latest"
export MODEL_SERVICE_PORT=8001

# vllm configurations
export SUPPORT_VLLM=true
# model configurations
export SUPPORT_LLM=true
export SUPPORT_ASR=false
export SUPPORT_DETR=false

export VLLM_CONTAINER="quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2"
export VLLM_MODEL_NAME="Nondzu/Mistral-7B-code-16k-qlora"
export VLLM_MAX_MODEL_LEN=6144
Expand Down
7 changes: 4 additions & 3 deletions scripts/envs/object-detection
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ export APP_PORT=8501
export MODEL_SERVICE_CONTAINER="quay.io/redhat-ai-dev/object_detection_python:latest"
export MODEL_SERVICE_PORT=8000

# vllm configurations
export SUPPORT_VLLM=false

# model configurations
export SUPPORT_LLM=false
export SUPPORT_ASR=false
export SUPPORT_DETR=true

# for gitlab case, since gitlab does not have pipeline webhook pre-set to trigger the initial build
export APP_INTERFACE_CONTAINER="quay.io/redhat-ai-dev/object_detection:latest"
10 changes: 8 additions & 2 deletions scripts/import-ai-lab-samples
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,14 @@ for f in */; do
sed -i "s!sed.edit.APPTAGS!$APP_TAGS!g" $DEST/template.yaml
sed -i "s!sed.edit.CATALOG_DESCRIPTION!Secure Supply Chain Example for $APP_DESC!g" $DEST/template.yaml

if [ $SUPPORT_VLLM == false ]; then
sed -i '/# SED_MODEL_SERVER_START/,/# SED_MODEL_SERVER_END/d' $DEST/template.yaml
if [ $SUPPORT_LLM == false ]; then
sed -i '/# SED_LLM_SERVER_START/,/# SED_LLM_SERVER_END/d' $DEST/template.yaml
fi
if [ $SUPPORT_ASR == false ]; then
sed -i '/# SED_ASR_MODEL_SERVER_START/,/# SED_ASR_MODEL_SERVER_END/d' $DEST/template.yaml
fi
if [ $SUPPORT_DETR == false ]; then
sed -i '/# SED_DETR_MODEL_SERVER_START/,/# SED_DETR_MODEL_SERVER_END/d' $DEST/template.yaml
fi

source $ROOT_DIR/properties
Expand Down
5 changes: 5 additions & 0 deletions skeleton/gitops-template/.tekton/gitops-on-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ metadata:
pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/git-clone.yaml"
pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml"
pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/verify-enterprise-contract.yaml"
pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/gather-deploy-images.yaml"
pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/download-sbom-from-url-in-attestation.yaml"
pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-appstudio/tssc-sample-pipelines/main/pac/tasks/upload-sbom-to-trustification.yaml"
labels:
argocd/app-name: ${{ values.name }}
janus-idp.io/tekton: ${{ values.name }}
Expand All @@ -23,6 +26,8 @@ spec:
value: '{{revision}}'
- name: target-branch
value: '{{target_branch}}'
- name: fail-if-trustification-not-configured
value: 'false'
pipelineRef:
name: gitops-pull-request
workspaces:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ spec:
- image: ${{ values.vllmModelServiceContainer }}
args: [
"--model",
"${{ values.vllmModelName }}",
"${{ values.modelName }}",
"--port",
"${{ values.modelServicePort }}",
"--download-dir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ resources:
{%- if values.vllmSelected %}
- pvc.yaml
{%- endif %}
{%- if not values.existingModelServer %}
- deployment-model-server.yaml
- service-model-server.yaml
{%- endif %}
- deployment.yaml
- route.yaml
- service.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ kind: ConfigMap
metadata:
name: ${{ values.name }}-model-config
data:
{%- if values.existingModelServer %}
MODEL_ENDPOINT: "${{ values.modelEndpoint }}"
{%- else %}
MODEL_ENDPOINT: "http://${{ values.name }}-model-server:${{ values.modelServicePort }}"
{%- if values.vllmSelected %}
MODEL_NAME: "${{ values.vllmModelName }}"
{%- endif %}
{%- if values.vllmSelected or values.existingModelServer %}
MODEL_NAME: "${{ values.modelName }}"
{%- endif %}
79 changes: 68 additions & 11 deletions skeleton/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ spec:
required:
- name
- owner
# SED_MODEL_SERVER_START
- modelServer
# SED_MODEL_SERVER_END
properties:
name:
title: Name
Expand All @@ -37,17 +35,74 @@ spec:
ui:options:
catalogFilter:
kind: [Group, User]
# SED_MODEL_SERVER_START
modelServer:
title: Large Language Model(LLM) Server
# SED_LLM_SERVER_START
title: Large Language Model(LLM) Server
description: The model server container to use
ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
default: llama.cpp
type: string
enum:
- llama.cpp
- vLLM
default: llama.cpp
ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
# SED_MODEL_SERVER_END
- llama.cpp
# SED_LLM_SERVER_END
# SED_ASR_MODEL_SERVER_START
title: ASR Model Server
description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR).
type: string
default: whisper.cpp
enum:
- whisper.cpp
# SED_ASR_MODEL_SERVER_END
# SED_DETR_MODEL_SERVER_START
title: DETR Model Server
description: The model server container to use. The deployed model on the server must support object detection.
type: string
default: detr-resnet-101
enum:
- detr-resnet-101
# SED_DETR_MODEL_SERVER_END
- Existing model server
dependencies:
modelServer:
oneOf:
- required:
- modelEndpoint
# SED_LLM_SERVER_START
- modelName
# SED_LLM_SERVER_END
properties:
modelServer:
const: Existing model server
modelEndpoint:
title: Model Server Endpoint
type: string
description: "The endpoint for an existing model server."
# SED_LLM_SERVER_START
modelName:
title: Model Name
type: string
ui:help: "The name of the model deployed on the model server you would like to use."
# SED_LLM_SERVER_END
# SED_LLM_SERVER_START
- properties:
modelServer:
const: vLLM
- properties:
modelServer:
const: llama.cpp
# SED_LLM_SERVER_END
# SED_ASR_MODEL_SERVER_START
- properties:
modelServer:
const: whisper.cpp
# SED_ASR_MODEL_SERVER_END
# SED_DETR_MODEL_SERVER_START
- properties:
modelServer:
const: detr-resnet-101
# SED_DETR_MODEL_SERVER_END

- title: Application Repository Information
required:
- hostType
Expand Down Expand Up @@ -245,13 +300,15 @@ spec:
appPort: ${APP_PORT}
modelServiceContainer: ${MODEL_SERVICE_CONTAINER}
modelServicePort: ${MODEL_SERVICE_PORT}
# SED_MODEL_SERVER_START
# SED_LLM_SERVER_START
# for vllm
vllmSelected: ${{ parameters.modelServer === 'vLLM' }}
vllmModelServiceContainer: ${VLLM_CONTAINER}
vllmModelName: ${VLLM_MODEL_NAME}
modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else '${VLLM_MODEL_NAME}' }}
maxModelLength: ${VLLM_MAX_MODEL_LEN}
# SED_MODEL_SERVER_END
# SED_LLM_SERVER_END
existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
modelEndpoint: ${{ parameters.modelEndpoint }}
# for RHOAI
rhoaiSelected: ${{ parameters.rhoaiSelected }}
- action: fs:rename
Expand Down
31 changes: 31 additions & 0 deletions templates/audio-to-text/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ spec:
required:
- name
- owner
- modelServer
properties:
name:
title: Name
Expand All @@ -34,6 +35,34 @@ spec:
ui:options:
catalogFilter:
kind: [Group, User]
modelServer:
# SED_ASR_MODEL_SERVER_START
title: ASR Model Server
description: The model server container to use. The deployed model on the server must support automatic speech recognition (ASR).
type: string
default: whisper.cpp
enum:
- whisper.cpp
# SED_ASR_MODEL_SERVER_END
- Existing model server
dependencies:
modelServer:
oneOf:
- required:
- modelEndpoint
properties:
modelServer:
const: Existing model server
modelEndpoint:
title: Model Server Endpoint
type: string
description: "The endpoint for an existing model server."
# SED_ASR_MODEL_SERVER_START
- properties:
modelServer:
const: whisper.cpp
# SED_ASR_MODEL_SERVER_END

- title: Application Repository Information
required:
- hostType
Expand Down Expand Up @@ -231,6 +260,8 @@ spec:
appPort: 8501
modelServiceContainer: quay.io/redhat-ai-dev/whispercpp:latest
modelServicePort: 8001
existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
modelEndpoint: ${{ parameters.modelEndpoint }}
# for RHOAI
rhoaiSelected: ${{ parameters.rhoaiSelected }}
- action: fs:rename
Expand Down
53 changes: 42 additions & 11 deletions templates/chatbot/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ spec:
required:
- name
- owner
# SED_MODEL_SERVER_START
- modelServer
# SED_MODEL_SERVER_END
properties:
name:
title: Name
Expand All @@ -37,17 +35,48 @@ spec:
ui:options:
catalogFilter:
kind: [Group, User]
# SED_MODEL_SERVER_START
modelServer:
title: Large Language Model(LLM) Server
# SED_LLM_SERVER_START
title: Large Language Model(LLM) Server
description: The model server container to use
ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
default: llama.cpp
type: string
enum:
- llama.cpp
- vLLM
default: llama.cpp
ui:help: If you choose vLLM, ensure your cluster has Nvidia GPU supported, and is with enough cpu and memory
# SED_MODEL_SERVER_END
- llama.cpp
# SED_LLM_SERVER_END
- Existing model server
dependencies:
modelServer:
oneOf:
- required:
- modelEndpoint
# SED_LLM_SERVER_START
- modelName
# SED_LLM_SERVER_END
properties:
modelServer:
const: Existing model server
modelEndpoint:
title: Model Server Endpoint
type: string
description: "The endpoint for an existing model server."
# SED_LLM_SERVER_START
modelName:
title: Model Name
type: string
ui:help: "The name of the model deployed on the model server you would like to use."
# SED_LLM_SERVER_END
# SED_LLM_SERVER_START
- properties:
modelServer:
const: vLLM
- properties:
modelServer:
const: llama.cpp
# SED_LLM_SERVER_END

- title: Application Repository Information
required:
- hostType
Expand Down Expand Up @@ -245,13 +274,15 @@ spec:
appPort: 8501
modelServiceContainer: quay.io/ai-lab/llamacpp_python:latest
modelServicePort: 8001
# SED_MODEL_SERVER_START
# SED_LLM_SERVER_START
# for vllm
vllmSelected: ${{ parameters.modelServer === 'vLLM' }}
vllmModelServiceContainer: quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2
vllmModelName: instructlab/granite-7b-lab
modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'instructlab/granite-7b-lab' }}
maxModelLength: 4096
# SED_MODEL_SERVER_END
# SED_LLM_SERVER_END
existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
modelEndpoint: ${{ parameters.modelEndpoint }}
# for RHOAI
rhoaiSelected: ${{ parameters.rhoaiSelected }}
- action: fs:rename
Expand Down
Loading

0 comments on commit a838022

Please sign in to comment.