Skip to content

Run performance test on a Kubernetes cluster #409

Run performance test on a Kubernetes cluster

Run performance test on a Kubernetes cluster #409

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Run performance test on a Kubernetes cluster
on:
workflow_dispatch:
inputs:
runner:
default: "gaudi3"
description: "Runner label 'aise-perf'"
required: true
type: string
cleanup:
default: true
description: "Whether to clean up the pods and the labels"
required: true
type: boolean
node_nums:
default: "1"
description: "List of test node numbers, e.g., 1,2,4,8"
required: true
type: string
registry:
default: ""
description: "Registry to store images, empty string means docker.io, default is empty"
required: false
type: string
tag:
default: "latest"
description: "Tag to apply to images"
required: true
type: string
example_branch:
default: "suyue/perf"
description: "GenAIExamples branch to test manifests"
required: true
type: string
infra_branch:
default: "main"
description: "GenAIInfra branch to test manifests"
required: true
type: string
eval_branch:
default: "main"
description: "GenAIEval branch to test manifests"
required: true
type: string
mode:
default: "with_rerank:oob"
description: "The mode of the test, e.g., with_rerank:tuned"
required: true
type: string
test_cases:
default: "e2e"
description: "The test cases of chatqna, e.g., e2e, llmserve, embedserve, rerankserve"
required: false
type: string
# user_queries:
# default: ''
# description: "The user query list, e.g., '4, 8, 16, 640', empty input means the default setting."
# required: false
# type: string
#load_config:
# default: "constant:5"
# description: "configuration for load test in format load_type:value, constant:5 or poisson:1.0"
# required: false
# type: string
jobs:
get-build-matrix:
runs-on: ubuntu-latest
outputs:
node_nums: ${{ steps.get-services.outputs.node_nums }}
steps:
- name: Get test Services
id: get-services
run: |
set -x
node_num_list=($(echo ${{ github.event.inputs.node_nums }} | tr ',' ' '))
node_nums=$(printf '%s\n' "${node_num_list[@]}" | sort | jq -R '.' | jq -sc '.')
echo "node_nums=$node_nums" >> $GITHUB_OUTPUT
run-benchmark:
needs: [get-build-matrix]
runs-on: "${{ inputs.runner }}"
env:
conda_env_name: "OPEA_perf"
strategy:
matrix:
node_num: ${{ fromJSON(needs.get-build-matrix.outputs.node_nums) }}
steps:
- name: Clean Up Working Directory
run: |
sudo rm -rf ${{github.workspace}}/*
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
- name: Checkout out Validation
uses: actions/checkout@v4
with:
path: Validation
- name: Checkout out GenAIExamples
uses: actions/checkout@v4
with:
repository: opea-project/GenAIExamples
ref: ${{ inputs.example_branch }}
path: GenAIExamples
- name: Checkout out GenAIEval
uses: actions/checkout@v4
with:
repository: opea-project/GenAIEval
ref: ${{ inputs.eval_branch }}
path: GenAIEval
- name: Checkout out GenAIInfra
uses: actions/checkout@v4
with:
repository: opea-project/GenAIInfra
ref: ${{ inputs.infra_branch }}
path: GenAIInfra
- name: Set up stress tool
run: |
export PATH=${HOME}/miniforge3/bin/:$PATH
source activate ${conda_env_name}
if ! command -v yq &> /dev/null; then
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq
sudo chmod +x /usr/bin/yq
fi
pip install pandas # install python lib pandas for csv processing
cp Validation/.github/scripts/process_csv.py GenAIEval/evals/benchmark # copy process_csv.py to GenAIEval folder
cd GenAIEval
pip install -r requirements.txt
# config env values
LOAD_SHAPE=$(echo "${{ inputs.load_config }}" | cut -d':' -f1 | xargs)
second_part=$(echo "${{ inputs.load_config }}" | cut -d':' -f2 | xargs)
if [ "$LOAD_SHAPE" == "constant" ]; then
echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV
echo "CONCURRENT_LEVEL=$second_part" >> $GITHUB_ENV
elif [ "$LOAD_SHAPE" == "poisson" ]; then
echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV
echo "ARRIVAL_RATE=$second_part" >> $GITHUB_ENV
else
echo "Unknown LOAD_SHAPE: $LOAD_SHAPE"
fi
- name: K8s Label Nodes
working-directory: ./Validation
env:
NODE_NUM: ${{ matrix.node_num }}
run: |
echo "uncordon=true" >> $GITHUB_ENV
.github/scripts/perf_test.sh --label
- name: Prepare benchmark configuration
id: prepare_benchmark
working-directory: ./Validation
env:
TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }}
TEST_CASES: ${{ inputs.test_cases }}
USER_QUERIES: ${{ inputs.user_queries }}
NODE_NUM: ${{ matrix.node_num }}
run: |
rm -rf $TEST_OUTPUT_DIR
.github/scripts/perf_test.sh --generate_config ${{ matrix.node_num }}
echo "uninstall=false" >> $GITHUB_ENV
echo "randomstr=$(echo $RANDOM)" >> $GITHUB_OUTPUT
echo "TEST_OUTPUT_DIR=$TEST_OUTPUT_DIR" >> $GITHUB_ENV
- name: Install Workload
working-directory: ./Validation
env:
IMAGE_REPO: ${{ inputs.registry }}
IMAGE_TAG: ${{ inputs.tag }}
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
MODE: ${{ inputs.mode }}
NODE_NUM: ${{ matrix.node_num }}
run: |
export PATH=${HOME}/miniforge3/bin/:$PATH
source activate ${conda_env_name}
echo "uninstall=true" >> $GITHUB_ENV
.github/scripts/perf_test.sh --installChatQnA
- name: Stress Test
working-directory: ./GenAIEval
env:
TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }}
TEST_CASES: ${{ inputs.test_cases }}
NODE_NUM: ${{ matrix.node_num }}
run: |
export PATH=${HOME}/miniforge3/bin/:$PATH
source activate ${conda_env_name}
cd evals/benchmark
python benchmark.py
${{ github.workspace }}/Validation/.github/scripts/perf_test.sh --process_result_data
cp $TEST_OUTPUT_DIR/*_result.csv ${{ github.workspace }}
cp $TEST_OUTPUT_DIR/*_testspec.yaml ${{ github.workspace }}
tar -cvf ${{ github.workspace }}/node_${{ matrix.node_num }}.tar $TEST_OUTPUT_DIR/*_result.csv $TEST_OUTPUT_DIR/*_testspec.yaml
- name: Print Test Result
run: |
for file in "${{ github.workspace }}"/*_result.csv; do
if [[ -f "$file" ]]; then
echo "Printing contents of: $file"
cat "$file"
echo "-----------------------------------"
fi
done
spec_file=$(find "${{ github.workspace }}" -maxdepth 1 -name '*_testspec.yaml' | head -n 1)
echo "Dump test specification: $spec_file"
cat "$spec_file"
- uses: actions/[email protected]
with:
name: ${{ matrix.node_num }}node_raw_data_tar_${{ steps.prepare_benchmark.outputs.randomstr }}
path: node_${{ matrix.node_num }}.tar
overwrite: true
- name: Uninstall Workload
if: always()
working-directory: ./Validation
env:
MODE: ${{ inputs.mode }}
NODE_NUM: ${{ matrix.node_num }}
run: |
if [[ "$uninstall" == true && "${{ inputs.cleanup }}" == true ]]; then
.github/scripts/perf_test.sh --uninstallChatQnA
sleep 200s
fi
- name: K8s Unlabel Nodes
if: always()
working-directory: ./Validation
run: |
if [[ "$uncordon" == true && "${{ inputs.cleanup }}" == true ]]; then
.github/scripts/perf_test.sh --unlabel
sleep 10s
fi