Run performance test on a Kubernetes cluster #409
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (C) 2024 Intel Corporation | |
# SPDX-License-Identifier: Apache-2.0 | |
name: Run performance test on a Kubernetes cluster | |
on: | |
workflow_dispatch: | |
inputs: | |
runner: | |
default: "gaudi3" | |
description: "Runner label 'aise-perf'" | |
required: true | |
type: string | |
cleanup: | |
default: true | |
description: "Whether to clean up the pods and the labels" | |
required: true | |
type: boolean | |
node_nums: | |
default: "1" | |
description: "List of test node numbers, e.g., 1,2,4,8" | |
required: true | |
type: string | |
registry: | |
default: "" | |
description: "Registry to store images, empty string means docker.io, default is empty" | |
required: false | |
type: string | |
tag: | |
default: "latest" | |
description: "Tag to apply to images" | |
required: true | |
type: string | |
example_branch: | |
default: "suyue/perf" | |
description: "GenAIExamples branch to test manifests" | |
required: true | |
type: string | |
infra_branch: | |
default: "main" | |
description: "GenAIInfra branch to test manifests" | |
required: true | |
type: string | |
eval_branch: | |
default: "main" | |
description: "GenAIEval branch to test manifests" | |
required: true | |
type: string | |
mode: | |
default: "with_rerank:oob" | |
description: "The mode of the test, e.g., with_rerank:tuned" | |
required: true | |
type: string | |
test_cases: | |
default: "e2e" | |
description: "The test cases of chatqna, e.g., e2e, llmserve, embedserve, rerankserve" | |
required: false | |
type: string | |
# user_queries: | |
# default: '' | |
# description: "The user query list, e.g., '4, 8, 16, 640', empty input means the default setting." | |
# required: false | |
# type: string | |
#load_config: | |
# default: "constant:5" | |
# description: "configuration for load test in format load_type:value, constant:5 or poisson:1.0" | |
# required: false | |
# type: string | |
jobs: | |
get-build-matrix: | |
runs-on: ubuntu-latest | |
outputs: | |
node_nums: ${{ steps.get-services.outputs.node_nums }} | |
steps: | |
- name: Get test Services | |
id: get-services | |
run: | | |
set -x | |
node_num_list=($(echo ${{ github.event.inputs.node_nums }} | tr ',' ' ')) | |
node_nums=$(printf '%s\n' "${node_num_list[@]}" | sort | jq -R '.' | jq -sc '.') | |
echo "node_nums=$node_nums" >> $GITHUB_OUTPUT | |
run-benchmark: | |
needs: [get-build-matrix] | |
runs-on: "${{ inputs.runner }}" | |
env: | |
conda_env_name: "OPEA_perf" | |
strategy: | |
matrix: | |
node_num: ${{ fromJSON(needs.get-build-matrix.outputs.node_nums) }} | |
steps: | |
- name: Clean Up Working Directory | |
run: | | |
sudo rm -rf ${{github.workspace}}/* | |
export PATH=${HOME}/miniforge3/bin/:$PATH | |
if conda info --envs | grep -q "$conda_env_name"; then | |
echo "$conda_env_name exist!" | |
else | |
conda create -n ${conda_env_name} python=3.12 -y | |
fi | |
- name: Checkout out Validation | |
uses: actions/checkout@v4 | |
with: | |
path: Validation | |
- name: Checkout out GenAIExamples | |
uses: actions/checkout@v4 | |
with: | |
repository: opea-project/GenAIExamples | |
ref: ${{ inputs.example_branch }} | |
path: GenAIExamples | |
- name: Checkout out GenAIEval | |
uses: actions/checkout@v4 | |
with: | |
repository: opea-project/GenAIEval | |
ref: ${{ inputs.eval_branch }} | |
path: GenAIEval | |
- name: Checkout out GenAIInfra | |
uses: actions/checkout@v4 | |
with: | |
repository: opea-project/GenAIInfra | |
ref: ${{ inputs.infra_branch }} | |
path: GenAIInfra | |
- name: Set up stress tool | |
run: | | |
export PATH=${HOME}/miniforge3/bin/:$PATH | |
source activate ${conda_env_name} | |
if ! command -v yq &> /dev/null; then | |
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq | |
sudo chmod +x /usr/bin/yq | |
fi | |
pip install pandas # install python lib pandas for csv processing | |
cp Validation/.github/scripts/process_csv.py GenAIEval/evals/benchmark # copy process_csv.py to GenAIEval folder | |
cd GenAIEval | |
pip install -r requirements.txt | |
# config env values | |
LOAD_SHAPE=$(echo "${{ inputs.load_config }}" | cut -d':' -f1 | xargs) | |
second_part=$(echo "${{ inputs.load_config }}" | cut -d':' -f2 | xargs) | |
if [ "$LOAD_SHAPE" == "constant" ]; then | |
echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV | |
echo "CONCURRENT_LEVEL=$second_part" >> $GITHUB_ENV | |
elif [ "$LOAD_SHAPE" == "poisson" ]; then | |
echo "LOAD_SHAPE=$LOAD_SHAPE" >> $GITHUB_ENV | |
echo "ARRIVAL_RATE=$second_part" >> $GITHUB_ENV | |
else | |
echo "Unknown LOAD_SHAPE: $LOAD_SHAPE" | |
fi | |
- name: K8s Label Nodes | |
working-directory: ./Validation | |
env: | |
NODE_NUM: ${{ matrix.node_num }} | |
run: | | |
echo "uncordon=true" >> $GITHUB_ENV | |
.github/scripts/perf_test.sh --label | |
- name: Prepare benchmark configuration | |
id: prepare_benchmark | |
working-directory: ./Validation | |
env: | |
TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }} | |
TEST_CASES: ${{ inputs.test_cases }} | |
USER_QUERIES: ${{ inputs.user_queries }} | |
NODE_NUM: ${{ matrix.node_num }} | |
run: | | |
rm -rf $TEST_OUTPUT_DIR | |
.github/scripts/perf_test.sh --generate_config ${{ matrix.node_num }} | |
echo "uninstall=false" >> $GITHUB_ENV | |
echo "randomstr=$(echo $RANDOM)" >> $GITHUB_OUTPUT | |
echo "TEST_OUTPUT_DIR=$TEST_OUTPUT_DIR" >> $GITHUB_ENV | |
- name: Install Workload | |
working-directory: ./Validation | |
env: | |
IMAGE_REPO: ${{ inputs.registry }} | |
IMAGE_TAG: ${{ inputs.tag }} | |
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} | |
MODE: ${{ inputs.mode }} | |
NODE_NUM: ${{ matrix.node_num }} | |
run: | | |
export PATH=${HOME}/miniforge3/bin/:$PATH | |
source activate ${conda_env_name} | |
echo "uninstall=true" >> $GITHUB_ENV | |
.github/scripts/perf_test.sh --installChatQnA | |
- name: Stress Test | |
working-directory: ./GenAIEval | |
env: | |
TEST_OUTPUT_DIR: /home/sdp/benchmark_output/node_${{ matrix.node_num }} | |
TEST_CASES: ${{ inputs.test_cases }} | |
NODE_NUM: ${{ matrix.node_num }} | |
run: | | |
export PATH=${HOME}/miniforge3/bin/:$PATH | |
source activate ${conda_env_name} | |
cd evals/benchmark | |
python benchmark.py | |
${{ github.workspace }}/Validation/.github/scripts/perf_test.sh --process_result_data | |
cp $TEST_OUTPUT_DIR/*_result.csv ${{ github.workspace }} | |
cp $TEST_OUTPUT_DIR/*_testspec.yaml ${{ github.workspace }} | |
tar -cvf ${{ github.workspace }}/node_${{ matrix.node_num }}.tar $TEST_OUTPUT_DIR/*_result.csv $TEST_OUTPUT_DIR/*_testspec.yaml | |
- name: Print Test Result | |
run: | | |
for file in "${{ github.workspace }}"/*_result.csv; do | |
if [[ -f "$file" ]]; then | |
echo "Printing contents of: $file" | |
cat "$file" | |
echo "-----------------------------------" | |
fi | |
done | |
spec_file=$(find "${{ github.workspace }}" -maxdepth 1 -name '*_testspec.yaml' | head -n 1) | |
echo "Dump test specification: $spec_file" | |
cat "$spec_file" | |
- uses: actions/[email protected] | |
with: | |
name: ${{ matrix.node_num }}node_raw_data_tar_${{ steps.prepare_benchmark.outputs.randomstr }} | |
path: node_${{ matrix.node_num }}.tar | |
overwrite: true | |
- name: Uninstall Workload | |
if: always() | |
working-directory: ./Validation | |
env: | |
MODE: ${{ inputs.mode }} | |
NODE_NUM: ${{ matrix.node_num }} | |
run: | | |
if [[ "$uninstall" == true && "${{ inputs.cleanup }}" == true ]]; then | |
.github/scripts/perf_test.sh --uninstallChatQnA | |
sleep 200s | |
fi | |
- name: K8s Unlabel Nodes | |
if: always() | |
working-directory: ./Validation | |
run: | | |
if [[ "$uncordon" == true && "${{ inputs.cleanup }}" == true ]]; then | |
.github/scripts/perf_test.sh --unlabel | |
sleep 10s | |
fi |