CI - Model Regression on schedule #57
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The docs: https://www.notion.so/rasa/The-CI-for-model-regression-tests-92af7185e08e4fb2a0c764770a8e9095 | |
name: CI - Model Regression on schedule | |
on: | |
schedule: | |
# Run once a week | |
- cron: "1 23 * * */7" | |
env: | |
GKE_ZONE: us-central1 | |
GCLOUD_VERSION: "318.0.0" | |
TF_FORCE_GPU_ALLOW_GROWTH: true | |
GITHUB_ISSUE_LABELS: '["type:bug :bug:", "tool:model-regression-tests"]' | |
PERFORMANCE_DROP_THRESHOLD: -0.05 | |
NVML_INTERVAL_IN_SEC: 1 | |
jobs: | |
read_test_configuration: | |
name: Reads tests configuration | |
runs-on: ubuntu-22.04 | |
outputs: | |
matrix: ${{ steps.set-matrix.outputs.matrix }} | |
matrix_length: ${{ steps.set-matrix.outputs.matrix_length }} | |
steps: | |
- name: Checkout main | |
uses: actions/checkout@v3 | |
- name: Checkout dataset | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ secrets.DATASET_REPOSITORY }} | |
token: ${{ secrets.ML_TEST_SA_PAT }} | |
path: "dataset" | |
ref: "main" | |
- name: Download gomplate | |
run: |- | |
sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 | |
sudo chmod +x /usr/local/bin/gomplate | |
- name: Check if a configuration file exists | |
run: test -f .github/configs/mr-test-schedule.json | |
- name: Set matrix values | |
id: set-matrix | |
shell: bash | |
run: |- | |
matrix=$(gomplate -d mapping=./dataset/dataset_config_mapping.json -d github=.github/configs/mr-test-schedule.json -f .github/templates/model_regression_test_config_to_json.tmpl) | |
matrix_length=$(echo $matrix | jq '.[] | length') | |
echo "::set-output name=matrix_length::$matrix_length" | |
echo "::set-output name=matrix::$matrix" | |
deploy_runner_gpu: | |
name: Deploy Github Runner - GPU | |
needs: read_test_configuration | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Setup Python | |
id: python | |
uses: actions/setup-python@c4e89fac7e8767b327bbad6cb4d859eda999cf08 | |
with: | |
python-version: '3.8' | |
- name: Export CLOUDSDK_PYTHON env variable | |
run: | | |
echo "CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}" >> $GITHUB_OUTPUT | |
export CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }} | |
- name: Download gomplate | |
run: |- | |
curl -o gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 | |
chmod 755 gomplate | |
- name: Get TensorFlow version | |
run: |- | |
# Read TF version from poetry.lock file | |
pip install toml | |
TF_VERSION=$(scripts/read_tensorflow_version.sh) | |
echo "TensorFlow version: $TF_VERSION" | |
echo TF_VERSION=$TF_VERSION >> $GITHUB_ENV | |
# Use compatible CUDA/cuDNN with the given TF version | |
- name: Prepare GitHub runner image tag | |
run: |- | |
GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json) | |
echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}" | |
echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV | |
num_max_replicas=3 | |
matrix_length=${{ needs.read_test_configuration.outputs.matrix_length }} | |
if [[ $matrix_length -gt $num_max_replicas ]]; then | |
NUM_REPLICAS=$num_max_replicas | |
else | |
NUM_REPLICAS=$matrix_length | |
fi | |
echo NUM_REPLICAS=$NUM_REPLICAS >> $GITHUB_ENV | |
- name: Send warning if the current TF version does not have CUDA image tags configured | |
if: env.GH_RUNNER_IMAGE_TAG == 'latest' | |
env: | |
TF_CUDA_FILE: ./github/config/tf-cuda.json | |
run: |- | |
echo "::warning file=${TF_CUDA_FILE},line=3,col=1,endColumn=3::Missing cuda config for tf ${{ env.TF_VERSION }}. If you are not sure how to config CUDA, please reach out to infrastructure." | |
- name: Notify slack on tf-cuda config updates | |
if: env.GH_RUNNER_IMAGE_TAG == 'latest' | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} | |
uses: voxmedia/github-action-slack-notify-build@212e9f7a9ca33368c8dd879d6053972128258985 | |
with: | |
channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }} | |
status: WARNING | |
color: warning | |
- name: Render deployment template | |
run: |- | |
export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }} | |
export GH_RUNNER_IMAGE=${{ secrets.GH_RUNNER_IMAGE }} | |
./gomplate -f .github/runner/github-runner-deployment.yaml.tmpl -o runner_deployment.yaml | |
# Setup gcloud auth | |
- uses: google-github-actions/auth@ef5d53e30bbcd8d0836f4288f5e50ff3e086997d | |
with: | |
service_account: ${{ secrets.GKE_RASA_CI_GPU_SA_NAME_RASA_CI_CD }} | |
credentials_json: ${{ secrets.GKE_SA_RASA_CI_CD_GPU_RASA_CI_CD }} | |
# Get the GKE credentials for the cluster | |
- name: Get GKE Cluster Credentials | |
uses: google-github-actions/get-gke-credentials@894c221960ab1bc16a69902f29f090638cca753f | |
with: | |
cluster_name: ${{ secrets.GKE_GPU_CLUSTER_RASA_CI_CD }} | |
location: ${{ env.GKE_ZONE }} | |
project_id: ${{ secrets.GKE_SA_RASA_CI_GPU_PROJECT_RASA_CI_CD }} | |
- name: Deploy Github Runner | |
run: |- | |
kubectl apply -f runner_deployment.yaml | |
kubectl -n github-runner rollout status --timeout=15m deployment/github-runner-$GITHUB_RUN_ID | |
- name: Notify slack on failure | |
if: failure() | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} | |
uses: voxmedia/github-action-slack-notify-build@212e9f7a9ca33368c8dd879d6053972128258985 | |
with: | |
channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }} | |
status: FAILED | |
color: danger | |
model_regression_test_gpu: | |
name: Model Regression Tests - GPU | |
continue-on-error: true | |
needs: | |
- deploy_runner_gpu | |
- read_test_configuration | |
env: | |
# Determine where CUDA and Nvidia libraries are located. TensorFlow looks for libraries in the given paths | |
LD_LIBRARY_PATH: "/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" | |
ACCELERATOR_TYPE: "GPU" | |
GITHUB_ISSUE_TITLE: "Scheduled Model Regression Test Failed" | |
runs-on: [self-hosted, gpu, "${{ github.run_id }}"] | |
strategy: | |
# max-parallel: By default, GitHub will maximize the number of jobs run in parallel depending on the available runners on GitHub-hosted virtual machines. | |
matrix: ${{fromJson(needs.read_test_configuration.outputs.matrix)}} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Checkout dataset | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ secrets.DATASET_REPOSITORY }} | |
token: ${{ secrets.ML_TEST_SA_PAT }} | |
path: "dataset" | |
ref: "main" | |
- name: Set env variables | |
id: set_dataset_config_vars | |
env: | |
DATASET_NAME: "${{ matrix.dataset }}" | |
CONFIG_NAME: "${{ matrix.config }}" | |
run: |- | |
# determine extra environment variables | |
# - CONFIG | |
# - DATASET | |
# - IS_EXTERNAL | |
# - EXTERNAL_DATASET_REPOSITORY_BRANCH | |
# - TRAIN_DIR | |
# - TEST_DIR | |
# - DOMAIN_FILE | |
source <(gomplate -d mapping=./dataset/dataset_config_mapping.json -f .github/templates/configuration_variables.tmpl) | |
# Not all configurations are available for all datasets. | |
# The job will fail and the workflow continues, if the configuration file doesn't exist | |
# for a given dataset | |
echo "::set-output name=is_dataset_exists::true" | |
echo "::set-output name=is_config_exists::true" | |
echo "::set-output name=is_external::${IS_EXTERNAL}" | |
if [[ "${IS_EXTERNAL}" == "true" ]]; then | |
echo "DATASET_DIR=dataset_external" >> $GITHUB_ENV | |
else | |
echo "DATASET_DIR=dataset" >> $GITHUB_ENV | |
test -d dataset/$DATASET || (echo "::warning::The ${{ matrix.dataset }} dataset doesn't exist. Skipping the job." \ | |
&& echo "::set-output name=is_config_exists::false" && exit 0) | |
fi | |
# Skip job if dataset is Hermit and config is BERT + DIET(seq) + ResponseSelector(t2t) or Sparse + BERT + DIET(seq) + ResponseSelector(t2t) | |
if [[ "${{ matrix.dataset }}" == "Hermit" && "${{ matrix.config }}" =~ "BERT + DIET(seq) + ResponseSelector(t2t)" ]]; then | |
echo "::warning::This ${{ matrix.dataset }} dataset / ${{ matrix.config }} config is currently being skipped due to OOM associated with the upgrade to TF 2.6." \ | |
&& echo "::set-output name=is_config_exists::false" && exit 0 | |
fi | |
# Skip job if a given type is not available for a given dataset | |
if [[ -z "${DOMAIN_FILE}" && "${{ matrix.type }}" == "core" ]]; then | |
echo "::warning::The ${{ matrix.dataset }} dataset doesn't include core type. Skipping the job." \ | |
&& echo "::set-output name=is_config_exists::false" && exit 0 | |
fi | |
test -f dataset/configs/$CONFIG || (echo "::warning::The ${{ matrix.config }} configuration file doesn't exist. Skipping the job." \ | |
&& echo "::set-output name=is_dataset_exists::false" && exit 0) | |
echo "DATASET=${DATASET}" >> $GITHUB_ENV | |
echo "CONFIG=${CONFIG}" >> $GITHUB_ENV | |
echo "DOMAIN_FILE=${DOMAIN_FILE}" >> $GITHUB_ENV | |
echo "EXTERNAL_DATASET_REPOSITORY_BRANCH=${EXTERNAL_DATASET_REPOSITORY_BRANCH}" >> $GITHUB_ENV | |
echo "IS_EXTERNAL=${IS_EXTERNAL}" >> $GITHUB_ENV | |
if [[ -z "${TRAIN_DIR}" ]]; then | |
echo "TRAIN_DIR=train" >> $GITHUB_ENV | |
else | |
echo "TRAIN_DIR=${TRAIN_DIR}" >> $GITHUB_ENV | |
fi | |
if [[ -z "${TEST_DIR}" ]]; then | |
echo "TEST_DIR=test" >> $GITHUB_ENV | |
else | |
echo "TEST_DIR=${TEST_DIR}" >> $GITHUB_ENV | |
fi | |
HOST_NAME=`hostname` | |
echo "HOST_NAME=${HOST_NAME}" >> $GITHUB_ENV | |
- name: Checkout dataset - external | |
uses: actions/checkout@v3 | |
if: steps.set_dataset_config_vars.outputs.is_external == 'true' | |
with: | |
repository: ${{ env.DATASET }} | |
token: ${{ secrets.ML_TEST_SA_PAT }} | |
path: "dataset_external" | |
ref: ${{ env.EXTERNAL_DATASET_REPOSITORY_BRANCH }} | |
- name: Set dataset commit | |
id: set-dataset-commit | |
working-directory: ${{ env.DATASET_DIR }} | |
run: | | |
DATASET_COMMIT=$(git rev-parse HEAD) | |
echo $DATASET_COMMIT | |
echo "::set-output name=dataset_commit::$DATASET_COMMIT" | |
- name: Start Datadog Agent | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
env: | |
DATASET_NAME: "${{ matrix.dataset }}" | |
CONFIG: "${{ matrix.config }}" | |
DATASET_COMMIT: "${{ steps.set-dataset-commit.outputs.dataset_commit }}" | |
BRANCH: ${{ github.ref }} | |
GITHUB_SHA: "${{ github.sha }}" | |
TYPE: "${{ matrix.type }}" | |
DATASET_REPOSITORY_BRANCH: "main" | |
INDEX_REPETITION: "${{ matrix.index_repetition }}" | |
run: | | |
.github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" ${{ env.NVML_INTERVAL_IN_SEC }} | |
- name: Set up Python 3.10 🐍 | |
uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
with: | |
python-version: '3.10' | |
- name: Read Poetry Version 🔢 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: | | |
echo "POETRY_VERSION=$(scripts/poetry-version.sh)" >> $GITHUB_ENV | |
shell: bash | |
- name: Install poetry 🦄 | |
uses: Gr1N/setup-poetry@09236184f6c1ab47c0dc9c1001c7fe200cf2afb0 # v7 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
with: | |
poetry-version: ${{ env.POETRY_VERSION }} | |
- name: Load Poetry Cached Libraries ⬇ | |
uses: actions/cache@v3 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
with: | |
path: ~/.cache/pypoetry/virtualenvs | |
key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-3.9-${{ hashFiles('**/poetry.lock') }}-${{ secrets.POETRY_CACHE_VERSION }} | |
- name: Install Dependencies 📦 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: | | |
make install-full | |
poetry run python -m spacy download de_core_news_md | |
- name: Install datadog-api-client | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: poetry run pip install -U datadog-api-client | |
- name: Validate that GPUs are working | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: |- | |
poetry run python .github/scripts/validate_gpus.py | |
- name: Download pretrained models 💪 | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: |- | |
poetry run python .github/scripts/download_pretrained.py --config dataset/configs/${CONFIG} | |
- name: Run test | |
id: run_test | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
env: | |
TFHUB_CACHE_DIR: ~/.tfhub_cache/ | |
OMP_NUM_THREADS: 1 | |
run: |- | |
poetry run rasa --version | |
export NOW_TRAIN=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}'); | |
cd ${{ github.workspace }} | |
if [[ "${{ steps.set_dataset_config_vars.outputs.is_external }}" == "true" ]]; then | |
export DATASET=. | |
fi | |
if [[ "${{ matrix.type }}" == "nlu" ]]; then | |
poetry run rasa train nlu --quiet -u "${DATASET_DIR}/${DATASET}/${TRAIN_DIR}" -c "dataset/configs/${CONFIG}" --out "${DATASET_DIR}/models/${DATASET}/${CONFIG}" | |
echo "::set-output name=train_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
export NOW_TEST=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}'); | |
poetry run rasa test nlu --quiet -u "${DATASET_DIR}/$DATASET/${TEST_DIR}" -m "${DATASET_DIR}/models/$DATASET/$CONFIG" --out "${{ github.workspace }}/results/$DATASET/$CONFIG" | |
echo "::set-output name=test_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TEST") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
echo "::set-output name=total_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
elif [[ "${{ matrix.type }}" == "core" ]]; then | |
poetry run rasa train core --quiet -s ${DATASET_DIR}/$DATASET/$TRAIN_DIR -c dataset/configs/$CONFIG -d ${DATASET_DIR}/${DATASET}/${DOMAIN_FILE} | |
echo "::set-output name=train_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
export NOW_TEST=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}'); | |
poetry run rasa test core -s "${DATASET_DIR}/${DATASET}/${TEST_DIR}" --out "${{ github.workspace }}/results/${{ matrix.dataset }}/${{ matrix.config }}" | |
echo "::set-output name=test_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TEST") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
echo "::set-output name=total_run_time::$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" | |
fi | |
- name: Generate a JSON file with a report / Publish results to Datadog | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
env: | |
SUMMARY_FILE: "./report.json" | |
DATASET_NAME: ${{ matrix.dataset }} | |
RESULT_DIR: "${{ github.workspace }}/results" | |
CONFIG: ${{ matrix.config }} | |
TEST_RUN_TIME: ${{ steps.run_test.outputs.test_run_time }} | |
TRAIN_RUN_TIME: ${{ steps.run_test.outputs.train_run_time }} | |
TOTAL_RUN_TIME: ${{ steps.run_test.outputs.total_run_time }} | |
DATASET_REPOSITORY_BRANCH: "main" | |
TYPE: ${{ matrix.type }} | |
INDEX_REPETITION: ${{ matrix.index_repetition }} | |
DATASET_COMMIT: ${{ steps.set-dataset-commit.outputs.dataset_commit }} | |
BRANCH: ${{ github.ref }} | |
PR_ID: "${{ github.event.number }}" | |
PR_URL: "" | |
DD_APP_KEY: ${{ secrets.DD_APP_KEY_PERF_TEST }} | |
DD_API_KEY: ${{ secrets.DD_API_KEY }} | |
DD_SITE: datadoghq.eu | |
run: |- | |
poetry run pip install analytics-python | |
poetry run python .github/scripts/mr_publish_results.py | |
cat $SUMMARY_FILE | |
- name: Upload an artifact with the report | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
uses: actions/upload-artifact@v3 | |
with: | |
name: report-${{ matrix.dataset }}-${{ matrix.config }}-${{ matrix.index_repetition }} | |
path: report.json | |
# Prepare diagnostic data for the configs which evaluate dialog policy performance | |
- name: Prepare diagnostic data | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' && matrix.type == 'core' | |
run: | | |
# Create dummy files to preserve directory structure when uploading artifacts | |
# See: https://github.com/actions/upload-artifact/issues/174 | |
touch "results/${{ matrix.dataset }}/.keep" | |
touch "results/${{ matrix.dataset }}/${{ matrix.config }}/.keep" | |
- name: Upload an artifact with diagnostic data | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' && matrix.type == 'core' | |
uses: actions/upload-artifact@v3 | |
with: | |
name: diagnostic_data | |
path: | | |
results/**/.keep | |
results/${{ matrix.dataset }}/${{ matrix.config }}/failed_test_stories.yml | |
results/${{ matrix.dataset }}/${{ matrix.config }}/story_confusion_matrix.png | |
- name: Stop Datadog Agent | |
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' | |
run: | | |
sudo service datadog-agent stop | |
- name: Check duplicate issue | |
if: failure() && github.event_name == 'schedule' | |
uses: actions/github-script@d556feaca394842dc55e4734bf3bb9f685482fa0 | |
id: issue-exists | |
with: | |
result-encoding: string | |
github-token: ${{ github.token }} | |
script: | | |
// Get all open issues | |
const opts = await github.issues.listForRepo.endpoint.merge({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
state: 'open', | |
labels: ${{ env.GITHUB_ISSUE_LABELS }} | |
}); | |
const issues = await github.paginate(opts) | |
// Check if issue exist by comparing title and body | |
for (const issue of issues) { | |
if (issue.title.includes('${{ env.GITHUB_ISSUE_TITLE }}') && | |
issue.body.includes('${{ matrix.dataset }}') && | |
issue.body.includes('${{ matrix.config }}')) { | |
console.log(`Found an exist issue \#${issue.number}. Skip the following steps.`); | |
return 'true' | |
} | |
} | |
return 'false' | |
- name: Create GitHub Issue 📬 | |
id: create-issue | |
if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule' | |
uses: actions/github-script@d556feaca394842dc55e4734bf3bb9f685482fa0 | |
with: | |
# do not use GITHUB_TOKEN here because it wouldn't trigger subsequent workflows | |
github-token: ${{ secrets.RASABOT_GITHUB_TOKEN }} | |
script: | | |
var issue = await github.issues.create({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
title: '${{ env.GITHUB_ISSUE_TITLE }}', | |
labels: ${{ env.GITHUB_ISSUE_LABELS }}, | |
body: '*This PR is automatically created by the Scheduled Model Regression Test workflow. Checkout the Github Action Run [here](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).* <br> --- <br> **Description of Problem:** <br> Scheduled Model Regression Test failed. <br> **Configuration**: `${{ matrix.config }}` <br> **Dataset**: `${{ matrix.dataset}}`' | |
}) | |
return issue.data.number | |
- name: Notify Slack of Failure 😱 | |
if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule' | |
uses: 8398a7/action-slack@a189acbf0b7ea434558662ae25a0de71df69a435 # v3 | |
with: | |
status: custom | |
fields: workflow,job,commit,repo,ref,author,took | |
custom_payload: | | |
{ | |
attachments: [{ | |
fallback: 'fallback', | |
color: '${{ job.status }}' === 'success' ? 'good' : '${{ job.status }}' === 'failure' ? 'danger' : 'warning', | |
title: `${process.env.AS_WORKFLOW}`, | |
text: 'Scheduled model regression test failed :no_entry:️', | |
fields: [{ | |
title: 'Configuration', | |
value: '${{ matrix.config }}', | |
short: false | |
}, | |
{ | |
title: 'Dataset', | |
value: '${{ matrix.dataset }}', | |
short: false | |
}, | |
{ | |
title: 'GitHub Issue', | |
value: `https://github.com/${{ github.repository }}/issues/${{ steps.create-issue.outputs.result }}`, | |
short: false | |
}, | |
{ | |
title: 'GitHub Action', | |
value: `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`, | |
short: false | |
}], | |
actions: [{ | |
}] | |
}] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CI_MODEL_REGRESSION_TEST }} | |
combine_reports: | |
name: Combine reports | |
runs-on: ubuntu-22.04 | |
needs: | |
- model_regression_test_gpu | |
if: always() && needs.model_regression_test_gpu.result == 'success' | |
steps: | |
- name: Checkout git repository 🕝 | |
uses: actions/checkout@v3 | |
- name: Set up Python 3.10 🐍 | |
uses: actions/setup-python@b55428b1882923874294fa556849718a1d7f2ca5 | |
with: | |
python-version: '3.10' | |
- name: Get reports | |
uses: actions/download-artifact@v3 | |
with: | |
path: reports/ | |
- name: Display structure of downloaded files | |
continue-on-error: true | |
run: ls -R | |
working-directory: reports/ | |
- name: Merge all reports | |
env: | |
SUMMARY_FILE: "./report.json" | |
REPORTS_DIR: "reports/" | |
run: | | |
python .github/scripts/mr_generate_summary.py | |
cat $SUMMARY_FILE | |
- name: Upload an artifact with the overall report | |
uses: actions/upload-artifact@v3 | |
with: | |
name: report.json | |
path: ./report.json | |
analyse_performance: | |
name: Analyse Performance | |
runs-on: ubuntu-22.04 | |
if: always() && github.event_name == 'schedule' | |
needs: | |
- model_regression_test_gpu | |
- combine_reports | |
env: | |
GITHUB_ISSUE_TITLE: "Scheduled Model Regression Test Performance Drops" | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Download report from last on-schedule regression test | |
run: | | |
# Get ID of last on-schedule workflow | |
SCHEDULE_ID=$(curl -X GET -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ | |
"https://api.github.com/repos/${{ github.repository }}/actions/workflows" \ | |
| jq -r '.workflows[] | select(.name == "${{ github.workflow }}") | select(.path | test("schedule")) | .id') | |
ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \ | |
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/${SCHEDULE_ID}/runs?event=schedule&status=completed&branch=main&per_page=1" | jq -r .workflow_runs[0].artifacts_url) | |
DOWNLOAD_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" "${ARTIFACT_URL}" \ | |
| jq -r '.artifacts[] | select(.name == "report.json") | .archive_download_url') | |
# Download the artifact | |
curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL | |
# Unzip and change name | |
unzip report.json.zip && mv report.json report_main.json | |
- name: Download the report | |
uses: actions/download-artifact@v3 | |
with: | |
name: report.json | |
- name: Download gomplate | |
run: |- | |
sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64 | |
sudo chmod +x /usr/local/bin/gomplate | |
- name: Analyse Performance 🔍 | |
id: performance | |
run: | | |
OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results.tmpl)" | |
OUTPUT="${OUTPUT//$'\n'/'%0A'}" | |
OUTPUT="${OUTPUT//$'\r'/'%0D'}" | |
OUTPUT="$(echo $OUTPUT | sed 's|`|\\`|g')" | |
echo "::set-output name=report_description::${OUTPUT}" | |
IS_DROPPED=false | |
# Loop through all negative values within parentheses | |
# Set IS_DROPPED to true if there is any value lower | |
# than the threshold | |
for x in $(grep -o '\(-[0-9.]\+\)' <<< $OUTPUT); do | |
if (( $(bc -l <<< "${{ env.PERFORMANCE_DROP_THRESHOLD }} > $x") )); then | |
IS_DROPPED=true | |
echo "The decrease of some test performance is > ${{ env.PERFORMANCE_DROP_THRESHOLD }}. Executing the following steps..." | |
break | |
fi | |
done | |
echo "::set-output name=is_dropped::$IS_DROPPED" | |
- name: Check duplicate issue | |
if: steps.performance.outputs.is_dropped == 'true' | |
uses: actions/github-script@d556feaca394842dc55e4734bf3bb9f685482fa0 | |
id: issue-exists | |
with: | |
result-encoding: string | |
github-token: ${{ github.token }} | |
script: | | |
// Get all open issues based on labels | |
const opts = await github.issues.listForRepo.endpoint.merge({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
state: 'open', | |
labels: ${{ env.GITHUB_ISSUE_LABELS }} | |
}); | |
const issues = await github.paginate(opts) | |
// Check if issue exist by comparing title | |
for (const issue of issues) { | |
if (issue.title.includes('${{ env.GITHUB_ISSUE_TITLE }}') ) { | |
console.log(`Found an exist issue \#${issue.number}. Skip the following steps.`); | |
return 'true' | |
} | |
} | |
return 'false' | |
- name: Create GitHub Issue 📬 | |
id: create-issue | |
if: steps.performance.outputs.is_dropped == 'true' && steps.issue-exists.outputs.result == 'false' | |
uses: actions/github-script@d556feaca394842dc55e4734bf3bb9f685482fa0 | |
with: | |
# do not use GITHUB_TOKEN here because it wouldn't trigger subsequent workflows | |
github-token: ${{ secrets.RASABOT_GITHUB_TOKEN }} | |
script: | | |
// Prepare issue body | |
let issue_body = '*This PR is automatically created by the Scheduled Model Regression Test workflow. Checkout the Github Action Run [here](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).* <br> --- <br> **Description of Problem:** <br> Some test performance scores **decreased**. Please look at the following table for more details. <br>' | |
issue_body += `${{ steps.performance.outputs.report_description }}` | |
// Open issue | |
var issue = await github.issues.create({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
title: '${{ env.GITHUB_ISSUE_TITLE }}', | |
labels: ${{ env.GITHUB_ISSUE_LABELS }}, | |
body: issue_body | |
}) | |
return issue.data.number | |
- name: Notify Slack when Performance Drops 💬 | |
if: steps.performance.outputs.is_dropped == 'true' && steps.issue-exists.outputs.result == 'false' | |
uses: 8398a7/action-slack@a189acbf0b7ea434558662ae25a0de71df69a435 #v3 | |
with: | |
status: custom | |
fields: workflow,job,commit,repo,ref,author,took | |
custom_payload: | | |
{ | |
attachments: [{ | |
fallback: 'fallback', | |
color: 'danger', | |
title: `${process.env.AS_WORKFLOW}`, | |
text: 'Scheduled model regression test performance drops :chart_with_downwards_trend:', | |
fields: [{ | |
title: 'GitHub Issue', | |
value: `https://github.com/${{ github.repository }}/issues/${{ steps.create-issue.outputs.result }}`, | |
short: false | |
}, | |
{ | |
title: 'GitHub Action', | |
value: `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`, | |
short: false | |
}], | |
actions: [{ | |
}] | |
}] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CI_MODEL_REGRESSION_TEST }} | |
remove_runner_gpu: | |
name: Delete Github Runner - GPU | |
if: always() | |
needs: | |
- deploy_runner_gpu | |
- model_regression_test_gpu | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Setup Python | |
id: python | |
uses: actions/setup-python@c4e89fac7e8767b327bbad6cb4d859eda999cf08 | |
with: | |
python-version: '3.8' | |
- name: Export CLOUDSDK_PYTHON env variable | |
run: | | |
echo "CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}" >> $GITHUB_OUTPUT | |
export CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }} | |
# Setup gcloud auth | |
- uses: google-github-actions/auth@ef5d53e30bbcd8d0836f4288f5e50ff3e086997d | |
with: | |
service_account: ${{ secrets.GKE_RASA_CI_GPU_SA_NAME_RASA_CI_CD }} | |
credentials_json: ${{ secrets.GKE_SA_RASA_CI_CD_GPU_RASA_CI_CD }} | |
# Get the GKE credentials for the cluster | |
- name: Get GKE Cluster Credentials | |
uses: google-github-actions/get-gke-credentials@894c221960ab1bc16a69902f29f090638cca753f | |
with: | |
cluster_name: ${{ secrets.GKE_GPU_CLUSTER_RASA_CI_CD }} | |
location: ${{ env.GKE_ZONE }} | |
project_id: ${{ secrets.GKE_SA_RASA_CI_GPU_PROJECT_RASA_CI_CD }} | |
- name: Remove Github Runner | |
run: kubectl -n github-runner delete deployments github-runner-${GITHUB_RUN_ID} --grace-period=30 |