From cf2574b2c8b2e8074684e67195a556b3b5809edb Mon Sep 17 00:00:00 2001 From: Roman Bredehoft Date: Wed, 12 Jun 2024 11:51:06 +0200 Subject: [PATCH 1/3] chore: add slack alert when stopping EC2 instances fails in CI --- .github/workflows/continuous-integration.yaml | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index 9bc1372b3..947883e85 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -1112,8 +1112,8 @@ jobs: # basically when the `matrix-preparation` has not been skipped # Side note: environmental variables cannot be used for jobs conditions, so we need to determine # if the job should be run or not in an previous job and store it in its output - send-report: - name: Send Slack report + slack-report: + name: Slack report runs-on: ubuntu-20.04 if: | always() @@ -1198,7 +1198,7 @@ jobs: echo "EOF" >> "$GITHUB_ENV" - - name: Slack report + - name: Send Slack report uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} @@ -1210,3 +1210,29 @@ jobs: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} SLACKIFY_MARKDOWN: true + + stop-ec2-failure-slack-alert: + name: Stop EC2 failure Slack alert + runs-on: ubuntu-20.04 + if: | + always() + && needs.matrix-preparation.result != 'skipped' + && needs.stop-runner-linux.result == 'failure' + timeout-minutes: 2 + needs: + [ + matrix-preparation, + stop-runner-linux, + ] + + steps: + - name: Send stop EC2 failure Slack alert + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 + env: + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_COLOR: 'failure' + SLACK_TITLE: 'Stopping EC2 instances (linux) failed ❌' + SLACK_MESSAGE: '@machinelearning EC2 instances must be terminated manually as soon as possible' + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} From 34c7059c8d38228f2574c447388b304129d193bf Mon Sep 17 00:00:00 2001 From: Roman Bredehoft Date: Wed, 12 Jun 2024 11:57:35 +0200 Subject: [PATCH 2/3] chore: trigger error message --- .github/workflows/continuous-integration.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index 947883e85..c6c330e84 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -718,6 +718,7 @@ jobs: ) && steps.conformance.outcome == 'success' && !cancelled() + && false shell: bash +e {0} run: | if [[ "${{ env.IS_WEEKLY }}" == "true" ]]; then @@ -1001,6 +1002,9 @@ jobs: label: ${{ needs.start-runner-linux.outputs.label-310 }} ec2-instance-id: ${{ needs.start-runner-linux.outputs.ec2-instance-id-310 }} mode: stop + + - name: trigger error + run: exit 1 build-macos-intel: name: Python ${{ matrix.python_version }} (macOS, intel) @@ -1233,6 +1237,6 @@ jobs: SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_COLOR: 'failure' SLACK_TITLE: 'Stopping EC2 instances (linux) failed ❌' - SLACK_MESSAGE: '@machinelearning EC2 instances must be terminated manually as soon as possible' + SLACK_MESSAGE: ' EC2 instances must be terminated manually as soon as possible' SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} From cc32d3ffbacd5505a0f1c4daaf6c3c9e41b48832 Mon Sep 17 00:00:00 2001 From: Roman Bredehoft Date: Thu, 13 Jun 2024 11:14:42 +0200 Subject: [PATCH 3/3] chore: revert trigger error step --- .github/workflows/continuous-integration.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index c6c330e84..ec8899340 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -718,7 +718,7 @@ jobs: ) && steps.conformance.outcome == 'success' && !cancelled() - && false + shell: bash +e {0} run: | if [[ "${{ env.IS_WEEKLY }}" == "true" ]]; then @@ -1002,9 +1002,6 @@ jobs: label: ${{ needs.start-runner-linux.outputs.label-310 }} ec2-instance-id: ${{ needs.start-runner-linux.outputs.ec2-instance-id-310 }} mode: stop - - - name: trigger error - run: exit 1 build-macos-intel: name: Python ${{ matrix.python_version }} (macOS, intel)