Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region #89
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region | |
on: | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
# ┌───────────── minute (0 - 59) | |
# │ ┌───────────── hour (0 - 23) | |
# │ │ ┌───────────── day of the month (1 - 31) | |
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) | |
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) | |
- cron: '0 18 * * *' # Runs at 6 PM UTC every day | |
workflow_dispatch: # Allows manual triggering of the workflow | |
inputs: | |
commit_hash: | |
type: string | |
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.' | |
required: false | |
default: '' | |
defaults: | |
run: | |
shell: bash -euo pipefail {0} | |
concurrency: | |
group: ${{ github.workflow }} | |
cancel-in-progress: false | |
jobs: | |
trigger_bench_on_ec2_machine_in_eu_central_1: | |
permissions: | |
id-token: write # aws-actions/configure-aws-credentials | |
statuses: write | |
contents: write | |
pull-requests: write | |
runs-on: [ self-hosted, small ] | |
container: | |
image: neondatabase/build-tools:pinned-bookworm | |
credentials: | |
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} | |
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} | |
options: --init | |
timeout-minutes: 360 # Set the timeout to 6 hours | |
env: | |
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }} | |
RUN_ID: ${{ github.run_id }} | |
AWS_DEFAULT_REGION : "eu-central-1" | |
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74" | |
steps: | |
# we don't need the neon source code because we run everything remotely | |
# however we still need the local github actions to run the allure step below | |
- uses: actions/checkout@v4 | |
- name: Show my own (github runner) external IP address - usefull for IP allowlisting | |
run: curl https://ifconfig.me | |
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine) | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-region: eu-central-1 | |
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }} | |
role-duration-seconds: 3600 | |
- name: Start EC2 instance and wait for the instance to boot up | |
run: | | |
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID | |
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID | |
sleep 60 # sleep some time to allow cloudinit and our API server to start up | |
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US | |
run: | | |
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text) | |
echo "Public IP of the EC2 instance: $public_ip" | |
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV | |
- name: Determine commit hash | |
env: | |
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }} | |
run: | | |
if [ -z "$INPUT_COMMIT_HASH" ]; then | |
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV | |
else | |
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV | |
fi | |
- name: Start Bench with run_id | |
run: | | |
curl -k -X 'POST' \ | |
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \ | |
-H 'accept: application/json' \ | |
-H 'Content-Type: application/json' \ | |
-H "Authorization: Bearer $API_KEY" \ | |
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}" | |
- name: Poll Test Status | |
id: poll_step | |
run: | | |
status="" | |
while [[ "$status" != "failure" && "$status" != "success" ]]; do | |
response=$(curl -k -X 'GET' \ | |
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \ | |
-H 'accept: application/json' \ | |
-H "Authorization: Bearer $API_KEY") | |
echo "Response: $response" | |
set +x | |
status=$(echo $response | jq -r '.status') | |
echo "Test status: $status" | |
if [[ "$status" == "failure" ]]; then | |
echo "Test failed" | |
exit 1 # Fail the job step if status is failure | |
elif [[ "$status" == "success" || "$status" == "null" ]]; then | |
break | |
elif [[ "$status" == "too_many_runs" ]]; then | |
echo "Too many runs already running" | |
echo "too_many_runs=true" >> "$GITHUB_OUTPUT" | |
exit 1 | |
fi | |
sleep 60 # Poll every 60 seconds | |
done | |
- name: Retrieve Test Logs | |
if: always() && steps.poll_step.outputs.too_many_runs != 'true' | |
run: | | |
curl -k -X 'GET' \ | |
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \ | |
-H 'accept: application/gzip' \ | |
-H "Authorization: Bearer $API_KEY" \ | |
--output "test_log_${GITHUB_RUN_ID}.gz" | |
- name: Unzip Test Log and Print it into this job's log | |
if: always() && steps.poll_step.outputs.too_many_runs != 'true' | |
run: | | |
gzip -d "test_log_${GITHUB_RUN_ID}.gz" | |
cat "test_log_${GITHUB_RUN_ID}" | |
- name: Create Allure report | |
if: ${{ !cancelled() }} | |
uses: ./.github/actions/allure-report-generate | |
with: | |
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} | |
- name: Post to a Slack channel | |
if: ${{ github.event.schedule && failure() }} | |
uses: slackapi/slack-github-action@v1 | |
with: | |
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream | |
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} | |
- name: Cleanup Test Resources | |
if: always() | |
run: | | |
curl -k -X 'POST' \ | |
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \ | |
-H 'accept: application/json' \ | |
-H "Authorization: Bearer $API_KEY" \ | |
-d '' | |
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine) | |
if: always() && steps.poll_step.outputs.too_many_runs != 'true' | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-region: eu-central-1 | |
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }} | |
role-duration-seconds: 3600 | |
- name: Stop EC2 instance and wait for the instance to be stopped | |
if: always() && steps.poll_step.outputs.too_many_runs != 'true' | |
run: | | |
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID | |
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID |