From 8558d0b4baf09c9c8e680b791d523b6f3dd14844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Wed, 13 Dec 2023 17:15:04 +0100 Subject: [PATCH] feat(RHIDP-857): Add scalability test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel MacĂ­k --- Makefile | 31 +++-- ci-scripts/collect-results.sh | 35 +++-- ci-scripts/rhdh-setup/deploy.sh | 2 + .../chart-values.image-override.yaml | 2 +- .../template/backstage/chart-values.yaml | 2 +- ci-scripts/scalability/collect-results.sh | 10 ++ ci-scripts/scalability/setup.sh | 7 + ci-scripts/scalability/test-scalability.sh | 120 ++++++++++++++++++ ci-scripts/scalability/test.sh | 10 ++ ci-scripts/setup.sh | 19 +-- ci-scripts/test.sh | 2 +- config/cluster_read_config.yaml | 26 +++- 12 files changed, 234 insertions(+), 32 deletions(-) create mode 100755 ci-scripts/scalability/collect-results.sh create mode 100755 ci-scripts/scalability/setup.sh create mode 100755 ci-scripts/scalability/test-scalability.sh create mode 100755 ci-scripts/scalability/test.sh diff --git a/Makefile b/Makefile index f36300e..1104a0f 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ export RHDH_HELM_RELEASE_NAME ?= rhdh # RHDH horizontal scaling export RHDH_DEPLOYMENT_REPLICAS ?= 1 export RHDH_DB_REPLICAS ?= 1 +export RHDH_DB_STORAGE ?= 1Gi export RHDH_KEYCLOAK_REPLICAS ?= 1 # python's venv base dir relative to the root of the repository @@ -39,6 +40,9 @@ PYTHON_VENV=.venv # Local directory to store temporary files export TMP_DIR=$(shell readlink -m .tmp) +# Local directory to store artifacts +export ARTIFACT_DIR ?= $(shell readlink -m .artifacts) + # Name of the namespace to install locust operator as well as to run Pods of master and workers. LOCUST_NAMESPACE=locust-operator @@ -105,9 +109,9 @@ undeploy-locust: clean ## === Testing === ## Remove test related resources from cluster -## Run `make clean-test SCENARIO=...` to clean a specific scenario from cluster -.PHONY: clean-test -clean-test: +## Run `make clean SCENARIO=...` to clean a specific scenario from cluster +.PHONY: clean +clean: kubectl delete --namespace $(LOCUST_NAMESPACE) cm locust.$(SCENARIO) --ignore-not-found --wait kubectl delete --namespace $(LOCUST_NAMESPACE) locusttests.locust.io $(SCENARIO).test --ignore-not-found --wait || true @@ -115,17 +119,24 @@ clean-test: ## Run `make test SCENARIO=...` to run a specific scenario .PHONY: test test: - echo $(SCENARIO)>benchmark-scenario + mkdir -p $(ARTIFACT_DIR) + echo $(SCENARIO)>$(ARTIFACT_DIR)/benchmark-scenario cat locust-test-template.yaml | envsubst | kubectl apply --namespace $(LOCUST_NAMESPACE) -f - kubectl create --namespace $(LOCUST_NAMESPACE) configmap locust.$(SCENARIO) --from-file scenarios/$(SCENARIO).py --dry-run=client -o yaml | kubectl apply --namespace $(LOCUST_NAMESPACE) -f - - date --utc -Ins>benchmark-before + date --utc -Ins>$(ARTIFACT_DIR)/benchmark-before timeout=$$(date -d "30 seconds" "+%s"); while [ -z "$$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)" ]; do if [ "$$(date "+%s")" -gt "$$timeout" ]; then echo "ERROR: Timeout waiting for locust master pod to start"; exit 1; else echo "Waiting for locust master pod to start..."; sleep 5s; fi; done kubectl wait --namespace $(LOCUST_NAMESPACE) --for=condition=Ready=true $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name) @echo "Getting locust master log:" kubectl logs --namespace $(LOCUST_NAMESPACE) -f -l performance-test-pod-name=$(SCENARIO)-test-master | tee load-test.log - date --utc -Ins>benchmark-after + date --utc -Ins>$(ARTIFACT_DIR)/benchmark-after @echo "All done!!!" +## Run the scalability test +## Run `make test-scalability SCENARIO=...` to run a specific scenario +.PHONY: test-scalability +test-scalability: + cd ./ci-scripts/scalability; ./test-scalability.sh + ## Run shellcheck on all of the shell scripts .PHONY: shellcheck shellcheck: @@ -145,7 +156,7 @@ ci-run: setup-venv deploy-locust test ## Deploy and populate RHDH in CI end to end .PHONY: ci-deploy -ci-deploy: clean namespace deploy-rhdh +ci-deploy: namespace deploy-rhdh ## === Maintanence === @@ -158,9 +169,9 @@ update-locust-images: skopeo copy --src-no-creds docker://docker.io/lotest/locust-k8s-operator:latest docker://quay.io/backstage-performance/locust-k8s-operator:latest ## Clean local resources -.PHONY: clean -clean: - rm -rvf *.log benchmark-* shellcheck ci-scripts/rhdh-setup/.tmp $(TMP_DIR) +.PHONY: clean-local +clean-local: + rm -rvf *.log shellcheck $(TMP_DIR) $(ARTIFACT_DIR) ## === Help === diff --git a/ci-scripts/collect-results.sh b/ci-scripts/collect-results.sh index 917c8da..313ffe7 100755 --- a/ci-scripts/collect-results.sh +++ b/ci-scripts/collect-results.sh @@ -6,9 +6,26 @@ set -o pipefail echo -e "\n === Collecting test results and metrics ===\n" -ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts} +ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") mkdir -p "${ARTIFACT_DIR}" +RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance} + +cli="oc" +clin="$cli -n $RHDH_NAMESPACE" + +for label in app.kubernetes.io/name=developer-hub app.kubernetes.io/name=postgresql; do + echo -e "\nCollecting logs from pods in '$RHDH_NAMESPACE' namespace with label '$label':" + for pod in $($clin get pods -l "$label" -o name); do + echo "$pod" + logfile="${ARTIFACT_DIR}/${pod##*/}" + echo -e " -> $logfile.log" + $clin logs "$pod" --tail=-1 >&"$logfile.log" || true + echo -e " -> $logfile.previous.log" + $clin logs "$pod" --tail=-1 --previous=true >&"$logfile.previous.log" || true + done +done + monitoring_collection_data=$ARTIFACT_DIR/benchmark.json monitoring_collection_log=$ARTIFACT_DIR/monitoring-collection.log monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir @@ -54,17 +71,17 @@ set +u # shellcheck disable=SC1090,SC1091 source $PYTHON_VENV_DIR/bin/activate set -u -mstart=$(date --utc --date "$(cat benchmark-before)" --iso-8601=seconds) -mend=$(date --utc --date "$(cat benchmark-after)" --iso-8601=seconds) +mstart=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-before")" --iso-8601=seconds) +mend=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-after")" --iso-8601=seconds) mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host') -mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat benchmark-scenario).py") +mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py") status_data.py \ --status-data-file "$monitoring_collection_data" \ --set \ - results.started="$(cat benchmark-before)" \ - results.ended="$(cat benchmark-after)" \ - name="RHDH load test $(cat benchmark-scenario)" \ - metadata.scenario.name="$(cat benchmark-scenario)" \ + results.started="$(cat "${ARTIFACT_DIR}/benchmark-before")" \ + results.ended="$(cat "${ARTIFACT_DIR}/benchmark-after")" \ + name="RHDH load test $(cat "${ARTIFACT_DIR}/benchmark-scenario")" \ + metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \ metadata.scenario.version="$mversion" \ -d &>"$monitoring_collection_log" status_data.py \ @@ -75,7 +92,7 @@ status_data.py \ --monitoring-raw-data-dir "$monitoring_collection_dir" \ --prometheus-host "https://$mhost" \ --prometheus-port 443 \ - --prometheus-token "$(oc whoami -t)" \ + --prometheus-token "$($cli whoami -t)" \ -d &>>"$monitoring_collection_log" set +u deactivate diff --git a/ci-scripts/rhdh-setup/deploy.sh b/ci-scripts/rhdh-setup/deploy.sh index 0b40185..e3a18b2 100755 --- a/ci-scripts/rhdh-setup/deploy.sh +++ b/ci-scripts/rhdh-setup/deploy.sh @@ -19,6 +19,7 @@ repo_name="$RHDH_NAMESPACE-helm-repo" export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-1} export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-1} +export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi} export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-1} export RHDH_IMAGE_REGISTRY=${RHDH_IMAGE_REGISTRY:-} @@ -113,6 +114,7 @@ backstage_install() { ${RHDH_HELM_RELEASE_NAME} \ ${RHDH_DEPLOYMENT_REPLICAS} \ ${RHDH_DB_REPLICAS} \ + ${RHDH_DB_STORAGE} \ ${RHDH_IMAGE_REGISTRY} \ ${RHDH_IMAGE_REPO} \ ${RHDH_IMAGE_TAG} \ diff --git a/ci-scripts/rhdh-setup/template/backstage/chart-values.image-override.yaml b/ci-scripts/rhdh-setup/template/backstage/chart-values.image-override.yaml index 2648341..74c8de8 100644 --- a/ci-scripts/rhdh-setup/template/backstage/chart-values.image-override.yaml +++ b/ci-scripts/rhdh-setup/template/backstage/chart-values.image-override.yaml @@ -119,7 +119,7 @@ upstream: persistence: enabled: true mountPath: /var/lib/pgsql/data - size: 1Gi + size: "${RHDH_DB_STORAGE}" podSecurityContext: enabled: false securityContext: diff --git a/ci-scripts/rhdh-setup/template/backstage/chart-values.yaml b/ci-scripts/rhdh-setup/template/backstage/chart-values.yaml index 15f1d78..18500ee 100644 --- a/ci-scripts/rhdh-setup/template/backstage/chart-values.yaml +++ b/ci-scripts/rhdh-setup/template/backstage/chart-values.yaml @@ -113,7 +113,7 @@ upstream: persistence: enabled: true mountPath: /var/lib/pgsql/data - size: 1Gi + size: "${RHDH_DB_STORAGE}" podSecurityContext: enabled: false securityContext: diff --git a/ci-scripts/scalability/collect-results.sh b/ci-scripts/scalability/collect-results.sh new file mode 100755 index 0000000..13d3e1d --- /dev/null +++ b/ci-scripts/scalability/collect-results.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +echo -e "\n === Collecting test results and metrics for RHDH scalability test ===\n" + +ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") +mkdir -p "$ARTIFACT_DIR" diff --git a/ci-scripts/scalability/setup.sh b/ci-scripts/scalability/setup.sh new file mode 100755 index 0000000..61d7104 --- /dev/null +++ b/ci-scripts/scalability/setup.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +echo -e "\n === Setting up RHDH scalability test ===\n" diff --git a/ci-scripts/scalability/test-scalability.sh b/ci-scripts/scalability/test-scalability.sh new file mode 100755 index 0000000..573339e --- /dev/null +++ b/ci-scripts/scalability/test-scalability.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +export PRE_LOAD_DB=${PRE_LOAD_DB:-true} +export RHDH_HELM_REPO=${RHDH_HELM_REPO:-https://gist.githubusercontent.com/rhdh-bot/63cef5cb6285889527bd6a67c0e1c2a9/raw} +export RHDH_HELM_CHART=${RHDH_HELM_CHART:-developer-hub} +export RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance} + +export WAIT_FOR_SEARCH_INDEX=${WAIT_FOR_SEARCH_INDEX:-true} + +export GITHUB_TOKEN GITHUB_USER GITHUB_REPO QUAY_TOKEN +GITHUB_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/github.token)" +GITHUB_USER="$(cat /usr/local/ci-secrets/backstage-performance/github.user)" +GITHUB_REPO="$(cat /usr/local/ci-secrets/backstage-performance/github.repo)" +QUAY_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/quay.token)" + +read -ra workers <<<"${SCALE_WORKERS:-5}" + +read -ra active_users_spawn_rate <<<"${SCALE_ACTIVE_USERS_SPAWN_RATES:-1:1 200:40}" + +read -ra bs_users_groups <<<"${SCALE_BS_USERS_GROUPS:-1:1 15000:5000}" + +read -ra catalog_sizes <<<"${SCALE_CATALOG_SIZES:-1 10000}" + +read -ra replicas <<<"${SCALE_REPLICAS:-5}" + +read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}" + +echo +echo "////// RHDH scalability test //////" +echo "Number of scalability matrix iterations: $((${#workers[*]} * ${#active_users_spawn_rate[*]} * ${#bs_users_groups[*]} * ${#catalog_sizes[*]} * ${#replicas[*]} * ${#db_storages[*]}))" +echo + +wait_for_indexing() { + if [ "$WAIT_FOR_SEARCH_INDEX" == "true" ]; then + HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')" + + start=$(date +%s) + timeout_timestamp=$(date -d "3600 seconds" "+%s") + while true; do + echo "Waiting for the search indexing to finish..." + if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then + echo "ERROR: Timeout waiting" + exit 1 + else + count="$(curl -sk "$HOST/api/search/query?term=&types%5B0%5D=software-catalog" | jq -rc '.numberOfResults')" + if [ "$count" != "null" ]; then + finish=$(date +%s) + echo "Search query returned non-empty set ($count) - indexing has finished in $((finish - start))s" + break + fi + fi + sleep 10s + done + else + echo "WAIT_FOR_SEARCH_INDEX is set to $WAIT_FOR_SEARCH_INDEX, skipping waiting for search indexing!" + fi +} +pushd ../../ +ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") +mkdir -p "${ARTIFACT_DIR}" + +SCALABILITY_ARTIFACTS="$ARTIFACT_DIR/scalability" +rm -rvf "${SCALABILITY_ARTIFACTS}" +mkdir -p "${SCALABILITY_ARTIFACTS}" + +for w in "${workers[@]}"; do + for bu_bg in "${bs_users_groups[@]}"; do + IFS=":" read -ra tokens <<<"${bu_bg}" + bu="${tokens[0]}" + bg="${tokens[1]}" + for c in "${catalog_sizes[@]}"; do + for r in "${replicas[@]}"; do + for s in "${db_storages[@]}"; do + echo + echo "/// Setting up RHDH for scalability test ///" + echo + set -x + export RHDH_DEPLOYMENT_REPLICAS="$r" + export RHDH_DB_REPLICAS="$r" + export RHDH_DB_STORAGE="$s" + export RHDH_KEYCLOAK_REPLICAS=$r + export BACKSTAGE_USER_COUNT=$bu + export GROUP_COUNT=$bg + export WORKERS=$w + export API_COUNT=$c + export COMPONENT_COUNT=$c + index=${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c + set +x + oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true + make undeploy-rhdh + setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup" + mkdir -p "$setup_artifacts" + ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log" + wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log" + for au_sr in "${active_users_spawn_rate[@]}"; do + IFS=":" read -ra tokens <<<"${au_sr}" + active_users=${tokens[0]} + spawn_rate=${tokens[1]} + echo + echo "/// Running the scalability test ///" + echo + set -x + export SCENARIO=${SCENARIO:-search-catalog} + export USERS="${active_users}" + export DURATION=${DURATION:-5m} + export SPAWN_RATE="${spawn_rate}" + set +x + make clean + test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u" + mkdir -p "$test_artifacts" + wait_for_indexing |& tee "$test_artifacts/before-test-search.log" + ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log" + ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log" + done + done + done + done + done +done +popd || exit diff --git a/ci-scripts/scalability/test.sh b/ci-scripts/scalability/test.sh new file mode 100755 index 0000000..dc51d7f --- /dev/null +++ b/ci-scripts/scalability/test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +export SCENARIO DURATION WAIT_FOR_SEARCH_INDEX PRE_LOAD_DB SCALE_WORKERS SCALE_ACTIVE_USERS_SPAWN_RATES SCALE_BS_USERS_GROUPS SCALE_CATALOG_SIZES SCALE_REPLICAS SCALE_DB_STORAGES + +echo -e "\n === Running RHDH scalability test ===\n" +make test-scalability diff --git a/ci-scripts/setup.sh b/ci-scripts/setup.sh index d68271f..c01090c 100755 --- a/ci-scripts/setup.sh +++ b/ci-scripts/setup.sh @@ -13,17 +13,18 @@ GITHUB_USER=$(cat /usr/local/ci-secrets/backstage-performance/github.user) GITHUB_REPO=$(cat /usr/local/ci-secrets/backstage-performance/github.repo) QUAY_TOKEN=$(cat /usr/local/ci-secrets/backstage-performance/quay.token) -export RHDH_DEPLOYMENT_REPLICAS=5 -export RHDH_DB_REPLICAS=5 -export RHDH_KEYCLOAK_REPLICAS=5 +export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-5} +export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-5} +export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi} +export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-5} -export API_COUNT=1000 -export COMPONENT_COUNT=1000 -export BACKSTAGE_USER_COUNT=1000 -export GROUP_COUNT=250 +export API_COUNT=${API_COUNT:-1000} +export COMPONENT_COUNT=${COMPONENT_COUNT:-1000} +export BACKSTAGE_USER_COUNT=${BACKSTAGE_USER_COUNT:-1000} +export GROUP_COUNT=${GROUP_COUNT:-250} -ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts} -mkdir -p "${ARTIFACT_DIR}" +ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") +mkdir -p "$ARTIFACT_DIR" rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.setup.csv" diff --git a/ci-scripts/test.sh b/ci-scripts/test.sh index 318372c..2c80e4f 100755 --- a/ci-scripts/test.sh +++ b/ci-scripts/test.sh @@ -13,7 +13,7 @@ export HOST HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')" # end-of testing env -ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts} +ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") mkdir -p "${ARTIFACT_DIR}" rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.test.csv" diff --git a/config/cluster_read_config.yaml b/config/cluster_read_config.yaml index fbb6f30..fb7f884 100644 --- a/config/cluster_read_config.yaml +++ b/config/cluster_read_config.yaml @@ -62,7 +62,31 @@ 'USERS', 'WORKERS', 'DURATION', + 'SPAWN_RATE 'SCENARIO', + 'PRE_LOAD_DB', + 'RHDH_DEPLOYMENT_REPLICAS', + 'RHDH_DB_REPLICAS', + 'RHDH_DB_STORAGE', + 'RHDH_KEYCLOAK_REPLICAS', + 'RHDH_HELM_REPO', + 'RHDH_HELM_CHART', + 'RHDH_HELM_CHART_VERSION', + 'RHDH_HELM_RELEASE_NAME', + 'RHDH_IMAGE_REGISTRY', + 'RHDH_IMAGE_REPO', + 'RHDH_IMAGE_TAG', + 'API_COUNT', + 'COMPONENT_COUNT', + 'BACKSTAGE_USER_COUNT', + 'GROUP_COUNT', + 'WAIT_FOR_SEARCH_INDEX', + 'SCALE_WORKERS', + 'SCALE_ACTIVE_USERS_SPAWN_RATES', + 'SCALE_BS_USERS_GROUPS', + 'SCALE_CATALOG_SIZES', + 'SCALE_REPLICAS', + 'SCALE_DB_STORAGES' ] %} - name: metadata.env.{{ var }} env_variable: {{ var }} @@ -127,7 +151,7 @@ {% macro pod_info(namespace, deployment, container) -%} # Gather info about pod configuration - name: metadata.cluster.pods.{{ deployment }}-{{ container }}.count - command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec.template.spec | if has("replicas") then .replicas else 1 end' + command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec | if has("replicas") then .replicas else 1 end' - name: metadata.cluster.pods.{{ deployment }}-{{ container }}.resources command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec.template.spec.containers | map(select(.name == "{{ container }}"))[0].resources' output: json