From 313b1339c469e74a2cb5630ff6505cec0b866118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Wed, 10 Jan 2024 16:12:10 +0100 Subject: [PATCH] feat(RHIDP-893): Collect DB scalability metrics and results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel MacĂ­k --- Makefile | 10 ++-- ci-scripts/collect-results.sh | 51 +++++++++++++++---- ci-scripts/scalability/collect-results.sh | 51 +++++++++++++++++++ config/cluster_read_config.populate.yaml | 14 +++++ ...fig.yaml => cluster_read_config.test.yaml} | 13 +++++ 5 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 config/cluster_read_config.populate.yaml rename config/{cluster_read_config.yaml => cluster_read_config.test.yaml} (93%) diff --git a/Makefile b/Makefile index 1104a0f..59a1e62 100644 --- a/Makefile +++ b/Makefile @@ -72,12 +72,16 @@ namespace: ## Deploy RHDH .PHONY: deploy-rhdh deploy-rhdh: + date --utc -Ins>$(TMP_DIR)/deploy-before cd ./ci-scripts/rhdh-setup/; ./deploy.sh -i + date --utc -Ins>$(TMP_DIR)/deploy-after ## Create users, groups and objects such as components and APIs in RHDH .PHONY: populate-rhdh populate-rhdh: + date --utc -Ins>$(TMP_DIR)/populate-before cd ./ci-scripts/rhdh-setup/; ./deploy.sh -c + date --utc -Ins>$(TMP_DIR)/populate-after ## Undeploy RHDH .PHONY: undeploy-rhdh @@ -120,15 +124,15 @@ clean: .PHONY: test test: mkdir -p $(ARTIFACT_DIR) - echo $(SCENARIO)>$(ARTIFACT_DIR)/benchmark-scenario + echo $(SCENARIO)>$(TMP_DIR)/benchmark-scenario cat locust-test-template.yaml | envsubst | kubectl apply --namespace $(LOCUST_NAMESPACE) -f - kubectl create --namespace $(LOCUST_NAMESPACE) configmap locust.$(SCENARIO) --from-file scenarios/$(SCENARIO).py --dry-run=client -o yaml | kubectl apply --namespace $(LOCUST_NAMESPACE) -f - - date --utc -Ins>$(ARTIFACT_DIR)/benchmark-before + date --utc -Ins>$(TMP_DIR)/benchmark-before timeout=$$(date -d "30 seconds" "+%s"); while [ -z "$$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)" ]; do if [ "$$(date "+%s")" -gt "$$timeout" ]; then echo "ERROR: Timeout waiting for locust master pod to start"; exit 1; else echo "Waiting for locust master pod to start..."; sleep 5s; fi; done kubectl wait --namespace $(LOCUST_NAMESPACE) --for=condition=Ready=true $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name) @echo "Getting locust master log:" kubectl logs --namespace $(LOCUST_NAMESPACE) -f -l performance-test-pod-name=$(SCENARIO)-test-master | tee load-test.log - date --utc -Ins>$(ARTIFACT_DIR)/benchmark-after + date --utc -Ins>$(TMP_DIR)/benchmark-after @echo "All done!!!" ## Run the scalability test diff --git a/ci-scripts/collect-results.sh b/ci-scripts/collect-results.sh index 313ffe7..026f693 100755 --- a/ci-scripts/collect-results.sh +++ b/ci-scripts/collect-results.sh @@ -9,6 +9,11 @@ echo -e "\n === Collecting test results and metrics ===\n" ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") mkdir -p "${ARTIFACT_DIR}" +export TMP_DIR + +TMP_DIR=$(readlink -m "${TMP_DIR:-.tmp}") +mkdir -p "${TMP_DIR}" + RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance} cli="oc" @@ -47,9 +52,16 @@ try_gather_dir() { fi } -try_gather_file ./.tmp/backstage.url -try_gather_file ./.tmp/keycloak.url -try_gather_file ./.tmp/chart-values.yaml +try_gather_file "${TMP_DIR}/backstage.url" +try_gather_file "${TMP_DIR}/keycloak.url" +try_gather_file "${TMP_DIR}/chart-values.yaml" +try_gather_file "${TMP_DIR}/deploy-before" +try_gather_file "${TMP_DIR}/deploy-after" +try_gather_file "${TMP_DIR}/populate-before" +try_gather_file "${TMP_DIR}/populate-after" +try_gather_file "${TMP_DIR}/benchmark-before" +try_gather_file "${TMP_DIR}/benchmark-after" +try_gather_file "${TMP_DIR}/benchmark-scenario" try_gather_file load-test.log PYTHON_VENV_DIR=.venv @@ -71,22 +83,39 @@ set +u # shellcheck disable=SC1090,SC1091 source $PYTHON_VENV_DIR/bin/activate set -u -mstart=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-before")" --iso-8601=seconds) -mend=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-after")" --iso-8601=seconds) +# populate phase +if [ "$PRE_LOAD_DB" == "true" ]; then + mstart=$(date --utc --date "$(cat "${TMP_DIR}/populate-before")" --iso-8601=seconds) + mend=$(date --utc --date "$(cat "${TMP_DIR}/populate-after")" --iso-8601=seconds) + mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host') + status_data.py \ + --status-data-file "$monitoring_collection_data" \ + --additional config/cluster_read_config.populate.yaml \ + --monitoring-start "$mstart" \ + --monitoring-end "$mend" \ + --monitoring-raw-data-dir "$monitoring_collection_dir" \ + --prometheus-host "https://$mhost" \ + --prometheus-port 443 \ + --prometheus-token "$($cli whoami -t)" \ + -d &>>"$monitoring_collection_log" +fi +# test phase +mstart=$(date --utc --date "$(cat "${TMP_DIR}/benchmark-before")" --iso-8601=seconds) +mend=$(date --utc --date "$(cat "${TMP_DIR}/benchmark-after")" --iso-8601=seconds) mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host') -mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py") +mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${TMP_DIR}/benchmark-scenario").py") status_data.py \ --status-data-file "$monitoring_collection_data" \ --set \ - results.started="$(cat "${ARTIFACT_DIR}/benchmark-before")" \ - results.ended="$(cat "${ARTIFACT_DIR}/benchmark-after")" \ - name="RHDH load test $(cat "${ARTIFACT_DIR}/benchmark-scenario")" \ - metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \ + results.started="$(cat "${TMP_DIR}/benchmark-before")" \ + results.ended="$(cat "${TMP_DIR}/benchmark-after")" \ + name="RHDH load test $(cat "${TMP_DIR}/benchmark-scenario")" \ + metadata.scenario.name="$(cat "${TMP_DIR}/benchmark-scenario")" \ metadata.scenario.version="$mversion" \ -d &>"$monitoring_collection_log" status_data.py \ --status-data-file "$monitoring_collection_data" \ - --additional config/cluster_read_config.yaml \ + --additional config/cluster_read_config.test.yaml \ --monitoring-start "$mstart" \ --monitoring-end "$mend" \ --monitoring-raw-data-dir "$monitoring_collection_dir" \ diff --git a/ci-scripts/scalability/collect-results.sh b/ci-scripts/scalability/collect-results.sh index 13d3e1d..67dc938 100755 --- a/ci-scripts/scalability/collect-results.sh +++ b/ci-scripts/scalability/collect-results.sh @@ -8,3 +8,54 @@ echo -e "\n === Collecting test results and metrics for RHDH scalability test == ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}") mkdir -p "$ARTIFACT_DIR" + +read -ra workers <<<"${SCALE_WORKERS:-5}" + +read -ra active_users_spawn_rate <<<"${SCALE_ACTIVE_USERS_SPAWN_RATES:-1:1 200:40}" + +read -ra bs_users_groups <<<"${SCALE_BS_USERS_GROUPS:-1:1 15000:5000}" + +read -ra catalog_sizes <<<"${SCALE_CATALOG_SIZES:-1 10000}" + +read -ra replicas <<<"${SCALE_REPLICAS:-5}" + +read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}" + +csv_delim=";" +csv_delim_quoted="\"$csv_delim\"" + +for w in "${workers[@]}"; do + for r in "${replicas[@]}"; do + for bu_bg in "${bs_users_groups[@]}"; do + IFS=":" read -ra tokens <<<"${bu_bg}" + bu="${tokens[0]}" + bg="${tokens[1]}" + for s in "${db_storages[@]}"; do + for au_sr in "${active_users_spawn_rate[@]}"; do + IFS=":" read -ra tokens <<<"${au_sr}" + active_users=${tokens[0]} + output="$ARTIFACT_DIR/scalability_c-${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${active_users}u.csv" + echo "CatalogSize${csv_delim}AverateRPS${csv_delim}MaxRPS${csv_delim}AverageRT${csv_delim}MaxRT${csv_delim}FailRate${csv_delim}DBStorageUsed${csv_delim}DBStorageAvailable${csv_delim}DBStorageCapacity" >"$output" + for c in "${catalog_sizes[@]}"; do + index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c" + benchmark_json="$(find . -name benchmark.json | grep "$index" || true)" + echo -n "$c;" >>"$output" + if [ -n "$benchmark_json" ]; then + jq_cmd="(.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \ + + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.max | tostring) \ + + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.mean | tostring) \ + + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.max | tostring) \ + + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_fail_ratio_Aggregated.mean | tostring) \ + + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".used_bytes.max | tostring) \ + + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".available_bytes.min | tostring) \ + + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".capacity_bytes.max | tostring)" + sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+),/: "\1",/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output" + else + echo ";" >>"$output" + fi + done + done + done + done + done +done diff --git a/config/cluster_read_config.populate.yaml b/config/cluster_read_config.populate.yaml new file mode 100644 index 0000000..e72a163 --- /dev/null +++ b/config/cluster_read_config.populate.yaml @@ -0,0 +1,14 @@ +{% macro pv_stats(pvc) -%} +# Collect data for PV stats +- name: measurements.cluster.pv_stats.populate.{{pvc}}.capacity_bytes + monitoring_query: kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +- name: measurements.cluster.pv_stats.populate.{{pvc}}.used_bytes + monitoring_query: kubelet_volume_stats_used_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +- name: measurements.cluster.pv_stats.populate.{{pvc}}.available_bytes + monitoring_query: kubelet_volume_stats_available_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +{%- endmacro %} + +{{ pv_stats('data-rhdh-postgresql-primary-0') }} diff --git a/config/cluster_read_config.yaml b/config/cluster_read_config.test.yaml similarity index 93% rename from config/cluster_read_config.yaml rename to config/cluster_read_config.test.yaml index ddfbc69..3ea8af0 100644 --- a/config/cluster_read_config.yaml +++ b/config/cluster_read_config.test.yaml @@ -174,7 +174,20 @@ {{ monitor_pod('openshift-apiserver', 'apiserver', 15) }} {{ monitor_pod('openshift-kube-apiserver', 'kube-apiserver', 15, pod_suffix_regex='-ip-.+') }} +{% macro pv_stats(pvc) -%} +# Collect data for PV stats +- name: measurements.cluster.pv_stats.test.{{pvc}}.capacity_bytes + monitoring_query: kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +- name: measurements.cluster.pv_stats.test.{{pvc}}.used_bytes + monitoring_query: kubelet_volume_stats_used_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +- name: measurements.cluster.pv_stats.test.{{pvc}}.available_bytes + monitoring_query: kubelet_volume_stats_available_bytes{persistentvolumeclaim="{{ pvc }}"} + monitoring_step: 15 +{%- endmacro %} +{{ pv_stats('data-rhdh-postgresql-primary-0') }} # Results - name: results.locust-operator.locust_requests_avg_response_time_Aggregated