Skip to content

Commit

Permalink
feat(RHIDP-893): Collect DB scalability metrics and results
Browse files Browse the repository at this point in the history
Signed-off-by: Pavel Macík <[email protected]>
  • Loading branch information
pmacik committed Jan 12, 2024
1 parent fe848fe commit 313b133
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 14 deletions.
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,16 @@ namespace:
## Deploy RHDH
.PHONY: deploy-rhdh
deploy-rhdh:
date --utc -Ins>$(TMP_DIR)/deploy-before
cd ./ci-scripts/rhdh-setup/; ./deploy.sh -i
date --utc -Ins>$(TMP_DIR)/deploy-after

## Create users, groups and objects such as components and APIs in RHDH
.PHONY: populate-rhdh
populate-rhdh:
date --utc -Ins>$(TMP_DIR)/populate-before
cd ./ci-scripts/rhdh-setup/; ./deploy.sh -c
date --utc -Ins>$(TMP_DIR)/populate-after

## Undeploy RHDH
.PHONY: undeploy-rhdh
Expand Down Expand Up @@ -120,15 +124,15 @@ clean:
.PHONY: test
test:
mkdir -p $(ARTIFACT_DIR)
echo $(SCENARIO)>$(ARTIFACT_DIR)/benchmark-scenario
echo $(SCENARIO)>$(TMP_DIR)/benchmark-scenario
cat locust-test-template.yaml | envsubst | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
kubectl create --namespace $(LOCUST_NAMESPACE) configmap locust.$(SCENARIO) --from-file scenarios/$(SCENARIO).py --dry-run=client -o yaml | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-before
date --utc -Ins>$(TMP_DIR)/benchmark-before
timeout=$$(date -d "30 seconds" "+%s"); while [ -z "$$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)" ]; do if [ "$$(date "+%s")" -gt "$$timeout" ]; then echo "ERROR: Timeout waiting for locust master pod to start"; exit 1; else echo "Waiting for locust master pod to start..."; sleep 5s; fi; done
kubectl wait --namespace $(LOCUST_NAMESPACE) --for=condition=Ready=true $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)
@echo "Getting locust master log:"
kubectl logs --namespace $(LOCUST_NAMESPACE) -f -l performance-test-pod-name=$(SCENARIO)-test-master | tee load-test.log
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-after
date --utc -Ins>$(TMP_DIR)/benchmark-after
@echo "All done!!!"

## Run the scalability test
Expand Down
51 changes: 40 additions & 11 deletions ci-scripts/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ echo -e "\n === Collecting test results and metrics ===\n"
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

export TMP_DIR

TMP_DIR=$(readlink -m "${TMP_DIR:-.tmp}")
mkdir -p "${TMP_DIR}"

RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}

cli="oc"
Expand Down Expand Up @@ -47,9 +52,16 @@ try_gather_dir() {
fi
}

try_gather_file ./.tmp/backstage.url
try_gather_file ./.tmp/keycloak.url
try_gather_file ./.tmp/chart-values.yaml
try_gather_file "${TMP_DIR}/backstage.url"
try_gather_file "${TMP_DIR}/keycloak.url"
try_gather_file "${TMP_DIR}/chart-values.yaml"
try_gather_file "${TMP_DIR}/deploy-before"
try_gather_file "${TMP_DIR}/deploy-after"
try_gather_file "${TMP_DIR}/populate-before"
try_gather_file "${TMP_DIR}/populate-after"
try_gather_file "${TMP_DIR}/benchmark-before"
try_gather_file "${TMP_DIR}/benchmark-after"
try_gather_file "${TMP_DIR}/benchmark-scenario"
try_gather_file load-test.log

PYTHON_VENV_DIR=.venv
Expand All @@ -71,22 +83,39 @@ set +u
# shellcheck disable=SC1090,SC1091
source $PYTHON_VENV_DIR/bin/activate
set -u
mstart=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-before")" --iso-8601=seconds)
mend=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-after")" --iso-8601=seconds)
# populate phase
if [ "$PRE_LOAD_DB" == "true" ]; then
mstart=$(date --utc --date "$(cat "${TMP_DIR}/populate-before")" --iso-8601=seconds)
mend=$(date --utc --date "$(cat "${TMP_DIR}/populate-after")" --iso-8601=seconds)
mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
status_data.py \
--status-data-file "$monitoring_collection_data" \
--additional config/cluster_read_config.populate.yaml \
--monitoring-start "$mstart" \
--monitoring-end "$mend" \
--monitoring-raw-data-dir "$monitoring_collection_dir" \
--prometheus-host "https://$mhost" \
--prometheus-port 443 \
--prometheus-token "$($cli whoami -t)" \
-d &>>"$monitoring_collection_log"
fi
# test phase
mstart=$(date --utc --date "$(cat "${TMP_DIR}/benchmark-before")" --iso-8601=seconds)
mend=$(date --utc --date "$(cat "${TMP_DIR}/benchmark-after")" --iso-8601=seconds)
mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py")
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${TMP_DIR}/benchmark-scenario").py")
status_data.py \
--status-data-file "$monitoring_collection_data" \
--set \
results.started="$(cat "${ARTIFACT_DIR}/benchmark-before")" \
results.ended="$(cat "${ARTIFACT_DIR}/benchmark-after")" \
name="RHDH load test $(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
results.started="$(cat "${TMP_DIR}/benchmark-before")" \
results.ended="$(cat "${TMP_DIR}/benchmark-after")" \
name="RHDH load test $(cat "${TMP_DIR}/benchmark-scenario")" \
metadata.scenario.name="$(cat "${TMP_DIR}/benchmark-scenario")" \
metadata.scenario.version="$mversion" \
-d &>"$monitoring_collection_log"
status_data.py \
--status-data-file "$monitoring_collection_data" \
--additional config/cluster_read_config.yaml \
--additional config/cluster_read_config.test.yaml \
--monitoring-start "$mstart" \
--monitoring-end "$mend" \
--monitoring-raw-data-dir "$monitoring_collection_dir" \
Expand Down
51 changes: 51 additions & 0 deletions ci-scripts/scalability/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,54 @@ echo -e "\n === Collecting test results and metrics for RHDH scalability test ==

ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "$ARTIFACT_DIR"

read -ra workers <<<"${SCALE_WORKERS:-5}"

read -ra active_users_spawn_rate <<<"${SCALE_ACTIVE_USERS_SPAWN_RATES:-1:1 200:40}"

read -ra bs_users_groups <<<"${SCALE_BS_USERS_GROUPS:-1:1 15000:5000}"

read -ra catalog_sizes <<<"${SCALE_CATALOG_SIZES:-1 10000}"

read -ra replicas <<<"${SCALE_REPLICAS:-5}"

read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}"

csv_delim=";"
csv_delim_quoted="\"$csv_delim\""

for w in "${workers[@]}"; do
for r in "${replicas[@]}"; do
for bu_bg in "${bs_users_groups[@]}"; do
IFS=":" read -ra tokens <<<"${bu_bg}"
bu="${tokens[0]}"
bg="${tokens[1]}"
for s in "${db_storages[@]}"; do
for au_sr in "${active_users_spawn_rate[@]}"; do
IFS=":" read -ra tokens <<<"${au_sr}"
active_users=${tokens[0]}
output="$ARTIFACT_DIR/scalability_c-${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${active_users}u.csv"
echo "CatalogSize${csv_delim}AverateRPS${csv_delim}MaxRPS${csv_delim}AverageRT${csv_delim}MaxRT${csv_delim}FailRate${csv_delim}DBStorageUsed${csv_delim}DBStorageAvailable${csv_delim}DBStorageCapacity" >"$output"
for c in "${catalog_sizes[@]}"; do
index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c"
benchmark_json="$(find . -name benchmark.json | grep "$index" || true)"
echo -n "$c;" >>"$output"
if [ -n "$benchmark_json" ]; then
jq_cmd="(.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.max | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.max | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_fail_ratio_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".used_bytes.max | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".available_bytes.min | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".capacity_bytes.max | tostring)"
sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+),/: "\1",/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output"
else
echo ";" >>"$output"
fi
done
done
done
done
done
done
14 changes: 14 additions & 0 deletions config/cluster_read_config.populate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% macro pv_stats(pvc) -%}
# Collect data for PV stats
- name: measurements.cluster.pv_stats.populate.{{pvc}}.capacity_bytes
monitoring_query: kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
- name: measurements.cluster.pv_stats.populate.{{pvc}}.used_bytes
monitoring_query: kubelet_volume_stats_used_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
- name: measurements.cluster.pv_stats.populate.{{pvc}}.available_bytes
monitoring_query: kubelet_volume_stats_available_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
{%- endmacro %}

{{ pv_stats('data-rhdh-postgresql-primary-0') }}
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,20 @@
{{ monitor_pod('openshift-apiserver', 'apiserver', 15) }}
{{ monitor_pod('openshift-kube-apiserver', 'kube-apiserver', 15, pod_suffix_regex='-ip-.+') }}

{% macro pv_stats(pvc) -%}
# Collect data for PV stats
- name: measurements.cluster.pv_stats.test.{{pvc}}.capacity_bytes
monitoring_query: kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
- name: measurements.cluster.pv_stats.test.{{pvc}}.used_bytes
monitoring_query: kubelet_volume_stats_used_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
- name: measurements.cluster.pv_stats.test.{{pvc}}.available_bytes
monitoring_query: kubelet_volume_stats_available_bytes{persistentvolumeclaim="{{ pvc }}"}
monitoring_step: 15
{%- endmacro %}

{{ pv_stats('data-rhdh-postgresql-primary-0') }}

# Results
- name: results.locust-operator.locust_requests_avg_response_time_Aggregated
Expand Down

0 comments on commit 313b133

Please sign in to comment.