From c39729a1ba7abdeb28e195167636267421505551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Tue, 16 Jan 2024 15:28:22 +0100 Subject: [PATCH] feat(RHIDP-1001): Add scalability test dimentions for RHDH CPU and memory resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel MacĂ­k --- Makefile | 4 + ci-scripts/rhdh-setup/create_resource.sh | 38 ++++---- ci-scripts/rhdh-setup/deploy.sh | 20 +++- ci-scripts/scalability/collect-results.sh | 42 ++++++--- ci-scripts/scalability/test-scalability.sh | 104 ++++++++++++--------- config/cluster_read_config.test.yaml | 4 + 6 files changed, 134 insertions(+), 78 deletions(-) diff --git a/Makefile b/Makefile index 59a1e62..80c5138 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,10 @@ export RHDH_HELM_RELEASE_NAME ?= rhdh export RHDH_DEPLOYMENT_REPLICAS ?= 1 export RHDH_DB_REPLICAS ?= 1 export RHDH_DB_STORAGE ?= 1Gi +export RHDH_RESOURCES_CPU_REQUESTS ?= 500m +export RHDH_RESOURCES_CPU_LIMITS ?= 500m +export RHDH_RESOURCES_MEMORY_RESOURCES ?= 1Gi +export RHDH_RESOURCES_MEMORY_LIMITS ?= 1Gi export RHDH_KEYCLOAK_REPLICAS ?= 1 # python's venv base dir relative to the root of the repository diff --git a/ci-scripts/rhdh-setup/create_resource.sh b/ci-scripts/rhdh-setup/create_resource.sh index c1478db..11b43e4 100755 --- a/ci-scripts/rhdh-setup/create_resource.sh +++ b/ci-scripts/rhdh-setup/create_resource.sh @@ -44,7 +44,7 @@ backstage_url() { } create_per_grp() { - echo "Creating entity YAML files" + echo "[INFO][$(date --utc -Ins)] Creating entity YAML files" varname=$2 obj_count=${!varname} if [[ -z ${!varname} ]]; then @@ -73,7 +73,7 @@ create_per_grp() { } clone_and_upload() { - echo "Uploading entities to GitHub" + echo "[INFO][$(date --utc -Ins)] Uploading entities to GitHub" git_str="${GITHUB_USER}:${GITHUB_TOKEN}@github.com" base_name=$(basename "$GITHUB_REPO") git_dir=$TMP_DIR/${base_name} @@ -123,7 +123,7 @@ create_group() { -H 'Content-Type: application/json' \ -H 'Authorization: Bearer '"$token" \ --data-raw '{"name": "'"${groupname}"'"}' |& tee -a "$TMP_DIR/create_group.log" - echo "Group $groupname created" >>"$TMP_DIR/create_group.log" + echo "[INFO][$(date --utc -Ins)] Group $groupname created" >>"$TMP_DIR/create_group.log" } create_groups() { @@ -142,7 +142,7 @@ create_user() { -H 'Content-Type: application/json' \ -H 'Authorization: Bearer '"$token" \ --data-raw '{"firstName":"'"${username}"'","lastName":"tester", "email":"'"${username}"'@test.com", "enabled":"true", "username":"'"${username}"'","groups":["/'"${groupname}"'"]}' |& tee -a "$TMP_DIR/create_user.log" - echo "User $username ($groupname) created" >>"$TMP_DIR/create_user.log" + echo "[INFO][$(date --utc -Ins)] User $username ($groupname) created" >>"$TMP_DIR/create_user.log" } create_users() { @@ -154,12 +154,18 @@ create_users() { token_lockfile="$TMP_DIR/token.lockfile" log_token() { - token_log="$TMP_DIR/get_token.log" - echo "[$(date --utc -Ins)] $1" >>"$token_log" + echo "[${2:-INFO}][$(date --utc -Ins)] $1" >>"$TMP_DIR/get_token.log" +} + +log_token_info() { + log_token "$1" "INFO" +} + +log_token_err() { + log_token "$1" "ERROR" } get_token() { - token_log="$TMP_DIR/get_token.log" token_file=$TMP_DIR/token.json while ! mkdir "$token_lockfile" 2>/dev/null; do sleep 0.5s @@ -169,21 +175,21 @@ get_token() { timeout_timestamp=$(date -d "60 seconds" "+%s") while [ ! -f "$token_file" ] || [ ! -s "$token_file" ] || [ "$(date +%s)" -gt "$(jq -rc '.expires_in_timestamp' "$token_file")" ]; do - log_token "refreshing keycloak token" - keycloak_pass=$(oc -n "${RHDH_NAMESPACE}" get secret credential-example-sso -o template --template='{{.data.ADMIN_PASSWORD}}' | base64 -d) - curl -s -k "$(keycloak_url)/auth/realms/master/protocol/openid-connect/token" -d username=admin -d "password=${keycloak_pass}" -d 'grant_type=password' -d 'client_id=admin-cli' | jq -r ".expires_in_timestamp = $(date -d '30 seconds' +%s)" >"$token_file" + log_token_info "Refreshing keycloak token" if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then - log_token "ERROR: Timeout getting keycloak token" + log_token_err "Timeout getting keycloak token" exit 1 - else - log_token "Re-attempting to get keycloak token" - sleep 5s fi + keycloak_pass=$(oc -n "${RHDH_NAMESPACE}" get secret credential-example-sso -o template --template='{{.data.ADMIN_PASSWORD}}' | base64 -d) + if ! curl -s -k "$(keycloak_url)/auth/realms/master/protocol/openid-connect/token" -d username=admin -d "password=${keycloak_pass}" -d 'grant_type=password' -d 'client_id=admin-cli' | jq -r ".expires_in_timestamp = $(date -d '30 seconds' +%s)" >"$token_file"; then + log_token_err "Unable to get token, re-attempting" + fi + sleep 5s done - rm -rf "$token_lockfile" jq -rc '.access_token' "$token_file" + rm -rf "$token_lockfile" } -export -f keycloak_url backstage_url backstage_url get_token create_group create_user log_token +export -f keycloak_url backstage_url backstage_url get_token create_group create_user log_token log_token_info log_token_err export kc_lockfile bs_lockfile token_lockfile diff --git a/ci-scripts/rhdh-setup/deploy.sh b/ci-scripts/rhdh-setup/deploy.sh index d0aa82c..aae5b53 100755 --- a/ci-scripts/rhdh-setup/deploy.sh +++ b/ci-scripts/rhdh-setup/deploy.sh @@ -20,6 +20,10 @@ repo_name="$RHDH_NAMESPACE-helm-repo" export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-1} export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-1} export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi} +export RHDH_RESOURCES_CPU_REQUESTS=${RHDH_RESOURCES_CPU_REQUESTS:-500m} +export RHDH_RESOURCES_CPU_LIMITS=${RHDH_RESOURCES_CPU_LIMITS:-500m} +export RHDH_RESOURCES_MEMORY_REQUESTS=${RHDH_RESOURCES_MEMORY_REQUESTS:-1Gi} +export RHDH_RESOURCES_MEMORY_LIMITS=${RHDH_RESOURCES_MEMORY_LIMITS:-1Gi} export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-1} export RHDH_IMAGE_REGISTRY=${RHDH_IMAGE_REGISTRY:-} @@ -44,13 +48,14 @@ wait_to_start() { rn=$resource/$name description=${5:-$rn} timeout_timestamp=$(date -d "$initial_timeout seconds" "+%s") + interval=10s while ! /bin/bash -c "$clin get $rn -o name"; do if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then - echo "ERROR: Timeout waiting for $description to start" + echo "[ERROR][$(date --utc -Ins)] Timeout waiting for $description to start" exit 1 else - echo "Waiting for $description to start..." - sleep 5s + echo "[INFO][$(date --utc -Ins)] Waiting $interval for $description to start..." + sleep "$interval" fi done $clin rollout status "$rn" --timeout="${wait_timeout}s" @@ -107,7 +112,6 @@ backstage_install() { chart_origin="$chart_origin@$RHDH_HELM_CHART_VERSION" fi echo "Installing RHDH Helm chart $RHDH_HELM_RELEASE_NAME from $chart_origin in $RHDH_NAMESPACE namespace" - #shellcheck disable=SC2086 envsubst \ '${OPENSHIFT_APP_DOMAIN} \ ${RHDH_HELM_RELEASE_NAME} \ @@ -118,7 +122,13 @@ backstage_install() { ${RHDH_IMAGE_REPO} \ ${RHDH_IMAGE_TAG} \ ${RHDH_NAMESPACE} \ - ' <"$chart_values" | tee "$TMP_DIR/chart-values.yaml" | helm upgrade --install "${RHDH_HELM_RELEASE_NAME}" --devel "${repo_name}/${RHDH_HELM_CHART}" ${version_arg} -n "${RHDH_NAMESPACE}" --values - + ' <"$chart_values" >"$TMP_DIR/chart-values.yaml" + if [ -n "${RHDH_RESOURCES_CPU_REQUESTS}" ]; then yq -i '.upstream.backstage.resources.requests.cpu = "'"${RHDH_RESOURCES_CPU_REQUESTS}"'"' "$TMP_DIR/chart-values.yaml"; fi + if [ -n "${RHDH_RESOURCES_CPU_LIMITS}" ]; then yq -i '.upstream.backstage.resources.limits.cpu = "'"${RHDH_RESOURCES_CPU_LIMITS}"'"' "$TMP_DIR/chart-values.yaml"; fi + if [ -n "${RHDH_RESOURCES_MEMORY_REQUESTS}" ]; then yq -i '.upstream.backstage.resources.requests.memory = "'"${RHDH_RESOURCES_MEMORY_REQUESTS}"'"' "$TMP_DIR/chart-values.yaml"; fi + if [ -n "${RHDH_RESOURCES_MEMORY_LIMITS}" ]; then yq -i '.upstream.backstage.resources.limits.memory = "'"${RHDH_RESOURCES_MEMORY_LIMITS}"'"' "$TMP_DIR/chart-values.yaml"; fi + #shellcheck disable=SC2086 + helm upgrade --install "${RHDH_HELM_RELEASE_NAME}" --devel "${repo_name}/${RHDH_HELM_CHART}" ${version_arg} -n "${RHDH_NAMESPACE}" --values "$TMP_DIR/chart-values.yaml" wait_to_start statefulset "${RHDH_HELM_RELEASE_NAME}-postgresql-read" 300 300 wait_to_start deployment "${RHDH_HELM_RELEASE_NAME}-developer-hub" 300 300 } diff --git a/ci-scripts/scalability/collect-results.sh b/ci-scripts/scalability/collect-results.sh index b0ec9bf..e4e2fed 100755 --- a/ci-scripts/scalability/collect-results.sh +++ b/ci-scripts/scalability/collect-results.sh @@ -21,6 +21,10 @@ read -ra replicas <<<"${SCALE_REPLICAS:-5}" read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}" +read -ra cpu_requests_limits <<<"${SCALE_CPU_REQUESTS_LIMITS:-500m:500m}" + +read -ra memory_requests_limits <<<"${SCALE_MEMORY_REQUESTS_LIMITS:-1Gi:1Gi}" + csv_delim=";" csv_delim_quoted="\"$csv_delim\"" @@ -36,14 +40,22 @@ for w in "${workers[@]}"; do active_users=${tokens[0]} output="$ARTIFACT_DIR/scalability_c-${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${active_users}u.csv" header="CatalogSize${csv_delim}AverateRPS${csv_delim}MaxRPS${csv_delim}AverageRT${csv_delim}MaxRT${csv_delim}FailRate${csv_delim}DBStorageUsed${csv_delim}DBStorageAvailable${csv_delim}DBStorageCapacity" - echo "$header" >"$output" - for c in "${catalog_sizes[@]}"; do - index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c" - benchmark_json="$(find "${ARTIFACT_DIR}" -name benchmark.json | grep "$index" || true)" - echo -n "$c" >>"$output" - if [ -n "$benchmark_json" ]; then - echo "Gathering data from $benchmark_json" - jq_cmd="$csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \ + for cr_cl in "${cpu_requests_limits[@]}"; do + IFS=":" read -ra tokens <<<"${cr_cl}" + cr="${tokens[0]}" + cl="${tokens[1]}" + for mr_ml in "${memory_requests_limits[@]}"; do + IFS=":" read -ra tokens <<<"${mr_ml}" + mr="${tokens[0]}" + ml="${tokens[1]}" + echo "$header" >"$output" + for c in "${catalog_sizes[@]}"; do + index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${cr}cr-${cl}cl-${mr}mr-${ml}ml-${c}c" + benchmark_json="$(find "${ARTIFACT_DIR}" -name benchmark.json | grep "$index" || true)" + echo -n "$c" >>"$output" + if [ -n "$benchmark_json" ]; then + echo "Gathering data from $benchmark_json" + jq_cmd="$csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \ + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.max | tostring) \ + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.mean | tostring) \ + $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.max | tostring) \ @@ -51,13 +63,15 @@ for w in "${workers[@]}"; do + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".used_bytes.max | tostring) \ + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".available_bytes.min | tostring) \ + $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".capacity_bytes.max | tostring)" - sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output" - else - for _ in $(seq 1 "$(echo "$header" | tr -cd "$csv_delim" | wc -c)"); do - echo -n ";" >>"$output" + sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output" + else + for _ in $(seq 1 "$(echo "$header" | tr -cd "$csv_delim" | wc -c)"); do + echo -n ";" >>"$output" + done + echo >>"$output" + fi done - echo >>"$output" - fi + done done done done diff --git a/ci-scripts/scalability/test-scalability.sh b/ci-scripts/scalability/test-scalability.sh index 573339e..b057d6d 100755 --- a/ci-scripts/scalability/test-scalability.sh +++ b/ci-scripts/scalability/test-scalability.sh @@ -25,6 +25,10 @@ read -ra replicas <<<"${SCALE_REPLICAS:-5}" read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}" +read -ra cpu_requests_limits <<<"${SCALE_CPU_REQUESTS_LIMITS:-500m:500m}" + +read -ra memory_requests_limits <<<"${SCALE_MEMORY_REQUESTS_LIMITS:-1Gi:1Gi}" + echo echo "////// RHDH scalability test //////" echo "Number of scalability matrix iterations: $((${#workers[*]} * ${#active_users_spawn_rate[*]} * ${#bs_users_groups[*]} * ${#catalog_sizes[*]} * ${#replicas[*]} * ${#db_storages[*]}))" @@ -68,49 +72,63 @@ for w in "${workers[@]}"; do IFS=":" read -ra tokens <<<"${bu_bg}" bu="${tokens[0]}" bg="${tokens[1]}" - for c in "${catalog_sizes[@]}"; do - for r in "${replicas[@]}"; do - for s in "${db_storages[@]}"; do - echo - echo "/// Setting up RHDH for scalability test ///" - echo - set -x - export RHDH_DEPLOYMENT_REPLICAS="$r" - export RHDH_DB_REPLICAS="$r" - export RHDH_DB_STORAGE="$s" - export RHDH_KEYCLOAK_REPLICAS=$r - export BACKSTAGE_USER_COUNT=$bu - export GROUP_COUNT=$bg - export WORKERS=$w - export API_COUNT=$c - export COMPONENT_COUNT=$c - index=${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c - set +x - oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true - make undeploy-rhdh - setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup" - mkdir -p "$setup_artifacts" - ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log" - wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log" - for au_sr in "${active_users_spawn_rate[@]}"; do - IFS=":" read -ra tokens <<<"${au_sr}" - active_users=${tokens[0]} - spawn_rate=${tokens[1]} - echo - echo "/// Running the scalability test ///" - echo - set -x - export SCENARIO=${SCENARIO:-search-catalog} - export USERS="${active_users}" - export DURATION=${DURATION:-5m} - export SPAWN_RATE="${spawn_rate}" - set +x - make clean - test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u" - mkdir -p "$test_artifacts" - wait_for_indexing |& tee "$test_artifacts/before-test-search.log" - ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log" - ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log" + for cr_cl in "${cpu_requests_limits[@]}"; do + IFS=":" read -ra tokens <<<"${cr_cl}" + cr="${tokens[0]}" + cl="${tokens[1]}" + for mr_ml in "${memory_requests_limits[@]}"; do + IFS=":" read -ra tokens <<<"${mr_ml}" + mr="${tokens[0]}" + ml="${tokens[1]}" + for c in "${catalog_sizes[@]}"; do + for r in "${replicas[@]}"; do + for s in "${db_storages[@]}"; do + echo + echo "/// Setting up RHDH for scalability test ///" + echo + set -x + export RHDH_DEPLOYMENT_REPLICAS="$r" + export RHDH_DB_REPLICAS="$r" + export RHDH_DB_STORAGE="$s" + export RHDH_RESOURCES_CPU_REQUESTS="$cr" + export RHDH_RESOURCES_CPU_LIMITS="$cl" + export RHDH_RESOURCES_MEMORY_REQUESTS="$mr" + export RHDH_RESOURCES_MEMORY_LIMITS="$ml" + export RHDH_KEYCLOAK_REPLICAS=$r + export BACKSTAGE_USER_COUNT=$bu + export GROUP_COUNT=$bg + export WORKERS=$w + export API_COUNT=$c + export COMPONENT_COUNT=$c + index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${cr}cr-${cl}cl-${mr}mr-${ml}ml-${c}c" + set +x + oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true + make undeploy-rhdh + setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup" + mkdir -p "$setup_artifacts" + ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log" + wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log" + for au_sr in "${active_users_spawn_rate[@]}"; do + IFS=":" read -ra tokens <<<"${au_sr}" + active_users=${tokens[0]} + spawn_rate=${tokens[1]} + echo + echo "/// Running the scalability test ///" + echo + set -x + export SCENARIO=${SCENARIO:-search-catalog} + export USERS="${active_users}" + export DURATION=${DURATION:-5m} + export SPAWN_RATE="${spawn_rate}" + set +x + make clean + test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u" + mkdir -p "$test_artifacts" + wait_for_indexing |& tee "$test_artifacts/before-test-search.log" + ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log" + ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log" + done + done done done done diff --git a/config/cluster_read_config.test.yaml b/config/cluster_read_config.test.yaml index 3ea8af0..0a5c0ab 100644 --- a/config/cluster_read_config.test.yaml +++ b/config/cluster_read_config.test.yaml @@ -68,6 +68,10 @@ 'RHDH_DEPLOYMENT_REPLICAS', 'RHDH_DB_REPLICAS', 'RHDH_DB_STORAGE', + 'RHDH_RESOURCES_CPU_REQUESTS', + 'RHDH_RESOURCES_CPU_LIMITS', + 'RHDH_RESOURCES_MEMORY_REQUESTS', + 'RHDH_RESOURCES_MEMORY_LIMITS', 'RHDH_KEYCLOAK_REPLICAS', 'RHDH_HELM_REPO', 'RHDH_HELM_CHART',