Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(RHIDP-1001): Add scalability test dimensions for RHDH CPU and memory resources #27

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export RHDH_HELM_RELEASE_NAME ?= rhdh
export RHDH_DEPLOYMENT_REPLICAS ?= 1
export RHDH_DB_REPLICAS ?= 1
export RHDH_DB_STORAGE ?= 1Gi
export RHDH_RESOURCES_CPU_REQUESTS ?=
export RHDH_RESOURCES_CPU_LIMITS ?=
export RHDH_RESOURCES_MEMORY_RESOURCES ?=
export RHDH_RESOURCES_MEMORY_LIMITS ?=
export RHDH_KEYCLOAK_REPLICAS ?= 1

# python's venv base dir relative to the root of the repository
Expand Down
38 changes: 22 additions & 16 deletions ci-scripts/rhdh-setup/create_resource.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ backstage_url() {
}

create_per_grp() {
echo "Creating entity YAML files"
echo "[INFO][$(date --utc -Ins)] Creating entity YAML files"
varname=$2
obj_count=${!varname}
if [[ -z ${!varname} ]]; then
Expand Down Expand Up @@ -73,7 +73,7 @@ create_per_grp() {
}

clone_and_upload() {
echo "Uploading entities to GitHub"
echo "[INFO][$(date --utc -Ins)] Uploading entities to GitHub"
git_str="${GITHUB_USER}:${GITHUB_TOKEN}@github.com"
base_name=$(basename "$GITHUB_REPO")
git_dir=$TMP_DIR/${base_name}
Expand Down Expand Up @@ -123,7 +123,7 @@ create_group() {
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer '"$token" \
--data-raw '{"name": "'"${groupname}"'"}' |& tee -a "$TMP_DIR/create_group.log"
echo "Group $groupname created" >>"$TMP_DIR/create_group.log"
echo "[INFO][$(date --utc -Ins)] Group $groupname created" >>"$TMP_DIR/create_group.log"
}

create_groups() {
Expand All @@ -142,7 +142,7 @@ create_user() {
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer '"$token" \
--data-raw '{"firstName":"'"${username}"'","lastName":"tester", "email":"'"${username}"'@test.com", "enabled":"true", "username":"'"${username}"'","groups":["/'"${groupname}"'"]}' |& tee -a "$TMP_DIR/create_user.log"
echo "User $username ($groupname) created" >>"$TMP_DIR/create_user.log"
echo "[INFO][$(date --utc -Ins)] User $username ($groupname) created" >>"$TMP_DIR/create_user.log"
}

create_users() {
Expand All @@ -154,12 +154,18 @@ create_users() {

token_lockfile="$TMP_DIR/token.lockfile"
log_token() {
token_log="$TMP_DIR/get_token.log"
echo "[$(date --utc -Ins)] $1" >>"$token_log"
echo "[${2:-INFO}][$(date --utc -Ins)] $1" >>"$TMP_DIR/get_token.log"
}

log_token_info() {
log_token "$1" "INFO"
}

log_token_err() {
log_token "$1" "ERROR"
}

get_token() {
token_log="$TMP_DIR/get_token.log"
token_file=$TMP_DIR/token.json
while ! mkdir "$token_lockfile" 2>/dev/null; do
sleep 0.5s
Expand All @@ -169,21 +175,21 @@ get_token() {

timeout_timestamp=$(date -d "60 seconds" "+%s")
while [ ! -f "$token_file" ] || [ ! -s "$token_file" ] || [ "$(date +%s)" -gt "$(jq -rc '.expires_in_timestamp' "$token_file")" ]; do
log_token "refreshing keycloak token"
keycloak_pass=$(oc -n "${RHDH_NAMESPACE}" get secret credential-example-sso -o template --template='{{.data.ADMIN_PASSWORD}}' | base64 -d)
curl -s -k "$(keycloak_url)/auth/realms/master/protocol/openid-connect/token" -d username=admin -d "password=${keycloak_pass}" -d 'grant_type=password' -d 'client_id=admin-cli' | jq -r ".expires_in_timestamp = $(date -d '30 seconds' +%s)" >"$token_file"
log_token_info "Refreshing keycloak token"
if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then
log_token "ERROR: Timeout getting keycloak token"
log_token_err "Timeout getting keycloak token"
exit 1
else
log_token "Re-attempting to get keycloak token"
sleep 5s
fi
keycloak_pass=$(oc -n "${RHDH_NAMESPACE}" get secret credential-example-sso -o template --template='{{.data.ADMIN_PASSWORD}}' | base64 -d)
if ! curl -s -k "$(keycloak_url)/auth/realms/master/protocol/openid-connect/token" -d username=admin -d "password=${keycloak_pass}" -d 'grant_type=password' -d 'client_id=admin-cli' | jq -r ".expires_in_timestamp = $(date -d '30 seconds' +%s)" >"$token_file"; then
log_token_err "Unable to get token, re-attempting"
fi
sleep 5s
done

rm -rf "$token_lockfile"
jq -rc '.access_token' "$token_file"
rm -rf "$token_lockfile"
}

export -f keycloak_url backstage_url backstage_url get_token create_group create_user log_token
export -f keycloak_url backstage_url backstage_url get_token create_group create_user log_token log_token_info log_token_err
export kc_lockfile bs_lockfile token_lockfile
20 changes: 15 additions & 5 deletions ci-scripts/rhdh-setup/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ repo_name="$RHDH_NAMESPACE-helm-repo"
export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-1}
export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-1}
export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi}
export RHDH_RESOURCES_CPU_REQUESTS=${RHDH_RESOURCES_CPU_REQUESTS:-}
export RHDH_RESOURCES_CPU_LIMITS=${RHDH_RESOURCES_CPU_LIMITS:-}
export RHDH_RESOURCES_MEMORY_REQUESTS=${RHDH_RESOURCES_MEMORY_REQUESTS:-}
export RHDH_RESOURCES_MEMORY_LIMITS=${RHDH_RESOURCES_MEMORY_LIMITS:-}
export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-1}

export RHDH_IMAGE_REGISTRY=${RHDH_IMAGE_REGISTRY:-}
Expand All @@ -44,13 +48,14 @@ wait_to_start() {
rn=$resource/$name
description=${5:-$rn}
timeout_timestamp=$(date -d "$initial_timeout seconds" "+%s")
interval=10s
while ! /bin/bash -c "$clin get $rn -o name"; do
if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then
echo "ERROR: Timeout waiting for $description to start"
echo "[ERROR][$(date --utc -Ins)] Timeout waiting for $description to start"
exit 1
else
echo "Waiting for $description to start..."
sleep 5s
echo "[INFO][$(date --utc -Ins)] Waiting $interval for $description to start..."
sleep "$interval"
fi
done
$clin rollout status "$rn" --timeout="${wait_timeout}s"
Expand Down Expand Up @@ -107,7 +112,6 @@ backstage_install() {
chart_origin="$chart_origin@$RHDH_HELM_CHART_VERSION"
fi
echo "Installing RHDH Helm chart $RHDH_HELM_RELEASE_NAME from $chart_origin in $RHDH_NAMESPACE namespace"
#shellcheck disable=SC2086
envsubst \
'${OPENSHIFT_APP_DOMAIN} \
${RHDH_HELM_RELEASE_NAME} \
Expand All @@ -118,7 +122,13 @@ backstage_install() {
${RHDH_IMAGE_REPO} \
${RHDH_IMAGE_TAG} \
${RHDH_NAMESPACE} \
' <"$chart_values" | tee "$TMP_DIR/chart-values.yaml" | helm upgrade --install "${RHDH_HELM_RELEASE_NAME}" --devel "${repo_name}/${RHDH_HELM_CHART}" ${version_arg} -n "${RHDH_NAMESPACE}" --values -
' <"$chart_values" >"$TMP_DIR/chart-values.yaml"
if [ -n "${RHDH_RESOURCES_CPU_REQUESTS}" ]; then yq -i '.upstream.backstage.resources.requests.cpu = "'"${RHDH_RESOURCES_CPU_REQUESTS}"'"' "$TMP_DIR/chart-values.yaml"; fi
if [ -n "${RHDH_RESOURCES_CPU_LIMITS}" ]; then yq -i '.upstream.backstage.resources.limits.cpu = "'"${RHDH_RESOURCES_CPU_LIMITS}"'"' "$TMP_DIR/chart-values.yaml"; fi
if [ -n "${RHDH_RESOURCES_MEMORY_REQUESTS}" ]; then yq -i '.upstream.backstage.resources.requests.memory = "'"${RHDH_RESOURCES_MEMORY_REQUESTS}"'"' "$TMP_DIR/chart-values.yaml"; fi
if [ -n "${RHDH_RESOURCES_MEMORY_LIMITS}" ]; then yq -i '.upstream.backstage.resources.limits.memory = "'"${RHDH_RESOURCES_MEMORY_LIMITS}"'"' "$TMP_DIR/chart-values.yaml"; fi
#shellcheck disable=SC2086
helm upgrade --install "${RHDH_HELM_RELEASE_NAME}" --devel "${repo_name}/${RHDH_HELM_CHART}" ${version_arg} -n "${RHDH_NAMESPACE}" --values "$TMP_DIR/chart-values.yaml"
wait_to_start statefulset "${RHDH_HELM_RELEASE_NAME}-postgresql-read" 300 300
wait_to_start deployment "${RHDH_HELM_RELEASE_NAME}-developer-hub" 300 300
}
Expand Down
42 changes: 28 additions & 14 deletions ci-scripts/scalability/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ read -ra replicas <<<"${SCALE_REPLICAS:-5}"

read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}"

read -ra cpu_requests_limits <<<"${SCALE_CPU_REQUESTS_LIMITS:-:}"

read -ra memory_requests_limits <<<"${SCALE_MEMORY_REQUESTS_LIMITS:-:}"

csv_delim=";"
csv_delim_quoted="\"$csv_delim\""

Expand All @@ -36,28 +40,38 @@ for w in "${workers[@]}"; do
active_users=${tokens[0]}
output="$ARTIFACT_DIR/scalability_c-${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${active_users}u.csv"
header="CatalogSize${csv_delim}AverateRPS${csv_delim}MaxRPS${csv_delim}AverageRT${csv_delim}MaxRT${csv_delim}FailRate${csv_delim}DBStorageUsed${csv_delim}DBStorageAvailable${csv_delim}DBStorageCapacity"
echo "$header" >"$output"
for c in "${catalog_sizes[@]}"; do
index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c"
benchmark_json="$(find "${ARTIFACT_DIR}" -name benchmark.json | grep "$index" || true)"
echo -n "$c" >>"$output"
if [ -n "$benchmark_json" ]; then
echo "Gathering data from $benchmark_json"
jq_cmd="$csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \
for cr_cl in "${cpu_requests_limits[@]}"; do
IFS=":" read -ra tokens <<<"${cr_cl}"
cr="${tokens[0]}"
cl="${tokens[1]}"
for mr_ml in "${memory_requests_limits[@]}"; do
IFS=":" read -ra tokens <<<"${mr_ml}"
mr="${tokens[0]}"
ml="${tokens[1]}"
echo "$header" >"$output"
for c in "${catalog_sizes[@]}"; do
index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${cr}cr-${cl}cl-${mr}mr-${ml}ml-${c}c"
benchmark_json="$(find "${ARTIFACT_DIR}" -name benchmark.json | grep "$index" || true)"
echo -n "$c" >>"$output"
if [ -n "$benchmark_json" ]; then
echo "Gathering data from $benchmark_json"
jq_cmd="$csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_current_rps_Aggregated.max | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_avg_response_time_Aggregated.max | tostring) \
+ $csv_delim_quoted + (.results.\"locust-operator\".locust_requests_fail_ratio_Aggregated.mean | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".used_bytes.max | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".available_bytes.min | tostring) \
+ $csv_delim_quoted + (.measurements.cluster.pv_stats.populate.\"data-rhdh-postgresql-primary-0\".capacity_bytes.max | tostring)"
sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output"
else
for _ in $(seq 1 "$(echo "$header" | tr -cd "$csv_delim" | wc -c)"); do
echo -n ";" >>"$output"
sed -Ee 's/: ([0-9]+\.[0-9]*[X]+[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9e\+-]*|[0-9]*X+[0-9]*\.[0-9]*X+[0-9e\+-]+)/: "\1"/g' "$benchmark_json" | jq -rc "$jq_cmd" >>"$output"
else
for _ in $(seq 1 "$(echo "$header" | tr -cd "$csv_delim" | wc -c)"); do
echo -n ";" >>"$output"
done
echo >>"$output"
fi
done
echo >>"$output"
fi
done
done
done
done
Expand Down
104 changes: 61 additions & 43 deletions ci-scripts/scalability/test-scalability.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ read -ra replicas <<<"${SCALE_REPLICAS:-5}"

read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}"

read -ra cpu_requests_limits <<<"${SCALE_CPU_REQUESTS_LIMITS:-:}"

read -ra memory_requests_limits <<<"${SCALE_MEMORY_REQUESTS_LIMITS:-:}"

echo
echo "////// RHDH scalability test //////"
echo "Number of scalability matrix iterations: $((${#workers[*]} * ${#active_users_spawn_rate[*]} * ${#bs_users_groups[*]} * ${#catalog_sizes[*]} * ${#replicas[*]} * ${#db_storages[*]}))"
Expand Down Expand Up @@ -68,49 +72,63 @@ for w in "${workers[@]}"; do
IFS=":" read -ra tokens <<<"${bu_bg}"
bu="${tokens[0]}"
bg="${tokens[1]}"
for c in "${catalog_sizes[@]}"; do
for r in "${replicas[@]}"; do
for s in "${db_storages[@]}"; do
echo
echo "/// Setting up RHDH for scalability test ///"
echo
set -x
export RHDH_DEPLOYMENT_REPLICAS="$r"
export RHDH_DB_REPLICAS="$r"
export RHDH_DB_STORAGE="$s"
export RHDH_KEYCLOAK_REPLICAS=$r
export BACKSTAGE_USER_COUNT=$bu
export GROUP_COUNT=$bg
export WORKERS=$w
export API_COUNT=$c
export COMPONENT_COUNT=$c
index=${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c
set +x
oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true
make undeploy-rhdh
setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup"
mkdir -p "$setup_artifacts"
ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log"
wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log"
for au_sr in "${active_users_spawn_rate[@]}"; do
IFS=":" read -ra tokens <<<"${au_sr}"
active_users=${tokens[0]}
spawn_rate=${tokens[1]}
echo
echo "/// Running the scalability test ///"
echo
set -x
export SCENARIO=${SCENARIO:-search-catalog}
export USERS="${active_users}"
export DURATION=${DURATION:-5m}
export SPAWN_RATE="${spawn_rate}"
set +x
make clean
test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u"
mkdir -p "$test_artifacts"
wait_for_indexing |& tee "$test_artifacts/before-test-search.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log"
for cr_cl in "${cpu_requests_limits[@]}"; do
IFS=":" read -ra tokens <<<"${cr_cl}"
cr="${tokens[0]}"
cl="${tokens[1]}"
for mr_ml in "${memory_requests_limits[@]}"; do
IFS=":" read -ra tokens <<<"${mr_ml}"
mr="${tokens[0]}"
ml="${tokens[1]}"
for c in "${catalog_sizes[@]}"; do
for r in "${replicas[@]}"; do
for s in "${db_storages[@]}"; do
echo
echo "/// Setting up RHDH for scalability test ///"
echo
set -x
export RHDH_DEPLOYMENT_REPLICAS="$r"
export RHDH_DB_REPLICAS="$r"
export RHDH_DB_STORAGE="$s"
export RHDH_RESOURCES_CPU_REQUESTS="$cr"
export RHDH_RESOURCES_CPU_LIMITS="$cl"
export RHDH_RESOURCES_MEMORY_REQUESTS="$mr"
export RHDH_RESOURCES_MEMORY_LIMITS="$ml"
export RHDH_KEYCLOAK_REPLICAS=$r
export BACKSTAGE_USER_COUNT=$bu
export GROUP_COUNT=$bg
export WORKERS=$w
export API_COUNT=$c
export COMPONENT_COUNT=$c
index="${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${cr}cr-${cl}cl-${mr}mr-${ml}ml-${c}c"
set +x
oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true
make undeploy-rhdh
setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup"
mkdir -p "$setup_artifacts"
ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log"
wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log"
for au_sr in "${active_users_spawn_rate[@]}"; do
IFS=":" read -ra tokens <<<"${au_sr}"
active_users=${tokens[0]}
spawn_rate=${tokens[1]}
echo
echo "/// Running the scalability test ///"
echo
set -x
export SCENARIO=${SCENARIO:-search-catalog}
export USERS="${active_users}"
export DURATION=${DURATION:-5m}
export SPAWN_RATE="${spawn_rate}"
set +x
make clean
test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u"
mkdir -p "$test_artifacts"
wait_for_indexing |& tee "$test_artifacts/before-test-search.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log"
done
done
done
done
done
Expand Down
4 changes: 4 additions & 0 deletions config/cluster_read_config.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@
'RHDH_DEPLOYMENT_REPLICAS',
'RHDH_DB_REPLICAS',
'RHDH_DB_STORAGE',
'RHDH_RESOURCES_CPU_REQUESTS',
'RHDH_RESOURCES_CPU_LIMITS',
'RHDH_RESOURCES_MEMORY_REQUESTS',
'RHDH_RESOURCES_MEMORY_LIMITS',
'RHDH_KEYCLOAK_REPLICAS',
'RHDH_HELM_REPO',
'RHDH_HELM_CHART',
Expand Down