Skip to content

Commit

Permalink
feat(RHIDP-857): Add scalability test
Browse files Browse the repository at this point in the history
Signed-off-by: Pavel Macík <[email protected]>
  • Loading branch information
pmacik committed Dec 20, 2023
1 parent 468ce13 commit 391b32a
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 32 deletions.
31 changes: 21 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export RHDH_HELM_RELEASE_NAME ?= rhdh
# RHDH horizontal scaling
export RHDH_DEPLOYMENT_REPLICAS ?= 1
export RHDH_DB_REPLICAS ?= 1
export RHDH_DB_STORAGE ?= 1Gi
export RHDH_KEYCLOAK_REPLICAS ?= 1

# python's venv base dir relative to the root of the repository
Expand All @@ -39,6 +40,9 @@ PYTHON_VENV=.venv
# Local directory to store temporary files
export TMP_DIR=$(shell readlink -m .tmp)

# Local directory to store artifacts
export ARTIFACT_DIR ?= $(shell readlink -m .artifacts)

# Name of the namespace to install locust operator as well as to run Pods of master and workers.
LOCUST_NAMESPACE=locust-operator

Expand Down Expand Up @@ -105,27 +109,34 @@ undeploy-locust: clean
## === Testing ===

## Remove test related resources from cluster
## Run `make clean-test SCENARIO=...` to clean a specific scenario from cluster
.PHONY: clean-test
clean-test:
## Run `make clean SCENARIO=...` to clean a specific scenario from cluster
.PHONY: clean
clean:
kubectl delete --namespace $(LOCUST_NAMESPACE) cm locust.$(SCENARIO) --ignore-not-found --wait
kubectl delete --namespace $(LOCUST_NAMESPACE) locusttests.locust.io $(SCENARIO).test --ignore-not-found --wait || true

## Deploy and run the locust test
## Run `make test SCENARIO=...` to run a specific scenario
.PHONY: test
test:
echo $(SCENARIO)>benchmark-scenario
mkdir -p $(ARTIFACT_DIR)
echo $(SCENARIO)>$(ARTIFACT_DIR)/benchmark-scenario
cat locust-test-template.yaml | envsubst | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
kubectl create --namespace $(LOCUST_NAMESPACE) configmap locust.$(SCENARIO) --from-file scenarios/$(SCENARIO).py --dry-run=client -o yaml | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
date --utc -Ins>benchmark-before
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-before
timeout=$$(date -d "30 seconds" "+%s"); while [ -z "$$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)" ]; do if [ "$$(date "+%s")" -gt "$$timeout" ]; then echo "ERROR: Timeout waiting for locust master pod to start"; exit 1; else echo "Waiting for locust master pod to start..."; sleep 5s; fi; done
kubectl wait --namespace $(LOCUST_NAMESPACE) --for=condition=Ready=true $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)
@echo "Getting locust master log:"
kubectl logs --namespace $(LOCUST_NAMESPACE) -f -l performance-test-pod-name=$(SCENARIO)-test-master | tee load-test.log
date --utc -Ins>benchmark-after
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-after
@echo "All done!!!"

## Run the scalability test
## Run `make test-scalability SCENARIO=...` to run a specific scenario
.PHONY: test-scalability
test-scalability:
cd ./ci-scripts/scalability; ./test-scalability.sh

## Run shellcheck on all of the shell scripts
.PHONY: shellcheck
shellcheck:
Expand All @@ -145,7 +156,7 @@ ci-run: setup-venv deploy-locust test

## Deploy and populate RHDH in CI end to end
.PHONY: ci-deploy
ci-deploy: clean namespace deploy-rhdh
ci-deploy: namespace deploy-rhdh

## === Maintanence ===

Expand All @@ -158,9 +169,9 @@ update-locust-images:
skopeo copy --src-no-creds docker://docker.io/lotest/locust-k8s-operator:latest docker://quay.io/backstage-performance/locust-k8s-operator:latest

## Clean local resources
.PHONY: clean
clean:
rm -rvf *.log benchmark-* shellcheck ci-scripts/rhdh-setup/.tmp $(TMP_DIR)
.PHONY: clean-local
clean-local:
rm -rvf *.log shellcheck $(TMP_DIR) $(ARTIFACT_DIR)

## === Help ===

Expand Down
35 changes: 26 additions & 9 deletions ci-scripts/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,26 @@ set -o pipefail

echo -e "\n === Collecting test results and metrics ===\n"

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}

cli="oc"
clin="$cli -n $RHDH_NAMESPACE"

for label in app.kubernetes.io/name=developer-hub app.kubernetes.io/name=postgresql; do
echo -e "\nCollecting logs from pods in '$RHDH_NAMESPACE' namespace with label '$label':"
for pod in $($clin get pods -l "$label" -o name); do
echo "$pod"
logfile="${ARTIFACT_DIR}/${pod##*/}"
echo -e " -> $logfile.log"
$clin logs "$pod" --tail=-1 >&"$logfile.log" || true
echo -e " -> $logfile.previous.log"
$clin logs "$pod" --tail=-1 --previous=true >&"$logfile.previous.log" || true
done
done

monitoring_collection_data=$ARTIFACT_DIR/benchmark.json
monitoring_collection_log=$ARTIFACT_DIR/monitoring-collection.log
monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir
Expand Down Expand Up @@ -54,17 +71,17 @@ set +u
# shellcheck disable=SC1090,SC1091
source $PYTHON_VENV_DIR/bin/activate
set -u
mstart=$(date --utc --date "$(cat benchmark-before)" --iso-8601=seconds)
mend=$(date --utc --date "$(cat benchmark-after)" --iso-8601=seconds)
mstart=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-before")" --iso-8601=seconds)
mend=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-after")" --iso-8601=seconds)
mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat benchmark-scenario).py")
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py")
status_data.py \
--status-data-file "$monitoring_collection_data" \
--set \
results.started="$(cat benchmark-before)" \
results.ended="$(cat benchmark-after)" \
name="RHDH load test $(cat benchmark-scenario)" \
metadata.scenario.name="$(cat benchmark-scenario)" \
results.started="$(cat "${ARTIFACT_DIR}/benchmark-before")" \
results.ended="$(cat "${ARTIFACT_DIR}/benchmark-after")" \
name="RHDH load test $(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
metadata.scenario.version="$mversion" \
-d &>"$monitoring_collection_log"
status_data.py \
Expand All @@ -75,7 +92,7 @@ status_data.py \
--monitoring-raw-data-dir "$monitoring_collection_dir" \
--prometheus-host "https://$mhost" \
--prometheus-port 443 \
--prometheus-token "$(oc whoami -t)" \
--prometheus-token "$($cli whoami -t)" \
-d &>>"$monitoring_collection_log"
set +u
deactivate
Expand Down
2 changes: 2 additions & 0 deletions ci-scripts/rhdh-setup/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ repo_name="$RHDH_NAMESPACE-helm-repo"

export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-1}
export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-1}
export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi}
export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-1}

export RHDH_IMAGE_REGISTRY=${RHDH_IMAGE_REGISTRY:-}
Expand Down Expand Up @@ -113,6 +114,7 @@ backstage_install() {
${RHDH_HELM_RELEASE_NAME} \
${RHDH_DEPLOYMENT_REPLICAS} \
${RHDH_DB_REPLICAS} \
${RHDH_DB_STORAGE} \
${RHDH_IMAGE_REGISTRY} \
${RHDH_IMAGE_REPO} \
${RHDH_IMAGE_TAG} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ upstream:
persistence:
enabled: true
mountPath: /var/lib/pgsql/data
size: 1Gi
size: "${RHDH_DB_STORAGE}"
podSecurityContext:
enabled: false
securityContext:
Expand Down
2 changes: 1 addition & 1 deletion ci-scripts/rhdh-setup/template/backstage/chart-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ upstream:
persistence:
enabled: true
mountPath: /var/lib/pgsql/data
size: 1Gi
size: "${RHDH_DB_STORAGE}"
podSecurityContext:
enabled: false
securityContext:
Expand Down
10 changes: 10 additions & 0 deletions ci-scripts/scalability/collect-results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

echo -e "\n === Collecting test results and metrics for RHDH scalability test ===\n"

ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "$ARTIFACT_DIR"
7 changes: 7 additions & 0 deletions ci-scripts/scalability/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

echo -e "\n === Setting up RHDH scalability test ===\n"
120 changes: 120 additions & 0 deletions ci-scripts/scalability/test-scalability.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

export PRE_LOAD_DB=${PRE_LOAD_DB:-true}
export RHDH_HELM_REPO=${RHDH_HELM_REPO:-https://gist.githubusercontent.com/rhdh-bot/63cef5cb6285889527bd6a67c0e1c2a9/raw}
export RHDH_HELM_CHART=${RHDH_HELM_CHART:-developer-hub}
export RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}

export WAIT_FOR_SEARCH_INDEX=${WAIT_FOR_SEARCH_INDEX:-true}

export GITHUB_TOKEN GITHUB_USER GITHUB_REPO QUAY_TOKEN
GITHUB_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/github.token)"
GITHUB_USER="$(cat /usr/local/ci-secrets/backstage-performance/github.user)"
GITHUB_REPO="$(cat /usr/local/ci-secrets/backstage-performance/github.repo)"
QUAY_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/quay.token)"

read -ra workers <<<"${SCALE_WORKERS:-5}"

read -ra active_users_spawn_rate <<<"${SCALE_ACTIVE_USERS_SPAWN_RATES:-1:1 200:40}"

read -ra bs_users_groups <<<"${SCALE_BS_USERS_GROUPS:-1:1 15000:5000}"

read -ra catalog_sizes <<<"${SCALE_CATALOG_SIZES:-1 10000}"

read -ra replicas <<<"${SCALE_REPLICAS:-5}"

read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}"

echo
echo "////// RHDH scalability test //////"
echo "Number of scalability matrix iterations: $((${#workers[*]} * ${#active_users_spawn_rate[*]} * ${#bs_users_groups[*]} * ${#catalog_sizes[*]} * ${#replicas[*]} * ${#db_storages[*]}))"
echo

wait_for_indexing() {
if [ "$WAIT_FOR_SEARCH_INDEX" == "true" ]; then
HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')"

start=$(date +%s)
timeout_timestamp=$(date -d "3600 seconds" "+%s")
while true; do
echo "Waiting for the search indexing to finish..."
if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then
echo "ERROR: Timeout waiting"
exit 1
else
count="$(curl -sk "$HOST/api/search/query?term=&types%5B0%5D=software-catalog" | jq -rc '.numberOfResults')"
if [ "$count" != "null" ]; then
finish=$(date +%s)
echo "Search query returned non-empty set ($count) - indexing has finished in $((finish - start))s"
break
fi
fi
sleep 10s
done
else
echo "WAIT_FOR_SEARCH_INDEX is set to $WAIT_FOR_SEARCH_INDEX, skipping waiting for search indexing!"
fi
}
pushd ../../
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

SCALABILITY_ARTIFACTS="$ARTIFACT_DIR/scalability"
rm -rvf "${SCALABILITY_ARTIFACTS}"
mkdir -p "${SCALABILITY_ARTIFACTS}"

for w in "${workers[@]}"; do
for bu_bg in "${bs_users_groups[@]}"; do
IFS=":" read -ra tokens <<<"${bu_bg}"
bu="${tokens[0]}"
bg="${tokens[1]}"
for c in "${catalog_sizes[@]}"; do
for r in "${replicas[@]}"; do
for s in "${db_storages[@]}"; do
echo
echo "/// Setting up RHDH for scalability test ///"
echo
set -x
export RHDH_DEPLOYMENT_REPLICAS="$r"
export RHDH_DB_REPLICAS="$r"
export RHDH_DB_STORAGE="$s"
export RHDH_KEYCLOAK_REPLICAS=$r
export BACKSTAGE_USER_COUNT=$bu
export GROUP_COUNT=$bg
export WORKERS=$w
export API_COUNT=$c
export COMPONENT_COUNT=$c
index=${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c
set +x
oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true
make undeploy-rhdh
setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup"
mkdir -p "$setup_artifacts"
ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log"
wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log"
for au_sr in "${active_users_spawn_rate[@]}"; do
IFS=":" read -ra tokens <<<"${au_sr}"
active_users=${tokens[0]}
spawn_rate=${tokens[1]}
echo
echo "/// Running the scalability test ///"
echo
set -x
export SCENARIO=${SCENARIO:-search-catalog}
export USERS="${active_users}"
export DURATION=${DURATION:-5m}
export SPAWN_RATE="${spawn_rate}"
set +x
make clean
test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u"
mkdir -p "$test_artifacts"
wait_for_indexing |& tee "$setup_artifacts/before-test-search.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log"
done
done
done
done
done
done
popd || exit
10 changes: 10 additions & 0 deletions ci-scripts/scalability/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

export SCENARIO DURATION WAIT_FOR_SEARCH_INDEX PRE_LOAD_DB SCALE_WORKERS SCALE_ACTIVE_USERS_SPAWN_RATES SCALE_BS_USERS_GROUPS SCALE_CATALOG_SIZES SCALE_REPLICAS SCALE_DB_STORAGES

echo -e "\n === Running RHDH scalability test ===\n"
make test-scalability
19 changes: 10 additions & 9 deletions ci-scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@ GITHUB_USER=$(cat /usr/local/ci-secrets/backstage-performance/github.user)
GITHUB_REPO=$(cat /usr/local/ci-secrets/backstage-performance/github.repo)
QUAY_TOKEN=$(cat /usr/local/ci-secrets/backstage-performance/quay.token)

export RHDH_DEPLOYMENT_REPLICAS=5
export RHDH_DB_REPLICAS=5
export RHDH_KEYCLOAK_REPLICAS=5
export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-5}
export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-5}
export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi}
export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-5}

export API_COUNT=1000
export COMPONENT_COUNT=1000
export BACKSTAGE_USER_COUNT=1000
export GROUP_COUNT=250
export API_COUNT=${API_COUNT:-1000}
export COMPONENT_COUNT=${COMPONENT_COUNT:-1000}
export BACKSTAGE_USER_COUNT=${BACKSTAGE_USER_COUNT:-1000}
export GROUP_COUNT=${GROUP_COUNT:-250}

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
mkdir -p "${ARTIFACT_DIR}"
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "$ARTIFACT_DIR"

rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.setup.csv"

Expand Down
2 changes: 1 addition & 1 deletion ci-scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export HOST
HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')"
# end-of testing env

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.test.csv"
Expand Down
20 changes: 19 additions & 1 deletion config/cluster_read_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,25 @@
'USERS',
'WORKERS',
'DURATION',
'SPAWN_RATE
'SCENARIO',
'PRE_LOAD_DB',
'RHDH_DEPLOYMENT_REPLICAS',
'RHDH_DB_REPLICAS',
'RHDH_DB_STORAGE',
'RHDH_KEYCLOAK_REPLICAS',
'RHDH_HELM_REPO',
'RHDH_HELM_CHART',
'RHDH_HELM_CHART_VERSION',
'RHDH_HELM_RELEASE_NAME',
'RHDH_IMAGE_REGISTRY',
'RHDH_IMAGE_REPO',
'RHDH_IMAGE_TAG',
'API_COUNT',
'COMPONENT_COUNT',
'BACKSTAGE_USER_COUNT',
'GROUP_COUNT',
'WAIT_FOR_SEARCH_INDEX',
] %}
- name: metadata.env.{{ var }}
env_variable: {{ var }}
Expand Down Expand Up @@ -127,7 +145,7 @@
{% macro pod_info(namespace, deployment, container) -%}
# Gather info about pod configuration
- name: metadata.cluster.pods.{{ deployment }}-{{ container }}.count
command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec.template.spec | if has("replicas") then .replicas else 1 end'
command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec | if has("replicas") then .replicas else 1 end'
- name: metadata.cluster.pods.{{ deployment }}-{{ container }}.resources
command: oc -n {{ namespace }} get deployment/{{ deployment }} -o json | jq '.spec.template.spec.containers | map(select(.name == "{{ container }}"))[0].resources'
output: json
Expand Down

0 comments on commit 391b32a

Please sign in to comment.