Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(RHIDP-857): Add scalability test #22

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export RHDH_HELM_RELEASE_NAME ?= rhdh
# RHDH horizontal scaling
export RHDH_DEPLOYMENT_REPLICAS ?= 1
export RHDH_DB_REPLICAS ?= 1
export RHDH_DB_STORAGE ?= 1Gi
export RHDH_KEYCLOAK_REPLICAS ?= 1

# python's venv base dir relative to the root of the repository
Expand All @@ -39,6 +40,9 @@ PYTHON_VENV=.venv
# Local directory to store temporary files
export TMP_DIR=$(shell readlink -m .tmp)

# Local directory to store artifacts
export ARTIFACT_DIR ?= $(shell readlink -m .artifacts)

# Name of the namespace to install locust operator as well as to run Pods of master and workers.
LOCUST_NAMESPACE=locust-operator

Expand Down Expand Up @@ -105,27 +109,34 @@ undeploy-locust: clean
## === Testing ===

## Remove test related resources from cluster
## Run `make clean-test SCENARIO=...` to clean a specific scenario from cluster
.PHONY: clean-test
clean-test:
## Run `make clean SCENARIO=...` to clean a specific scenario from cluster
.PHONY: clean
clean:
kubectl delete --namespace $(LOCUST_NAMESPACE) cm locust.$(SCENARIO) --ignore-not-found --wait
kubectl delete --namespace $(LOCUST_NAMESPACE) locusttests.locust.io $(SCENARIO).test --ignore-not-found --wait || true

## Deploy and run the locust test
## Run `make test SCENARIO=...` to run a specific scenario
.PHONY: test
test:
echo $(SCENARIO)>benchmark-scenario
mkdir -p $(ARTIFACT_DIR)
echo $(SCENARIO)>$(ARTIFACT_DIR)/benchmark-scenario
cat locust-test-template.yaml | envsubst | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
kubectl create --namespace $(LOCUST_NAMESPACE) configmap locust.$(SCENARIO) --from-file scenarios/$(SCENARIO).py --dry-run=client -o yaml | kubectl apply --namespace $(LOCUST_NAMESPACE) -f -
date --utc -Ins>benchmark-before
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-before
timeout=$$(date -d "30 seconds" "+%s"); while [ -z "$$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)" ]; do if [ "$$(date "+%s")" -gt "$$timeout" ]; then echo "ERROR: Timeout waiting for locust master pod to start"; exit 1; else echo "Waiting for locust master pod to start..."; sleep 5s; fi; done
kubectl wait --namespace $(LOCUST_NAMESPACE) --for=condition=Ready=true $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l performance-test-pod-name=$(SCENARIO)-test-master -o name)
@echo "Getting locust master log:"
kubectl logs --namespace $(LOCUST_NAMESPACE) -f -l performance-test-pod-name=$(SCENARIO)-test-master | tee load-test.log
date --utc -Ins>benchmark-after
date --utc -Ins>$(ARTIFACT_DIR)/benchmark-after
@echo "All done!!!"

## Run the scalability test
## Run `make test-scalability SCENARIO=...` to run a specific scenario
.PHONY: test-scalability
test-scalability:
cd ./ci-scripts/scalability; ./test-scalability.sh

## Run shellcheck on all of the shell scripts
.PHONY: shellcheck
shellcheck:
Expand All @@ -145,7 +156,7 @@ ci-run: setup-venv deploy-locust test

## Deploy and populate RHDH in CI end to end
.PHONY: ci-deploy
ci-deploy: clean namespace deploy-rhdh
ci-deploy: namespace deploy-rhdh

## === Maintanence ===

Expand All @@ -158,9 +169,9 @@ update-locust-images:
skopeo copy --src-no-creds docker://docker.io/lotest/locust-k8s-operator:latest docker://quay.io/backstage-performance/locust-k8s-operator:latest

## Clean local resources
.PHONY: clean
clean:
rm -rvf *.log benchmark-* shellcheck ci-scripts/rhdh-setup/.tmp $(TMP_DIR)
.PHONY: clean-local
clean-local:
rm -rvf *.log shellcheck $(TMP_DIR) $(ARTIFACT_DIR)

## === Help ===

Expand Down
35 changes: 26 additions & 9 deletions ci-scripts/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,26 @@ set -o pipefail

echo -e "\n === Collecting test results and metrics ===\n"

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}

cli="oc"
clin="$cli -n $RHDH_NAMESPACE"

for label in app.kubernetes.io/name=developer-hub app.kubernetes.io/name=postgresql; do
echo -e "\nCollecting logs from pods in '$RHDH_NAMESPACE' namespace with label '$label':"
for pod in $($clin get pods -l "$label" -o name); do
echo "$pod"
logfile="${ARTIFACT_DIR}/${pod##*/}"
echo -e " -> $logfile.log"
$clin logs "$pod" --tail=-1 >&"$logfile.log" || true
echo -e " -> $logfile.previous.log"
$clin logs "$pod" --tail=-1 --previous=true >&"$logfile.previous.log" || true
done
done

monitoring_collection_data=$ARTIFACT_DIR/benchmark.json
monitoring_collection_log=$ARTIFACT_DIR/monitoring-collection.log
monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir
Expand Down Expand Up @@ -54,17 +71,17 @@ set +u
# shellcheck disable=SC1090,SC1091
source $PYTHON_VENV_DIR/bin/activate
set -u
mstart=$(date --utc --date "$(cat benchmark-before)" --iso-8601=seconds)
mend=$(date --utc --date "$(cat benchmark-after)" --iso-8601=seconds)
mstart=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-before")" --iso-8601=seconds)
mend=$(date --utc --date "$(cat "${ARTIFACT_DIR}/benchmark-after")" --iso-8601=seconds)
mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat benchmark-scenario).py")
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py")
status_data.py \
--status-data-file "$monitoring_collection_data" \
--set \
results.started="$(cat benchmark-before)" \
results.ended="$(cat benchmark-after)" \
name="RHDH load test $(cat benchmark-scenario)" \
metadata.scenario.name="$(cat benchmark-scenario)" \
results.started="$(cat "${ARTIFACT_DIR}/benchmark-before")" \
results.ended="$(cat "${ARTIFACT_DIR}/benchmark-after")" \
name="RHDH load test $(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
metadata.scenario.version="$mversion" \
-d &>"$monitoring_collection_log"
status_data.py \
Expand All @@ -75,7 +92,7 @@ status_data.py \
--monitoring-raw-data-dir "$monitoring_collection_dir" \
--prometheus-host "https://$mhost" \
--prometheus-port 443 \
--prometheus-token "$(oc whoami -t)" \
--prometheus-token "$($cli whoami -t)" \
-d &>>"$monitoring_collection_log"
set +u
deactivate
Expand Down
2 changes: 2 additions & 0 deletions ci-scripts/rhdh-setup/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ repo_name="$RHDH_NAMESPACE-helm-repo"

export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-1}
export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-1}
export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi}
export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-1}

export RHDH_IMAGE_REGISTRY=${RHDH_IMAGE_REGISTRY:-}
Expand Down Expand Up @@ -113,6 +114,7 @@ backstage_install() {
${RHDH_HELM_RELEASE_NAME} \
${RHDH_DEPLOYMENT_REPLICAS} \
${RHDH_DB_REPLICAS} \
${RHDH_DB_STORAGE} \
${RHDH_IMAGE_REGISTRY} \
${RHDH_IMAGE_REPO} \
${RHDH_IMAGE_TAG} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ upstream:
persistence:
enabled: true
mountPath: /var/lib/pgsql/data
size: 1Gi
size: "${RHDH_DB_STORAGE}"
podSecurityContext:
enabled: false
securityContext:
Expand Down
2 changes: 1 addition & 1 deletion ci-scripts/rhdh-setup/template/backstage/chart-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ upstream:
persistence:
enabled: true
mountPath: /var/lib/pgsql/data
size: 1Gi
size: "${RHDH_DB_STORAGE}"
podSecurityContext:
enabled: false
securityContext:
Expand Down
10 changes: 10 additions & 0 deletions ci-scripts/scalability/collect-results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

echo -e "\n === Collecting test results and metrics for RHDH scalability test ===\n"

ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "$ARTIFACT_DIR"
7 changes: 7 additions & 0 deletions ci-scripts/scalability/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

echo -e "\n === Setting up RHDH scalability test ===\n"
120 changes: 120 additions & 0 deletions ci-scripts/scalability/test-scalability.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

export PRE_LOAD_DB=${PRE_LOAD_DB:-true}
export RHDH_HELM_REPO=${RHDH_HELM_REPO:-https://gist.githubusercontent.com/rhdh-bot/63cef5cb6285889527bd6a67c0e1c2a9/raw}
export RHDH_HELM_CHART=${RHDH_HELM_CHART:-developer-hub}
export RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}

export WAIT_FOR_SEARCH_INDEX=${WAIT_FOR_SEARCH_INDEX:-true}

export GITHUB_TOKEN GITHUB_USER GITHUB_REPO QUAY_TOKEN
GITHUB_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/github.token)"
GITHUB_USER="$(cat /usr/local/ci-secrets/backstage-performance/github.user)"
GITHUB_REPO="$(cat /usr/local/ci-secrets/backstage-performance/github.repo)"
QUAY_TOKEN="$(cat /usr/local/ci-secrets/backstage-performance/quay.token)"

read -ra workers <<<"${SCALE_WORKERS:-5}"

read -ra active_users_spawn_rate <<<"${SCALE_ACTIVE_USERS_SPAWN_RATES:-1:1 200:40}"

read -ra bs_users_groups <<<"${SCALE_BS_USERS_GROUPS:-1:1 15000:5000}"

read -ra catalog_sizes <<<"${SCALE_CATALOG_SIZES:-1 10000}"

read -ra replicas <<<"${SCALE_REPLICAS:-5}"

read -ra db_storages <<<"${SCALE_DB_STORAGES:-1Gi 2Gi}"

echo
echo "////// RHDH scalability test //////"
echo "Number of scalability matrix iterations: $((${#workers[*]} * ${#active_users_spawn_rate[*]} * ${#bs_users_groups[*]} * ${#catalog_sizes[*]} * ${#replicas[*]} * ${#db_storages[*]}))"
echo

wait_for_indexing() {
if [ "$WAIT_FOR_SEARCH_INDEX" == "true" ]; then
HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')"

start=$(date +%s)
timeout_timestamp=$(date -d "3600 seconds" "+%s")
while true; do
echo "Waiting for the search indexing to finish..."
if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then
echo "ERROR: Timeout waiting"
exit 1
else
count="$(curl -sk "$HOST/api/search/query?term=&types%5B0%5D=software-catalog" | jq -rc '.numberOfResults')"
if [ "$count" != "null" ]; then
finish=$(date +%s)
echo "Search query returned non-empty set ($count) - indexing has finished in $((finish - start))s"
break
fi
fi
sleep 10s
done
else
echo "WAIT_FOR_SEARCH_INDEX is set to $WAIT_FOR_SEARCH_INDEX, skipping waiting for search indexing!"
fi
}
pushd ../../
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

SCALABILITY_ARTIFACTS="$ARTIFACT_DIR/scalability"
rm -rvf "${SCALABILITY_ARTIFACTS}"
mkdir -p "${SCALABILITY_ARTIFACTS}"

for w in "${workers[@]}"; do
for bu_bg in "${bs_users_groups[@]}"; do
IFS=":" read -ra tokens <<<"${bu_bg}"
bu="${tokens[0]}"
bg="${tokens[1]}"
for c in "${catalog_sizes[@]}"; do
for r in "${replicas[@]}"; do
for s in "${db_storages[@]}"; do
echo
echo "/// Setting up RHDH for scalability test ///"
echo
set -x
export RHDH_DEPLOYMENT_REPLICAS="$r"
export RHDH_DB_REPLICAS="$r"
export RHDH_DB_STORAGE="$s"
export RHDH_KEYCLOAK_REPLICAS=$r
export BACKSTAGE_USER_COUNT=$bu
export GROUP_COUNT=$bg
export WORKERS=$w
export API_COUNT=$c
export COMPONENT_COUNT=$c
index=${r}r-db_${s}-${bu}bu-${bg}bg-${w}w-${c}c
set +x
oc login "$OPENSHIFT_API" -u "$OPENSHIFT_USERNAME" -p "$OPENSHIFT_PASSWORD" --insecure-skip-tls-verify=true
make undeploy-rhdh
setup_artifacts="$SCALABILITY_ARTIFACTS/$index/setup"
mkdir -p "$setup_artifacts"
ARTIFACT_DIR=$setup_artifacts ./ci-scripts/setup.sh |& tee "$setup_artifacts/setup.log"
wait_for_indexing |& tee "$setup_artifacts/after-setup-search.log"
for au_sr in "${active_users_spawn_rate[@]}"; do
IFS=":" read -ra tokens <<<"${au_sr}"
active_users=${tokens[0]}
spawn_rate=${tokens[1]}
echo
echo "/// Running the scalability test ///"
echo
set -x
export SCENARIO=${SCENARIO:-search-catalog}
export USERS="${active_users}"
export DURATION=${DURATION:-5m}
export SPAWN_RATE="${spawn_rate}"
set +x
make clean
test_artifacts="$SCALABILITY_ARTIFACTS/$index/test/${active_users}u"
mkdir -p "$test_artifacts"
wait_for_indexing |& tee "$test_artifacts/before-test-search.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/test.sh |& tee "$test_artifacts/test.log"
ARTIFACT_DIR=$test_artifacts ./ci-scripts/collect-results.sh |& tee "$test_artifacts/collect-results.log"
done
done
done
done
done
done
popd || exit
10 changes: 10 additions & 0 deletions ci-scripts/scalability/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -o nounset
set -o errexit
set -o pipefail

export SCENARIO DURATION WAIT_FOR_SEARCH_INDEX PRE_LOAD_DB SCALE_WORKERS SCALE_ACTIVE_USERS_SPAWN_RATES SCALE_BS_USERS_GROUPS SCALE_CATALOG_SIZES SCALE_REPLICAS SCALE_DB_STORAGES

echo -e "\n === Running RHDH scalability test ===\n"
make test-scalability
19 changes: 10 additions & 9 deletions ci-scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@ GITHUB_USER=$(cat /usr/local/ci-secrets/backstage-performance/github.user)
GITHUB_REPO=$(cat /usr/local/ci-secrets/backstage-performance/github.repo)
QUAY_TOKEN=$(cat /usr/local/ci-secrets/backstage-performance/quay.token)

export RHDH_DEPLOYMENT_REPLICAS=5
export RHDH_DB_REPLICAS=5
export RHDH_KEYCLOAK_REPLICAS=5
export RHDH_DEPLOYMENT_REPLICAS=${RHDH_DEPLOYMENT_REPLICAS:-5}
export RHDH_DB_REPLICAS=${RHDH_DB_REPLICAS:-5}
export RHDH_DB_STORAGE=${RHDH_DB_STORAGE:-1Gi}
export RHDH_KEYCLOAK_REPLICAS=${RHDH_KEYCLOAK_REPLICAS:-5}

export API_COUNT=1000
export COMPONENT_COUNT=1000
export BACKSTAGE_USER_COUNT=1000
export GROUP_COUNT=250
export API_COUNT=${API_COUNT:-1000}
export COMPONENT_COUNT=${COMPONENT_COUNT:-1000}
export BACKSTAGE_USER_COUNT=${BACKSTAGE_USER_COUNT:-1000}
export GROUP_COUNT=${GROUP_COUNT:-250}

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
mkdir -p "${ARTIFACT_DIR}"
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "$ARTIFACT_DIR"

rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.setup.csv"

Expand Down
2 changes: 1 addition & 1 deletion ci-scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export HOST
HOST="https://$(oc get routes rhdh-developer-hub -n "${RHDH_NAMESPACE:-rhdh-performance}" -o jsonpath='{.spec.host}')"
# end-of testing env

ARTIFACT_DIR=${ARTIFACT_DIR:-artifacts}
ARTIFACT_DIR=$(readlink -m "${ARTIFACT_DIR:-.artifacts}")
mkdir -p "${ARTIFACT_DIR}"

rate_limits_csv="${ARTIFACT_DIR}/gh-rate-limits-remaining.test.csv"
Expand Down
Loading