Skip to content

Commit

Permalink
Add nodevertical-heavy workload
Browse files Browse the repository at this point in the history
  • Loading branch information
rsevilla87 authored and chaitanyaenr committed Apr 30, 2020
1 parent 98f3da9 commit 3580895
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 1 deletion.
6 changes: 5 additions & 1 deletion workloads/nodevertical.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
when: cluster_name is succeeded
when: snafu_cluster_name == ""

- name: Set NodeVertical template
set_fact:
nodevertical_template: "{% if nodevertical_heavy %}workload-nodevertical-heavy-script-cm.yml.j2{% else %}workload-nodevertical-script-cm.yml.j2{% endif %}"

- name: Template workload templates
template:
src: "{{item.src}}"
Expand All @@ -105,7 +109,7 @@
dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-job.yml"
- src: workload-env.yml.j2
dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-nodevertical-env.yml"
- src: workload-nodevertical-script-cm.yml.j2
- src: "{{ nodevertical_template }}"
dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-nodevertical-script-cm.yml"


Expand Down
2 changes: 2 additions & 0 deletions workloads/templates/workload-env.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ data:
NODEVERTICAL_TS_TIMEOUT: "{{nodevertical_ts_timeout}}"
EXPECTED_NODEVERTICAL_DURATION: "{{expected_nodevertical_duration}}"
AZURE_AUTH: "{{azure_auth|bool|lower}}"
NODEVERTICAL_HEAVY_PROBE_ENDPOINT: "{{ nodevertical_heavy_probe_endpoint }}"
NODEVERTICAL_HEAVY_PROBE_PERIOD: "{{ nodevertical_heavy_probe_period }}"
{% elif workload_job == "podvertical" %}
PBENCH_INSTRUMENTATION: "{{pbench_instrumentation|bool|lower}}"
ENABLE_PBENCH_COPY: "{{enable_pbench_copy|bool|lower}}"
Expand Down
284 changes: 284 additions & 0 deletions workloads/templates/workload-nodevertical-heavy-script-cm.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: scale-ci-workload-script
data:
run.sh: |
#!/bin/sh
set -eo pipefail
workload_log() { echo "$(date -u) $@" >&2; }
export -f workload_log
export TOTAL_POD_COUNT=$((TOTAL_POD_COUNT / 2))
workload_log "Configuring pbench for NodeVertical"
mkdir -p /var/lib/pbench-agent/tools-default/
echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd
if [ "${ENABLE_PBENCH_AGENTS}" = true ]; then
echo "" > /var/lib/pbench-agent/tools-default/disk
echo "" > /var/lib/pbench-agent/tools-default/iostat
echo "workload" > /var/lib/pbench-agent/tools-default/label
echo "" > /var/lib/pbench-agent/tools-default/mpstat
echo "" > /var/lib/pbench-agent/tools-default/oc
echo "" > /var/lib/pbench-agent/tools-default/perf
echo "" > /var/lib/pbench-agent/tools-default/pidstat
echo "" > /var/lib/pbench-agent/tools-default/sar
master_nodes=`oc get nodes -l pbench_agent=true,node-role.kubernetes.io/master= --no-headers | awk '{print $1}'`
for node in $master_nodes; do
echo "master" > /var/lib/pbench-agent/tools-default/remote@$node
done
infra_nodes=`oc get nodes -l pbench_agent=true,node-role.kubernetes.io/infra= --no-headers | awk '{print $1}'`
for node in $infra_nodes; do
echo "infra" > /var/lib/pbench-agent/tools-default/remote@$node
done
worker_nodes=`oc get nodes -l pbench_agent=true,node-role.kubernetes.io/worker= --no-headers | awk '{print $1}'`
for node in $worker_nodes; do
echo "worker" > /var/lib/pbench-agent/tools-default/remote@$node
done
fi
source /opt/pbench-agent/profile
workload_log "Done configuring pbench NodeVertical"

workload_log "Configuring Heavy NodeVertical test"
envsubst < /root/workload/nodevertical-heavy.yaml.template > /tmp/nodevertical-heavy.yaml
cp /root/workload/nodevert-perf-app.yaml.template /tmp/nodevert-perf-app.yaml
workload_log "Done configuring Heavy NodeVertical test"

workload_log "Running Heavy NodeVertical workload"
if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then
pbench-user-benchmark -- sh /root/workload/workload.sh
result_dir="/var/lib/pbench-agent/$(ls -t /var/lib/pbench-agent/ | grep "pbench-user" | head -1)"/1/sample1
if [ "${ENABLE_PBENCH_COPY}" = "true" ]; then
pbench-copy-results --prefix ${NODEVERTICAL_TEST_PREFIX}
fi
else
sh /root/workload/workload.sh
result_dir=/tmp
fi
workload_log "Completed Heavy NodeVertical workload run"

workload_log "Checking Test Results"
workload_log "Checking Cluster Loader Exit Code"
if [ "$(jq '.exit_code==0' ${result_dir}/exit.json)" = "false" ]; then
workload_log "Cluster Loader Failure"
workload_log "Test Analysis: Failed"
exit 1
fi
workload_log "Comparing Heavy NodeVertical duration to expected duration"
workload_log "Heavy NodeVertical Duration: $(jq '.duration' ${result_dir}/exit.json)"
if [ "$(jq '.duration>'${EXPECTED_NODEVERTICAL_DURATION}'' ${result_dir}/exit.json)" = "true" ]; then
workload_log "EXPECTED_NODEVERTICAL_DURATION (${EXPECTED_NODEVERTICAL_DURATION}) exceeded ($(jq '.duration' ${result_dir}/exit.json))"
workload_log "Test Analysis: Failed"
exit 1
fi
workload_log "Cluster Loader Metrics: $(cat ${result_dir}/clusterloader.json | jq '.')"
# TODO: Check pbench-agent collected metrics for Pass/Fail
# TODO: Check prometheus collected metrics for Pass/Fail
workload_log "Test Analysis: Passed"
workload.sh: |
#!/bin/sh
set -o pipefail

result_dir=/tmp
if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then
result_dir=${benchmark_results_dir}
fi
start_time=$(date +%s)
if [[ "${AZURE_AUTH}" == "true" ]]; then
export AZURE_AUTH_LOCATION=/tmp/azure_auth
fi
export cluster_name={{ snafu_cluster_name }}
export test_user={{ snafu_user }}
export es={{ snafu_es_host }}
export es_port={{ snafu_es_port }}
export es_index={{ snafu_es_index_prefix }}
VIPERCONFIG=/tmp/nodevertical-heavy.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt"
exit_code=$?
end_time=$(date +%s)
duration=$((end_time-start_time))

workload_log "Writing Cluster Loader Exit Code"
jq -n '. | ."exit_code"='${exit_code}' | ."duration"='${duration}'' > "${result_dir}/exit.json"
workload_log "Writing Cluster Loader Metrics to clusterloader.json"
grep "cluster_loader_marker" ${result_dir}/clusterloader.txt > "${result_dir}/clusterloader.json"

workload_log "Finished workload script"
nodevertical-heavy.yaml.template: |
provider: local
ClusterLoader:
cleanup: ${NODEVERTICAL_CLEANUP}
projects:
- num: 1
basename: ${NODEVERTICAL_BASENAME}
ifexists: delete
tuning: default
nodeselector: "node-role.kubernetes.io/worker="
templates:
- num: ${TOTAL_POD_COUNT}
file: nodevert-perf-app.yaml
parameters:
READINESS_ENDPOINT: ${NODEVERTICAL_HEAVY_PROBE_ENDPOINT}
READINESS_PERIOD: ${NODEVERTICAL_HEAVY_PROBE_PERIOD}
tuningsets:
- name: default
templates:
stepping:
stepsize: ${NODEVERTICAL_STEPSIZE}
pause: ${NODEVERTICAL_PAUSE}
timeout: ${NODEVERTICAL_TS_TIMEOUT}
rate_limit:
delay: 0
nodevert-perf-app.yaml.template: |
kind: Template
apiVersion: template.openshift.io/v1
labels:
template: perf-app
metadata:
name: perf-app
objects:
- kind: DeploymentConfig
apiVersion: v1
metadata:
name: postgres-${IDENTIFIER}
spec:
template:
metadata:
labels:
name: postgres-${IDENTIFIER}
spec:
nodeSelector:
nodevertical: 'true'
containers:
- name: postgresql
image: registry.redhat.io/rhscl/postgresql-10-rhel7:latest
ports:
- containerPort: 5432
protocol: TCP
env:
- name: POSTGRESQL_USER
value: ${POSTGRESQL_USER}
- name: POSTGRESQL_PASSWORD
value: ${POSTGRESQL_PASSWORD}
- name: POSTGRESQL_DATABASE
value: ${POSTGRESQL_DATABASE}
resources: {}
imagePullPolicy: Always
capabilities: {}
securityContext:
capabilities: {}
privileged: false
restartPolicy: Always
serviceAccount: ''
replicas: 1
selector:
name: postgres-${IDENTIFIER}
triggers:
- type: ConfigChange
strategy:
type: Rolling
- kind: DeploymentConfig
apiVersion: v1
metadata:
name: perfapp-${IDENTIFIER}
spec:
template:
metadata:
labels:
name: perfapp-${IDENTIFIER}
spec:
nodeSelector:
nodevertical: 'true'
containers:
- name: perfapp
image: quay.io/rsevilla/perfapp:latest
readinessProbe:
httpGet:
path: ${READINESS_ENDPOINT}
port: 8080
periodSeconds: {{ '${{READINESS_PERIOD}}' }}
failureThreshold: 1
timeoutSeconds: 60
initialDelaySeconds: 30
ports:
- containerPort: 8080
protocol: TCP
env:
- name: POSTGRESQL_USER
value: ${POSTGRESQL_USER}
- name: POSTGRESQL_PASSWORD
value: ${POSTGRESQL_PASSWORD}
- name: POSTGRESQL_DATABASE
value: ${POSTGRESQL_DATABASE}
- name: POSTGRESQL_HOSTNAME
value: postgresql-${IDENTIFIER}
- name: POSTGRESQL_PORT
value: '5432'
- name: POSTGRESQL_RETRY_INTERVAL
value: ${POSTGRESQL_RETRY_INTERVAL}
resources: {}
imagePullPolicy: Always
capabilities: {}
securityContext:
capabilities: {}
privileged: false
restartPolicy: Always
serviceAccount: ''
replicas: 1
selector:
name: perfapp-${IDENTIFIER}
triggers:
- type: ConfigChange
strategy:
type: Rolling
- kind: Service
apiVersion: v1
metadata:
name: postgresql-${IDENTIFIER}
spec:
selector:
name: postgres-${IDENTIFIER}
ports:
- protocol: TCP
port: 5432
targetPort: 5432
portalIP: ''
type: ClusterIP
sessionAffinity: None
status:
loadBalancer: {}
- kind: Service
apiVersion: v1
metadata:
name: perfapp-${IDENTIFIER}
spec:
selector:
name: perfapp-${IDENTIFIER}
ports:
- protocol: TCP
port: 8080
targetPort: 8080
portalIP: ''
type: ClusterIP
sessionAffinity: None
status:
loadBalancer: {}
parameters:
- name: IDENTIFIER
description: Number to append to the name of resources
value: '1'
- name: POSTGRESQL_USER
description: Postgresql database username
value: 'admin'
- name: POSTGRESQL_PASSWORD
description: Postgresql database password
value: 'secret'
- name: POSTGRESQL_DATABASE
description: Postgresql database name
value: 'mydb'
- name: POSTGRESQL_RETRY_INTERVAL
description: Postgresql connection retry interval
value: '5'
- name: READINESS_ENDPOINT
description: Readiness probe endpoint
value: '/ready'
- name: READINESS_PERIOD
description: Readiness probe period
value: '30'
3 changes: 3 additions & 0 deletions workloads/vars/nodevertical.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ nodevertical_pod_image: "{{ lookup('env', 'NODEVERTICAL_POD_IMAGE')|default('gcr
nodevertical_stepsize: "{{ lookup('env', 'NODEVERTICAL_STEPSIZE')|default(50, true)|int }}"
nodevertical_pause: "{{ lookup('env', 'NODEVERTICAL_PAUSE')|default(60, true)|int }}"
nodevertical_ts_timeout: "{{ lookup('env', 'NODEVERTICAL_TS_TIMEOUT')|default(180, true)|int }}"
nodevertical_heavy: "{{ lookup('env', 'NODEVERTICAL_HEAVY')|default(false, true)|bool|lower }}"
nodevertical_heavy_probe_endpoint: "{{ lookup('env', 'NODEVERTICAL_HEAVY_PROBE_ENDPOINT')|default('/ready', true) }}"
nodevertical_heavy_probe_period: "{{ lookup('env', 'NODEVERTICAL_HEAVY_PROBE_PERIOD')|default(30, true)|int }}"

# Pass/fail criteria
expected_nodevertical_duration: "{{ lookup('env', 'EXPECTED_NODEVERTICAL_DURATION')|default(600, true)|int }}"

0 comments on commit 3580895

Please sign in to comment.