Skip to content

Commit

Permalink
K8SPSMDB-1080 - Use trap to catch exit status
Browse files Browse the repository at this point in the history
  • Loading branch information
tplavcic committed Jun 27, 2024
1 parent 31091a0 commit 7a70e0d
Show file tree
Hide file tree
Showing 27 changed files with 14 additions and 75 deletions.
1 change: 0 additions & 1 deletion e2e-tests/arbiter/run
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ check_cr_config() {
if [[ $(kubectl_bin get pod \
--selector=statefulset.kubernetes.io/pod-name="${cluster}-arbiter-0" \
-o jsonpath='{.items[*].status.containerStatuses[?(@.name == "mongod-arbiter")].restartCount}') -gt 0 ]]; then
collect_k8s_logs
echo "Something went wrong with arbiter. Exiting..."
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/balancer/run
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ check_balancer() {
| grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye")

if [[ $balancer_running != "$expected" ]]; then
collect_k8s_logs
echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running"
echo "Expected $expected"
exit 1
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/cross-site-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ for i in "rs0" "rs1"; do
done

if [[ $shards -lt 2 ]]; then
collect_k8s_logs
echo "data is only on some of the shards, maybe sharding is not working"
exit 1
fi
Expand Down
2 changes: 0 additions & 2 deletions e2e-tests/data-at-rest-encryption/run
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ encrypted_cluster_log=$(kubectl_bin logs some-name-rs0-0 -c mongod -n $namespace

echo "$encrypted_cluster_log"
if [ -z "$encrypted_cluster_log" ]; then
collect_k8s_logs
echo "Cluster is not encrypted"
exit 1
fi
Expand All @@ -100,7 +99,6 @@ until [ "$retry" -ge 10 ]; do
echo "Cluster is not encrypted already"
break
elif [ $retry == 15 ]; then
collect_k8s_logs
echo "Max retry count $retry reached. Cluster is still encrypted"
exit 1
else
Expand Down
3 changes: 0 additions & 3 deletions e2e-tests/data-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ check_rs_proper_component_deletion() {
until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do
let retry+=1
if [ $retry -ge 70 ]; then
collect_k8s_logs
sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length')
echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..."
exit 1
Expand Down Expand Up @@ -116,7 +115,6 @@ main() {
done

if [[ $shards -lt 3 ]]; then
collect_k8s_logs
echo "data is only on some of the shards, maybe sharding is not working"
exit 1
fi
Expand All @@ -127,7 +125,6 @@ main() {
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls")
if ! echo $res | grep -q '"ok" : 1'; then
collect_k8s_logs
echo "app database not dropped. Exiting.."
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/default-cr/run
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ function stop_cluster() {
let passed_time="${passed_time}+${sleep_time}"
sleep ${sleep_time}
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
collect_k8s_logs
echo "We've been waiting for cluster stop for too long. Exiting..."
exit 1
fi
Expand Down
3 changes: 0 additions & 3 deletions e2e-tests/demand-backup-physical-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ run_recovery_check() {
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
kubectl_bin get psmdb ${cluster} -o yaml
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
collect_k8s_logs
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
exit 1
fi
Expand All @@ -53,7 +52,6 @@ check_exported_mongos_service_endpoint() {
local host=$1

if [ "$host" != "$(kubectl_bin get psmdb $cluster -o=jsonpath='{.status.host}')" ]; then
collect_k8s_logs
echo "Exported host is not correct after the restore"
exit 1
fi
Expand Down Expand Up @@ -82,7 +80,6 @@ wait_cluster_consistency ${cluster}
lbEndpoint=$(kubectl_bin get svc $cluster-mongos -o=jsonpath='{.status}' |
jq -r 'select(.loadBalancer != null and .loadBalancer.ingress != null and .loadBalancer.ingress != []) | .loadBalancer.ingress[0][]')
if [ -z $lbEndpoint ]; then
collect_k8s_logs
echo "mongos service not exported correctly"
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/demand-backup-physical/run
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ run_recovery_check() {
wait_restore "${backup_name}" "${cluster}" "ready" "0" "1800"
kubectl_bin get psmdb ${cluster} -o yaml
if [ $(kubectl_bin get psmdb ${cluster} -o yaml | yq '.metadata.annotations."percona.com/resync-pbm"') == null ]; then
collect_k8s_logs
echo "psmdb/${cluster} should be annotated with percona.com/resync-pbm after a physical restore"
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/demand-backup-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
| grep -c ${backup_dest_minio}_ | cat)
if [[ $backup_exists -eq 1 ]]; then
collect_k8s_logs
echo "Backup was not removed from bucket -- minio"
exit 1
fi
Expand Down
2 changes: 0 additions & 2 deletions e2e-tests/demand-backup/run
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
| grep -c ${backup_dest_minio} | cat)
if [[ $backup_exists -eq 1 ]]; then
collect_k8s_logs
echo "Backup was not removed from bucket -- minio"
exit 1
fi
Expand Down Expand Up @@ -171,7 +170,6 @@ backup_exists=$(kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --rest
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls s3://operator-testing/ \
| grep -c ${backup_dest_minio} | cat)
if [[ $backup_exists -eq 1 ]]; then
collect_k8s_logs
echo "Backup was not removed from bucket -- minio"
exit 1
fi
Expand Down
2 changes: 0 additions & 2 deletions e2e-tests/expose-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ function stop_cluster() {
let passed_time="${passed_time}+${sleep_time}"
sleep ${passed_time}
if [[ ${passed_time} -gt ${max_wait_time} ]]; then
collect_k8s_logs
echo "We've been waiting for cluster stop for too long. Exiting..."
exit 1
fi
Expand Down Expand Up @@ -53,7 +52,6 @@ function compare_mongo_config() {
rs0_0_endpoint_actual=$(run_mongo 'var host;var x=0;rs.conf().members.forEach(function(d){ if(d.tags.podName=="some-name-rs0-0"){ host=rs.conf().members[x].host;print(host)};x=x+1; })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0.${namespace}" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye')

if [[ $rs0_0_endpoint_actual != "$rs0_0_endpoint:27017" || $cfg_0_endpoint_actual != "$cfg_0_endpoint:27017" ]]; then
collect_k8s_logs
desc "Actual values rs $rs0_0_endpoint_actual and cfg $cfg_0_endpoint_actual do not match expected rs $rs0_0_endpoint:27017 and cfg $cfg_0_endpoint:27017"
exit 1
fi
Expand Down
28 changes: 9 additions & 19 deletions e2e-tests/functions
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :)
src_dir=$(realpath $test_dir/../..)
logs_dir=$(realpath $test_dir/../logs)

trap cleanup EXIT HUP INT QUIT TERM
cleanup() {
exit_code=$?
if [[ ${exit_code} -ne 0 ]]; then
collect_k8s_logs
fi
exit ${exit_code}
}

if [[ ${ENABLE_LOGGING} == "true" ]]; then
if [ ! -d "${logs_dir}" ]; then
mkdir "${logs_dir}"
Expand Down Expand Up @@ -150,7 +159,6 @@ wait_pod() {
echo -n .
let retry+=1
if [ $retry -ge 360 ]; then
collect_k8s_logs
kubectl_bin describe pod/$pod
kubectl_bin logs $pod
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
Expand Down Expand Up @@ -179,7 +187,6 @@ wait_cron() {
echo -n .
let retry+=1
if [ $retry -ge 360 ]; then
collect_k8s_logs
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
Expand All @@ -205,7 +212,6 @@ wait_backup_agent() {
echo -n .
let retry+=1
if [ $retry -ge 360 ]; then
collect_k8s_logs
kubectl_bin logs $agent_pod -c backup-agent \
| tail -100

Expand All @@ -230,7 +236,6 @@ wait_backup() {
let retry+=1
current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}')
if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then
collect_k8s_logs
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
Expand Down Expand Up @@ -291,7 +296,6 @@ wait_deployment() {
echo -n .
let retry+=1
if [ $retry -ge 360 ]; then
collect_k8s_logs
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
Expand Down Expand Up @@ -339,7 +343,6 @@ wait_restore() {
let retry+=1
current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}')
if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then
collect_k8s_logs
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
Expand Down Expand Up @@ -553,7 +556,6 @@ retry() {

until "$@"; do
if [[ $n -ge $max ]]; then
collect_k8s_logs
echo "The command '$@' has failed after $n attempts."
exit 1
fi
Expand Down Expand Up @@ -593,7 +595,6 @@ wait_for_running() {
timeout=$((timeout + 1))
echo -n '.'
if [[ ${timeout} -gt 1500 ]]; then
collect_k8s_logs
echo
echo "Waiting timeout has been reached. Exiting..."
exit 1
Expand All @@ -616,7 +617,6 @@ wait_for_delete() {
echo -n .
let retry+=1
if [ $retry -ge $wait_time ]; then
collect_k8s_logs
kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
Expand All @@ -639,8 +639,6 @@ compare_generation() {

current_generation="$(kubectl_bin get ${resource_type} "${resource_name}" -o jsonpath='{.metadata.generation}')"
if [[ ${generation} != "${current_generation}" ]]; then
collect_k8s_logs

echo "Generation for ${resource_type}/${resource_name} is: ${current_generation}, but should be: ${generation}"
exit 1
fi
Expand Down Expand Up @@ -1011,7 +1009,6 @@ get_service_endpoint() {
return
fi

collect_k8s_logs
exit 1
}

Expand Down Expand Up @@ -1150,9 +1147,6 @@ kubectl_bin() {
cat "$LAST_OUT"
cat "$LAST_ERR" >&2
rm "$LAST_OUT" "$LAST_ERR"
if [ ${exit_status} != 0 ]; then
collect_k8s_logs
fi
return ${exit_status}
}

Expand Down Expand Up @@ -1191,7 +1185,6 @@ wait_cluster_consistency() {
until [[ "$(kubectl_bin get psmdb "${cluster_name}" -o jsonpath='{.status.state}')" == "ready" ]]; do
let retry+=1
if [ $retry -ge $wait_time ]; then
collect_k8s_logs
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
exit 1
fi
Expand All @@ -1218,7 +1211,6 @@ check_backup_deletion() {
retry=0
until [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 403 ]] || [[ $(curl -sw '%{http_code}' -o /dev/null $path) -eq 404 ]]; do
if [ $retry -ge 10 ]; then
collect_k8s_logs
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
echo "Backup was not removed from bucket -- $storage_name"
exit 1
Expand Down Expand Up @@ -1280,7 +1272,6 @@ function get_mongod_ver_from_image() {
version_info=$(run_simple_cli_inside_image ${image} 'mongod --version' | $sed -r 's/^.*db version v(([0-9]+\.){2}[0-9]+-[0-9]+).*$/\1/g')

if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+-[0-9]+$ ]]; then
collect_k8s_logs
printf "No mongod version obtained from %s. Exiting" ${image}
exit 1
fi
Expand All @@ -1293,7 +1284,6 @@ function get_pbm_version() {
local version_info=$(run_simple_cli_inside_image ${image} 'pbm-agent version' | $sed -r 's/^Version:\ (([0-9]+\.){2}[0-9]+)\ .*/\1/g')

if [[ ! ${version_info} =~ ^([0-9]+\.){2}[0-9]+$ ]]; then
collect_k8s_logs
printf "No pbm version obtained from %s. Exiting" ${image}
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/init-deploy/run
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace"
desc 'check number of connections'
conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye')
if [ ${conn_count} -gt ${max_conn} ]; then
collect_k8s_logs
echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}"
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/mongod-major-upgrade-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ function main() {
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')

if [[ ${currentFCV} != ${version} ]]; then
collect_k8s_logs
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
exit 1
fi
Expand Down
1 change: 0 additions & 1 deletion e2e-tests/mongod-major-upgrade/run
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ function main() {
| grep -E '^\{.*\}$' | jq -r '.featureCompatibilityVersion.version')

if [[ ${currentFCV} != ${version} ]]; then
collect_k8s_logs
echo "FCV at the moment is ${currentFCV} and is not set to ${version} as it should. Exiting..."
exit 1
fi
Expand Down
2 changes: 0 additions & 2 deletions e2e-tests/monitoring-2-0/run
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ until kubectl_bin exec monitoring-0 -- bash -c "ls -l /proc/*/exe 2>/dev/null| g
sleep 5
let retry+=1
if [ $retry -ge 20 ]; then
collect_k8s_logs
echo "Max retry count $retry reached. Pmm-server can't start"
exit 1
fi
Expand Down Expand Up @@ -151,7 +150,6 @@ if [[ -n ${OPENSHIFT} ]]; then
fi

if [[ $(kubectl_bin logs monitoring-rs0-0 pmm-client | grep -c 'cannot auto discover databases and collections') != 0 ]]; then
collect_k8s_logs
echo "error: cannot auto discover databases and collections"
exit 1
fi
Expand Down
3 changes: 0 additions & 3 deletions e2e-tests/multi-cluster-service/run
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ wait_mcs_api() {
until [[ $(kubectl_bin api-resources | grep ServiceExport | wc -l) -eq 1 ]]; do
let retry+=1
if [ $retry -ge 64 ]; then
collect_k8s_logs
echo max retry count $retry reached. Something went wrong with MCS, probably a problem on GCP side.
exit 1
fi
Expand All @@ -41,7 +40,6 @@ wait_service_import() {
until [[ "$(kubectl_bin get serviceimport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do
let retry+=1
if [ $retry -ge 64 ]; then
collect_k8s_logs
echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-importer.
exit 1
fi
Expand All @@ -60,7 +58,6 @@ wait_service_export() {
until [[ "$(kubectl_bin get serviceexport --ignore-not-found | grep -v 'NAME' | wc -l)" -eq "9" ]]; do
let retry+=1
if [ $retry -ge 64 ]; then
collect_k8s_logs
echo max retry count $retry reached. Something went wrong with MCS, probably a problem in gke-mcs-exporter.
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion e2e-tests/one-pod/compare/statefulset_one-pod-rs0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ metadata:
name: one-pod
spec:
podManagementPolicy: OrderedReady
replicas: 1
replicas: 2
revisionHistoryLimit: 10
selector:
matchLabels:
Expand Down
4 changes: 0 additions & 4 deletions e2e-tests/rs-shard-migration/run
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,10 @@ function main() {
wait_cluster_consistency "${cluster}"

if [[ $(kubectl_bin get statefulset/${cluster}-mongos -o jsonpath='{.status.readyReplicas}') -lt 1 ]]; then
collect_k8s_logs
echo "Mongos hasn't been properly started. Exiting..."
exit 1
fi
if [[ "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.replicas}')" != "$(kubectl_bin get sts/${cluster}-cfg -o jsonpath='{.status.readyReplicas}')" ]]; then
collect_k8s_logs
echo "Cfg pods haven't been properly started. Exiting..."
exit 1
fi
Expand All @@ -56,7 +54,6 @@ function main() {

if [[ -z "$(get_shard_parameter ${cluster} ${namespace} lastCommitedOpTime)" ]] \
&& [[ -z "$(get_shard_parameter ${cluster} ${namespace} '$configServerState.opTime.ts')" ]]; then # for mongo 3.6
collect_k8s_logs
echo "Sharded cluster does not work properly"
exit 1
fi
Expand All @@ -73,7 +70,6 @@ function main() {
|| [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-mongos"'")].metadata.name}')" ]] \
|| [[ -n "$(kubectl_bin get service -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]] \
|| [[ -n "$(kubectl_bin get statefulset -o jsonpath='{.items[?(@.metadata.name == "'"${cluster}-cfg"'")].metadata.name}')" ]]; then
collect_k8s_logs
echo "Transition to replicaset cluster has not been done well. Cluster does not work properly or some leftovers still exist"
exit 1
fi
Expand Down
Loading

0 comments on commit 7a70e0d

Please sign in to comment.