diff --git a/Jenkinsfile b/Jenkinsfile index 973c1bbe21..6d6adf3fb9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -97,6 +97,26 @@ void pushLogFile(String FILE_NAME) { } } +void pushK8SLogs(String TEST_NAME) { + def LOG_FILE_PATH="e2e-tests/logs" + echo "Push k8s logs to S3!" + + withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + sh """ + if [ -d "${LOG_FILE_PATH}/${TEST_NAME}" ]; then + env GZIP=-9 tar -zcvf ${TEST_NAME}.tar.gz -C ${LOG_FILE_PATH} ${TEST_NAME} + rm -rf ${LOG_FILE_PATH}/${TEST_NAME} + + S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs + aws s3 ls \$S3_PATH/ || : + aws s3 rm \$S3_PATH/${TEST_NAME}.tar.gz || : + aws s3 cp --quiet ${TEST_NAME}.tar.gz \$S3_PATH/ || : + rm -f ${TEST_NAME}.tar.gz + fi + """ + } +} + void popArtifactFile(String FILE_NAME) { echo "Try to get $FILE_NAME file from S3!" @@ -211,6 +231,7 @@ void runTest(Integer TEST_ID) { return true } catch (exc) { + pushK8SLogs("$testName") if (retryCount >= 1 || currentBuild.nextBuild != null) { currentBuild.result = 'FAILURE' return true @@ -243,6 +264,7 @@ pipeline { CLUSTER_NAME = sh(script: "echo jen-psmdb-${env.CHANGE_ID}-${GIT_SHORT_COMMIT}-${env.BUILD_NUMBER} | tr '[:upper:]' '[:lower:]'", , returnStdout: true).trim() AUTHOR_NAME = sh(script: "echo ${CHANGE_AUTHOR_EMAIL} | awk -F'@' '{print \$1}'", , returnStdout: true).trim() ENABLE_LOGGING = "true" + ENABLE_K8S_LOGGING = "true" } agent { label 'docker' @@ -483,7 +505,7 @@ EOF unstash 'IMAGE' def IMAGE = sh(returnStdout: true, script: "cat results/docker/TAG").trim() - TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\n" + TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\nlogs: `s3://percona-jenkins-artifactory/cloud-psmdb-operator/PR-${env.CHANGE_ID}/${GIT_SHORT_COMMIT}/logs/`" pullRequest.comment(TestsReport) } } diff --git a/e2e-tests/arbiter/run b/e2e-tests/arbiter/run index 2721feae74..fa853cdb56 100755 --- a/e2e-tests/arbiter/run +++ b/e2e-tests/arbiter/run @@ -25,7 +25,7 @@ check_cr_config() { desc 'wait for convergence' local arbiter_ip=$(get_service_ip $cluster-arbiter-0) local URI="$(get_service_ip $cluster-0),$(get_service_ip $cluster-1),$(get_service_ip $cluster-arbiter-0)" - sleep 240 + wait_cluster_consistency "${cluster/-rs0/}" # check arbiter liveness if [[ $(kubectl_bin get pod \ diff --git a/e2e-tests/functions b/e2e-tests/functions index 00469b3a0c..52cb34414f 100755 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :) src_dir=$(realpath $test_dir/../..) logs_dir=$(realpath $test_dir/../logs) +trap cleanup EXIT HUP INT QUIT TERM +cleanup() { + exit_code=$? + if [[ ${exit_code} -ne 0 ]]; then + collect_k8s_logs + fi + exit ${exit_code} +} + if [[ ${ENABLE_LOGGING} == "true" ]]; then if [ ! -d "${logs_dir}" ]; then mkdir "${logs_dir}" @@ -603,7 +612,7 @@ wait_for_delete() { echo -n . let retry+=1 if [ $retry -ge $wait_time ]; then - kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ + kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ @@ -703,10 +712,15 @@ run_mongo() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - [[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0' - kubectl_bin exec ${client_container} -- \ - bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1) + if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then + kubectl exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + else + kubectl_bin exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag" + fi } run_mongo_tls() { @@ -716,10 +730,15 @@ run_mongo_tls() { local suffix=${4:-.svc.cluster.local} local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') local mongo_flag="$5" - [[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0' - kubectl_bin exec ${client_container} -- \ - bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1) + if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then + kubectl exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + else + kubectl_bin exec ${client_container} -- \ + bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag" + fi } run_mongos() { @@ -887,13 +906,13 @@ deploy_cert_manager() { delete_crd() { desc 'get and delete old CRDs and RBAC' - kubectl_bin delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || : + kubectl delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || : for crd_name in $(yq eval '.metadata.name' "${src_dir}/deploy/crd.yaml" | grep -v '\-\-\-'); do kubectl get ${crd_name} --all-namespaces -o wide \ | grep -v 'NAMESPACE' \ | xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \ || : - kubectl_bin wait --for=delete crd ${crd_name} || : + kubectl wait --for=delete crd ${crd_name} || : done local rbac_yaml='rbac.yaml' @@ -901,7 +920,7 @@ delete_crd() { rbac_yaml='cw-rbac.yaml' fi - kubectl_bin delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true + kubectl delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true } destroy() { @@ -910,7 +929,7 @@ destroy() { desc 'destroy cluster/operator and all other resources' if [ ${ignore_logs} == "false" ] && [ "${DEBUG_TESTS}" == 1 ]; then - kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ + kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \ | grep -v 'level=info' \ | grep -v 'level=debug' \ | grep -v 'Getting tasks for pod' \ @@ -933,9 +952,9 @@ destroy() { oc delete --grace-period=0 --force=true project "$OPERATOR_NS" & fi else - kubectl_bin delete --grace-period=0 --force=true namespace "$namespace" & + kubectl delete --grace-period=0 --force=true namespace "$namespace" & if [ -n "$OPERATOR_NS" ]; then - kubectl_bin delete --grace-period=0 --force=true namespace "$OPERATOR_NS" & + kubectl delete --grace-period=0 --force=true namespace "$OPERATOR_NS" & fi fi rm -rf ${tmp_dir} @@ -1232,7 +1251,7 @@ check_crd_for_deletion() { local git_tag="$1" for crd_name in $(curl -s https://raw.githubusercontent.com/percona/percona-server-mongodb-operator/${git_tag}/deploy/crd.yaml | yq eval '.metadata.name' | $sed 's/---//g' | $sed ':a;N;$!ba;s/\n/ /g'); do - if [[ $(kubectl_bin get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then + if [[ $(kubectl get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then kubectl get ${crd_name} --all-namespaces -o wide \ | grep -v 'NAMESPACE' \ | xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \ @@ -1299,6 +1318,59 @@ function generate_vs_json() { echo ${version_service_source} | jq '.' >${target_path} } +collect_k8s_logs() { + if [[ ${ENABLE_K8S_LOGGING} == "true" ]]; then + local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}" + local logs_path="${logs_dir}/${test_name}" + + rm -rf ${logs_path} || : + mkdir -p ${logs_path} + + for ns in ${check_namespaces}; do + local pods=$(kubectl get pods -n "${ns}" -o name | awk -F "/" '{print $2}') + for p in ${pods}; do + kubectl -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || : + local containers=$(kubectl -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}') + for c in ${containers}; do + kubectl -n "${ns}" logs ${p} -c ${c} >${logs_path}/container_${p}_${c}.log || : + echo "logs saved in: ${logs_path}/${ns}_${p}_${c}.log" + done + done + done + for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets roles issuer certificate; do + echo "##### START: ${object} NS: ${namespace} #####" >>${logs_path}/_overview_${namespace}.txt + kubectl get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || : + echo -e "##### END: ${object} NS: ${namespace} #####\n" >>${logs_path}/_overview_${namespace}.txt + kubectl get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || : + kubectl describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || : + done + kubectl get events --all-namespaces >${logs_path}/_events.log || : + kubectl get nodes >${logs_path}/_nodes.log || : + kubectl get clusterroles >${logs_path}/_clusterroles.log || : + + local secret psmdb_secret psmdb_user psmdb_pass + for psmdb_name in "$(kubectl get psmdb -n ${namespace} -o custom-columns=NAME:.metadata.name --no-headers=true)"; do + psmdb_secret="$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.secrets.users}')" + if [[ ${psmdb_secret} ]]; then secret="${psmdb_secret}"; else secret="${psmdb_name}-secrets"; fi + psmdb_user="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_USER}' | base64 --decode)" + psmdb_pass="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_PASSWORD}' | base64 --decode)" + if [[ "$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.sharding.enabled}')" == "true" ]]; then + local cfg_replica="cfg" + echo "##### sh.status() #####" >${logs_path}/mongos_${psmdb_name}.mongo + run_mongos 'sh.status()' "${psmdb_user}:${psmdb_pass}@${psmdb_name}-mongos.${namespace}" >>${logs_path}/mongos_${psmdb_name}.mongo + fi + for psmdb_replset in $(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.replsets[*].name}' | awk '{print $0" '${cfg_replica}'"}'); do + local command=("rs.status()" "rs.config()" "db.printSlaveReplicationInfo()" "db.serverCmdLineOpts()" "db.getRoles()" "db.getUsers()") + for com in "${command[@]}"; do + echo "##### START: ${com} #####" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + run_mongo "${com}" "${psmdb_user}:${psmdb_pass}@${psmdb_name}-${psmdb_replset}.${namespace}" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + echo -e "##### END: ${com} #####\n" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo + done + done + done + fi +} + check_passwords_leak() { local secrets local passwords diff --git a/e2e-tests/init-deploy/run b/e2e-tests/init-deploy/run index 7520d936c3..cf6e6cea7f 100755 --- a/e2e-tests/init-deploy/run +++ b/e2e-tests/init-deploy/run @@ -61,6 +61,7 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace" desc 'check number of connections' conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye') if [ ${conn_count} -gt ${max_conn} ]; then + echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}" exit 1 fi diff --git a/e2e-tests/split-horizon/run b/e2e-tests/split-horizon/run index 420202dee3..1519d67c14 100755 --- a/e2e-tests/split-horizon/run +++ b/e2e-tests/split-horizon/run @@ -16,7 +16,7 @@ configure_client_hostAliases() { hostAliasesJson=$(echo $hostAliasesJson | jq --argjson newAlias "$hostAlias" '. += [$newAlias]') done - kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]" + kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]" wait_pod $(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}') } @@ -50,21 +50,21 @@ diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then - echo "mongo client should've redirect the connection to primary" - exit 1 + echo "mongo client should've redirect the connection to primary" + exit 1 fi # stepping down to ensure we haven't redirected to primary just because primary is pod-0 run_mongo_tls "rs.stepDown()" \ - "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \ - mongodb "" "--quiet" + "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \ + mongodb "" "--quiet" sleep 10 # give some time for re-election isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:clusterAdmin123456@some-name-rs0-0.clouddemo.xyz,some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames) if [ "${isMaster}" != "true" ]; then - echo "mongo client should've redirect the connection to primary" - exit 1 + echo "mongo client should've redirect the connection to primary" + exit 1 fi apply_cluster ${test_dir}/conf/${cluster}-5horizons.yml