Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

K8SPSMDB-1080 - Push failed test k8s logs to S3 bucket #1324

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,26 @@ void pushLogFile(String FILE_NAME) {
}
}

void pushK8SLogs(String TEST_NAME) {
def LOG_FILE_PATH="e2e-tests/logs"
echo "Push k8s logs to S3!"

withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', accessKeyVariable: 'AWS_ACCESS_KEY_ID', credentialsId: 'AMI/OVF', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
sh """
if [ -d "${LOG_FILE_PATH}/${TEST_NAME}" ]; then
env GZIP=-9 tar -zcvf ${TEST_NAME}.tar.gz -C ${LOG_FILE_PATH} ${TEST_NAME}
tplavcic marked this conversation as resolved.
Show resolved Hide resolved
rm -rf ${LOG_FILE_PATH}/${TEST_NAME}

S3_PATH=s3://percona-jenkins-artifactory/\$JOB_NAME/\$(git rev-parse --short HEAD)/logs
aws s3 ls \$S3_PATH/ || :
aws s3 rm \$S3_PATH/${TEST_NAME}.tar.gz || :
aws s3 cp --quiet ${TEST_NAME}.tar.gz \$S3_PATH/ || :
rm -f ${TEST_NAME}.tar.gz
fi
"""
}
}

void popArtifactFile(String FILE_NAME) {
echo "Try to get $FILE_NAME file from S3!"

Expand Down Expand Up @@ -211,6 +231,7 @@ void runTest(Integer TEST_ID) {
return true
}
catch (exc) {
pushK8SLogs("$testName")
if (retryCount >= 1 || currentBuild.nextBuild != null) {
currentBuild.result = 'FAILURE'
return true
Expand Down Expand Up @@ -243,6 +264,7 @@ pipeline {
CLUSTER_NAME = sh(script: "echo jen-psmdb-${env.CHANGE_ID}-${GIT_SHORT_COMMIT}-${env.BUILD_NUMBER} | tr '[:upper:]' '[:lower:]'", , returnStdout: true).trim()
AUTHOR_NAME = sh(script: "echo ${CHANGE_AUTHOR_EMAIL} | awk -F'@' '{print \$1}'", , returnStdout: true).trim()
ENABLE_LOGGING = "true"
ENABLE_K8S_LOGGING = "true"
}
agent {
label 'docker'
Expand Down Expand Up @@ -483,7 +505,7 @@ EOF

unstash 'IMAGE'
def IMAGE = sh(returnStdout: true, script: "cat results/docker/TAG").trim()
TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\n"
TestsReport = TestsReport + "\r\n\r\ncommit: ${env.CHANGE_URL}/commits/${env.GIT_COMMIT}\r\nimage: `${IMAGE}`\r\nlogs: `s3://percona-jenkins-artifactory/cloud-psmdb-operator/PR-${env.CHANGE_ID}/${GIT_SHORT_COMMIT}/logs/`"
pullRequest.comment(TestsReport)
}
}
Expand Down
2 changes: 1 addition & 1 deletion e2e-tests/arbiter/run
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ check_cr_config() {
desc 'wait for convergence'
local arbiter_ip=$(get_service_ip $cluster-arbiter-0)
local URI="$(get_service_ip $cluster-0),$(get_service_ip $cluster-1),$(get_service_ip $cluster-arbiter-0)"
sleep 240
wait_cluster_consistency "${cluster/-rs0/}"

# check arbiter liveness
if [[ $(kubectl_bin get pod \
Expand Down
100 changes: 86 additions & 14 deletions e2e-tests/functions
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ conf_dir=$(realpath $test_dir/../conf || :)
src_dir=$(realpath $test_dir/../..)
logs_dir=$(realpath $test_dir/../logs)

trap cleanup EXIT HUP INT QUIT TERM
cleanup() {
exit_code=$?
if [[ ${exit_code} -ne 0 ]]; then
collect_k8s_logs
fi
exit ${exit_code}
}

if [[ ${ENABLE_LOGGING} == "true" ]]; then
if [ ! -d "${logs_dir}" ]; then
mkdir "${logs_dir}"
Expand Down Expand Up @@ -603,7 +612,7 @@ wait_for_delete() {
echo -n .
let retry+=1
if [ $retry -ge $wait_time ]; then
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
eleo007 marked this conversation as resolved.
Show resolved Hide resolved
| grep -v 'level=info' \
| grep -v 'level=debug' \
| grep -v 'Getting tasks for pod' \
Expand Down Expand Up @@ -703,10 +712,15 @@ run_mongo() {
local suffix=${4:-.svc.cluster.local}
local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}')
local mongo_flag="$5"
[[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0'
kubectl_bin exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag"
local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1)

if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then
kubectl exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag"
else
kubectl_bin exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?ssl=false\&replicaSet=$replica_set $mongo_flag"
fi
}

run_mongo_tls() {
Expand All @@ -716,10 +730,15 @@ run_mongo_tls() {
local suffix=${4:-.svc.cluster.local}
local client_container=$(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}')
local mongo_flag="$5"
[[ $uri == *cfg* ]] && replica_set='cfg' || replica_set='rs0'
kubectl_bin exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag"
local replica_set=$(echo "$uri" | grep -oE '\-(rs[0-9]|cfg)(\.|-)' | sed 's/^.//;s/.$//' | head -n1)

if [[ ${FUNCNAME[1]} == "collect_k8s_logs" ]]; then
kubectl exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag"
else
kubectl_bin exec ${client_container} -- \
bash -c "printf '$command\n' | mongo $driver://$uri$suffix/admin?replicaSet=$replica_set --tls --tlsCAFile /etc/mongodb-ssl/ca.crt --tlsCertificateKeyFile /tmp/tls.pem --tlsAllowInvalidHostnames $mongo_flag"
fi
}

run_mongos() {
Expand Down Expand Up @@ -887,21 +906,21 @@ deploy_cert_manager() {
delete_crd() {
desc 'get and delete old CRDs and RBAC'

kubectl_bin delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || :
kubectl delete -f "${src_dir}/deploy/crd.yaml" --ignore-not-found --wait=false || :
for crd_name in $(yq eval '.metadata.name' "${src_dir}/deploy/crd.yaml" | grep -v '\-\-\-'); do
kubectl get ${crd_name} --all-namespaces -o wide \
| grep -v 'NAMESPACE' \
| xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \
|| :
kubectl_bin wait --for=delete crd ${crd_name} || :
kubectl wait --for=delete crd ${crd_name} || :
done

local rbac_yaml='rbac.yaml'
if [ -n "${OPERATOR_NS}" ]; then
rbac_yaml='cw-rbac.yaml'
fi

kubectl_bin delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true
kubectl delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true
}

destroy() {
Expand All @@ -910,7 +929,7 @@ destroy() {

desc 'destroy cluster/operator and all other resources'
if [ ${ignore_logs} == "false" ] && [ "${DEBUG_TESTS}" == 1 ]; then
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
kubectl logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
| grep -v 'level=info' \
| grep -v 'level=debug' \
| grep -v 'Getting tasks for pod' \
Expand All @@ -933,9 +952,9 @@ destroy() {
oc delete --grace-period=0 --force=true project "$OPERATOR_NS" &
fi
else
kubectl_bin delete --grace-period=0 --force=true namespace "$namespace" &
kubectl delete --grace-period=0 --force=true namespace "$namespace" &
if [ -n "$OPERATOR_NS" ]; then
kubectl_bin delete --grace-period=0 --force=true namespace "$OPERATOR_NS" &
kubectl delete --grace-period=0 --force=true namespace "$OPERATOR_NS" &
fi
fi
rm -rf ${tmp_dir}
Expand Down Expand Up @@ -1232,7 +1251,7 @@ check_crd_for_deletion() {
local git_tag="$1"

for crd_name in $(curl -s https://raw.githubusercontent.com/percona/percona-server-mongodb-operator/${git_tag}/deploy/crd.yaml | yq eval '.metadata.name' | $sed 's/---//g' | $sed ':a;N;$!ba;s/\n/ /g'); do
if [[ $(kubectl_bin get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then
if [[ $(kubectl get crd/${crd_name} -o jsonpath='{.status.conditions[-1].type}') == "Terminating" ]]; then
kubectl get ${crd_name} --all-namespaces -o wide \
| grep -v 'NAMESPACE' \
| xargs -L 1 sh -xc 'kubectl patch '${crd_name}' -n $0 $1 --type=merge -p "{\"metadata\":{\"finalizers\":[]}}"' \
Expand Down Expand Up @@ -1299,6 +1318,59 @@ function generate_vs_json() {
echo ${version_service_source} | jq '.' >${target_path}
}

collect_k8s_logs() {
if [[ ${ENABLE_K8S_LOGGING} == "true" ]]; then
local check_namespaces="${namespace}${OPERATOR_NS:+ $OPERATOR_NS}"
local logs_path="${logs_dir}/${test_name}"

rm -rf ${logs_path} || :
mkdir -p ${logs_path}

for ns in ${check_namespaces}; do
local pods=$(kubectl get pods -n "${ns}" -o name | awk -F "/" '{print $2}')
for p in ${pods}; do
kubectl -n "${ns}" describe pod ${p} >${logs_path}/pod_${ns}_${p}.dsc || :
local containers=$(kubectl -n "${ns}" get pod ${p} -o jsonpath='{.spec.containers[*].name}')
for c in ${containers}; do
kubectl -n "${ns}" logs ${p} -c ${c} >${logs_path}/container_${p}_${c}.log || :
echo "logs saved in: ${logs_path}/${ns}_${p}_${c}.log"
done
done
done
for object in psmdb psmdb-backup psmdb-restore pods deployments replicasets services sts configmaps persistentvolumeclaims persistentvolumes secrets roles issuer certificate; do
echo "##### START: ${object} NS: ${namespace} #####" >>${logs_path}/_overview_${namespace}.txt
kubectl get ${object} -n "${namespace}" >>${logs_path}/_overview_${namespace}.txt || :
echo -e "##### END: ${object} NS: ${namespace} #####\n" >>${logs_path}/_overview_${namespace}.txt
kubectl get ${object} -n "${namespace}" -oyaml >${logs_path}/${object}_${namespace}.yaml || :
kubectl describe ${object} -n "${namespace}" >${logs_path}/${object}_${namespace}.dsc || :
done
kubectl get events --all-namespaces >${logs_path}/_events.log || :
kubectl get nodes >${logs_path}/_nodes.log || :
kubectl get clusterroles >${logs_path}/_clusterroles.log || :

local secret psmdb_secret psmdb_user psmdb_pass
for psmdb_name in "$(kubectl get psmdb -n ${namespace} -o custom-columns=NAME:.metadata.name --no-headers=true)"; do
psmdb_secret="$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.secrets.users}')"
if [[ ${psmdb_secret} ]]; then secret="${psmdb_secret}"; else secret="${psmdb_name}-secrets"; fi
psmdb_user="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_USER}' | base64 --decode)"
psmdb_pass="$(kubectl get secrets ${psmdb_secret} -ojsonpath='{.data.MONGODB_BACKUP_PASSWORD}' | base64 --decode)"
if [[ "$(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.sharding.enabled}')" == "true" ]]; then
local cfg_replica="cfg"
echo "##### sh.status() #####" >${logs_path}/mongos_${psmdb_name}.mongo
run_mongos 'sh.status()' "${psmdb_user}:${psmdb_pass}@${psmdb_name}-mongos.${namespace}" >>${logs_path}/mongos_${psmdb_name}.mongo
fi
for psmdb_replset in $(kubectl get psmdb ${psmdb_name} -n ${namespace} -ojsonpath='{.spec.replsets[*].name}' | awk '{print $0" '${cfg_replica}'"}'); do
local command=("rs.status()" "rs.config()" "db.printSlaveReplicationInfo()" "db.serverCmdLineOpts()" "db.getRoles()" "db.getUsers()")
for com in "${command[@]}"; do
echo "##### START: ${com} #####" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo
run_mongo "${com}" "${psmdb_user}:${psmdb_pass}@${psmdb_name}-${psmdb_replset}.${namespace}" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo
echo -e "##### END: ${com} #####\n" >>${logs_path}/mongodb_${psmdb_name}_${psmdb_replset}.mongo
done
done
done
fi
}

check_passwords_leak() {
local secrets
local passwords
Expand Down
1 change: 1 addition & 0 deletions e2e-tests/init-deploy/run
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ compare_mongo_cmd "find" "myApp:myPass@$cluster-2.$cluster.$namespace"
desc 'check number of connections'
conn_count=$(run_mongo 'db.serverStatus().connections.current' "clusterAdmin:clusterAdmin123456@$cluster.$namespace" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|bye')
if [ ${conn_count} -gt ${max_conn} ]; then
echo "Mongo connection count ${conn_count} is greater than maximum connection count limit: ${max_conn}"
exit 1
fi

Expand Down
14 changes: 7 additions & 7 deletions e2e-tests/split-horizon/run
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ configure_client_hostAliases() {
hostAliasesJson=$(echo $hostAliasesJson | jq --argjson newAlias "$hostAlias" '. += [$newAlias]')
done

kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]"
kubectl_bin patch deployment psmdb-client --type='json' -p="[{'op': 'replace', 'path': '/spec/template/spec/hostAliases', 'value': $hostAliasesJson}]"

wait_pod $(kubectl_bin get pods --selector=name=psmdb-client -o 'jsonpath={.items[].metadata.name}')
}
Expand Down Expand Up @@ -50,21 +50,21 @@ diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json

isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames)
if [ "${isMaster}" != "true" ]; then
echo "mongo client should've redirect the connection to primary"
exit 1
echo "mongo client should've redirect the connection to primary"
exit 1
fi

# stepping down to ensure we haven't redirected to primary just because primary is pod-0
run_mongo_tls "rs.stepDown()" \
"clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \
mongodb "" "--quiet"
"clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \
mongodb "" "--quiet"

sleep 10 # give some time for re-election

isMaster=$(run_mongo_tls "db.isMaster().ismaster" "clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" mongodb "" "--quiet" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:' | grep -v certificateNames)
if [ "${isMaster}" != "true" ]; then
echo "mongo client should've redirect the connection to primary"
exit 1
echo "mongo client should've redirect the connection to primary"
exit 1
fi

apply_cluster ${test_dir}/conf/${cluster}-5horizons.yml
Expand Down
Loading