Skip to content

Commit

Permalink
K8SPSMDB-778: Delete repset and its components properly (#1306)
Browse files Browse the repository at this point in the history
* Update getting replset pods.

* Logs

* Improve getting stateful sets for removal.

* logs

* Fix getting removed sts.

* fix component check

* Cleanup

* Fix cr.yaml

* WIP: update data-sharded test

* Fix removing.

* Update test.

* Update e2e-tests/data-sharded/run

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Updated data-sharded test.

* Update e2e-tests/data-sharded/run

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update e2e-tests/data-sharded/run

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update e2e-tests/data-sharded/run

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* User replset label rather then extracting it from the name.

* increse timeout

* Refactor and fix lint error.

* fix split-horizon test

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Viacheslav Sarzhan <[email protected]>
  • Loading branch information
3 people authored Sep 19, 2023
1 parent 9340bef commit 5b31035
Show file tree
Hide file tree
Showing 13 changed files with 229 additions and 196 deletions.
25 changes: 7 additions & 18 deletions deploy/cr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,24 +297,13 @@ spec:
# nodeSelector:
# disktype: ssd
# schedulerName: "default"
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
volumeSpec:
# emptyDir: {}
# hostPath:
# path: /data
# type: Directory
persistentVolumeClaim:
# storageClassName: standard
# accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 3Gi
resources:
limits:
cpu: "300m"
memory: "0.5G"
requests:
cpu: "300m"
memory: "0.5G"
# hostAliases:
# - ip: "10.10.0.2"
# hostnames:
Expand Down
27 changes: 26 additions & 1 deletion e2e-tests/data-sharded/conf/some-name.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ spec:
requests:
storage: 1Gi

size: 3
size: 4
configuration: |
net:
tls:
Expand All @@ -119,6 +119,9 @@ spec:
journalCompressor: snappy
indexConfig:
prefixCompression: true
arbiter:
enabled: true
size: 1
- name: rs2
affinity:
antiAffinityTopologyKey: none
Expand Down Expand Up @@ -159,6 +162,28 @@ spec:
journalCompressor: snappy
indexConfig:
prefixCompression: true
nonvoting:
enabled: true
size: 3
affinity:
antiAffinityTopologyKey: "kubernetes.io/hostname"
resources:
limits:
cpu: 500m
memory: 1G
requests:
cpu: 100m
memory: 0.1G
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 2Gi
volumeSpec:
persistentVolumeClaim:
resources:
requests:
storage: 1Gi

secrets:
users: some-users
214 changes: 128 additions & 86 deletions e2e-tests/data-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -6,96 +6,138 @@ test_dir=$(realpath "$(dirname "$0")")
. "${test_dir}/../functions"
set_debug

if [[ ${IMAGE_MONGOD} == *"percona-server-mongodb-operator"* ]]; then
MONGO_VER=$(echo -n "${IMAGE_MONGOD}" | $sed -r 's/.*([0-9].[0-9])$/\1/')
else
MONGO_VER=$(echo -n "${IMAGE_MONGOD}" | $sed -r 's/.*:([0-9]+\.[0-9]+).*$/\1/')
fi

deploy_cert_manager
create_infra "$namespace"

desc 'create secrets and start client'
kubectl_bin apply -f "$conf_dir/secrets.yml"
kubectl_bin apply -f "$conf_dir/client_with_tls.yml"

cluster="some-name"
desc "create first PSMDB cluster $cluster"
apply_cluster "$test_dir/conf/$cluster.yml"

desc 'check if all Pods started'
wait_for_running $cluster-cfg 3
wait_for_running $cluster-rs0 3
wait_for_running $cluster-rs1 3
wait_for_running $cluster-rs2 3
wait_for_running $cluster-mongos 3

desc 'create user'
run_mongos \
'db.createUser({user:"user",pwd:"pass",roles:[{db:"app",role:"readWrite"}]})' \
"userAdmin:userAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

desc 'set chunk size to 32 MB'
run_mongos \
"use config\n db.settings.save( { _id:\"chunksize\", value: 32 } )" \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

desc 'write data'
run_script_mongos "${test_dir}/data.js" "user:pass@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"

desc 'shard collection'
run_mongos \
'sh.enableSharding("app")' \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

run_mongos \
'sh.shardCollection("app.city", { _id: 1 } )' \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 120

desc 'check chunks'
chunks_param1="ns"
chunks_param2='"app.city"'

if [[ ${MONGO_VER} == "6.0" || ${MONGO_VER} == "5.0" ]]; then
chunks_param1="uuid"
chunks_param2=$(run_mongos \
"use app\n db.getCollectionInfos({ \"name\": \"city\" })[0].info.uuid" \
"user:pass@$cluster-mongos.$namespace" \
'' \
'' \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls" \
| grep "switched to db app" -A 1 | grep -v "switched to db app")
fi

shards=0
for i in "rs0" "rs1" "rs2"; do
out=$(run_mongos \
"use config\n db.chunks.count({\"${chunks_param1}\": ${chunks_param2}, \"shard\": \"$i\"})" \
check_rs_proper_component_deletion() {
local cluster="$1"
local rs_name="$2"

rs_idx=$(kubectl_bin get psmdb ${cluster} -ojson | jq --arg RS $rs_name '.spec.replsets | map(.name == $RS) | index(true)')
kubectl_bin patch psmdb ${cluster} --type=json -p="[{'op': 'remove', 'path': '/spec/replsets/$rs_idx'}]"

echo -n "Deleting replset $rs_name"
until [[ $(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length') -eq 0 ]]; do
let retry+=1
if [ $retry -ge 70 ]; then
sts_count=$(kubectl_bin get sts -l app.kubernetes.io/instance=${cluster},app.kubernetes.io/replset=${rs_name} -ojson | jq '.items | length')
echo "Replset $rs_name not properly removed, expected sts count of 0 but got $sts_count. Exiting after $retry tries..."
exit 1
fi
echo -n .
sleep 30
done

echo "OK"
}

main() {
if [[ ${IMAGE_MONGOD} == *"percona-server-mongodb-operator"* ]]; then
MONGO_VER=$(echo -n "${IMAGE_MONGOD}" | $sed -r 's/.*([0-9].[0-9])$/\1/')
else
MONGO_VER=$(echo -n "${IMAGE_MONGOD}" | $sed -r 's/.*:([0-9]+\.[0-9]+).*$/\1/')
fi

deploy_cert_manager
create_infra "$namespace"

desc 'create secrets and start client'
kubectl_bin apply -f "$conf_dir/secrets.yml"
kubectl_bin apply -f "$conf_dir/client_with_tls.yml"

cluster="some-name"
desc "create first PSMDB cluster $cluster"
apply_cluster "$test_dir/conf/$cluster.yml"

desc 'check if all Pods started'
wait_for_running $cluster-cfg 3
wait_for_running $cluster-rs0 3
wait_for_running $cluster-rs1 3
wait_for_running $cluster-rs2 3
wait_for_running $cluster-mongos 3

desc 'create user'
run_mongos \
'db.createUser({user:"user",pwd:"pass",roles:[{db:"app",role:"readWrite"}]})' \
"userAdmin:userAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

desc 'set chunk size to 32 MB'
run_mongos \
"use config\n db.settings.save( { _id:\"chunksize\", value: 32 } )" \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls" \
| grep "switched to db config" -A 1 | grep -v "switched to db config")
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

desc "$i has $out chunks"
desc 'write data'
run_script_mongos "${test_dir}/data.js" "user:pass@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"

desc 'shard collection'
run_mongos \
'sh.enableSharding("app")' \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 2

if [[ $out -ne 0 ]]; then
((shards = shards + 1))
run_mongos \
'sh.shardCollection("app.city", { _id: 1 } )' \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls"
sleep 120

desc 'check chunks'
chunks_param1="ns"
chunks_param2='"app.city"'

if [[ ${MONGO_VER} == "6.0" || ${MONGO_VER} == "5.0" ]]; then
chunks_param1="uuid"
chunks_param2=$(run_mongos \
"use app\n db.getCollectionInfos({ \"name\": \"city\" })[0].info.uuid" \
"user:pass@$cluster-mongos.$namespace" \
'' \
'' \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls" \
| grep "switched to db app" -A 1 | grep -v "switched to db app")
fi
done

if [[ $shards -lt 3 ]]; then
echo "data is only on some of the shards, maybe sharding is not working"
exit 1
fi
shards=0
for i in "rs0" "rs1" "rs2"; do
out=$(run_mongos \
"use config\n db.chunks.count({\"${chunks_param1}\": ${chunks_param2}, \"shard\": \"$i\"})" \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls" \
| grep "switched to db config" -A 1 | grep -v "switched to db config")

desc "$i has $out chunks"

if [[ $out -ne 0 ]]; then
((shards = shards + 1))
fi
done

if [[ $shards -lt 3 ]]; then
echo "data is only on some of the shards, maybe sharding is not working"
exit 1
fi

# Drop non system database so we can remove shards
res=$(run_mongos \
"use app\n db.dropDatabase()" \
"clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" "mongodb" ".svc.cluster.local" \
"--tlsCertificateKeyFile /tmp/tls.pem --tlsCAFile /etc/mongodb-ssl/ca.crt --tls")
if ! echo $res | grep -q '"ok" : 1'; then
echo "app database not dropped. Exiting.."
exit 1
fi

desc 'check if rs1 and all its related stateful sets are properly removed'
check_rs_proper_component_deletion $cluster rs1

desc 'check if rs2 and all its related stateful sets are properly removed'
check_rs_proper_component_deletion $cluster rs2

destroy "$namespace"

destroy "$namespace"
desc 'test passed'
}

desc 'test passed'
main
8 changes: 4 additions & 4 deletions e2e-tests/split-horizon/run
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ sleep 15

set -o xtrace

run_mongo "rs.conf().members.map(function(member) { return member.horizons })" \
run_mongo "rs.conf().members.map(function(member) { return member.horizons }).sort((a, b) => a.external.localeCompare(b.external))" \
"clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \
mongodb "" "--quiet" >${tmp_dir}/horizons-3.json
diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json
Expand Down Expand Up @@ -78,7 +78,7 @@ kubectl_bin patch psmdb ${cluster} \
wait_for_running "${cluster}-rs0" 5
wait_cluster_consistency ${cluster}

run_mongo "rs.conf().members.map(function(member) { return member.horizons })" \
run_mongo "rs.conf().members.map(function(member) { return member.horizons }).sort((a, b) => a.external.localeCompare(b.external))" \
"clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \
mongodb "" "--quiet" >${tmp_dir}/horizons-5.json
diff $test_dir/compare/horizons-5.json $tmp_dir/horizons-5.json
Expand All @@ -89,7 +89,7 @@ kubectl_bin patch psmdb ${cluster} \
wait_for_running "${cluster}-rs0" 3
wait_cluster_consistency ${cluster}

run_mongo "rs.conf().members.map(function(member) { return member.horizons })" \
run_mongo "rs.conf().members.map(function(member) { return member.horizons }).sort((a, b) => a.external.localeCompare(b.external))" \
"clusterAdmin:[email protected],some-name-rs0-1.clouddemo.xyz,some-name-rs0-2.clouddemo.xyz" \
mongodb "" "--quiet" >${tmp_dir}/horizons.json
diff $test_dir/compare/horizons-3.json $tmp_dir/horizons-3.json
Expand All @@ -98,4 +98,4 @@ apply_cluster ${test_dir}/conf/${cluster}.yml
wait_for_running "${cluster}-rs0" 3
wait_cluster_consistency ${cluster}

destroy ${namespace}
destroy ${namespace}
4 changes: 2 additions & 2 deletions pkg/controller/perconaservermongodb/mgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr
return api.AppStateReady, nil
}

pods, err := psmdb.GetRSPods(ctx, r.client, cr, replset.Name, false)
pods, err := psmdb.GetRSPods(ctx, r.client, cr, replset.Name)
if err != nil {
return api.AppStateInit, errors.Wrap(err, "failed to get replset pods")
}
Expand Down Expand Up @@ -238,7 +238,7 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context,
// Primary with a Secondary and an Arbiter (PSA)
unsafePSA := cr.Spec.UnsafeConf && rs.Arbiter.Enabled && rs.Arbiter.Size == 1 && !rs.NonVoting.Enabled && rs.Size == 2

pods, err := psmdb.GetRSPods(ctx, r.client, cr, rs.Name, false)
pods, err := psmdb.GetRSPods(ctx, r.client, cr, rs.Name)
if err != nil {
return 0, errors.Wrap(err, "get rs pods")
}
Expand Down
Loading

0 comments on commit 5b31035

Please sign in to comment.