Skip to content

Commit

Permalink
K8SPSMDB-1003: Kubernetes node zone/region tag (#1360)
Browse files Browse the repository at this point in the history
* K8SPSMDB-1003 - kubernetes node tags zone/region

Add kubernetes node tags zone/region to the monogo nodes.

* Remove worning message if we do not have special permission.

* fix test

* fix cross-site test

* fix image

* update test

* delete unsused

* update cross-site test

* fix PR comments

* fix

* fix

* fix

* fix

---------

Co-authored-by: Viacheslav Sarzhan <[email protected]>
Co-authored-by: Natalia Marukovich <[email protected]>
Co-authored-by: Natalia Marukovich <[email protected]>
Co-authored-by: Inel Pandzic <[email protected]>
  • Loading branch information
5 people authored Apr 24, 2024
1 parent 282394a commit 028e8c3
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 14 deletions.
8 changes: 8 additions & 0 deletions deploy/cw-bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18336,6 +18336,14 @@ rules:
- update
- patch
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
8 changes: 8 additions & 0 deletions deploy/cw-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ rules:
- update
- patch
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
36 changes: 32 additions & 4 deletions e2e-tests/cross-site-sharded/run
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@ unset OPERATOR_NS
main_cluster="cross-site-sharded-main"
replica_cluster="cross-site-sharded-replica"

wait_for_members() {
local endpoint="$1"
local rsName="$2"
local nodes_amount=0
until [[ ${nodes_amount} == 6 ]]; do
nodes_amount=$(run_mongos 'rs.conf().members.length' "clusterAdmin:clusterAdmin123456@$endpoint" "mongodb" ":27017" \
| egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye' \
| $sed -re 's/ObjectId\("[0-9a-f]+"\)//; s/-[0-9]+.svc/-xxx.svc/')

echo "waiting for all members to be configured in ${rsName}"
let retry+=1
if [ $retry -ge 15 ]; then
echo "Max retry count $retry reached. something went wrong with mongo cluster. Config for endpoint $endpoint has $nodes_amount but expected 6."
exit 1
fi
echo -n .
sleep 10
done
}

desc "create main cluster"
create_infra "$namespace"

Expand Down Expand Up @@ -118,7 +138,10 @@ sleep 30

desc "create replica PSMDB cluster $cluster"
apply_cluster "$test_dir/conf/${replica_cluster}.yml"
sleep 300

wait_for_running $replica_cluster-rs0 3 "false"
wait_for_running $replica_cluster-rs1 3 "false"
wait_for_running $replica_cluster-cfg 3 "false"

replica_cfg_0_endpoint=$(get_service_ip cross-site-sharded-replica-cfg-0 'cfg')
replica_cfg_1_endpoint=$(get_service_ip cross-site-sharded-replica-cfg-1 'cfg')
Expand All @@ -141,7 +164,10 @@ kubectl_bin patch psmdb ${main_cluster} --type=merge --patch '{
}
}'

sleep 60
wait_for_members $replica_cfg_0_endpoint cfg
wait_for_members $replica_rs0_0_endpoint rs0
wait_for_members $replica_rs1_0_endpoint rs1

kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$replica_namespace"

desc 'check if all 3 Pods started'
Expand All @@ -165,8 +191,8 @@ compare_mongos_cmd "find" "myApp:myPass@$main_cluster-mongos.$namespace"

desc 'test failover'
kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$namespace"

kubectl_bin delete psmdb $main_cluster
sleep 60

desc 'run disaster recovery script for replset: cfg'
run_script_mongos "${test_dir}/disaster_recovery.js" "clusterAdmin:clusterAdmin123456@$replica_cfg_0_endpoint" "mongodb" ":27017"
Expand All @@ -180,7 +206,9 @@ run_script_mongos "${test_dir}/disaster_recovery.js" "clusterAdmin:clusterAdmin1
desc 'make replica cluster managed'
kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$replica_namespace"
kubectl_bin patch psmdb ${replica_cluster} --type=merge --patch '{"spec":{"unmanaged": false}}'
sleep 120

wait_for_running $replica_cluster-rs0 3
wait_for_running $replica_cluster-cfg 3

desc "check failover status"
compare_mongos_cmd "find" "myApp:myPass@$replica_cluster-mongos.$replica_namespace"
Expand Down
2 changes: 2 additions & 0 deletions e2e-tests/serviceless-external-nodes/run
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ cat $tmp_dir/psmdb.yaml \

wait_cluster_consistency ${cluster}

# waiting the config will be ready.
sleep 30
run_mongo 'rs.status().members.forEach(function(z){printjson(z.name);printjson(z.stateStr); })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0-0.${cluster}-rs0.${namespace}" "mongodb" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye' >"$tmp_dir/rs.txt"

cat "${test_dir}/compare/rs.txt" \
Expand Down
30 changes: 20 additions & 10 deletions pkg/controller/perconaservermongodb/mgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
"github.com/percona/percona-server-mongodb-operator/pkg/util"
)

var errReplsetLimit = fmt.Errorf("maximum replset member (%d) count reached", mongo.MaxMembers)
Expand Down Expand Up @@ -267,6 +268,20 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context,
return 0, fmt.Errorf("get host for pod %s: %v", pod.Name, err)
}

nodeLabels := mongo.ReplsetTags{
"nodeName": pod.Spec.NodeName,
"podName": pod.Name,
"serviceName": cr.Name,
}

labels, err := psmdb.GetNodeLabels(ctx, r.client, cr, pod)
if err == nil {
nodeLabels = util.MapMerge(nodeLabels, mongo.ReplsetTags{
"region": labels[corev1.LabelTopologyRegion],
"zone": labels[corev1.LabelTopologyZone],
})
}

member := mongo.ConfigMember{
ID: key,
Host: host,
Expand All @@ -293,16 +308,11 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context,
member.ArbiterOnly = true
member.Priority = 0
case "mongod", "cfg":
member.Tags = mongo.ReplsetTags{
"podName": pod.Name,
"serviceName": cr.Name,
}
member.Tags = nodeLabels
case "nonVoting":
member.Tags = mongo.ReplsetTags{
"podName": pod.Name,
"serviceName": cr.Name,
"nonVoting": "true",
}
member.Tags = util.MapMerge(mongo.ReplsetTags{
"nonVoting": "true",
}, nodeLabels)
member.Priority = 0
member.Votes = 0
}
Expand Down Expand Up @@ -597,7 +607,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c
"sh", "-c",
fmt.Sprintf(
`
cat <<-EOF | %s
cat <<-EOF | %s
rs.initiate(
{
_id: '%s',
Expand Down
18 changes: 18 additions & 0 deletions pkg/psmdb/getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package psmdb
import (
"context"
"sort"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -165,3 +166,20 @@ func GetExportedServices(ctx context.Context, cl client.Client, cr *api.PerconaS

return seList, nil
}

func GetNodeLabels(ctx context.Context, cl client.Client, cr *api.PerconaServerMongoDB, pod corev1.Pod) (map[string]string, error) {
// Set a timeout for the request, to avoid hanging forever
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

node := &corev1.Node{}

err := cl.Get(ctx, client.ObjectKey{
Name: pod.Spec.NodeName,
}, node)
if err != nil {
return nil, errors.Wrapf(err, "failed to get node %s", pod.Spec.NodeName)
}

return node.Labels, nil
}

0 comments on commit 028e8c3

Please sign in to comment.