From 4c2d2caf9fa60f14ae6d7fcd4cc4254b76013362 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Thu, 18 Apr 2024 14:02:29 +0200 Subject: [PATCH] util: add support to configure mirror daemon count Currently we are assuming that only one rbd mirror daemon running on the ceph cluster but that is not true for many cases and it can be more that one, this PR make this as a configurable parameter. fixes: #4312 Signed-off-by: Madhu Rajanna --- charts/ceph-csi-rbd/values.yaml | 1 + deploy/csi-config-map-sample.yaml | 3 + internal/rbd/rbd_util.go | 6 +- internal/util/csiconfig.go | 17 ++++ internal/util/csiconfig_test.go | 88 +++++++++++++++++++ .../api/deploy/kubernetes/csi-config-map.go | 2 + 6 files changed, 116 insertions(+), 1 deletion(-) diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 0a8195e7402..a3821ff7bb1 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -27,6 +27,7 @@ serviceAccounts: # - "" # rbd: # netNamespaceFilePath: "{{ .kubeletDir }}/plugins/{{ .driverName }}/net" +# mirrorDaemonCount: 1 # readAffinity: # enabled: true # crushLocationLabels: diff --git a/deploy/csi-config-map-sample.yaml b/deploy/csi-config-map-sample.yaml index d44f9690506..e0263a0d8c9 100644 --- a/deploy/csi-config-map-sample.yaml +++ b/deploy/csi-config-map-sample.yaml @@ -19,6 +19,8 @@ kind: ConfigMap # NOTE: The given radosNamespace must already exists in the pool. # NOTE: Make sure you don't add radosNamespace option to a currently in use # configuration as it will cause issues. +# The "rbd.mirrorDaemonCount" is optional and represents the total number of +# RBD mirror daemons running on the ceph cluster. # The field "cephFS.subvolumeGroup" is optional and defaults to "csi". # NOTE: The given subvolumeGroup must already exist in the filesystem. # The "cephFS.netNamespaceFilePath" fields are the various network namespace @@ -64,6 +66,7 @@ data: "rbd": { "netNamespaceFilePath": "/plugins/rbd.csi.ceph.com/net", "radosNamespace": "", + "mirrorDaemonCount": 1, }, "monitors": [ "", diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index e3852893295..fa3f68a7749 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -553,9 +553,13 @@ func (ri *rbdImage) isInUse() (bool, error) { // because we opened the image, there is at least one watcher defaultWatchers := 1 if mirrorInfo.Primary { + count, err := util.GetRBDMirrorDaemonCount(util.CsiConfigFile, ri.ClusterID) + if err != nil { + return false, err + } // if rbd mirror daemon is running, a watcher will be added by the rbd // mirror daemon for mirrored images. - defaultWatchers++ + defaultWatchers += count } return len(watchers) > defaultWatchers, nil diff --git a/internal/util/csiconfig.go b/internal/util/csiconfig.go index 0982172d867..6d3e7109ae0 100644 --- a/internal/util/csiconfig.go +++ b/internal/util/csiconfig.go @@ -45,6 +45,7 @@ const ( "clusterID": "", "rbd": { "radosNamespace": "" + "mirrorDaemonCount": 1 }, "monitors": [ "", @@ -105,6 +106,22 @@ func GetRadosNamespace(pathToConfig, clusterID string) (string, error) { return cluster.RBD.RadosNamespace, nil } +// GetRBDMirrorDaemonCount returns the number of mirror daemon count for the +// given clusterID. +func GetRBDMirrorDaemonCount(pathToConfig, clusterID string) (int, error) { + cluster, err := readClusterInfo(pathToConfig, clusterID) + if err != nil { + return 0, err + } + + // if it is empty, set the default to 1 which is most common in a cluster. + if cluster.RBD.MirrorDaemonCount == 0 { + return 1, nil + } + + return cluster.RBD.MirrorDaemonCount, nil +} + // CephFSSubvolumeGroup returns the subvolumeGroup for CephFS volumes. If not set, it returns the default value "csi". func CephFSSubvolumeGroup(pathToConfig, clusterID string) (string, error) { cluster, err := readClusterInfo(pathToConfig, clusterID) diff --git a/internal/util/csiconfig_test.go b/internal/util/csiconfig_test.go index 8da09907689..c616d4c3adb 100644 --- a/internal/util/csiconfig_test.go +++ b/internal/util/csiconfig_test.go @@ -17,11 +17,14 @@ limitations under the License. package util import ( + "bytes" "encoding/json" "os" "testing" cephcsi "github.com/ceph/ceph-csi/api/deploy/kubernetes" + + "github.com/stretchr/testify/require" ) var ( @@ -530,3 +533,88 @@ func TestGetCephFSMountOptions(t *testing.T) { }) } } + +func TestGetRBDMirrorDaemonCount(t *testing.T) { + t.Parallel() + tests := []struct { + name string + clusterID string + want int + }{ + { + name: "get rbd mirror daemon count for cluster-1", + clusterID: "cluster-1", + want: 2, + }, + { + name: "get rbd mirror daemon count for cluster-2", + clusterID: "cluster-2", + want: 4, + }, + { + name: "when rbd mirror daemon count is empty", + clusterID: "cluster-3", + want: 1, // default mirror daemon count + }, + } + + csiConfig := []cephcsi.ClusterInfo{ + { + ClusterID: "cluster-1", + Monitors: []string{"ip-1", "ip-2"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 2, + }, + }, + { + ClusterID: "cluster-2", + Monitors: []string{"ip-3", "ip-4"}, + RBD: cephcsi.RBD{ + MirrorDaemonCount: 4, + }, + }, + { + ClusterID: "cluster-3", + Monitors: []string{"ip-5", "ip-6"}, + }, + } + csiConfigFileContent, err := json.Marshal(csiConfig) + if err != nil { + t.Errorf("failed to marshal csi config info %v", err) + } + tmpConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + for _, tt := range tests { + ts := tt + t.Run(ts.name, func(t *testing.T) { + t.Parallel() + var got int + got, err = GetRBDMirrorDaemonCount(tmpConfPath, ts.clusterID) + if err != nil { + t.Errorf("GetRBDMirrorDaemonCount() error = %v", err) + + return + } + if got != ts.want { + t.Errorf("GetRBDMirrorDaemonCount() = %v, want %v", got, ts.want) + } + }) + } + + // when mirrorDaemonCount is set as string + csiConfigFileContent = bytes.Replace( + csiConfigFileContent, + []byte(`"mirrorDaemonCount":2`), + []byte(`"mirrorDaemonCount":"2"`), + 1) + tmpCSIConfPath := t.TempDir() + "/ceph-csi.json" + err = os.WriteFile(tmpCSIConfPath, csiConfigFileContent, 0o600) + if err != nil { + t.Errorf("failed to write %s file content: %v", CsiConfigFile, err) + } + _, err = GetRBDMirrorDaemonCount(tmpCSIConfPath, "test") + require.Error(t, err) +} diff --git a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go index 4d237b3c51f..0c418e23d11 100644 --- a/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go +++ b/vendor/github.com/ceph/ceph-csi/api/deploy/kubernetes/csi-config-map.go @@ -46,6 +46,8 @@ type RBD struct { NetNamespaceFilePath string `json:"netNamespaceFilePath"` // RadosNamespace is a rados namespace in the pool RadosNamespace string `json:"radosNamespace"` + // RBD mirror daemons running in the ceph cluster. + MirrorDaemonCount int `json:"mirrorDaemonCount"` } type NFS struct {