diff --git a/go.mod b/go.mod index 5d62a3ebbd9..e1e7a3c3dd5 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/aws/aws-sdk-go v1.55.5 github.com/aws/aws-sdk-go-v2/service/sts v1.31.3 github.com/ceph/ceph-csi/api v0.0.0-00010101000000-000000000000 - github.com/ceph/go-ceph v0.29.0 + github.com/ceph/go-ceph v0.29.1-0.20240925141413-065319c78733 github.com/container-storage-interface/spec v1.10.0 github.com/csi-addons/spec v0.2.1-0.20240730084235-3958a5b17d24 github.com/gemalto/kmip-go v0.0.10 diff --git a/go.sum b/go.sum index 66757015b9c..a214bae6b35 100644 --- a/go.sum +++ b/go.sum @@ -1440,8 +1440,8 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= -github.com/ceph/go-ceph v0.29.0 h1:pJQY+++PyY2FMP0ffVaE7FbIdivemBPCu4MWr4S8CtI= -github.com/ceph/go-ceph v0.29.0/go.mod h1:U/l216/AzIWrFe7ny+9cJ5Yjzyb5otrEGfdrU5VtCME= +github.com/ceph/go-ceph v0.29.1-0.20240925141413-065319c78733 h1:vX8mfbKwE24CbO5t1Xc02u53pjWAsyvjgAMJk7o7nsQ= +github.com/ceph/go-ceph v0.29.1-0.20240925141413-065319c78733/go.mod h1:OJFju/Xmtb7ihHo/aXOayw6RhVOUGNke5EwTipwaf6A= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -1647,8 +1647,8 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG github.com/goccy/go-yaml v1.9.8/go.mod h1:JubOolP3gh0HpiBc4BLRD4YmjEjHAmIIB2aaXKkTfoE= github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA= -github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/gofrs/uuid/v5 v5.3.0 h1:m0mUMr+oVYUdxpMLgSYCZiXe7PuVPnI94+OMeVBNedk= +github.com/gofrs/uuid/v5 v5.3.0/go.mod h1:CDOjlDMVAtN56jqyRUZh58JT31Tiw7/oQyEXZV+9bD8= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index f0ac0a47956..1745dbc3793 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -881,9 +881,9 @@ func (cs *ControllerServer) checkErrAndUndoReserve( } if errors.Is(err, ErrImageNotFound) { - err = rbdVol.ensureImageCleanup(ctx) - if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + notFoundErr := rbdVol.ensureImageCleanup(ctx) + if notFoundErr != nil { + return nil, status.Errorf(codes.Internal, "failed to cleanup image %q: %v", rbdVol, notFoundErr) } } else { // All errors other than ErrImageNotFound should return an error back to the caller @@ -1538,11 +1538,6 @@ func cleanUpImageAndSnapReservation(ctx context.Context, rbdSnap *rbdSnapshot, c } defer rbdVol.Destroy(ctx) - err = rbdVol.openIoctx() - if err != nil { - return status.Error(codes.Internal, err.Error()) - } - // cleanup the image from trash if the error is image not found. err = rbdVol.ensureImageCleanup(ctx) if err != nil { diff --git a/internal/rbd/group/volume_group.go b/internal/rbd/group/volume_group.go index 7de46131a3a..d23b26d99b3 100644 --- a/internal/rbd/group/volume_group.go +++ b/internal/rbd/group/volume_group.go @@ -20,7 +20,6 @@ import ( "context" "errors" "fmt" - "strings" "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" @@ -164,7 +163,7 @@ func (vg *volumeGroup) Create(ctx context.Context) error { err = librbd.GroupCreate(ioctx, name) if err != nil { - if !errors.Is(rados.ErrObjectExists, err) && !strings.Contains(err.Error(), "rbd: ret=-17, File exists") { + if !errors.Is(err, librbd.ErrExist) { return fmt.Errorf("failed to create volume group %q: %w", name, err) } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 1beb4f5318f..3b72b7c4c86 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -621,6 +621,11 @@ func isCephMgrSupported(ctx context.Context, clusterID string, err error) bool { // ensureImageCleanup finds image in trash and if found removes it // from trash. func (ri *rbdImage) ensureImageCleanup(ctx context.Context) error { + err := ri.openIoctx() + if err != nil { + return err + } + trashInfoList, err := librbd.GetTrashList(ri.ioctx) if err != nil { log.ErrorLog(ctx, "failed to list images in trash: %v", err) @@ -1214,8 +1219,7 @@ func GenVolFromVolID( } vol, err = generateVolumeFromVolumeID(ctx, volumeID, vi, cr, secrets) - if !errors.Is(err, util.ErrKeyNotFound) && !errors.Is(err, util.ErrPoolNotFound) && - !errors.Is(err, ErrImageNotFound) { + if !shouldRetryVolumeGeneration(err) { return vol, err } @@ -1226,8 +1230,7 @@ func GenVolFromVolID( } if mapping != nil { rbdVol, vErr := generateVolumeFromMapping(ctx, mapping, volumeID, vi, cr, secrets) - if !errors.Is(vErr, util.ErrKeyNotFound) && !errors.Is(vErr, util.ErrPoolNotFound) && - !errors.Is(vErr, ErrImageNotFound) { + if !shouldRetryVolumeGeneration(vErr) { return rbdVol, vErr } } @@ -1280,8 +1283,7 @@ func generateVolumeFromMapping( // Add mapping poolID to Identifier nvi.LocationID = pID vol, err = generateVolumeFromVolumeID(ctx, volumeID, nvi, cr, secrets) - if !errors.Is(err, util.ErrKeyNotFound) && !errors.Is(err, util.ErrPoolNotFound) && - !errors.Is(err, ErrImageNotFound) { + if !shouldRetryVolumeGeneration(err) { return vol, err } } @@ -1292,6 +1294,33 @@ func generateVolumeFromMapping( return vol, util.ErrPoolNotFound } +// shouldRetryVolumeGeneration determines whether the process of finding or generating +// volumes should continue based on the type of error encountered. +// +// It checks if the given error matches any of the following known errors: +// - util.ErrKeyNotFound: The key required to locate the volume is missing in Rados omap. +// - util.ErrPoolNotFound: The rbd pool where the volume/omap is expected doesn't exist. +// - ErrImageNotFound: The image doesn't exist in the rbd pool. +// - rados.ErrPermissionDenied: Permissions to access the pool is denied. +// +// If any of these errors are encountered, the function returns `true`, indicating +// that the volume search should continue because of known error. Otherwise, it +// returns `false`, meaning the search should stop. +// +// This helper function is used in scenarios where multiple attempts may be made +// to retrieve or generate volume information, and we want to gracefully handle +// specific failure cases while retrying for others. +func shouldRetryVolumeGeneration(err error) bool { + if err == nil { + return false // No error, do not retry + } + // Continue searching for specific known errors + return (errors.Is(err, util.ErrKeyNotFound) || + errors.Is(err, util.ErrPoolNotFound) || + errors.Is(err, ErrImageNotFound) || + errors.Is(err, rados.ErrPermissionDenied)) +} + func genVolFromVolumeOptions( ctx context.Context, volOptions map[string]string, diff --git a/internal/rbd/rbd_util_test.go b/internal/rbd/rbd_util_test.go index 905e977079c..5d14ed844a7 100644 --- a/internal/rbd/rbd_util_test.go +++ b/internal/rbd/rbd_util_test.go @@ -23,8 +23,11 @@ import ( "strings" "testing" + "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" "github.com/stretchr/testify/require" + + "github.com/ceph/ceph-csi/internal/util" ) func TestHasSnapshotFeature(t *testing.T) { @@ -387,3 +390,54 @@ func Test_checkValidImageFeatures(t *testing.T) { }) } } + +func Test_shouldRetryVolumeGeneration(t *testing.T) { + t.Parallel() + type args struct { + err error + } + tests := []struct { + name string + args args + want bool + }{ + { + name: "No error (stop searching)", + args: args{err: nil}, + want: false, // No error, stop searching + }, + { + name: "ErrKeyNotFound (continue searching)", + args: args{err: util.ErrKeyNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPoolNotFound (continue searching)", + args: args{err: util.ErrPoolNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrImageNotFound (continue searching)", + args: args{err: ErrImageNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPermissionDenied (continue searching)", + args: args{err: rados.ErrPermissionDenied}, + want: true, // Known error, continue searching + }, + { + name: "Different error (stop searching)", + args: args{err: errors.New("unknown error")}, + want: false, // Unknown error, stop searching + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := shouldRetryVolumeGeneration(tt.args.err); got != tt.want { + t.Errorf("shouldRetryVolumeGeneration() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/vendor/github.com/ceph/go-ceph/internal/dlsym/dlsym.go b/vendor/github.com/ceph/go-ceph/internal/dlsym/dlsym.go index 9e5782c4d50..0c1ef1ef7bc 100644 --- a/vendor/github.com/ceph/go-ceph/internal/dlsym/dlsym.go +++ b/vendor/github.com/ceph/go-ceph/internal/dlsym/dlsym.go @@ -2,12 +2,10 @@ package dlsym // #cgo LDFLAGS: -ldl // +// #define _GNU_SOURCE +// // #include // #include -// -// #ifndef RTLD_DEFAULT /* from dlfcn.h */ -// #define RTLD_DEFAULT ((void *) 0) -// #endif import "C" import ( diff --git a/vendor/github.com/ceph/go-ceph/rados/snapshot.go b/vendor/github.com/ceph/go-ceph/rados/snapshot.go index 183a119d2cc..46b87aa6522 100644 --- a/vendor/github.com/ceph/go-ceph/rados/snapshot.go +++ b/vendor/github.com/ceph/go-ceph/rados/snapshot.go @@ -85,8 +85,8 @@ func (ioctx *IOContext) GetSnapName(snapID SnapID) (string, error) { err error ) // range from 1k to 64KiB - retry.WithSizes(1024, 1<<16, func(len int) retry.Hint { - cLen := C.int(len) + retry.WithSizes(1024, 1<<16, func(length int) retry.Hint { + cLen := C.int(length) buf = make([]byte, cLen) ret := C.rados_ioctx_snap_get_name( ioctx.ioctx, diff --git a/vendor/github.com/ceph/go-ceph/rados/watcher.go b/vendor/github.com/ceph/go-ceph/rados/watcher.go index 7cd7e90f3d8..a99c77bc00a 100644 --- a/vendor/github.com/ceph/go-ceph/rados/watcher.go +++ b/vendor/github.com/ceph/go-ceph/rados/watcher.go @@ -298,11 +298,11 @@ func (c *Conn) WatcherFlush() error { // // NOTE: starting with pacific this is implemented as a C function and this can // be replaced later -func decodeNotifyResponse(response *C.char, len C.size_t) ([]NotifyAck, []NotifyTimeout) { - if len == 0 || response == nil { +func decodeNotifyResponse(response *C.char, length C.size_t) ([]NotifyAck, []NotifyTimeout) { + if length == 0 || response == nil { return nil, nil } - b := (*[math.MaxInt32]byte)(unsafe.Pointer(response))[:len:len] + b := (*[math.MaxInt32]byte)(unsafe.Pointer(response))[:length:length] pos := 0 num := binary.LittleEndian.Uint32(b[pos:]) diff --git a/vendor/github.com/ceph/go-ceph/rbd/errors.go b/vendor/github.com/ceph/go-ceph/rbd/errors.go index 4d98cce7c4e..e26e9d1356a 100644 --- a/vendor/github.com/ceph/go-ceph/rbd/errors.go +++ b/vendor/github.com/ceph/go-ceph/rbd/errors.go @@ -73,6 +73,8 @@ var ( // Public general error const ( + // ErrExist indicates a non-specific already existing resource. + ErrExist = rbdError(-C.EEXIST) // ErrNotExist indicates a non-specific missing resource. ErrNotExist = rbdError(-C.ENOENT) // ErrNotImplemented indicates a function is not implemented in by librbd. diff --git a/vendor/github.com/ceph/go-ceph/rbd/snapshot_octopus.go b/vendor/github.com/ceph/go-ceph/rbd/snapshot_octopus.go index 86b8e77d3f3..4bd008a7405 100644 --- a/vendor/github.com/ceph/go-ceph/rbd/snapshot_octopus.go +++ b/vendor/github.com/ceph/go-ceph/rbd/snapshot_octopus.go @@ -50,8 +50,8 @@ func (image *Image) GetSnapByID(snapID uint64) (string, error) { err error ) // range from 1k to 64KiB - retry.WithSizes(1024, 1<<16, func(len int) retry.Hint { - cLen := C.size_t(len) + retry.WithSizes(1024, 1<<16, func(length int) retry.Hint { + cLen := C.size_t(length) buf = make([]byte, cLen) ret := C.rbd_snap_get_name( image.image, diff --git a/vendor/modules.txt b/vendor/modules.txt index f2ca75b2e13..6a24b1fb5e5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -205,7 +205,7 @@ github.com/ceph/ceph-csi/api/deploy/kubernetes/cephfs github.com/ceph/ceph-csi/api/deploy/kubernetes/nfs github.com/ceph/ceph-csi/api/deploy/kubernetes/rbd github.com/ceph/ceph-csi/api/deploy/ocp -# github.com/ceph/go-ceph v0.29.0 +# github.com/ceph/go-ceph v0.29.1-0.20240925141413-065319c78733 ## explicit; go 1.19 github.com/ceph/go-ceph/cephfs github.com/ceph/go-ceph/cephfs/admin