From d6a4659329ab93cfb9e0337a0f1838c057e1cd69 Mon Sep 17 00:00:00 2001 From: Shivanjan Chakravorty Date: Mon, 19 Feb 2024 02:13:20 -0700 Subject: [PATCH] feat: add delete cloud backup fix: create cloud backup ready status update Signed-off-by: Shivanjan Chakravorty --- api/server/sdk/errors.go | 5 + csi/controller.go | 122 +++++++++++++++++---- csi/controller_test.go | 229 +++++++++++++++++++++++++++++++-------- 3 files changed, 294 insertions(+), 62 deletions(-) diff --git a/api/server/sdk/errors.go b/api/server/sdk/errors.go index 713ed280b..5b6b55ad3 100644 --- a/api/server/sdk/errors.go +++ b/api/server/sdk/errors.go @@ -22,6 +22,11 @@ import ( "google.golang.org/grpc/status" ) +// IsErrorUnavailable returns if the given error is due to unavailable +func IsErrorUnavailable(err error) bool { + return FromError(err).Code() == codes.Unavailable +} + // IsErrorNotFound returns if the given error is due to not found func IsErrorNotFound(err error) bool { return FromError(err).Code() == codes.NotFound diff --git a/csi/controller.go b/csi/controller.go index 17f9dae77..1495d437a 100644 --- a/csi/controller.go +++ b/csi/controller.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "github.com/libopenstorage/openstorage/api" + "github.com/libopenstorage/openstorage/api/server/sdk" "github.com/libopenstorage/openstorage/pkg/grpcutil" "github.com/libopenstorage/openstorage/pkg/units" "github.com/libopenstorage/openstorage/pkg/util" @@ -1016,6 +1017,7 @@ func (s *OsdCsiServer) createLocalSnapshot( }, }, nil } + func (s *OsdCsiServer) getCloudBackupClient(ctx context.Context) (api.OpenStorageCloudBackupClient, error) { // Get grpc connection conn, err := s.getRemoteConn(ctx) @@ -1036,6 +1038,9 @@ func (s *OsdCsiServer) createCloudBackup( return nil, err } + // In the incoming request the snapshot is denoted by `snapshot-` + csiSnapshotID := req.GetName() + // Get any labels passed in by the CO _, locator, _, err := s.specHandler.SpecFromOpts(req.GetParameters()) if err != nil { @@ -1043,11 +1048,35 @@ func (s *OsdCsiServer) createCloudBackup( } credentialID := locator.VolumeLabels[osdSnapshotCredentialIDKey] - backupID := req.GetName() + + // Check if the snapshot with this name already exists + backupStatus, err := cloudBackupClient.Status(ctx, &api.SdkCloudBackupStatusRequest{ + TaskId: csiSnapshotID, + }) + if err == nil { + + // Verify the parent is the same + if req.GetSourceVolumeId() != backupStatus.Statuses[csiSnapshotID].GetSrcVolumeId() { + return nil, status.Error(codes.AlreadyExists, "Requested snapshot already exists for another source volume id") + } + + isBackupReady := backupStatus.Statuses[csiSnapshotID].Status == api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone + + return &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SnapshotId: csiSnapshotID, + SourceVolumeId: req.GetSourceVolumeId(), + CreationTime: backupStatus.Statuses[csiSnapshotID].StartTime, + ReadyToUse: isBackupReady, + SizeBytes: int64(backupStatus.Statuses[csiSnapshotID].BytesDone), + }, + }, nil + } + // Create snapshot _, err = cloudBackupClient.Create(ctx, &api.SdkCloudBackupCreateRequest{ VolumeId: req.GetSourceVolumeId(), - TaskId: backupID, + TaskId: csiSnapshotID, CredentialId: credentialID, Labels: locator.GetVolumeLabels(), }) @@ -1057,31 +1086,24 @@ func (s *OsdCsiServer) createCloudBackup( } var isBackupReady bool - var backupStatus *api.SdkCloudBackupStatusResponse // Check if snapshot has been created but is in error state - backupStatus, errFindFailed := cloudBackupClient.Status(ctx, &api.SdkCloudBackupStatusRequest{ + backupStatus, err = cloudBackupClient.Status(ctx, &api.SdkCloudBackupStatusRequest{ VolumeId: req.GetSourceVolumeId(), - TaskId: backupID, + TaskId: csiSnapshotID, }) - if errFindFailed != nil { - return nil, status.Errorf(codes.Aborted, "Failed to create cloud snapshot: %v", err) + if err != nil { + return nil, status.Errorf(codes.Aborted, "Failed to get cloud snapshot status: %v", err) } - isBackupReady = backupStatus.Statuses[backupID].Status == api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone - snapSize, errSizeFailed := cloudBackupClient.Size(ctx, &api.SdkCloudBackupSizeRequest{ - BackupId: backupID, - }) - if errSizeFailed != nil { - return nil, status.Errorf(codes.Aborted, "Failed to get cloud snapshot size: %v", err) - } + isBackupReady = backupStatus.Statuses[csiSnapshotID].Status == api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone return &csi.CreateSnapshotResponse{ Snapshot: &csi.Snapshot{ - SizeBytes: int64(snapSize.GetTotalDownloadBytes()), - SnapshotId: backupID, + SnapshotId: csiSnapshotID, + SizeBytes: int64(backupStatus.Statuses[csiSnapshotID].BytesDone), SourceVolumeId: req.GetSourceVolumeId(), - CreationTime: backupStatus.Statuses[backupID].StartTime, + CreationTime: backupStatus.Statuses[csiSnapshotID].StartTime, ReadyToUse: isBackupReady, }, }, nil @@ -1091,12 +1113,48 @@ func (s *OsdCsiServer) createCloudBackup( func (s *OsdCsiServer) DeleteSnapshot( ctx context.Context, req *csi.DeleteSnapshotRequest, -) (*csi.DeleteSnapshotResponse, error) { +) (resp *csi.DeleteSnapshotResponse, err error) { + cloudBackupClient, err := s.getCloudBackupClient(ctx) + cloudBackupDriverUnavailable := sdk.IsErrorUnavailable(err) + if err != nil && !cloudBackupDriverUnavailable { + return nil, err + } - if len(req.GetSnapshotId()) == 0 { + csiSnapshotID := req.GetSnapshotId() + if len(csiSnapshotID) == 0 { return nil, status.Error(codes.InvalidArgument, "Snapshot id must be provided") } + // Check if snapshot has been created but is in error state + backupStatus, err := cloudBackupClient.Status(ctx, &api.SdkCloudBackupStatusRequest{ + TaskId: csiSnapshotID, + }) + if sdk.IsErrorNotFound(err) || cloudBackupDriverUnavailable { + resp, err = s.deleteLocalSnapshot(ctx, req) + return + } + if err != nil { + return nil, status.Errorf(codes.Aborted, "Failed to get cloud snapshot status: %v", err) + } + + clogger.WithContext(ctx).Errorf("DeleteSnapshots backupStatus: %+v, csiSnapshotID: %s", backupStatus, csiSnapshotID) + + req.Secrets = map[string]string{ + osdSnapshotCredentialIDKey: backupStatus.Statuses[csiSnapshotID].CredentialId, + } + + req.SnapshotId = backupStatus.Statuses[csiSnapshotID].BackupId + + resp, err = s.deleteCloudBackup(ctx, req) + return + +} + +func (s *OsdCsiServer) deleteLocalSnapshot( + ctx context.Context, + req *csi.DeleteSnapshotRequest, +) (*csi.DeleteSnapshotResponse, error) { + // Get grpc connection conn, err := s.getConn() if err != nil { @@ -1125,6 +1183,32 @@ func (s *OsdCsiServer) DeleteSnapshot( return &csi.DeleteSnapshotResponse{}, nil } +func (s *OsdCsiServer) deleteCloudBackup( + ctx context.Context, + req *csi.DeleteSnapshotRequest, +) (*csi.DeleteSnapshotResponse, error) { + cloudBackupClient, err := s.getCloudBackupClient(ctx) + if err != nil { + return nil, err + } + + credentialID := req.GetSecrets()[osdSnapshotCredentialIDKey] + + backupID := req.GetSnapshotId() + + // Delete snapshot + _, err = cloudBackupClient.Delete(ctx, &api.SdkCloudBackupDeleteRequest{ + BackupId: backupID, + CredentialId: credentialID, + }) + // NOTE: Currently, the Delete API call has no implementation that returns + // a not found gRPC error with the status code. + if err != nil && !sdk.IsErrorNotFound(err) { + return nil, status.Errorf(codes.Aborted, "failed to delete cloud snapshot: %v", err) + } + return &csi.DeleteSnapshotResponse{}, nil +} + // ListSnapshots lists all snapshots in a cluster. // This is mainly implemented for Nomad, as Kubernetes will not call // list snapshots for drivers which have synchronous snapshot creation. diff --git a/csi/controller_test.go b/csi/controller_test.go index cac51e16b..7c3549868 100644 --- a/csi/controller_test.go +++ b/csi/controller_test.go @@ -22,10 +22,12 @@ import ( "fmt" "math" "reflect" + "strings" "sync" "testing" csi "github.com/container-storage-interface/spec/lib/go/csi" + "github.com/gogo/protobuf/proto" "github.com/golang/mock/gomock" "github.com/golang/protobuf/ptypes" "github.com/golang/protobuf/ptypes/timestamp" @@ -3412,7 +3414,7 @@ type fakeOsdCsiServer struct { func (f *fakeOsdCsiServer) getCloudBackupClient(ctx context.Context) (api.OpenStorageCloudBackupClient, error) { return f.mockCloudBackupClient, nil } -func TestOsdCsiServer_CreateSnapshot(t *testing.T) { +func TestOsdCsiServer_CreateCloudSnapshot(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -3423,6 +3425,8 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { mockErr := errors.New("MOCK ERROR") creationTime := timestamppb.Now() + mockSourceVolumeID := "mock-volume-id" + mockCloudBackupClient.EXPECT().Create(gomock.Any(), gomock.Any(), gomock.Any()). DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupCreateRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupCreateResponse, error) { if req.TaskId == "create-error" { @@ -3433,65 +3437,58 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { return nil, status.Errorf(codes.NotFound, "Volume id not found") } - return &api.SdkCloudBackupCreateResponse{ - TaskId: req.TaskId, - }, nil + if req.TaskId == "new-ok" { + return &api.SdkCloudBackupCreateResponse{ + TaskId: req.TaskId, + }, nil + } + + return nil, mockErr }).AnyTimes() + newFirstPass := true mockCloudBackupClient.EXPECT().Status(gomock.Any(), gomock.Any(), gomock.Any()). DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupStatusRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupStatusResponse, error) { if req.TaskId == "status-error" { return nil, mockErr } - // if req.TaskId == "status-failed" || - if req.TaskId == "delete-error" { + if req.TaskId == "already-ok" { return &api.SdkCloudBackupStatusResponse{ Statuses: map[string]*api.SdkCloudBackupStatus{ req.TaskId: { - Status: api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeFailed, - StartTime: creationTime, + BackupId: req.TaskId, + Status: api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone, + StartTime: creationTime, + SrcVolumeId: mockSourceVolumeID, + BytesDone: defaultCSIVolumeSize, }, }, }, nil } - return &api.SdkCloudBackupStatusResponse{ - Statuses: map[string]*api.SdkCloudBackupStatus{ - req.TaskId: { - Status: api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone, - StartTime: creationTime, + if req.TaskId == "new-ok" && !newFirstPass { + return &api.SdkCloudBackupStatusResponse{ + Statuses: map[string]*api.SdkCloudBackupStatus{ + req.TaskId: { + BackupId: req.TaskId, + Status: api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeQueued, + StartTime: creationTime, + SrcVolumeId: mockSourceVolumeID, + BytesDone: 0, + }, }, - }, - }, nil - - }).AnyTimes() - - mockCloudBackupClient.EXPECT().Delete(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupDeleteRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupDeleteResponse, error) { - if req.BackupId == "delete-error" { - return nil, mockErr + }, nil } - - return &api.SdkCloudBackupDeleteResponse{}, nil - - }).AnyTimes() - - mockCloudBackupClient.EXPECT().Size(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupSizeRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupSizeResponse, error) { - if req.BackupId == "size-error" { - return nil, mockErr + if req.TaskId == "new-ok" && newFirstPass { + newFirstPass = false } - return &api.SdkCloudBackupSizeResponse{ - TotalDownloadBytes: defaultCSIVolumeSize, - }, nil + return nil, mockErr }).AnyTimes() - mockSourceVolumeID := "mock-volume-id" - tests := []struct { name string SnapshotName string @@ -3504,6 +3501,12 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { nil, true, }, + { + "failed to get parameters", + "param-error", + nil, + true, + }, { "fail snapshot create", "create-error", @@ -3523,18 +3526,26 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { true, }, { - "fail to get snapshot size", - "size-error", - nil, - true, + "creation scheduled", + "new-ok", + &csi.CreateSnapshotResponse{ + Snapshot: &csi.Snapshot{ + SizeBytes: 0, + SnapshotId: "new-ok", + SourceVolumeId: mockSourceVolumeID, + CreationTime: creationTime, + ReadyToUse: false, + }, + }, + false, }, { "creation completes without any error", - "ok", + "already-ok", &csi.CreateSnapshotResponse{ Snapshot: &csi.Snapshot{ SizeBytes: int64(defaultCSIVolumeSize), - SnapshotId: "ok", + SnapshotId: "already-ok", SourceVolumeId: mockSourceVolumeID, CreationTime: creationTime, ReadyToUse: true, @@ -3556,11 +3567,19 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + + specLabels := []string{ + osdSnapshotLabelsTypeKey + "=cloud", + } + if tt.SnapshotName != "param-error" { + specLabels = append(specLabels, osdSnapshotCredentialIDKey+"=mockcredid") + } + req := &csi.CreateSnapshotRequest{ Name: tt.SnapshotName, SourceVolumeId: mockSourceVolumeID, Parameters: map[string]string{ - api.SpecLabels: osdSnapshotLabelsTypeKey + "=cloud", + api.SpecLabels: strings.Join(specLabels, ","), }, } @@ -3588,3 +3607,127 @@ func TestOsdCsiServer_CreateSnapshot(t *testing.T) { }) } } + +func TestOsdCsiServer_DeleteCloudSnapshot(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + mockCloudBackupClient := mock.NewMockOpenStorageCloudBackupClient(ctrl) + + ctx := context.Background() + + mockErr := errors.New("MOCK ERROR") + creationTime := timestamppb.Now() + + mockCloudBackupClient.EXPECT().Delete(gomock.Any(), gomock.Any(), gomock.Any()). + DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupDeleteRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupDeleteResponse, error) { + if req.BackupId == "delete-error" { + return nil, mockErr + } + + if req.BackupId == "delete-notfound" { + return nil, status.Errorf(codes.NotFound, "Volume id not found") + } + + return &api.SdkCloudBackupDeleteResponse{}, nil + + }).AnyTimes() + + mockCloudBackupClient.EXPECT().Status(gomock.Any(), gomock.Any(), gomock.Any()). + DoAndReturn(func(ctx context.Context, req *api.SdkCloudBackupStatusRequest, opts ...grpc.CallOption) (*api.SdkCloudBackupStatusResponse, error) { + if req.TaskId == "status-error" { + return nil, mockErr + } + + return &api.SdkCloudBackupStatusResponse{ + Statuses: map[string]*api.SdkCloudBackupStatus{ + req.TaskId: { + BackupId: req.TaskId, + Status: api.SdkCloudBackupStatusType_SdkCloudBackupStatusTypeDone, + StartTime: creationTime, + }, + }, + }, nil + + }).AnyTimes() + + tests := []struct { + name string + SnapshotName string + Cred string + want *csi.DeleteSnapshotRequest + wantErr bool + }{ + { + "remote client connection failed", + "remote-client-error", + "", + nil, + true, + }, + { + "fail to get cloud snap status", + "status-error", + "", + nil, + true, + }, + { + "fail snapshot delete", + "delete-error", + "", + nil, + true, + }, + { + "deletion completes without any error", + "ok", + "", + &csi.DeleteSnapshotRequest{}, + false, + }, + } + mockRoundRobinBalancer := mockLoadBalancer.NewMockBalancer(ctrl) + // nil, false, nil + mockRoundRobinBalancer.EXPECT().GetRemoteNodeConnection(gomock.Any()).DoAndReturn( + func(ctx context.Context) (*grpc.ClientConn, bool, error) { + var err error + if ctx.Value("remote-client-error").(bool) { + err = mockErr + } + return nil, false, err + }).AnyTimes() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := &csi.DeleteSnapshotRequest{ + SnapshotId: tt.SnapshotName, + Secrets: map[string]string{ + api.SpecLabels: osdSnapshotCredentialIDKey + "=" + tt.Cred, + }, + } + + s := &OsdCsiServer{ + specHandler: spec.NewSpecHandler(), + mu: sync.Mutex{}, + cloudBackupClient: func(cc grpc.ClientConnInterface) api.OpenStorageCloudBackupClient { + return mockCloudBackupClient + }, + roundRobinBalancer: mockRoundRobinBalancer, + } + + doClientErr := tt.SnapshotName == "remote-client-error" + + ctx = context.WithValue(ctx, "remote-client-error", doClientErr) + + got, err := s.DeleteSnapshot(ctx, req) + if (err != nil) != tt.wantErr { + t.Errorf("OsdCsiServer.DeleteSnapshot() error = %v, wantErr %v", err, tt.wantErr) + return + } + if proto.Equal(got, tt.want) { + t.Errorf("OsdCsiServer.DeleteSnapshot() = %v, want %v", got, tt.want) + } + }) + } +}