Skip to content

Commit

Permalink
feat: improve error transparency for volume attachment failure
Browse files Browse the repository at this point in the history
Longhorn 9968

Signed-off-by: Derek Su <[email protected]>
  • Loading branch information
derekbit committed Jan 20, 2025
1 parent 82ada80 commit 6e451d2
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 21 deletions.
45 changes: 31 additions & 14 deletions api/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ type Volume struct {
Conditions map[string]longhorn.Condition `json:"conditions"`
KubernetesStatus longhorn.KubernetesStatus `json:"kubernetesStatus"`
CloneStatus longhorn.VolumeCloneStatus `json:"cloneStatus"`
Ready bool `json:"ready"`
Ready longhorn.Condition `json:"ready"`

AccessMode longhorn.AccessMode `json:"accessMode"`
ShareEndpoint string `json:"shareEndpoint"`
Expand Down Expand Up @@ -1373,6 +1373,35 @@ func toEmptyResource() *Empty {
}
}

func toVolumeReadyCondition(v *longhorn.Volume) longhorn.Condition {
scheduledCondition := types.GetCondition(v.Status.Conditions, longhorn.VolumeConditionTypeScheduled)
isCloningDesired := types.IsDataFromVolume(v.Spec.DataSource)
isCloningCompleted := v.Status.CloneStatus.State == longhorn.VolumeCloneStateCompleted

status := longhorn.ConditionStatusFalse
message := ""
if v.Spec.NodeID == "" && v.Status.State != longhorn.VolumeStateDetached {
message = "volume is not attached to any node"
} else if v.Status.State == longhorn.VolumeStateDetached && scheduledCondition.Status != longhorn.ConditionStatusTrue {
message = fmt.Sprintf("volume is detached but not scheduled with reason %v", scheduledCondition.Reason)
} else if v.Status.Robustness == longhorn.VolumeRobustnessFaulted {
message = "volume is in faulted state"
} else if v.Status.RestoreRequired {
message = "volume requires restore"
} else if isCloningDesired && !isCloningCompleted {
message = "volume is cloning"
} else {
status = longhorn.ConditionStatusTrue
}

return longhorn.Condition{
Type: longhorn.VolumeConditionTypeReady,
Status: status,
Reason: "",
Message: message,
}
}

func toSettingResource(setting *longhorn.Setting) *Setting {
definition, _ := types.GetSettingDefinition(types.SettingName(setting.Name))

Expand Down Expand Up @@ -1563,18 +1592,6 @@ func toVolumeResource(v *longhorn.Volume, ves []*longhorn.Engine, vrs []*longhor
// 3. It's faulted.
// 4. It's restore pending.
// 5. It's failed to clone
ready := true
scheduledCondition := types.GetCondition(v.Status.Conditions, longhorn.VolumeConditionTypeScheduled)
isCloningDesired := types.IsDataFromVolume(v.Spec.DataSource)
isCloningCompleted := v.Status.CloneStatus.State == longhorn.VolumeCloneStateCompleted
if (v.Spec.NodeID == "" && v.Status.State != longhorn.VolumeStateDetached) ||
(v.Status.State == longhorn.VolumeStateDetached && scheduledCondition.Status != longhorn.ConditionStatusTrue) ||
v.Status.Robustness == longhorn.VolumeRobustnessFaulted ||
v.Status.RestoreRequired ||
(isCloningDesired && !isCloningCompleted) {
ready = false
}

r := &Volume{
Resource: client.Resource{
Id: v.Name,
Expand Down Expand Up @@ -1620,7 +1637,7 @@ func toVolumeResource(v *longhorn.Volume, ves []*longhorn.Engine, vrs []*longhor
ReplicaZoneSoftAntiAffinity: v.Spec.ReplicaZoneSoftAntiAffinity,
ReplicaDiskSoftAntiAffinity: v.Spec.ReplicaDiskSoftAntiAffinity,
DataEngine: v.Spec.DataEngine,
Ready: ready,
Ready: toVolumeReadyCondition(v),

AccessMode: v.Spec.AccessMode,
ShareEndpoint: v.Status.ShareEndpoint,
Expand Down
2 changes: 1 addition & 1 deletion client/generated_volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type Volume struct {

PurgeStatus []PurgeStatus `json:"purgeStatus,omitempty" yaml:"purge_status,omitempty"`

Ready bool `json:"ready,omitempty" yaml:"ready,omitempty"`
Ready interface{} `json:"ready,omitempty" yaml:"ready,omitempty"`

RebuildStatus []RebuildStatus `json:"rebuildStatus,omitempty" yaml:"rebuild_status,omitempty"`

Expand Down
5 changes: 3 additions & 2 deletions csi/controller_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,9 @@ func (cs *ControllerServer) ControllerPublishVolume(ctx context.Context, req *cs
// TODO: JM should readiness be handled by the caller?
// Most of the readiness conditions are covered by the attach, except auto attachment which requires changes to the design
// should be handled by the processing of the api return codes
if !volume.Ready {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads", volumeID)
ready := volume.Ready.(longhorn.Condition)
if ready.Status != longhorn.ConditionStatusTrue {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads since %s", volumeID, ready.Message)
}

attachmentID := generateAttachmentID(volumeID, nodeID)
Expand Down
10 changes: 6 additions & 4 deletions csi/node_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,9 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
}
}

if !volume.Ready {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads", volumeID)
ready := volume.Ready.(longhorn.Condition)
if ready.Status != longhorn.ConditionStatusTrue {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads since %s", volumeID, ready.Message)
}

podsStatus := ns.collectWorkloadPodsStatus(volume, log)
Expand Down Expand Up @@ -454,8 +455,9 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
return nil, status.Errorf(codes.InvalidArgument, "volume %s hasn't been attached yet", volumeID)
}

if !volume.Ready {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads", volumeID)
ready := volume.Ready.(longhorn.Condition)
if ready.Status != longhorn.ConditionStatusTrue {
return nil, status.Errorf(codes.Aborted, "volume %s is not ready for workloads since %s", volumeID, ready.Message)
}

if requiresSharedAccess(volume, volumeCapability) && !volume.Migratable {
Expand Down
1 change: 1 addition & 0 deletions k8s/pkg/apis/longhorn/v1beta2/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ const (
VolumeConditionTypeRestore = "Restore"
VolumeConditionTypeTooManySnapshots = "TooManySnapshots"
VolumeConditionTypeWaitForBackingImage = "WaitForBackingImage"
VolumeConditionTypeReady = "Ready"
)

const (
Expand Down

0 comments on commit 6e451d2

Please sign in to comment.