Skip to content

Commit

Permalink
fix: bug cannot use the disk which has failed replica
Browse files Browse the repository at this point in the history
We should not exclude the disk which has failed replica in 2 cases:
1. The caller is trying to reuse the failed replica, this disk is a
   valid candidate
2. The failed replica is no longer reusable, this disk is a valid candidate

longhorn-10210

Signed-off-by: Phan Le <[email protected]>
  • Loading branch information
PhanLe1010 authored and derekbit committed Jan 19, 2025
1 parent 5fef491 commit 82ada80
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
12 changes: 10 additions & 2 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
}
multiError.Append(errors)
}
diskCandidates = filterDisksWithMatchingReplicas(diskCandidates, replicas, diskSoftAntiAffinity)
diskCandidates = filterDisksWithMatchingReplicas(diskCandidates, replicas, diskSoftAntiAffinity, ignoreFailedReplicas)
return diskCandidates, multiError
}

Expand Down Expand Up @@ -467,9 +467,17 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk
// filterDiskWithMatchingReplicas returns disk that have no matching replicas when diskSoftAntiAffinity is false.
// Otherwise, it returns the input disks map.
func filterDisksWithMatchingReplicas(disks map[string]*Disk, replicas map[string]*longhorn.Replica,
diskSoftAntiAffinity bool) map[string]*Disk {
diskSoftAntiAffinity, ignoreFailedReplicas bool) map[string]*Disk {
replicasCountPerDisk := map[string]int{}
for _, r := range replicas {
if r.Spec.FailedAt != "" {
if ignoreFailedReplicas {
continue
}
if !IsPotentiallyReusableReplica(r) {
continue // This replica can never be used again, so it does not count in scheduling decisions.
}
}
replicasCountPerDisk[r.Spec.DiskID]++
}

Expand Down
4 changes: 3 additions & 1 deletion scheduler/replica_scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
inputDiskUUIDs []string
inputReplicas map[string]*longhorn.Replica
diskSoftAntiAffinity bool
ignoreFailedReplicas bool

expectDiskUUIDs []string
}
Expand Down Expand Up @@ -1454,6 +1455,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
replica4.Name: replica4,
}
tc.diskSoftAntiAffinity = true
tc.ignoreFailedReplicas = false
tc.expectDiskUUIDs = []string{diskUUID5} // Only disk5 has no matching replica.
tests["only schedule to disk without matching replica"] = tc

Expand All @@ -1463,7 +1465,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
for _, UUID := range tc.inputDiskUUIDs {
inputDisks[UUID] = &Disk{}
}
outputDiskUUIDs := filterDisksWithMatchingReplicas(inputDisks, tc.inputReplicas, tc.diskSoftAntiAffinity)
outputDiskUUIDs := filterDisksWithMatchingReplicas(inputDisks, tc.inputReplicas, tc.diskSoftAntiAffinity, tc.ignoreFailedReplicas)
c.Assert(len(outputDiskUUIDs), Equals, len(tc.expectDiskUUIDs))
for _, UUID := range tc.expectDiskUUIDs {
_, ok := outputDiskUUIDs[UUID]
Expand Down

0 comments on commit 82ada80

Please sign in to comment.