Skip to content

Commit

Permalink
dynamic host volumes: account for other claims in capability check
Browse files Browse the repository at this point in the history
When we feasibility check a dynamic host volume against a volume request, we
check the attachment mode and access mode. This only ensures that the
capabilities match, but doesn't enforce the semantics of the capabilities
against other claims that may be made on the allocation.

Add support for checking the requested capability against other allocations that
the volume claimed.

Ref: #24479
  • Loading branch information
tgross committed Dec 16, 2024
1 parent 97d9269 commit 0491707
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 13 deletions.
5 changes: 5 additions & 0 deletions nomad/state/state_store_host_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs

vol = vol.Copy()
vol.Allocations = []*structs.AllocListStub{}
vol.Writers = 0

// we can't use AllocsByNodeTerminal because we only want to filter out
// allocs that are client-terminal, not server-terminal
Expand All @@ -43,6 +44,9 @@ func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs
}
for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes {
if volClaim.Type == structs.VolumeTypeHost && volClaim.Source == vol.Name {
if !volClaim.ReadOnly {
vol.Writers++
}
vol.Allocations = append(vol.Allocations, alloc.Stub(nil))
}
}
Expand Down Expand Up @@ -101,6 +105,7 @@ func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) err
// Allocations are denormalized on read, so we don't want these to be
// written to the state store.
vol.Allocations = nil
vol.Writers = 0
vol.ModifyIndex = index

err = txn.Insert(TableHostVolumes, vol)
Expand Down
49 changes: 49 additions & 0 deletions nomad/structs/host_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ type HostVolume struct {
// this host volume. They are denormalized on read and this field will be
// never written to Raft
Allocations []*AllocListStub `json:",omitempty"`

// Writers is a count of the number of allocs from the Allocations field
// that can write to this volume. This count is denormalized on read, never
// written to Raft, and never returned by the API.
Writers int `json:"-"`
}

type HostVolumeState string
Expand Down Expand Up @@ -245,6 +250,50 @@ func (hv *HostVolume) GetID() string {
return hv.ID
}

// IsAvailable determines if the volume is available for a request
func (hv *HostVolume) IsAvailable(reqAccess HostVolumeAccessMode, reqAttach HostVolumeAttachmentMode, readOnly bool) bool {

if hv.State != HostVolumeStateReady {
return false
}

// check that the volume has the requested capability at all
var capOk bool
for _, cap := range hv.RequestedCapabilities {
if reqAccess == cap.AccessMode &&
reqAttach == cap.AttachmentMode {
capOk = true
break
}
}
if !capOk {
return false
}

// if no other allocations claim the volume, it's first-come-first-serve to
// determine how subsequent allocs can claim it
if len(hv.Allocations) == 0 {
return true
}

switch reqAccess {
case HostVolumeAccessModeUnknown:
return false // invalid volume state
case HostVolumeAccessModeSingleNodeReader:
return readOnly
case HostVolumeAccessModeSingleNodeWriter:
return len(hv.Allocations) == 0
case HostVolumeAccessModeMultiNodeReader:
return readOnly
case HostVolumeAccessModeMultiNodeSingleWriter:
return hv.Writers == 0
case HostVolumeAccessModeMultiNodeMultiWriter:
return true
}

return true
}

// HostVolumeCapability is the requested attachment and access mode for a volume
type HostVolumeCapability struct {
AttachmentMode HostVolumeAttachmentMode
Expand Down
17 changes: 4 additions & 13 deletions scheduler/feasible.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,26 +194,17 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool {
}

if volCfg.ID != "" { // dynamic host volume
vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, false)
vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, true)
if err != nil || vol == nil {
// node fingerprint has a dynamic volume that's no longer in the
// state store; this is only possible if the batched fingerprint
// update from a delete RPC is written before the delete RPC's
// raft entry completes
return false
}
if vol.State != structs.HostVolumeStateReady {
return false
}
var capOk bool
for _, cap := range vol.RequestedCapabilities {
if req.AccessMode == structs.CSIVolumeAccessMode(cap.AccessMode) &&
req.AttachmentMode == structs.CSIVolumeAttachmentMode(cap.AttachmentMode) {
capOk = true
break
}
}
if !capOk {
if !vol.IsAvailable(
structs.HostVolumeAccessMode(req.AccessMode),
structs.HostVolumeAttachmentMode(req.AttachmentMode), req.ReadOnly) {
return false
}
} else if !req.ReadOnly {
Expand Down

0 comments on commit 0491707

Please sign in to comment.