Skip to content

Commit

Permalink
dynamic host volumes: account for other claims in capability check
Browse files Browse the repository at this point in the history
When we feasibility check a dynamic host volume against a volume request, we
check the attachment mode and access mode. This only ensures that the
capabilities match, but doesn't enforce the semantics of the capabilities
against other claims that may be made on the allocation.

Add support for checking the requested capability against other allocations that
the volume claimed.

Requires additional updates:
* Job validation should allow attachment/access mode for host volumes
* Added new single-node access modes and removed multi-node access modes

Ref: #24479
  • Loading branch information
tgross committed Dec 17, 2024
1 parent 97d9269 commit 44e88d5
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 41 deletions.
5 changes: 5 additions & 0 deletions nomad/state/state_store_host_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs

vol = vol.Copy()
vol.Allocations = []*structs.AllocListStub{}
vol.Writers = []string{}

// we can't use AllocsByNodeTerminal because we only want to filter out
// allocs that are client-terminal, not server-terminal
Expand All @@ -43,6 +44,9 @@ func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs
}
for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes {
if volClaim.Type == structs.VolumeTypeHost && volClaim.Source == vol.Name {
if !volClaim.ReadOnly {
vol.Writers = append(vol.Writers, alloc.ID)
}
vol.Allocations = append(vol.Allocations, alloc.Stub(nil))
}
}
Expand Down Expand Up @@ -101,6 +105,7 @@ func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) err
// Allocations are denormalized on read, so we don't want these to be
// written to the state store.
vol.Allocations = nil
vol.Writers = nil
vol.ModifyIndex = index

err = txn.Insert(TableHostVolumes, vol)
Expand Down
61 changes: 52 additions & 9 deletions nomad/structs/host_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ type HostVolume struct {
// this host volume. They are denormalized on read and this field will be
// never written to Raft
Allocations []*AllocListStub `json:",omitempty"`

// Writers and is a list of alloc IDs from the Allocations field that can
// write to this volume. This count is denormalized on read, never written
// to Raft, and never returned by the API.
Writers []string `json:"-"`
}

type HostVolumeState string
Expand Down Expand Up @@ -245,6 +250,47 @@ func (hv *HostVolume) GetID() string {
return hv.ID
}

// IsAvailable determines if the volume is available for a request
func (hv *HostVolume) IsAvailable(reqAccess HostVolumeAccessMode, reqAttach HostVolumeAttachmentMode, readOnly bool, previousAllocID string) bool {

if hv.State != HostVolumeStateReady {
return false
}

// check that the volume has the requested capability at all
var capOk bool
for _, cap := range hv.RequestedCapabilities {
if reqAccess == cap.AccessMode &&
reqAttach == cap.AttachmentMode {
capOk = true
break
}
}
if !capOk {
return false
}

// if no other allocations claim the volume, it's first-come-first-serve to
// determine how subsequent allocs can claim it
if len(hv.Allocations) == 0 {
return true
}

switch reqAccess {
case HostVolumeAccessModeUnknown:
return false // invalid mode
case HostVolumeAccessModeSingleNodeReader:
return readOnly
case HostVolumeAccessModeSingleNodeWriter, HostVolumeAccessModeSingleNodeSingleWriter:
return len(hv.Writers) == 0 ||
(len(hv.Writers) == 1 && hv.Writers[0] == previousAllocID)
case HostVolumeAccessModeSingleNodeMultiWriter:
// no contraint
}

return true
}

// HostVolumeCapability is the requested attachment and access mode for a volume
type HostVolumeCapability struct {
AttachmentMode HostVolumeAttachmentMode
Expand Down Expand Up @@ -275,9 +321,8 @@ func (hvc *HostVolumeCapability) Validate() error {
switch hvc.AccessMode {
case HostVolumeAccessModeSingleNodeReader,
HostVolumeAccessModeSingleNodeWriter,
HostVolumeAccessModeMultiNodeReader,
HostVolumeAccessModeMultiNodeSingleWriter,
HostVolumeAccessModeMultiNodeMultiWriter:
HostVolumeAccessModeSingleNodeSingleWriter,
HostVolumeAccessModeSingleNodeMultiWriter:
default:
return fmt.Errorf("invalid access mode: %q", hvc.AccessMode)
}
Expand All @@ -302,12 +347,10 @@ type HostVolumeAccessMode string
const (
HostVolumeAccessModeUnknown HostVolumeAccessMode = ""

HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only"
HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer"

HostVolumeAccessModeMultiNodeReader HostVolumeAccessMode = "multi-node-reader-only"
HostVolumeAccessModeMultiNodeSingleWriter HostVolumeAccessMode = "multi-node-single-writer"
HostVolumeAccessModeMultiNodeMultiWriter HostVolumeAccessMode = "multi-node-multi-writer"
HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only"
HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer"
HostVolumeAccessModeSingleNodeSingleWriter HostVolumeAccessMode = "single-node-single-writer"
HostVolumeAccessModeSingleNodeMultiWriter HostVolumeAccessMode = "single-node-multi-writer"
)

// HostVolumeStub is used for responses for the list volumes endpoint
Expand Down
124 changes: 121 additions & 3 deletions nomad/structs/host_volumes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/shoenig/test/must"
)
Expand Down Expand Up @@ -45,7 +46,7 @@ func TestHostVolume_Copy(t *testing.T) {
out.Constraints[0].LTarget = "${meta.node_class}"
out.RequestedCapabilities = append(out.RequestedCapabilities, &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeBlockDevice,
AccessMode: HostVolumeAccessModeMultiNodeReader,
AccessMode: HostVolumeAccessModeSingleNodeMultiWriter,
})
out.Parameters["foo"] = "baz"

Expand Down Expand Up @@ -195,7 +196,7 @@ func TestHostVolume_CanonicalizeForUpdate(t *testing.T) {
RequestedCapacityMaxBytes: 500000,
RequestedCapabilities: []*HostVolumeCapability{{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeMultiNodeMultiWriter,
AccessMode: HostVolumeAccessModeSingleNodeMultiWriter,
}},
}
existing := &HostVolume{
Expand Down Expand Up @@ -240,7 +241,7 @@ func TestHostVolume_CanonicalizeForUpdate(t *testing.T) {

must.Eq(t, []*HostVolumeCapability{{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeMultiNodeMultiWriter,
AccessMode: HostVolumeAccessModeSingleNodeMultiWriter,
}}, vol.RequestedCapabilities)

must.Eq(t, "/var/nomad/alloc_mounts/82f357d6.ext4", vol.HostPath)
Expand All @@ -251,3 +252,120 @@ func TestHostVolume_CanonicalizeForUpdate(t *testing.T) {
must.Nil(t, vol.Allocations)

}

func TestHostVolume_IsAvailable(t *testing.T) {

allCaps := []*HostVolumeCapability{}

for _, accessMode := range []HostVolumeAccessMode{
HostVolumeAccessModeSingleNodeReader,
HostVolumeAccessModeSingleNodeWriter,
HostVolumeAccessModeSingleNodeSingleWriter,
HostVolumeAccessModeSingleNodeMultiWriter,
} {
for _, attachMode := range []HostVolumeAttachmentMode{
HostVolumeAttachmentModeFilesystem, HostVolumeAttachmentModeBlockDevice,
} {
allCaps = append(allCaps, &HostVolumeCapability{
AttachmentMode: attachMode,
AccessMode: accessMode,
})
}
}

testCases := []struct {
name string
hasAllocs []string
hasWriters []string
hasCaps []*HostVolumeCapability
req *HostVolumeCapability
readOnly bool
prevAllocID string
expect bool
}{
{
name: "enforce attachment mode",
hasCaps: []*HostVolumeCapability{{
AttachmentMode: HostVolumeAttachmentModeBlockDevice,
AccessMode: HostVolumeAccessModeSingleNodeSingleWriter,
}},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeSingleWriter,
},
expect: false,
},
{
name: "enforce read only",
hasAllocs: []string{"a", "b", "c"},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeReader,
},
expect: false,
},
{
name: "enforce read only ok",
hasAllocs: []string{"a", "b", "c"},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeReader,
},
readOnly: true,
expect: true,
},
{
name: "enforce single writer",
hasAllocs: []string{"a", "b", "c"},
hasWriters: []string{"b"},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeSingleWriter,
},
expect: false,
},
{
name: "enforce single writer ok across deployments",
hasAllocs: []string{"a", "b", "c"},
hasWriters: []string{"b"},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeSingleWriter,
},
prevAllocID: "b",
expect: true,
},
{
name: "multi writer is always ok",
hasAllocs: []string{"a", "b", "c"},
hasWriters: []string{"b", "c"},
req: &HostVolumeCapability{
AttachmentMode: HostVolumeAttachmentModeFilesystem,
AccessMode: HostVolumeAccessModeSingleNodeMultiWriter,
},
expect: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
allocs := helper.ConvertSlice(tc.hasAllocs, func(id string) *AllocListStub {
return &AllocListStub{ID: id}
})
vol := &HostVolume{
Allocations: allocs,
Writers: tc.hasWriters,
State: HostVolumeStateReady,
}
if len(tc.hasCaps) > 0 {
vol.RequestedCapabilities = tc.hasCaps
} else {
vol.RequestedCapabilities = allCaps
}

must.Eq(t, tc.expect, vol.IsAvailable(
tc.req.AccessMode, tc.req.AttachmentMode, tc.readOnly, tc.prevAllocID))
})
}

}
18 changes: 12 additions & 6 deletions nomad/structs/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,22 @@ func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) e
switch v.Type {

case VolumeTypeHost:
if v.AttachmentMode != CSIVolumeAttachmentModeUnknown {
addErr("host volumes cannot have an attachment mode")
}
if v.AccessMode != CSIVolumeAccessModeUnknown {
addErr("host volumes cannot have an access mode")
}
if v.MountOptions != nil {
// TODO(1.10.0): support mount options for dynamic host volumes
addErr("host volumes cannot have mount options")
}

switch v.AccessMode {
case CSIVolumeAccessModeSingleNodeReader, CSIVolumeAccessModeMultiNodeReader:
if !v.ReadOnly {
addErr("%s volumes must be read-only", v.AccessMode)
}
default:
// dynamic host volumes are all "per node" so there's no way to
// validate that other access modes work for a given volume until we
// have access to other allocations (in the scheduler)
}

case VolumeTypeCSI:

switch v.AttachmentMode {
Expand Down
30 changes: 13 additions & 17 deletions scheduler/feasible.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,10 @@ func NewRandomIterator(ctx Context, nodes []*structs.Node) *StaticIterator {
// HostVolumeChecker is a FeasibilityChecker which returns whether a node has
// the host volumes necessary to schedule a task group.
type HostVolumeChecker struct {
ctx Context
volumeReqs []*structs.VolumeRequest
namespace string
ctx Context
volumeReqs []*structs.VolumeRequest
namespace string
previousAllocationID string
}

// NewHostVolumeChecker creates a HostVolumeChecker from a set of volumes
Expand All @@ -151,9 +152,10 @@ func NewHostVolumeChecker(ctx Context) *HostVolumeChecker {
}

// SetVolumes takes the volumes required by a task group and updates the checker.
func (h *HostVolumeChecker) SetVolumes(allocName string, ns string, volumes map[string]*structs.VolumeRequest) {
func (h *HostVolumeChecker) SetVolumes(allocName string, ns string, volumes map[string]*structs.VolumeRequest, prevAllocID string) {
h.namespace = ns
h.volumeReqs = []*structs.VolumeRequest{}
h.previousAllocationID = prevAllocID
for _, req := range volumes {
if req.Type != structs.VolumeTypeHost {
continue // filter CSI volumes
Expand Down Expand Up @@ -194,26 +196,20 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool {
}

if volCfg.ID != "" { // dynamic host volume
vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, false)
vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, true)
if err != nil || vol == nil {
// node fingerprint has a dynamic volume that's no longer in the
// state store; this is only possible if the batched fingerprint
// update from a delete RPC is written before the delete RPC's
// raft entry completes
return false
}
if vol.State != structs.HostVolumeStateReady {
return false
}
var capOk bool
for _, cap := range vol.RequestedCapabilities {
if req.AccessMode == structs.CSIVolumeAccessMode(cap.AccessMode) &&
req.AttachmentMode == structs.CSIVolumeAttachmentMode(cap.AttachmentMode) {
capOk = true
break
}
}
if !capOk {
if !vol.IsAvailable(
structs.HostVolumeAccessMode(req.AccessMode),
structs.HostVolumeAttachmentMode(req.AttachmentMode),
req.ReadOnly,
h.previousAllocationID,
) {
return false
}
} else if !req.ReadOnly {
Expand Down
1 change: 1 addition & 0 deletions scheduler/generic_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,7 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs
}
}
selectOptions.PenaltyNodeIDs = penaltyNodes
selectOptions.PreviousAllocID = prevAllocation.ID
}
if preferredNode != nil {
selectOptions.PreferredNodes = []*structs.Node{preferredNode}
Expand Down
Loading

0 comments on commit 44e88d5

Please sign in to comment.