Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic host volumes: fingerprint client plugins #24589

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
"github.com/hashicorp/nomad/client/dynamicplugins"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/hoststats"
"github.com/hashicorp/nomad/client/hostvolumemanager"
hvm "github.com/hashicorp/nomad/client/hostvolumemanager"
cinterfaces "github.com/hashicorp/nomad/client/interfaces"
"github.com/hashicorp/nomad/client/lib/cgroupslib"
"github.com/hashicorp/nomad/client/lib/numalib"
Expand Down Expand Up @@ -290,7 +290,7 @@ type Client struct {
// drivermanager is responsible for managing driver plugins
drivermanager drivermanager.Manager

hostVolumeManager *hostvolumemanager.HostVolumeManager
hostVolumeManager *hvm.HostVolumeManager

// baseLabels are used when emitting tagged metrics. All client metrics will
// have these tags, and optionally more.
Expand Down Expand Up @@ -535,7 +535,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
c.devicemanager = devManager
c.pluginManagers.RegisterAndRun(devManager)

c.hostVolumeManager = hostvolumemanager.NewHostVolumeManager(cfg.AllocMountsDir, logger)
c.hostVolumeManager = hvm.NewHostVolumeManager(logger,
cfg.DynamicHostVolumePluginPath,
cfg.AllocMountsDir)

// Set up the service registration wrapper using the Consul and Nomad
// implementations. The Nomad implementation is only ever used on the
Expand Down
3 changes: 3 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ type Config struct {
// HostVolumes is a map of the configured host volumes by name.
HostVolumes map[string]*structs.ClientHostVolumeConfig

// DynamicHostVolumePluginPath is the directory containing DHV plugins.
DynamicHostVolumePluginPath string

// HostNetworks is a map of the conigured host networks by name.
HostNetworks map[string]*structs.ClientHostNetworkConfig

Expand Down
119 changes: 119 additions & 0 deletions client/fingerprint/dynamic_host_volumes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package fingerprint

import (
"context"
"os"
"strings"
"sync"
"time"

"github.com/hashicorp/go-hclog"
hvm "github.com/hashicorp/nomad/client/hostvolumemanager"
"github.com/hashicorp/nomad/helper"
)

func NewPluginsHostVolumeFingerprint(logger hclog.Logger) Fingerprint {
return &DynamicHostVolumePluginFingerprint{
logger: logger.Named("host_volume_plugins"),
}
}

var _ ReloadableFingerprint = &DynamicHostVolumePluginFingerprint{}

type DynamicHostVolumePluginFingerprint struct {
logger hclog.Logger
}

func (h *DynamicHostVolumePluginFingerprint) Reload() {
// host volume plugins are re-detected on agent reload
}

func (h *DynamicHostVolumePluginFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error {
// always add "mkdir" plugin
h.logger.Debug("detected plugin built-in", "plugin_id", "mkdir", "version", hvm.HostVolumePluginMkdirVersion)
defer response.AddAttribute("plugins.dhv.version.mkdir", hvm.HostVolumePluginMkdirVersion)
response.Detected = true

// this config value will be empty in -dev mode
pluginDir := request.Config.DynamicHostVolumePluginPath
if pluginDir == "" {
return nil
}

plugins, err := GetHostVolumePluginVersions(h.logger, pluginDir)
if err != nil {
if os.IsNotExist(err) {
h.logger.Debug("plugin dir does not exist", "dir", pluginDir)
} else {
h.logger.Warn("error finding plugins", "dir", pluginDir, "error", err)
}
return nil // don't halt agent start
}

// if this was a reload, wipe what was there before
for k := range request.Node.Attributes {
if strings.HasPrefix(k, "plugins.dhv.") {
response.RemoveAttribute(k)
}
}

// set the attribute(s)
for plugin, version := range plugins {
h.logger.Debug("detected plugin", "plugin_id", plugin, "version", version)
response.AddAttribute("plugins.dhv.version."+plugin, version)
}

return nil
}

func (h *DynamicHostVolumePluginFingerprint) Periodic() (bool, time.Duration) {
return false, 0 // not periodic; db TODO(1.10.0): could be?
}

// GetHostVolumePluginVersions finds all the executable files on disk
// that respond to a Version call (arg $1 = 'version' / env $OPERATION = 'version')
// The return map's keys are plugin IDs, and the values are version strings.
func GetHostVolumePluginVersions(log hclog.Logger, pluginDir string) (map[string]string, error) {
files, err := helper.FindExecutableFiles(pluginDir)
if err != nil {
return nil, err
}

plugins := make(map[string]string)
mut := sync.Mutex{}
var wg sync.WaitGroup

for file, fullPath := range files {
wg.Add(1)
go func(file, fullPath string) {
defer wg.Done()
// really should take way less than a second
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Someone is totally going to write a plugin on the JVM that takes 10s to assemble a DI framework on startup, and we'll 🤦 about it. But this seems like a reasonable assumption. 😁

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yaknow, I wonder if plugin authors might even use the "responds to version" aspect as a way to auto-disable plugins that won't be able to fulfill requests, like if they can't reach an NFS server, or who knows. they might want to try until some timeout, and that might reasonably be longer than 1s...

ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()

log := log.With("plugin_id", file) // db TODO: "name" like CNI, instead of "ID" ?

p, err := hvm.NewHostVolumePluginExternal(log, file, fullPath, "")
if err != nil {
log.Debug("error getting plugin", "error", err)
return
}

version, err := p.Version(ctx)
if err != nil {
log.Debug("failed to get version from plugin", "error", err)
return
}

mut.Lock()
plugins[file] = version.String()
mut.Unlock()
}(file, fullPath)
}

wg.Wait()
return plugins, nil
}
89 changes: 89 additions & 0 deletions client/fingerprint/dynamic_host_volumes_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package fingerprint

import (
"os"
"path/filepath"
"runtime"
"testing"

"github.com/hashicorp/nomad/client/config"
hvm "github.com/hashicorp/nomad/client/hostvolumemanager"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
)

// this is more of a full integration test of:
// fingerprint <- find plugins <- find executables
func TestPluginsHostVolumeFingerprint(t *testing.T) {
cfg := &config.Config{DynamicHostVolumePluginPath: ""}
node := &structs.Node{Attributes: map[string]string{}}
req := &FingerprintRequest{Config: cfg, Node: node}
fp := NewPluginsHostVolumeFingerprint(testlog.HCLogger(t))

// this fingerprint is not mandatory, so no error should be returned
for name, path := range map[string]string{
"empty": "",
"non-existent": "/nowhere",
"impossible": "dynamic_host_volumes_test.go",
} {
t.Run(name, func(t *testing.T) {
resp := FingerprintResponse{}
cfg.DynamicHostVolumePluginPath = path
err := fp.Fingerprint(req, &resp)
must.NoError(t, err)
must.True(t, resp.Detected) // always true due to "mkdir" built-in
})
}

if runtime.GOOS == "windows" {
t.Skip("test scripts not built for windows") // db TODO(1.10.0)
}

// happy path: dir exists. this one will contain a single valid plugin.
tmp := t.TempDir()
cfg.DynamicHostVolumePluginPath = tmp

files := []struct {
name string
contents string
perm os.FileMode
}{
// only this first one should be detected as a valid plugin
{"happy-plugin", "#!/usr/bin/env sh\necho '0.0.1'", 0700},
{"not-a-plugin", "#!/usr/bin/env sh\necho 'not-a-version'", 0700},
{"unhappy-plugin", "#!/usr/bin/env sh\necho '0.0.2'; exit 1", 0700},
{"not-executable", "hello", 0400},
}
for _, f := range files {
must.NoError(t, os.WriteFile(filepath.Join(tmp, f.name), []byte(f.contents), f.perm))
}
// directories should be ignored
must.NoError(t, os.Mkdir(filepath.Join(tmp, "a-directory"), 0700))

// do the fingerprint
resp := FingerprintResponse{}
err := fp.Fingerprint(req, &resp)
must.NoError(t, err)
must.Eq(t, map[string]string{
"plugins.dhv.version.happy-plugin": "0.0.1",
"plugins.dhv.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in
}, resp.Attributes)

// do it again after deleting our one good plugin.
// repeat runs should wipe attributes, so nothing should remain.
node.Attributes = resp.Attributes
must.NoError(t, os.Remove(filepath.Join(tmp, "happy-plugin")))

resp = FingerprintResponse{}
err = fp.Fingerprint(req, &resp)
must.NoError(t, err)
must.Eq(t, map[string]string{
"plugins.dhv.version.happy-plugin": "", // will get cleaned up later

"plugins.dhv.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in
}, resp.Attributes)
}
1 change: 1 addition & 0 deletions client/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ var (
"network": NewNetworkFingerprint,
"nomad": NewNomadFingerprint,
"plugins_cni": NewPluginsCNIFingerprint,
"plugins_dhv": NewPluginsHostVolumeFingerprint,
"signal": NewSignalFingerprint,
"storage": NewStorageFingerprint,
"vault": NewVaultFingerprint,
Expand Down
13 changes: 7 additions & 6 deletions client/host_volume_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"testing"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/hostvolumemanager"
hvm "github.com/hashicorp/nomad/client/hostvolumemanager"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/shoenig/test/must"
Expand All @@ -22,8 +22,8 @@ func TestHostVolume(t *testing.T) {

tmp := t.TempDir()
expectDir := filepath.Join(tmp, "test-vol-id")
hvm := hostvolumemanager.NewHostVolumeManager(tmp, testlog.HCLogger(t))
client.hostVolumeManager = hvm
client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t),
"/no/ext/plugins", tmp)

t.Run("happy", func(t *testing.T) {
req := &cstructs.ClientHostVolumeCreateRequest{
Expand Down Expand Up @@ -60,19 +60,20 @@ func TestHostVolume(t *testing.T) {
}
var resp cstructs.ClientHostVolumeCreateResponse
err := client.ClientRPC("HostVolume.Create", req, &resp)
must.EqError(t, err, `no such plugin "non-existent"`)
must.EqError(t, err, `no such plugin: "non-existent"`)

delReq := &cstructs.ClientHostVolumeDeleteRequest{
PluginID: "non-existent",
}
var delResp cstructs.ClientHostVolumeDeleteResponse
err = client.ClientRPC("HostVolume.Delete", delReq, &delResp)
must.EqError(t, err, `no such plugin "non-existent"`)
must.EqError(t, err, `no such plugin: "non-existent"`)
})

t.Run("error from plugin", func(t *testing.T) {
// "mkdir" plugin can't create a directory within a file
client.hostVolumeManager = hostvolumemanager.NewHostVolumeManager("host_volume_endpoint_test.go", testlog.HCLogger(t))
client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t),
"/no/ext/plugins", "host_volume_endpoint_test.go")

req := &cstructs.ClientHostVolumeCreateRequest{
ID: "test-vol-id",
Expand Down
27 changes: 26 additions & 1 deletion client/hostvolumemanager/host_volume_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ type HostVolumePluginCreateResponse struct {
Context map[string]string `json:"context"` // metadata
}

const HostVolumePluginMkdirVersion = "0.0.1"

var _ HostVolumePlugin = &HostVolumePluginMkdir{}

type HostVolumePluginMkdir struct {
Expand All @@ -44,7 +46,7 @@ type HostVolumePluginMkdir struct {
}

func (p *HostVolumePluginMkdir) Version(_ context.Context) (*version.Version, error) {
return version.NewVersion("0.0.1")
return version.NewVersion(HostVolumePluginMkdirVersion)
}

func (p *HostVolumePluginMkdir) Create(_ context.Context,
Expand Down Expand Up @@ -91,6 +93,29 @@ func (p *HostVolumePluginMkdir) Delete(_ context.Context, req *cstructs.ClientHo

var _ HostVolumePlugin = &HostVolumePluginExternal{}

func NewHostVolumePluginExternal(log hclog.Logger,
id, executable, targetPath string) (*HostVolumePluginExternal, error) {
// this should only be called with already-detected executables,
// but we'll double-check it anyway, so we can provide a tidy error message
// if it has changed between fingerprinting and execution.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch on this.

f, err := os.Stat(executable)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("%w: %q", ErrPluginNotExists, id)
}
return nil, err
}
if !helper.IsExecutable(f) {
return nil, fmt.Errorf("%w: %q", ErrPluginNotExecutable, id)
}
return &HostVolumePluginExternal{
ID: id,
Executable: executable,
TargetPath: targetPath,
log: log,
}, nil
}

type HostVolumePluginExternal struct {
ID string
Executable string
Expand Down
Loading
Loading