diff --git a/lxd/storage/connectors/connector.go b/lxd/storage/connectors/connector.go new file mode 100644 index 000000000000..d64a5c57cd5b --- /dev/null +++ b/lxd/storage/connectors/connector.go @@ -0,0 +1,76 @@ +package connectors + +import ( + "context" +) + +const ( + // TypeUnknown represents an unknown storage connector. + TypeUnknown string = "unknown" + + // TypeNVME represents an NVMe/TCP storage connector. + TypeNVME string = "nvme" + + // TypeSDC represents Dell SDC storage connector. + TypeSDC string = "sdc" +) + +// Connector represents a storage connector that handles connections through +// appropriate storage subsystem. +type Connector interface { + Type() string + Version() (string, error) + QualifiedName() (string, error) + LoadModules() bool + SessionID(targetQN string) (string, error) + Connect(ctx context.Context, targetAddr string, targetQN string) error + ConnectAll(ctx context.Context, targetAddr string) error + Disconnect(targetQN string) error + DisconnectAll() error +} + +// NewConnector instantiates a new connector of the given type. +// The caller needs to ensure connector type is validated before calling this +// function, as common (empty) connector is returned for unknown type. +func NewConnector(connectorType string, serverUUID string) Connector { + common := common{ + serverUUID: serverUUID, + } + + switch connectorType { + case TypeNVME: + return &connectorNVMe{ + common: common, + } + + case TypeSDC: + return &connectorNVMe{ + common: common, + } + + default: + // Return common connector if the type is unknown. This removes + // the need to check for nil or handle the error in the caller. + return &common + } +} + +// GetSupportedVersions returns the versions for the given connector types +// ignoring those that produce an error when version is being retrieved +// (e.g. due to a missing required tools). +func GetSupportedVersions(connectorTypes []string) []string { + versions := make([]string, 0, len(connectorTypes)) + + // Iterate over the supported connectors, extracting version and loading + // kernel module for each of them. + for _, connectorType := range connectorTypes { + version, err := NewConnector(connectorType, "").Version() + if err != nil { + continue + } + + versions = append(versions, version) + } + + return versions +} diff --git a/lxd/storage/connectors/connector_common.go b/lxd/storage/connectors/connector_common.go new file mode 100644 index 000000000000..a875b6725c5d --- /dev/null +++ b/lxd/storage/connectors/connector_common.go @@ -0,0 +1,58 @@ +package connectors + +import ( + "context" + "fmt" +) + +var _ Connector = &common{} + +type common struct { + serverUUID string +} + +// Type returns the name of the connector. +func (c *common) Type() string { + return TypeUnknown +} + +// Version returns the version of the connector. +func (c *common) Version() (string, error) { + return "", fmt.Errorf("Version not implemented") +} + +// QualifiedName returns the qualified name of the connector. +func (c *common) QualifiedName() (string, error) { + return "", fmt.Errorf("QualifiedName not implemented") +} + +// LoadModules loads the necessary kernel modules. +func (c *common) LoadModules() bool { + return true +} + +// SessionID returns the identifier of a session that matches the connector's qualified name. +// If there is no such session, an empty string is returned. +func (c *common) SessionID(targetQN string) (string, error) { + return "", fmt.Errorf("ExistingSession not implemented") +} + +// Connect establishes a connection with the target on the given address. +func (c common) Connect(ctx context.Context, targetAddr string, targetQN string) error { + return fmt.Errorf("Connect not implemented") +} + +// ConnectAll establishes a connection with all targets available on the given address. +func (c common) ConnectAll(ctx context.Context, targetAddr string) error { + return fmt.Errorf("ConnectAll not implemented") +} + +// Disconnect terminates a connection with the target. +func (c common) Disconnect(targetQN string) error { + return fmt.Errorf("Disconnect not implemented") +} + +// DisconnectAll terminates all connections with all targets. +func (c common) DisconnectAll() error { + return fmt.Errorf("DisconnectAll not implemented") +} diff --git a/lxd/storage/connectors/connector_nvme.go b/lxd/storage/connectors/connector_nvme.go new file mode 100644 index 000000000000..cde08ddd1c03 --- /dev/null +++ b/lxd/storage/connectors/connector_nvme.go @@ -0,0 +1,171 @@ +package connectors + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/canonical/lxd/lxd/util" + "github.com/canonical/lxd/shared" +) + +var _ Connector = &connectorNVMe{} + +type connectorNVMe struct { + common +} + +// Type returns the type of the connector. +func (c *connectorNVMe) Type() string { + return TypeNVME +} + +// Version returns the version of the NVMe CLI. +func (c *connectorNVMe) Version() (string, error) { + // Detect and record the version of the NVMe CLI. + out, err := shared.RunCommand("nvme", "version") + if err != nil { + return "", fmt.Errorf("Failed to get nvme-cli version: %w", err) + } + + fields := strings.Split(strings.TrimSpace(out), " ") + if strings.HasPrefix(out, "nvme version ") && len(fields) > 2 { + return fmt.Sprintf("%s (nvme-cli)", fields[2]), nil + } + + return "", fmt.Errorf("Failed to get nvme-cli version: Unexpected output %q", out) +} + +// LoadModules loads the NVMe/TCP kernel modules. +// Returns true if the modules can be loaded. +func (c *connectorNVMe) LoadModules() bool { + err := util.LoadModule("nvme_fabrics") + if err != nil { + return false + } + + err = util.LoadModule("nvme_tcp") + return err == nil +} + +// QualifiedName returns a custom NQN generated from the server UUID. +// Getting the NQN from /etc/nvme/hostnqn would require the nvme-cli +// package to be installed on the host. +func (c *connectorNVMe) QualifiedName() (string, error) { + return fmt.Sprintf("nqn.2014-08.org.nvmexpress:uuid:%s", c.serverUUID), nil +} + +// SessionID returns the target's qualified name (NQN) if a corresponding +// session is found. Otherwise, an empty string is returned. +func (c *connectorNVMe) SessionID(targetQN string) (string, error) { + // Base path for NVMe sessions/subsystems. + basePath := "/sys/devices/virtual/nvme-subsystem" + + // Retrieve list of existing NVMe sessions on this host. + directories, err := os.ReadDir(basePath) + if err != nil { + if os.IsNotExist(err) { + // No active sessions because NVMe subsystems directory does not exist. + return "", nil + } + + return "", fmt.Errorf("Failed getting a list of existing NVMe subsystems: %w", err) + } + + for _, directory := range directories { + subsystemName := directory.Name() + + // Get the target NQN. + nqnBytes, err := os.ReadFile(filepath.Join(basePath, subsystemName, "subsysnqn")) + if err != nil { + return "", fmt.Errorf("Failed getting the target NQN for subystem %q: %w", subsystemName, err) + } + + if strings.Contains(string(nqnBytes), targetQN) { + // Already connected. + return targetQN, nil + } + } + + return "", nil +} + +// Connect establishes a connection with the target on the given address. +func (c *connectorNVMe) Connect(ctx context.Context, targetAddr string, targetQN string) error { + hostNQN, err := c.QualifiedName() + if err != nil { + return err + } + + // Try to find an existing NVMe session. + targetNQN, err := c.SessionID(targetQN) + if err != nil { + return err + } + + if targetNQN != "" { + // Already connected. + return nil + } + + _, stderr, err := shared.RunCommandSplit(ctx, nil, nil, "nvme", "connect", "--transport", "tcp", "--traddr", targetAddr, "--nqn", targetQN, "--hostnqn", hostNQN, "--hostid", c.serverUUID) + if err != nil { + return fmt.Errorf("Failed to connect to target %q on %q via NVMe: %w", targetQN, targetAddr, err) + } + + if stderr != "" { + return fmt.Errorf("Failed to connect to target %q on %q via NVMe: %s", targetQN, targetAddr, stderr) + } + + return nil +} + +// ConnectAll establishes a connection with all targets available on the given address. +func (c *connectorNVMe) ConnectAll(ctx context.Context, targetAddr string) error { + hostNQN, err := c.QualifiedName() + if err != nil { + return err + } + + _, stderr, err := shared.RunCommandSplit(ctx, nil, nil, "nvme", "connect-all", "--transport", "tcp", "--traddr", targetAddr, "--hostnqn", hostNQN, "--hostid", c.serverUUID) + if err != nil { + return fmt.Errorf("Failed to connect to any target on %q via NVMe: %w", targetAddr, err) + } + + if stderr != "" { + return fmt.Errorf("Failed to connect to any target on %q via NVMe: %s", targetAddr, stderr) + } + + return nil +} + +// Disconnect terminates a connection with the target. +func (c *connectorNVMe) Disconnect(targetQN string) error { + // Find an existing NVMe session. + targetNQN, err := c.SessionID(targetQN) + if err != nil { + return err + } + + // Disconnect from the NVMe target if there is an existing session. + if targetNQN != "" { + _, err := shared.RunCommand("nvme", "disconnect", "--nqn", targetNQN) + if err != nil { + return fmt.Errorf("Failed disconnecting from NVMe target %q: %w", targetNQN, err) + } + } + + return nil +} + +// DisconnectAll terminates all connections with all targets. +func (c *connectorNVMe) DisconnectAll() error { + _, err := shared.RunCommand("nvme", "disconnect-all") + if err != nil { + return fmt.Errorf("Failed disconnecting from NVMe targets: %w", err) + } + + return nil +} diff --git a/lxd/storage/connectors/connector_sdc.go b/lxd/storage/connectors/connector_sdc.go new file mode 100644 index 000000000000..3bb074676b28 --- /dev/null +++ b/lxd/storage/connectors/connector_sdc.go @@ -0,0 +1,53 @@ +package connectors + +import ( + "context" +) + +var _ Connector = &connectorSDC{} + +type connectorSDC struct { + common +} + +// Type returns the type of the connector. +func (c *connectorSDC) Type() string { + return TypeSDC +} + +// LoadModules returns true. SDC does not require any kernel modules to be loaded. +func (c *connectorSDC) LoadModules() bool { + return true +} + +// QualifiedName returns an empty string and no error. SDC has no qualified name. +func (c *connectorSDC) QualifiedName() (string, error) { + return "", nil +} + +// SessionID returns an empty string and no error, as connections are handled by SDC. +func (c *connectorSDC) SessionID(targetQN string) (string, error) { + return "", nil +} + +// Connect does nothing. Connections are fully handled by SDC. +func (c *connectorSDC) Connect(ctx context.Context, targetAddr string, targetQN string) error { + // Nothing to do. Connection is handled by Dell SDC. + return nil +} + +// ConnectAll does nothing. Connections are fully handled by SDC. +func (c *connectorSDC) ConnectAll(ctx context.Context, targetAddr string) error { + // Nothing to do. Connection is handled by Dell SDC. + return nil +} + +// Disconnect does nothing. Connections are fully handled by SDC. +func (c *connectorSDC) Disconnect(targetQN string) error { + return nil +} + +// DisconnectAll does nothing. Connections are fully handled by SDC. +func (c *connectorSDC) DisconnectAll() error { + return nil +} diff --git a/lxd/storage/connectors/utils.go b/lxd/storage/connectors/utils.go new file mode 100644 index 000000000000..0fa7b9d7b77d --- /dev/null +++ b/lxd/storage/connectors/utils.go @@ -0,0 +1,126 @@ +package connectors + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "strings" + "time" + + "golang.org/x/sys/unix" + + "github.com/canonical/lxd/lxd/resources" + "github.com/canonical/lxd/shared" +) + +// devicePathFilterFunc is a function that accepts device path and returns true +// if the path matches the required criteria. +type devicePathFilterFunc func(devPath string) bool + +// GetDiskDevicePath checks whether the disk device with a given prefix and suffix +// exists in /dev/disk/by-id directory. A device path is returned if the device is +// found, otherwise an error is returned. +func GetDiskDevicePath(diskNamePrefix string, diskPathFilter devicePathFilterFunc) (string, error) { + devPath, err := findDiskDevicePath(diskNamePrefix, diskPathFilter) + if err != nil { + return "", err + } + + if devPath == "" { + return "", fmt.Errorf("Device not found") + } + + return devPath, nil +} + +// WaitDiskDevicePath waits for the disk device to appear in /dev/disk/by-id. +// It periodically checks for the device to appear and returns the device path +// once it is found. If the device does not appear within the timeout, an error +// is returned. +func WaitDiskDevicePath(ctx context.Context, diskNamePrefix string, diskPathFilter devicePathFilterFunc) (string, error) { + var err error + var diskPath string + + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + for { + // Check if the device is already present. + diskPath, err = findDiskDevicePath(diskNamePrefix, diskPathFilter) + if err != nil && !errors.Is(err, unix.ENOENT) { + return "", err + } + + // If the device is found, return the device path. + if diskPath != "" { + break + } + + // Check if context is cancelled. + if ctx.Err() != nil { + return "", ctx.Err() + } + + time.Sleep(500 * time.Millisecond) + } + + return diskPath, nil +} + +// findDiskDevivePath iterates over device names in /dev/disk/by-id directory and +// returns the path to the disk device that matches the given prefix and suffix. +// Disk partitions are skipped, and an error is returned if the device is not found. +func findDiskDevicePath(diskNamePrefix string, diskPathFilter devicePathFilterFunc) (string, error) { + var diskPaths []string + + // If there are no other disks on the system by id, the directory might not + // even be there. Returns ENOENT in case the by-id/ directory does not exist. + diskPaths, err := resources.GetDisksByID(diskNamePrefix) + if err != nil { + return "", err + } + + for _, diskPath := range diskPaths { + // Skip the disk if it is only a partition of the actual volume. + if strings.Contains(diskPath, "-part") { + continue + } + + // Use custom disk path filter, if one is provided. + if diskPathFilter != nil && !diskPathFilter(diskPath) { + continue + } + + // The actual device might not already be created. + // Returns ENOENT in case the device does not exist. + devPath, err := filepath.EvalSymlinks(diskPath) + if err != nil { + return "", err + } + + return devPath, nil + } + + return "", nil +} + +// WaitDiskDeviceGone waits for the disk device to disappear from /dev/disk/by-id. +// It periodically checks for the device to disappear and returns once the device +// is gone. If the device does not disappear within the timeout, an error is returned. +func WaitDiskDeviceGone(ctx context.Context, diskPath string, timeoutSeconds int) bool { + ctx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds)*time.Second) + defer cancel() + + for { + if !shared.PathExists(diskPath) { + return true + } + + if ctx.Err() != nil { + return false + } + + time.Sleep(500 * time.Millisecond) + } +} diff --git a/lxd/storage/drivers/driver_powerflex.go b/lxd/storage/drivers/driver_powerflex.go index b523302e41ad..d7bb43bbf7d3 100644 --- a/lxd/storage/drivers/driver_powerflex.go +++ b/lxd/storage/drivers/driver_powerflex.go @@ -8,6 +8,7 @@ import ( "github.com/canonical/lxd/lxd/migration" "github.com/canonical/lxd/lxd/operations" + "github.com/canonical/lxd/lxd/storage/connectors" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/validate" @@ -19,10 +20,10 @@ const powerFlexDefaultUser = "admin" // powerFlexDefaultSize represents the default PowerFlex volume size. const powerFlexDefaultSize = "8GiB" -const ( - powerFlexModeNVMe = "nvme" - powerFlexModeSDC = "sdc" -) +var powerflexSupportedConnectors = []string{ + connectors.TypeNVME, + connectors.TypeSDC, +} var powerFlexLoaded bool var powerFlexVersion string @@ -30,6 +31,10 @@ var powerFlexVersion string type powerflex struct { common + // Holds the low level connector for the PowerFlex driver. + // Use powerflex.connector() to retrieve the initialized connector. + storageConnector connectors.Connector + // Holds the low level HTTP client for the PowerFlex API. // Use powerflex.client() to retrieve the client struct. httpClient *powerFlexClient @@ -46,28 +51,29 @@ func (d *powerflex) load() error { return nil } - // Detect and record the version. - // The NVMe CLI is shipped with the snap. - out, err := shared.RunCommand("nvme", "version") - if err != nil { - return fmt.Errorf("Failed to get nvme-cli version: %w", err) - } - - fields := strings.Split(strings.TrimSpace(out), " ") - if strings.HasPrefix(out, "nvme version ") && len(fields) > 2 { - powerFlexVersion = fmt.Sprintf("%s (nvme-cli)", fields[2]) - } + versions := connectors.GetSupportedVersions(powerflexSupportedConnectors) + powerFlexVersion = strings.Join(versions, " / ") + powerFlexLoaded = true - // Load the NVMe/TCP kernel modules. + // Load the kernel modules of the respective connector. // Ignore if the modules cannot be loaded. - // Support for the NVMe/TCP mode is checked during pool creation. + // Support for a specific connector is checked during pool creation. // When a LXD host gets rebooted this ensures that the kernel modules are still loaded. - _ = d.loadNVMeModules() + _ = d.connector().LoadModules() - powerFlexLoaded = true return nil } +// connector retrieves an initialized storage connector based on the configured +// PowerFlex mode. The connector is cached in the driver struct. +func (d *powerflex) connector() connectors.Connector { + if d.storageConnector == nil { + d.storageConnector = connectors.NewConnector(d.config["powerflex.mode"], d.state.ServerUUID) + } + + return d.storageConnector +} + // isRemote returns true indicating this driver uses remote storage. func (d *powerflex) isRemote() bool { return true @@ -102,10 +108,13 @@ func (d *powerflex) FillConfig() error { // First try if the NVMe/TCP kernel modules can be loaed. // Second try if the SDC kernel module is setup. if d.config["powerflex.mode"] == "" { - if d.loadNVMeModules() { - d.config["powerflex.mode"] = powerFlexModeNVMe + // Create temporary connector to check if NVMe/TCP kernel modules can be loaded. + nvmeConnector := connectors.NewConnector(connectors.TypeNVME, "") + + if nvmeConnector.LoadModules() { + d.config["powerflex.mode"] = connectors.TypeNVME } else if goscaleio.DrvCfgIsSDCInstalled() { - d.config["powerflex.mode"] = powerFlexModeSDC + d.config["powerflex.mode"] = connectors.TypeSDC } } @@ -139,7 +148,7 @@ func (d *powerflex) Create() error { client := d.client() switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: + case connectors.TypeNVME: // Discover one of the storage pools SDT services. if d.config["powerflex.sdt"] == "" { pool, err := d.resolvePool() @@ -163,7 +172,7 @@ func (d *powerflex) Create() error { d.config["powerflex.sdt"] = relations[0].IPList[0].IP } - case powerFlexModeSDC: + case connectors.TypeSDC: if d.config["powerflex.sdt"] != "" { return fmt.Errorf("The powerflex.sdt config key is specific to the NVMe/TCP mode") } @@ -280,14 +289,23 @@ func (d *powerflex) Validate(config map[string]string) error { return err } + newMode := config["powerflex.mode"] + oldMode := d.config["powerflex.mode"] + + // Ensure powerflex.mode cannot be changed to avoid leaving volume mappings + // and to prevent disturbing running instances. + if oldMode != "" && oldMode != newMode { + return fmt.Errorf("PowerFlex mode cannot be changed") + } + // Check if the selected PowerFlex mode is supported on this node. // Also when forming the storage pool on a LXD cluster, the mode // that got discovered on the creating machine needs to be validated // on the other cluster members too. This can be done here since Validate // gets executed on every cluster member when receiving the cluster // notification to finally create the pool. - if d.config["powerflex.mode"] == powerFlexModeNVMe && !d.loadNVMeModules() { - return fmt.Errorf("NVMe/TCP is not supported") + if newMode != "" && !connectors.NewConnector(newMode, "").LoadModules() { + return fmt.Errorf("PowerFlex mode %q is not supported due to missing kernel modules", newMode) } return nil diff --git a/lxd/storage/drivers/driver_powerflex_utils.go b/lxd/storage/drivers/driver_powerflex_utils.go index 6c8433f01da9..827bcc75cb48 100644 --- a/lxd/storage/drivers/driver_powerflex_utils.go +++ b/lxd/storage/drivers/driver_powerflex_utils.go @@ -2,27 +2,19 @@ package drivers import ( "bytes" - "context" "crypto/tls" "encoding/base64" "encoding/json" - "errors" "fmt" "io" "net/http" - "os" - "path/filepath" "strconv" "strings" - "time" "github.com/dell/goscaleio" "github.com/google/uuid" - "golang.org/x/sys/unix" - "github.com/canonical/lxd/lxd/locking" - "github.com/canonical/lxd/lxd/resources" - "github.com/canonical/lxd/lxd/util" + "github.com/canonical/lxd/lxd/storage/connectors" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/revert" @@ -65,7 +57,7 @@ type powerFlexError map[string]any // Error tries to return all kinds of errors from the PowerFlex API in a nicely formatted way. func (p *powerFlexError) Error() string { - var errorStrings []string + errorStrings := make([]string, 0, len(*p)) for k, v := range *p { errorStrings = append(errorStrings, fmt.Sprintf("%s: %v", k, v)) } @@ -747,18 +739,6 @@ func (p *powerFlexClient) getHostVolumeMappings(hostID string) ([]powerFlexVolum return actualResponse, nil } -// loadNVMeModules loads the NVMe/TCP kernel modules. -// Returns true if the modules can be loaded. -func (d *powerflex) loadNVMeModules() bool { - err := util.LoadModule("nvme_fabrics") - if err != nil { - return false - } - - err = util.LoadModule("nvme_tcp") - return err == nil -} - // client returns the drivers PowerFlex client. // A new client gets created if it not yet exists. func (d *powerflex) client() *powerFlexClient { @@ -769,13 +749,6 @@ func (d *powerflex) client() *powerFlexClient { return d.httpClient } -// getHostNQN returns the unique NVMe nqn for the current host. -// A custom one is generated from the servers UUID since getting the nqn from /etc/nvme/hostnqn -// requires the nvme-cli package to be installed on the host. -func (d *powerflex) getHostNQN() string { - return fmt.Sprintf("nqn.2014-08.org.nvmexpress:uuid:%s", d.state.ServerUUID) -} - // getHostGUID returns the SDC GUID. // The GUID is unique for a single host. // Cache the GUID as it never changes for a single host. @@ -792,21 +765,6 @@ func (d *powerflex) getHostGUID() (string, error) { return d.sdcGUID, nil } -// getServerName returns the hostname of this host. -// It prefers the value from the daemons state in case LXD is clustered. -func (d *powerflex) getServerName() (string, error) { - if d.state.ServerName != "none" { - return d.state.ServerName, nil - } - - hostname, err := os.Hostname() - if err != nil { - return "", fmt.Errorf("Failed to get hostname: %w", err) - } - - return hostname, nil -} - // getVolumeType returns the selected provisioning type of the volume. // As a default it returns type thin. func (d *powerflex) getVolumeType(vol Volume) powerFlexVolumeType { @@ -825,24 +783,28 @@ func (d *powerflex) getVolumeType(vol Volume) powerFlexVolumeType { // createNVMeHost creates this NVMe host in PowerFlex. func (d *powerflex) createNVMeHost() (string, revert.Hook, error) { var hostID string - nqn := d.getHostNQN() + + hostNQN, err := d.connector().QualifiedName() + if err != nil { + return "", nil, err + } revert := revert.New() defer revert.Fail() client := d.client() - host, err := client.getNVMeHostByNQN(nqn) + host, err := client.getNVMeHostByNQN(hostNQN) if err != nil { if !api.StatusErrorCheck(err, http.StatusNotFound) { return "", nil, err } - hostname, err := d.getServerName() + hostname, err := ResolveServerName(d.state.ServerName) if err != nil { return "", nil, err } - hostID, err = client.createHost(hostname, nqn) + hostID, err = client.createHost(hostname, hostNQN) if err != nil { return "", nil, err } @@ -862,8 +824,13 @@ func (d *powerflex) createNVMeHost() (string, revert.Hook, error) { // deleteNVMeHost deletes this NVMe host in PowerFlex. func (d *powerflex) deleteNVMeHost() error { client := d.client() - nqn := d.getHostNQN() - host, err := client.getNVMeHostByNQN(nqn) + + hostNQN, err := d.connector().QualifiedName() + if err != nil { + return err + } + + host, err := client.getNVMeHostByNQN(hostNQN) if err != nil { // Skip the deletion if the host doesn't exist anymore. if api.StatusErrorCheck(err, http.StatusNotFound) { @@ -884,8 +851,8 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { var hostID string switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: - unlock, err := locking.Lock(d.state.ShutdownCtx, "nvme") + case connectors.TypeNVME: + unlock, err := storageConnectorLock(d.connector().Type(), "powerflex") if err != nil { return nil, err } @@ -899,7 +866,7 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { } reverter.Add(cleanup) - case powerFlexModeSDC: + case connectors.TypeSDC: hostGUID, err := d.getHostGUID() if err != nil { return nil, err @@ -946,19 +913,19 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { reverter.Add(func() { _ = client.deleteHostVolumeMapping(hostID, volumeID) }) } - if d.config["powerflex.mode"] == powerFlexModeNVMe { - // Connect to the NVMe/TCP subsystem. - // We have to connect after the first mapping was established. - // PowerFlex does not offer any discovery log entries until a volume gets mapped to the host. - // This action is idempotent. - cleanup, err := d.connectNVMeSubsys() - if err != nil { - return nil, err - } + targetAddr := d.config["powerflex.sdt"] - reverter.Add(cleanup) + // Connect to the storage subsystem. + // In case of NVMe/TCP, we have to connect after the first mapping was established, + // as PowerFlex does not offer any discovery log entries until a volume gets mapped + // to the host. + err = d.connector().ConnectAll(d.state.ShutdownCtx, targetAddr) + if err != nil { + return nil, err } + reverter.Add(func() { _ = d.connector().DisconnectAll() }) + cleanup := reverter.Clone().Fail reverter.Success() return cleanup, nil @@ -979,44 +946,6 @@ func (d *powerflex) getMappedDevPath(vol Volume, mapVolume bool) (string, revert revert.Add(cleanup) } - powerFlexVolumes := make(map[string]string) - - // discoverFunc has to be called in a loop with a set timeout to ensure - // all the necessary directories and devices can be discovered. - discoverFunc := func(volumeID string, diskPrefix string) error { - var diskPaths []string - - // If there are no other disks on the system by id, the directory might not even be there. - // Returns ENOENT in case the by-id/ directory does not exist. - diskPaths, err := resources.GetDisksByID(diskPrefix) - if err != nil { - return err - } - - for _, diskPath := range diskPaths { - // Skip the disk if it is only a partition of the actual PowerFlex volume. - if strings.Contains(diskPath, "-part") { - continue - } - - // Skip other volume's that don't match the PowerFlex volume's ID. - if !strings.Contains(diskPath, volumeID) { - continue - } - - // The actual device might not already be created. - // Returns ENOENT in case the device does not exist. - devPath, err := filepath.EvalSymlinks(diskPath) - if err != nil { - return err - } - - powerFlexVolumes[volumeID] = devPath - } - - return nil - } - volumeName, err := d.getVolumeName(vol) if err != nil { return "", nil, err @@ -1027,59 +956,34 @@ func (d *powerflex) getMappedDevPath(vol Volume, mapVolume bool) (string, revert return "", nil, err } - timeout := time.Now().Add(5 * time.Second) - // It might take a while to create the local disk. - // Retry until it can be found. - for { - if time.Now().After(timeout) { - return "", nil, fmt.Errorf("Timeout exceeded for PowerFlex volume discovery: %q", volumeName) - } - - var prefix string - switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: - prefix = "nvme-eui." - case powerFlexModeSDC: - prefix = "emc-vol-" - } - - err := discoverFunc(powerFlexVolumeID, prefix) - if err != nil { - // Try again if on of the directories cannot be found. - if errors.Is(err, unix.ENOENT) { - continue - } - - return "", nil, err - } - - // Exit if the volume got discovered. - _, ok := powerFlexVolumes[powerFlexVolumeID] - if ok { - break - } - - // Exit if the volume wasn't explicitly mapped. - // Doing a retry would run into the timeout when the device isn't mapped. - if !mapVolume { - break - } + var prefix string + switch d.config["powerflex.mode"] { + case connectors.TypeNVME: + prefix = "nvme-eui." + case connectors.TypeSDC: + prefix = "emc-vol-" + } - time.Sleep(10 * time.Millisecond) + devicePathFilter := func(path string) bool { + return strings.Contains(path, powerFlexVolumeID) } - if len(powerFlexVolumes) == 0 { - return "", nil, fmt.Errorf("Failed to discover any PowerFlex volume") + var devicePath string + if mapVolume { + // Wait for the device path to appear as the volume has been just mapped to the host. + devicePath, err = connectors.WaitDiskDevicePath(d.state.ShutdownCtx, prefix, devicePathFilter) + } else { + // Get the the device path without waiting. + devicePath, err = connectors.GetDiskDevicePath(prefix, devicePathFilter) } - powerFlexVolumePath, ok := powerFlexVolumes[powerFlexVolumeID] - if !ok { - return "", nil, fmt.Errorf("PowerFlex volume not found: %q", volumeName) + if err != nil { + return "", nil, fmt.Errorf("Failed to locate device for volume %q: %w", vol.name, err) } cleanup := revert.Clone().Fail revert.Success() - return powerFlexVolumePath, cleanup, nil + return devicePath, cleanup, nil } // unmapVolume unmaps the given volume from this host. @@ -1097,20 +1001,24 @@ func (d *powerflex) unmapVolume(vol Volume) error { var host *powerFlexSDC switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: - nqn := d.getHostNQN() - host, err = client.getNVMeHostByNQN(nqn) + case connectors.TypeNVME: + hostNQN, err := d.connector().QualifiedName() + if err != nil { + return err + } + + host, err = client.getNVMeHostByNQN(hostNQN) if err != nil { return err } - unlock, err := locking.Lock(d.state.ShutdownCtx, "nvme") + unlock, err := storageConnectorLock(d.connector().Type(), "powerflex") if err != nil { return err } defer unlock() - case powerFlexModeSDC: + case connectors.TypeSDC: hostGUID, err := d.getHostGUID() if err != nil { return err @@ -1129,19 +1037,14 @@ func (d *powerflex) unmapVolume(vol Volume) error { // Wait until the volume has disappeared. volumePath, _, _ := d.getMappedDevPath(vol, false) - if volumePath != "" { - ctx, cancel := context.WithTimeout(d.state.ShutdownCtx, 10*time.Second) - defer cancel() - - if !waitGone(ctx, volumePath) { - return fmt.Errorf("Timeout whilst waiting for PowerFlex volume to disappear: %q", vol.name) - } + if volumePath != "" && !connectors.WaitDiskDeviceGone(d.state.ShutdownCtx, volumePath, 10) { + return fmt.Errorf("Timeout whilst waiting for PowerFlex volume to disappear: %q", vol.name) } // In case of SDC the driver doesn't manage the underlying connection to PowerFlex. // Therefore if this was the last volume being unmapped from this system // LXD will not try to cleanup the connection. - if d.config["powerflex.mode"] == powerFlexModeNVMe { + if d.config["powerflex.mode"] == connectors.TypeNVME { mappings, err := client.getHostVolumeMappings(host.ID) if err != nil { return err @@ -1150,7 +1053,7 @@ func (d *powerflex) unmapVolume(vol Volume) error { if len(mappings) == 0 { // Disconnect from the NVMe subsystem. // Do this first before removing the host from PowerFlex. - err := d.disconnectNVMeSubsys() + err = d.connector().DisconnectAll() if err != nil { return err } @@ -1167,77 +1070,6 @@ func (d *powerflex) unmapVolume(vol Volume) error { return nil } -// connectNVMeSubsys connects this host to the NVMe subsystem configured in the storage pool. -// The connection can only be established after the first volume is mapped to this host. -// The operation is idempotent and returns nil if already connected to the subsystem. -func (d *powerflex) connectNVMeSubsys() (revert.Hook, error) { - basePath := "/sys/devices/virtual/nvme-subsystem" - - // Retrieve list of existing NVMe subsystems on this host. - directories, err := os.ReadDir(basePath) - if err != nil { - return nil, fmt.Errorf("Failed getting a list of NVMe subsystems: %w", err) - } - - revert := revert.New() - defer revert.Fail() - - pool, err := d.resolvePool() - if err != nil { - return nil, err - } - - domain, err := d.client().getProtectionDomain(pool.ProtectionDomainID) - if err != nil { - return nil, err - } - - for _, directory := range directories { - subsystemName := directory.Name() - - // Get the subsystem's NQN. - nqnBytes, err := os.ReadFile(filepath.Join(basePath, subsystemName, "subsysnqn")) - if err != nil { - return nil, fmt.Errorf("Failed getting the NQN of subystem %q: %w", subsystemName, err) - } - - if strings.Contains(string(nqnBytes), domain.SystemID) { - cleanup := revert.Clone().Fail - revert.Success() - - // Already connected to the NVMe subsystem for the respective PowerFlex system. - return cleanup, nil - } - } - - nqn := d.getHostNQN() - serverUUID := d.state.ServerUUID - _, stderr, err := shared.RunCommandSplit(d.state.ShutdownCtx, nil, nil, "nvme", "connect-all", "-t", "tcp", "-a", d.config["powerflex.sdt"], "-q", nqn, "-I", serverUUID) - if err != nil { - return nil, fmt.Errorf("Failed nvme connect-all: %w", err) - } - - if stderr != "" { - return nil, fmt.Errorf("Failed connecting to PowerFlex NVMe/TCP subsystem: %s", stderr) - } - - revert.Add(func() { _ = d.disconnectNVMeSubsys() }) - - cleanup := revert.Clone().Fail - revert.Success() - return cleanup, nil -} - -// disconnectNVMeSubsys disconnects this host from the NVMe subsystem. -func (d *powerflex) disconnectNVMeSubsys() error { - _, err := shared.RunCommand("nvme", "disconnect-all") - if err != nil { - return fmt.Errorf("Failed disconnecting from PowerFlex NVMe/TCP subsystem: %w", err) - } - - return nil -} - // resolvePool looks up the selected storage pool. // If only the pool is provided, it's expected to be the ID of the pool. // In case both pool and domain are set, the pool will get looked up diff --git a/lxd/storage/drivers/utils.go b/lxd/storage/drivers/utils.go index 954ace3074ab..7e9b4936eaf2 100644 --- a/lxd/storage/drivers/utils.go +++ b/lxd/storage/drivers/utils.go @@ -15,6 +15,7 @@ import ( "golang.org/x/sys/unix" "github.com/canonical/lxd/lxd/idmap" + "github.com/canonical/lxd/lxd/locking" "github.com/canonical/lxd/lxd/operations" "github.com/canonical/lxd/lxd/storage/filesystem" "github.com/canonical/lxd/shared" @@ -218,23 +219,6 @@ func tryExists(ctx context.Context, path string) bool { } } -// waitGone waits for a file to not exist anymore or the context being cancelled. -// The probe happens at intervals of 500 milliseconds. -func waitGone(ctx context.Context, path string) bool { - for { - select { - case <-ctx.Done(): - return false - default: - if !shared.PathExists(path) { - return true - } - } - - time.Sleep(500 * time.Millisecond) - } -} - // fsUUID returns the filesystem UUID for the given block path. // error is returned if the given block device exists but has no UUID. func fsUUID(path string) (string, error) { @@ -894,3 +878,27 @@ func roundAbove(above, val int64) int64 { return rounded } + +// ResolveServerName returns the given server name if it is not "none". +// If the server name is "none", it retrieves and returns the server's hostname. +func ResolveServerName(serverName string) (string, error) { + if serverName != "none" { + return serverName, nil + } + + hostname, err := os.Hostname() + if err != nil { + return "", fmt.Errorf("Failed to get hostname: %w", err) + } + + return hostname, nil +} + +// storageConnectorLock acquires a lock for storage connector and returns the unlock function. +func storageConnectorLock(connectorName string, driverName string) (locking.UnlockFunc, error) { + l := logger.AddContext(logger.Ctx{"connector": connectorName, "driver": driverName}) + l.Debug("Acquiring lock for storage connector") + defer l.Debug("Lock acquired for storage connector") + + return locking.Lock(context.TODO(), fmt.Sprintf("StorageConnector_%s_%s", connectorName, driverName)) +}