From 2dd5ddc80a2b4a337cd70a974c4c8de28e148e78 Mon Sep 17 00:00:00 2001 From: Blake Devcich Date: Wed, 15 Nov 2023 14:28:47 -0600 Subject: [PATCH] Added ability to use non-default profiles * Added no-xattr profile to nnf-dm-config config map to run `dcp --xattrs none` * Having another profile in there makes it easier for int-test * DM controller now looks retrieves the profile specified by dm.spec.profile Copy Offload API does not yet support this Signed-off-by: Blake Devcich --- config/dm_config/nnf-dm-config.yaml | 56 +++++++++-------- go.mod | 2 +- go.sum | 4 +- .../controller/datamovement_controller.go | 15 ++--- .../datamovement_controller_test.go | 61 ++++++++++++++++++- .../api/v1alpha1/nnf_datamovement_types.go | 17 ++++-- .../v1alpha1/nnf_datamovementmanager_types.go | 15 ++++- ....cray.hpe.com_nnfdatamovementmanagers.yaml | 3 + .../nnf.cray.hpe.com_nnfdatamovements.yaml | 6 ++ vendor/modules.txt | 2 +- 10 files changed, 133 insertions(+), 48 deletions(-) diff --git a/config/dm_config/nnf-dm-config.yaml b/config/dm_config/nnf-dm-config.yaml index 50debc43..ab84e2b0 100644 --- a/config/dm_config/nnf-dm-config.yaml +++ b/config/dm_config/nnf-dm-config.yaml @@ -1,38 +1,44 @@ # Each profile is capable of providing different configurations for data movement. -# Note: For now, only the default profile is supported. profiles: - # Default profile that is used for all data movement activity. default: - # The number of slots specified in the MPI hostfile. A value less than 1 disables the use - # of slots in the hostfile. - slots: 8 + # The number of slots specified in the MPI hostfile. A value less than 1 disables the use + # of slots in the hostfile. + slots: 8 + + # The number of max_slots specified in the MPI hostfile. A value less than 1 disables the use + # of max_slots in the hostfile. + maxSlots: 0 - # The number of max_slots specified in the MPI hostfile. A value less than 1 disables the use - # of max_slots in the hostfile. - maxSlots: 0 + # The full command to execute data movement. $VARS are replaced by the nnf software. Available + # $VARS: + # HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the + # slots/max_slots for each host. This hostfile is created at `/tmp//hostfile` + # UID: User ID that is inherited from the Workflow + # GID: Group ID that is inherited from the Workflow + # SRC: source for the data movement + # DEST destination for the data movement + # default: command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST + command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST - # The full command to execute data movement. $VARS are replaced by the nnf software. Available - # $VARS: - # HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the - # slots/max_slots for each host. This hostfile is created at `/tmp//hostfile` - # UID: User ID that is inherited from the Workflow - # GID: Group ID that is inherited from the Workflow - # SRC: source for the data movement - # DEST destination for the data movement - # default: command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST - command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST + # If true, enable the command's stdout to be saved in the log when the command completes + # successfully. On failure, the output is always logged. + logStdout: false - # If true, enable the command's stdout to be saved in the log when the command completes - # successfully. On failure, the output is always logged. - logStdout: false + # Similar to logStdout, store the command's stdout in Status.Message when the command + # completes successfully. On failure, the output is always stored. + storeStdout: false - # Similar to logStdout, store the command's stdout in Status.Message when the command - # completes successfully. On failure, the output is always stored. - storeStdout: false + # Same as default profile but tell dcp not to copy xattrs + no-xattr: + slots: 8 + maxSlots: 0 + command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --xattrs none --uid $UID --gid $GID $SRC $DEST + logStdout: false + storeStdout: false # NnfDataMovement resources have the ability to collect and store the progress percentage and the # last few lines of output in the CommandStatus field. This number is used for the interval to collect # the progress data. `dcp --progress N` must be included in the data movement command in order for # progress to be collected. A value less than 1 disables this functionality. -progressIntervalSeconds: 5 \ No newline at end of file +progressIntervalSeconds: 5 diff --git a/go.mod b/go.mod index f2af705d..fa01fcbc 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.19 require ( github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e - github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0 + github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440 github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 github.com/prometheus/client_golang v1.16.0 diff --git a/go.sum b/go.sum index 76e5f008..4f33d356 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI= github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f h1:aWtSSQLLk9mUZj94mowirQeVw9saf80gVe10X0rZe8o= github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0 h1:p6AuBbayRXU8WeBLBXXIihmJaB8IDJe9GjcEMFzJn6o= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0/go.mod h1:YX9Q91wqtUmfZjU4KxSwZMDJGBzppiGEW4BpAVTIMAs= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440 h1:H1PJnKfnvWdaHYrT9QAF2FoFjTjDFBYIMacA4pLBL1I= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440/go.mod h1:t0KypbCmssZzL9vhQFHLdauxHKgptJK1SbPJHjm+Baw= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/internal/controller/datamovement_controller.go b/internal/controller/datamovement_controller.go index 2d631532..91308617 100644 --- a/internal/controller/datamovement_controller.go +++ b/internal/controller/datamovement_controller.go @@ -66,8 +66,7 @@ const ( configMapNamespace = nnfv1alpha1.DataMovementNamespace // DM ConfigMap Data Keys - configMapKeyData = "nnf-dm-config.yaml" - configMapKeyProfileDefault = "default" + configMapKeyData = "nnf-dm-config.yaml" ) // Regex to scrape the progress output of the `dcp` command. Example output: @@ -256,16 +255,14 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request } log.Info("Using config map", "config", cfg) - // TODO: Allow use of non-default dm config profiles - for now only use the default. For copy - // offload API, we could create "fake" profiles and store those in the DM object based on the - // parameters supplied to the CreateRequest(). - // Ensure profile exists - profile, found := cfg.Profiles[configMapKeyProfileDefault] + // Ensure requested DM profile exists + profile, found := cfg.Profiles[dm.Spec.Profile] if !found { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithUserMessage("'%s' profile not found in config map: %v", configMapKeyProfileDefault, client.ObjectKeyFromObject(configMap)).WithUser().WithFatal() + return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithUserMessage("'%s' profile not found in config map: %v", dm.Spec.Profile, client.ObjectKeyFromObject(configMap)).WithUser().WithFatal() } - log.Info("Using profile", "name", configMapKeyProfileDefault, "profile", profile) + log.Info("Using profile", "profile name", dm.Spec.Profile, "profile", profile) + // Built command + hostfile cmdArgs, mpiHostfile, err := buildDMCommand(ctx, profile, hosts, dm) if err != nil { return ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create data movement command").WithError(err).WithMajor() diff --git a/internal/controller/datamovement_controller_test.go b/internal/controller/datamovement_controller_test.go index e7c68f78..592ebae3 100644 --- a/internal/controller/datamovement_controller_test.go +++ b/internal/controller/datamovement_controller_test.go @@ -106,13 +106,13 @@ var _ = Describe("Data Movement Test", func() { // Default config map data dmCfg = &dmConfig{ Profiles: map[string]dmConfigProfile{ - "default": { + nnfv1alpha1.DataMovementProfileDefault: { Command: defaultCommand, }, }, ProgressIntervalSeconds: 1, } - dmCfgProfile = dmCfg.Profiles[configMapKeyProfileDefault] + dmCfgProfile = dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault] dm = &nnfv1alpha1.NnfDataMovement{ ObjectMeta: metav1.ObjectMeta{ @@ -140,7 +140,7 @@ var _ = Describe("Data Movement Test", func() { // Create CM and verify label if createCm { // allow test to override the values in the default cfg profile - dmCfg.Profiles[configMapKeyProfileDefault] = dmCfgProfile + dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault] = dmCfgProfile // Convert the config to raw b, err := yaml.Marshal(dmCfg) @@ -393,6 +393,61 @@ var _ = Describe("Data Movement Test", func() { }) }) + Context("when a non-default profile is supplied (and present)", func() { + p := "test-profile" + cmd := "sleep .1" + + BeforeEach(func() { + dmCfgProfile = dmConfigProfile{ + Command: cmd, + } + dmCfg.Profiles[p] = dmCfgProfile + dm.Spec.Profile = p + }) + It("should use that profile to perform data movement", func() { + + By("completing the data movement successfully") + Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dm), dm)).To(Succeed()) + return dm.Status + }, "3s").Should(MatchFields(IgnoreExtras, Fields{ + "State": Equal(nnfv1alpha1.DataMovementConditionTypeFinished), + "Status": Equal(nnfv1alpha1.DataMovementConditionReasonSuccess), + })) + + By("verify that profile is used") + Expect(dm.Spec.Profile).To(Equal(p)) + Expect(dm.Status.CommandStatus.Command).To(Equal(cmdBashPrefix + cmd)) + }) + }) + + Context("when a non-default profile is supplied (and NOT present)", func() { + m := "missing-test-profile" + cmd := "sleep .1" + + BeforeEach(func() { + dmCfgProfile = dmConfigProfile{ + Command: cmd, + } + dmCfg.Profiles["test-profile"] = dmCfgProfile + dm.Spec.Profile = m + }) + It("should use that profile to perform data movement and fail", func() { + + By("having a State/Status of 'Finished'/'Invalid'") + Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dm), dm)).To(Succeed()) + return dm.Status + }).Should(MatchFields(IgnoreExtras, Fields{ + "State": Equal(nnfv1alpha1.DataMovementConditionTypeFinished), + "Status": Equal(nnfv1alpha1.DataMovementConditionReasonInvalid), + })) + + By("verify that profile is used") + Expect(dm.Spec.Profile).To(Equal(m)) + }) + }) + Context("when a data movement command fails", func() { BeforeEach(func() { dmCfgProfile.Command = "false" diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go index 6c39f10c..bb6dcd11 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go @@ -26,13 +26,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - const ( - // The required namespace for an NNF Data Movement operation. This is for system wide (lustre) data movement. - // Individual nodes may also perform data movement in which case they use the NNF Node Name as the namespace. + // The required namespace for an NNF Data Movement operation. This is for system wide (lustre) + // data movement. Individual nodes may also perform data movement in which case they use the + // NNF Node Name as the namespace. DataMovementNamespace = "nnf-dm-system" + + // The name of the default profile stored in the nnf-dm-config ConfigMap that is used to + // configure Data Movement. + DataMovementProfileDefault = "default" ) // NnfDataMovementSpec defines the desired state of NnfDataMovement @@ -60,6 +62,11 @@ type NnfDataMovementSpec struct { // +kubebuilder:default:=false Cancel bool `json:"cancel,omitempty"` + // Profile specifies the name of profile in the nnf-dm-config ConfigMap to be used for + // configuring data movement. Defaults to the default profile. + // +kubebuilder:default:=default + Profile string `json:"profile,omitempty"` + // User defined configuration on how data movement should be performed. This overrides the // configuration defined in the nnf-dm-config ConfigMap. These values are typically set by the // Copy Offload API. diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go index c9b0941d..091a033f 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -22,10 +22,16 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/DataWorkflowServices/dws/utils/updater" ) const ( DataMovementWorkerLabel = "dm.cray.hpe.com/worker" + + // The name of the expected Data Movement manager. This is to ensure Data Movement is ready in + // the DataIn/DataOut stages before attempting data movement operations. + DataMovementManagerName = "nnf-dm-manager-controller-manager" ) // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! @@ -58,7 +64,8 @@ type NnfDataMovementManagerStatus struct { // Ready indicates that the Data Movement Manager has achieved the desired readiness state // and all managed resources are initialized. - Ready bool `json:"ready,omitempty"` + // +kubebuilder:default:=false + Ready bool `json:"ready"` } //+kubebuilder:object:root=true @@ -75,6 +82,10 @@ type NnfDataMovementManager struct { Status NnfDataMovementManagerStatus `json:"status,omitempty"` } +func (m *NnfDataMovementManager) GetStatus() updater.Status[*NnfDataMovementManagerStatus] { + return &m.Status +} + //+kubebuilder:object:root=true // NnfDataMovementManagerList contains a list of NnfDataMovementManager diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml index 6c7dab88..ce62f193 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml @@ -7637,9 +7637,12 @@ spec: NnfDataMovementManager properties: ready: + default: false description: Ready indicates that the Data Movement Manager has achieved the desired readiness state and all managed resources are initialized. type: boolean + required: + - ready type: object type: object served: true diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml index 5c7e5128..c008b052 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml @@ -109,6 +109,12 @@ spec: operation. format: int32 type: integer + profile: + default: default + description: Profile specifies the name of profile in the nnf-dm-config + ConfigMap to be used for configuring data movement. Defaults to + the default profile. + type: string source: description: Source describes the source of the data movement operation properties: diff --git a/vendor/modules.txt b/vendor/modules.txt index f29b69d4..75d9a78d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -10,7 +10,7 @@ github.com/NearNodeFlash/lustre-fs-operator/config/crd/bases # github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f ## explicit; go 1.19 github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models -# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0 +# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440 ## explicit; go 1.19 github.com/NearNodeFlash/nnf-sos/api/v1alpha1 github.com/NearNodeFlash/nnf-sos/config/crd/bases