Skip to content

Commit

Permalink
Added ability to use profiles from directives
Browse files Browse the repository at this point in the history
Signed-off-by: Blake Devcich <[email protected]>
  • Loading branch information
bdevcich committed Nov 15, 2023
1 parent 9b94462 commit 9852dfd
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 48 deletions.
56 changes: 31 additions & 25 deletions config/dm_config/nnf-dm-config.yaml
Original file line number Diff line number Diff line change
@@ -1,38 +1,44 @@
# Each profile is capable of providing different configurations for data movement.
# Note: For now, only the default profile is supported.
profiles:

# Default profile that is used for all data movement activity.
default:
# The number of slots specified in the MPI hostfile. A value less than 1 disables the use
# of slots in the hostfile.
slots: 8
# The number of slots specified in the MPI hostfile. A value less than 1 disables the use
# of slots in the hostfile.
slots: 8

# The number of max_slots specified in the MPI hostfile. A value less than 1 disables the use
# of max_slots in the hostfile.
maxSlots: 0

# The number of max_slots specified in the MPI hostfile. A value less than 1 disables the use
# of max_slots in the hostfile.
maxSlots: 0
# The full command to execute data movement. $VARS are replaced by the nnf software. Available
# $VARS:
# HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the
# slots/max_slots for each host. This hostfile is created at `/tmp/<dm-name>/hostfile`
# UID: User ID that is inherited from the Workflow
# GID: Group ID that is inherited from the Workflow
# SRC: source for the data movement
# DEST destination for the data movement
# default: command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST
command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST

# The full command to execute data movement. $VARS are replaced by the nnf software. Available
# $VARS:
# HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the
# slots/max_slots for each host. This hostfile is created at `/tmp/<dm-name>/hostfile`
# UID: User ID that is inherited from the Workflow
# GID: Group ID that is inherited from the Workflow
# SRC: source for the data movement
# DEST destination for the data movement
# default: command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST
command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST
# If true, enable the command's stdout to be saved in the log when the command completes
# successfully. On failure, the output is always logged.
logStdout: false

# If true, enable the command's stdout to be saved in the log when the command completes
# successfully. On failure, the output is always logged.
logStdout: false
# Similar to logStdout, store the command's stdout in Status.Message when the command
# completes successfully. On failure, the output is always stored.
storeStdout: false

# Similar to logStdout, store the command's stdout in Status.Message when the command
# completes successfully. On failure, the output is always stored.
storeStdout: false
# Same as default profile but tell dcp not to copy xattrs
no-xattr:
slots: 8
maxSlots: 0
command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --xattrs none --uid $UID --gid $GID $SRC $DEST
logStdout: false
storeStdout: false

# NnfDataMovement resources have the ability to collect and store the progress percentage and the
# last few lines of output in the CommandStatus field. This number is used for the interval to collect
# the progress data. `dcp --progress N` must be included in the data movement command in order for
# progress to be collected. A value less than 1 disables this functionality.
progressIntervalSeconds: 5
progressIntervalSeconds: 5
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.19

require (
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440
github.com/onsi/ginkgo/v2 v2.11.0
github.com/onsi/gomega v1.27.10
github.com/prometheus/client_golang v1.16.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI=
github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f h1:aWtSSQLLk9mUZj94mowirQeVw9saf80gVe10X0rZe8o=
github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0 h1:p6AuBbayRXU8WeBLBXXIihmJaB8IDJe9GjcEMFzJn6o=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0/go.mod h1:YX9Q91wqtUmfZjU4KxSwZMDJGBzppiGEW4BpAVTIMAs=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440 h1:H1PJnKfnvWdaHYrT9QAF2FoFjTjDFBYIMacA4pLBL1I=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440/go.mod h1:t0KypbCmssZzL9vhQFHLdauxHKgptJK1SbPJHjm+Baw=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down
15 changes: 6 additions & 9 deletions internal/controller/datamovement_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ const (
configMapNamespace = nnfv1alpha1.DataMovementNamespace

// DM ConfigMap Data Keys
configMapKeyData = "nnf-dm-config.yaml"
configMapKeyProfileDefault = "default"
configMapKeyData = "nnf-dm-config.yaml"
)

// Regex to scrape the progress output of the `dcp` command. Example output:
Expand Down Expand Up @@ -256,16 +255,14 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request
}
log.Info("Using config map", "config", cfg)

// TODO: Allow use of non-default dm config profiles - for now only use the default. For copy
// offload API, we could create "fake" profiles and store those in the DM object based on the
// parameters supplied to the CreateRequest().
// Ensure profile exists
profile, found := cfg.Profiles[configMapKeyProfileDefault]
// Ensure requested DM profile exists
profile, found := cfg.Profiles[dm.Spec.Profile]
if !found {
return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithUserMessage("'%s' profile not found in config map: %v", configMapKeyProfileDefault, client.ObjectKeyFromObject(configMap)).WithUser().WithFatal()
return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithUserMessage("'%s' profile not found in config map: %v", dm.Spec.Profile, client.ObjectKeyFromObject(configMap)).WithUser().WithFatal()
}
log.Info("Using profile", "name", configMapKeyProfileDefault, "profile", profile)
log.Info("Using profile", "profile name", dm.Spec.Profile, "profile", profile)

// Built command + hostfile
cmdArgs, mpiHostfile, err := buildDMCommand(ctx, profile, hosts, dm)
if err != nil {
return ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create data movement command").WithError(err).WithMajor()
Expand Down
61 changes: 58 additions & 3 deletions internal/controller/datamovement_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,13 @@ var _ = Describe("Data Movement Test", func() {
// Default config map data
dmCfg = &dmConfig{
Profiles: map[string]dmConfigProfile{
"default": {
nnfv1alpha1.DataMovementProfileDefault: {
Command: defaultCommand,
},
},
ProgressIntervalSeconds: 1,
}
dmCfgProfile = dmCfg.Profiles[configMapKeyProfileDefault]
dmCfgProfile = dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault]

dm = &nnfv1alpha1.NnfDataMovement{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -140,7 +140,7 @@ var _ = Describe("Data Movement Test", func() {
// Create CM and verify label
if createCm {
// allow test to override the values in the default cfg profile
dmCfg.Profiles[configMapKeyProfileDefault] = dmCfgProfile
dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault] = dmCfgProfile

// Convert the config to raw
b, err := yaml.Marshal(dmCfg)
Expand Down Expand Up @@ -393,6 +393,61 @@ var _ = Describe("Data Movement Test", func() {
})
})

Context("when a non-default profile is supplied (and present)", func() {
p := "test-profile"
cmd := "sleep .1"

BeforeEach(func() {
dmCfgProfile = dmConfigProfile{
Command: cmd,
}
dmCfg.Profiles[p] = dmCfgProfile
dm.Spec.Profile = p
})
It("should use that profile to perform data movement", func() {

By("completing the data movement successfully")
Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus {
g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dm), dm)).To(Succeed())
return dm.Status
}, "3s").Should(MatchFields(IgnoreExtras, Fields{
"State": Equal(nnfv1alpha1.DataMovementConditionTypeFinished),
"Status": Equal(nnfv1alpha1.DataMovementConditionReasonSuccess),
}))

By("verify that profile is used")
Expect(dm.Spec.Profile).To(Equal(p))
Expect(dm.Status.CommandStatus.Command).To(Equal(cmdBashPrefix + cmd))
})
})

Context("when a non-default profile is supplied (and NOT present)", func() {
m := "missing-test-profile"
cmd := "sleep .1"

BeforeEach(func() {
dmCfgProfile = dmConfigProfile{
Command: cmd,
}
dmCfg.Profiles["test-profile"] = dmCfgProfile
dm.Spec.Profile = m
})
It("should use that profile to perform data movement and fail", func() {

By("having a State/Status of 'Finished'/'Invalid'")
Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus {
g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dm), dm)).To(Succeed())
return dm.Status
}).Should(MatchFields(IgnoreExtras, Fields{
"State": Equal(nnfv1alpha1.DataMovementConditionTypeFinished),
"Status": Equal(nnfv1alpha1.DataMovementConditionReasonInvalid),
}))

By("verify that profile is used")
Expect(dm.Spec.Profile).To(Equal(m))
})
})

Context("when a data movement command fails", func() {
BeforeEach(func() {
dmCfgProfile.Command = "false"
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ github.com/NearNodeFlash/lustre-fs-operator/config/crd/bases
# github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f
## explicit; go 1.19
github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models
# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231108192651-ab8d87963df0
# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20231115183043-7345e20cf440
## explicit; go 1.19
github.com/NearNodeFlash/nnf-sos/api/v1alpha1
github.com/NearNodeFlash/nnf-sos/config/crd/bases
Expand Down

0 comments on commit 9852dfd

Please sign in to comment.