Skip to content

Commit

Permalink
Merge branch 'master' into release-v0.0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
roehrich-hpe committed Sep 14, 2023
2 parents 6e79eba + 3965777 commit 11103ba
Show file tree
Hide file tree
Showing 260 changed files with 19,501 additions and 4,954 deletions.
6 changes: 3 additions & 3 deletions config/rbac/daemon_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ rules:
- list
- watch
- apiGroups:
- dws.cray.hpe.com
- dataworkflowservices.github.io
resources:
- clientmounts
verbs:
- get
- list
- apiGroups:
- dws.cray.hpe.com
- dataworkflowservices.github.io
resources:
- clientmounts/status
verbs:
- get
- list
- apiGroups:
- dws.cray.hpe.com
- dataworkflowservices.github.io
resources:
- systemconfigurations
verbs:
Expand Down
44 changes: 22 additions & 22 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,28 @@ rules:
- patch
- update
- watch
- apiGroups:
- dataworkflowservices.github.io
resources:
- clientmounts
verbs:
- get
- list
- apiGroups:
- dataworkflowservices.github.io
resources:
- clientmounts/status
verbs:
- get
- list
- apiGroups:
- dataworkflowservices.github.io
resources:
- systemconfigurations
verbs:
- get
- list
- watch
- apiGroups:
- dm.cray.hpe.com
resources:
Expand All @@ -104,28 +126,6 @@ rules:
- get
- patch
- update
- apiGroups:
- dws.cray.hpe.com
resources:
- clientmounts
verbs:
- get
- list
- apiGroups:
- dws.cray.hpe.com
resources:
- clientmounts/status
verbs:
- get
- list
- apiGroups:
- dws.cray.hpe.com
resources:
- systemconfigurations
verbs:
- get
- list
- watch
- apiGroups:
- lus.cray.hpe.com
resources:
Expand Down
18 changes: 14 additions & 4 deletions controllers/datamovement_controller.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand Down Expand Up @@ -51,7 +51,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/yaml"

dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2"
dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2"
dmv1alpha1 "github.com/NearNodeFlash/nnf-dm/api/v1alpha1"
"github.com/NearNodeFlash/nnf-dm/controllers/metrics"
nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1"
Expand Down Expand Up @@ -127,8 +127,8 @@ func (i *invalidError) Unwrap() error { return i.err }
//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovements/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovements/finalizers,verbs=update
//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfstorages,verbs=get;list;watch
//+kubebuilder:rbac:groups=dws.cray.hpe.com,resources=clientmounts,verbs=get;list
//+kubebuilder:rbac:groups=dws.cray.hpe.com,resources=clientmounts/status,verbs=get;list
//+kubebuilder:rbac:groups=dataworkflowservices.github.io,resources=clientmounts,verbs=get;list
//+kubebuilder:rbac:groups=dataworkflowservices.github.io,resources=clientmounts/status,verbs=get;list
//+kubebuilder:rbac:groups=lus.cray.hpe.com,resources=lustrefilesystems,verbs=get;list;watch
//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update
//+kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;update
Expand Down Expand Up @@ -405,6 +405,7 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request
log.Error(err, "Data movement operation cancelled", "output", combinedOutBuf.String())
dm.Status.Status = nnfv1alpha1.DataMovementConditionReasonCancelled
} else if err != nil {
log.Error(err, "Data movement operation failed", "output", combinedOutBuf.String())
dm.Status.Status = nnfv1alpha1.DataMovementConditionReasonFailed
dm.Status.Message = fmt.Sprintf("%s: %s", err.Error(), combinedOutBuf.String())
resourceErr := dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("data movement operation failed: %s", combinedOutBuf.String()).WithFatal()
Expand Down Expand Up @@ -471,6 +472,14 @@ func buildDMCommand(ctx context.Context, profile dmConfigProfile, hosts []string
slots := profile.Slots
maxSlots := profile.MaxSlots

// Allow the user to override the slots and max_slots in the hostfile.
if userConfig && dm.Spec.UserConfig.Slots != nil && *dm.Spec.UserConfig.Slots >= 0 {
slots = *dm.Spec.UserConfig.Slots
}
if userConfig && dm.Spec.UserConfig.MaxSlots != nil && *dm.Spec.UserConfig.MaxSlots >= 0 {
maxSlots = *dm.Spec.UserConfig.MaxSlots
}

hostfile, err = createMpiHostfile(dm.Name, hosts, slots, maxSlots)
if err != nil {
return nil, "", fmt.Errorf("error creating MPI hostfile: %v", err)
Expand Down Expand Up @@ -512,6 +521,7 @@ func buildDMCommand(ctx context.Context, profile dmConfigProfile, hosts []string

// Create an MPI Hostfile given a list of hosts, slots, and maxSlots. A temporary directory is
// created based on the DM Name. The hostfile is created inside of this directory.
// A value of 0 for slots or maxSlots will not use it in the hostfile.
func createMpiHostfile(dmName string, hosts []string, slots, maxSlots int) (string, error) {

tmpdir := filepath.Join("/tmp", dmName)
Expand Down
45 changes: 45 additions & 0 deletions controllers/datamovement_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
. "github.com/onsi/gomega/gstruct"
"go.openly.dev/pointy"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -748,6 +749,50 @@ var _ = Describe("Data Movement Test", func() {
Expect(strings.Join(cmd, " ")).Should(MatchRegexp(expectedCmdRegex))
})
})

When("slots/maxSlots are specified in the request", func() {
DescribeTable("it should use the user slots vs the profile",
func(numSlots *int) {
profileSlots, profileMaxSlots := 3, 8

profile := dmConfigProfile{
Command: defaultCommand,
Slots: profileSlots,
MaxSlots: profileMaxSlots,
}
dm.Spec.UserConfig = &nnfv1alpha1.NnfDataMovementConfig{
Slots: numSlots,
MaxSlots: numSlots,
}
_, hostfilePath, err := buildDMCommand(context.TODO(), profile, hosts, &dm)
Expect(err).ToNot(HaveOccurred())
Expect(hostfilePath).ToNot(BeEmpty())
DeferCleanup(func() {
Expect(os.Remove(hostfilePath)).ToNot(HaveOccurred())
})

content, err := os.ReadFile(hostfilePath)
Expect(err).ToNot(HaveOccurred())
Expect(string(content)).ToNot(BeEmpty())

if numSlots == nil {
// if nil, use the profile's slots
Expect(string(content)).Should(MatchRegexp(fmt.Sprintf(" slots=%d", profileSlots)))
Expect(string(content)).Should(MatchRegexp(fmt.Sprintf(" max_slots=%d", profileMaxSlots)))
} else if *numSlots == 0 {
// if 0, then don't use slots at all
Expect(string(content)).ShouldNot(MatchRegexp(" slots"))
Expect(string(content)).ShouldNot(MatchRegexp(" max_slots"))
} else {
Expect(string(content)).Should(MatchRegexp(fmt.Sprintf(" slots=%d", *numSlots)))
Expect(string(content)).Should(MatchRegexp(fmt.Sprintf(" max_slots=%d", *numSlots)))
}
},
Entry("when non-zero", pointy.Int(17)),
Entry("when zero it should omit", pointy.Int(0)),
Entry("when nil it should use the profile", nil),
)
})
})
})
})
13 changes: 13 additions & 0 deletions daemons/compute/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,19 @@ To build the generated code used for the Go and Python example clients, run `./p

`client-go` and `client-py` will have updates to the generated files when the API changes.

To run this script, you will need the following installed on your system:

- [protoc](https://grpc.io/docs/protoc-installation/)
- [protoc-gen-doc](https://github.com/pseudomuto/protoc-gen-doc#installation)
- [grpc and grpc tools python modules](https://grpc.io/docs/languages/python/quickstart/)

On OSX, this can be done easily via:

```shell
brew install protobuf protoc-gen-go protoc-gen-go-grpc
pip3 install grpcio grpcio_tools
```

#### C and C++

For the C and C++ clients, the clients must be built to generate the source code to support the API. Run the `Makefiles` in the `_client-c`, `client-cpp`, and `lib-cpp` directories to update the generated API files.
Expand Down
8 changes: 8 additions & 0 deletions daemons/compute/api/datamovement.proto
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ message DataMovementCreateRequest {

// If true, store stdout in DataMovementStatusResponse.Message when the command is successful. Failure output is always contained in the message.
bool storeStdout = 7;

// The number of slots specified in the MPI hostfile. A value of 0 disables the use of slots in
// the hostfile. -1 will defer to the server side configuration.
int32 slots = 8;

// The number of max_slots specified in the MPI hostfile. A value of 0 disables the use of
// max_slots in the hostfile. -1 will defer to the server side configuration.
int32 maxSlots = 9;
}

// The Data Movement Create Response to indicate the status of of the Data Movement Request.
Expand Down
Loading

0 comments on commit 11103ba

Please sign in to comment.