Skip to content

Commit

Permalink
feat: shrink support for fluxion
Browse files Browse the repository at this point in the history
This changeset exposes the remove_subgraph function,
which we can call a shrink. It does not account for
(I do not think) handling jobs properly, but should
be a reasonable start to testing or debugging.

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch committed Nov 1, 2024
1 parent 9d3e31a commit b910bd4
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${G
ENV PATH=$PATH:/usr/local/go/bin:/home/vscode/go/bin

# Testing grow/shrink from custom branch
RUN git clone -b debug-resource-error-messages https://github.com/researchapps/flux-sched /opt/flux-sched
RUN git clone -b add-shrink https://github.com/researchapps/flux-sched /opt/flux-sched
# RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched

# We also need to rebuild into the system install
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,14 @@ jobs:

# TODO: we should consider distributing the header files with the release builds
- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
run: git clone -b add-shrink https://github.com/researchapps/flux-sched /opt/flux-sched
- name: flux-sched compile
run: |
cd /opt/flux-sched
cmake -B build
make -C build
make -C build install
cd -
- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test Binary
Expand Down
19 changes: 19 additions & 0 deletions cmd/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,25 @@ func main() {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes should now succeed: %v\n", err)
}

// Shrink (remove subgraph) for node2
fmt.Println("🥕 Asking to Shrink from 4 to 3 Nodes")
err = cli.Shrink("/tiny0/rack0/node2")
if err != nil {
log.Fatalf("Error in ReapiClient Shrink: %s %s\n", err, cli.GetErrMsg())
}
fmt.Printf("Shrink request return value: %v\n", err)

fmt.Println("Asking to MatchSatisfy 4 nodes (again, not possible)")
sat, overhead, err = cli.MatchSatisfy(growJobspec)
checkErrors(cli)
if err != nil {
log.Fatalf("Error in ReapiClient MatchSatisfy: %v\n", err)
}
printSatOutput(sat, err)
if sat {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes with only 3 should fail: %v\n", err)
}

}

func printOutput(reserved bool, allocated string, at int64, jobid uint64, err error) {
Expand Down
25 changes: 21 additions & 4 deletions pkg/fluxcli/reapi_cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,14 @@ func (cli *ReapiClient) UpdateAllocate(jobid int, r string) (at int64, overhead
return at, overhead, r_out, err
}

// Update the resource state with R.
// Update the resource state with R (grow).
//
// \param h Opaque handle. How it is used is an implementation
// detail. However, when it is used within a Flux's
// detail. However, when it is used within a Flux's
// service module, it is expected to be a pointer
// to a flux_t object.
// \param R_subgraph R String
// \return 0 on success; -1 on error.
// \param R_subgraph R String
// \return 0 on success; -1 on error.
// int reapi_cli_grow (reapi_cli_ctx_t *ctx, const char *R_subgraph);
func (cli *ReapiClient) Grow(rSubgraph string) (err error) {
var resources = C.CString(rSubgraph)
Expand All @@ -237,6 +237,23 @@ func (cli *ReapiClient) Grow(rSubgraph string) (err error) {
return retvalToError(fluxerr, "issue resource api client grow")
}

// Update the resource state (shrink) with R_node_path.
//
// \param h Opaque handle. How it is used is an implementation
// detail. However, when it is used within a Flux's
// service module, it is expected to be a pointer
// to a flux_t object.
// \param R_node_path R String to prune down
// \return 0 on success; -1 on error.
// int reapi_cli_shrink (reapi_cli_ctx_t *ctx, const char *R_node_path);
func (cli *ReapiClient) Shrink(rNodePath string) (err error) {
var nodePath = C.CString(rNodePath)
defer C.free(unsafe.Pointer(nodePath))

fluxerr := (int)(C.reapi_cli_shrink((*C.struct_reapi_cli_ctx)(cli.ctx), nodePath))

Check failure on line 253 in pkg/fluxcli/reapi_cli.go

View workflow job for this annotation

GitHub Actions / Test fluxion-go (fluxrm/flux-sched:jammy, /usr/lib)

could not determine kind of name for C.reapi_cli_shrink

Check failure on line 253 in pkg/fluxcli/reapi_cli.go

View workflow job for this annotation

GitHub Actions / Test fluxion-go (fluxrm/flux-sched:fedora38, /usr/lib64)

could not determine kind of name for C.reapi_cli_shrink

Check failure on line 253 in pkg/fluxcli/reapi_cli.go

View workflow job for this annotation

GitHub Actions / Test fluxion-go (fluxrm/flux-sched:bookworm-amd64, /usr/lib)

could not determine kind of name for C.reapi_cli_shrink

Check failure on line 253 in pkg/fluxcli/reapi_cli.go

View workflow job for this annotation

GitHub Actions / Test fluxion-go (fluxrm/flux-sched:el8, /usr/lib64)

could not determine kind of name for C.reapi_cli_shrink
return retvalToError(fluxerr, "issue resource api client shrink")
}

// Cancel cancels the allocation or reservation corresponding to jobid.
//
// \param jobid jobid of the uint64_t type.
Expand Down

0 comments on commit b910bd4

Please sign in to comment.