Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: shrink support for fluxion #13

Open
wants to merge 1 commit into
base: add-grow-support
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${G
ENV PATH=$PATH:/usr/local/go/bin:/home/vscode/go/bin

# Testing grow/shrink from custom branch
RUN git clone -b debug-resource-error-messages https://github.com/researchapps/flux-sched /opt/flux-sched
RUN git clone -b add-shrink https://github.com/researchapps/flux-sched /opt/flux-sched
# RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched

# We also need to rebuild into the system install
Expand Down
15 changes: 11 additions & 4 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ jobs:
fail-fast: false
matrix:
# container base and lib prefix
test: [["fluxrm/flux-sched:jammy", "/usr/lib"],
test: [["fluxrm/flux-sched:noble", "/usr/lib"],
["fluxrm/flux-sched:fedora38", "/usr/lib64"],
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"],
["fluxrm/flux-sched:el8", "/usr/lib64"]]
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"]]

container:
image: ${{ matrix.test[0] }}
Expand All @@ -38,7 +37,15 @@ jobs:

# TODO: we should consider distributing the header files with the release builds
- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
run: git clone -b add-shrink https://github.com/researchapps/flux-sched /opt/flux-sched
- name: flux-sched compile
run: |
export FLUX_SCHED_VERSION=0.39.0
cd /opt/flux-sched
cmake -B build
make -C build
make -C build install
cd -
- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test Binary
Expand Down
16 changes: 12 additions & 4 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ jobs:
strategy:
fail-fast: false
matrix:
test: [["fluxrm/flux-sched:jammy", "/usr/lib"],
test: [["fluxrm/flux-sched:noble", "/usr/lib"],
["fluxrm/flux-sched:fedora38", "/usr/lib64"],
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"],
["fluxrm/flux-sched:el8", "/usr/lib64"]]
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"]]

container:
image: ${{ matrix.test[0] }}
Expand All @@ -33,7 +32,16 @@ jobs:
go-version: ^1.21

- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
run: git clone -b add-shrink https://github.com/researchapps/flux-sched /opt/flux-sched
- name: flux-sched compile
run: |
export FLUX_SCHED_VERSION=0.39.0
cd /opt/flux-sched
cmake -B build
make -C build
make -C build install
cd -

- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test
Expand Down
19 changes: 19 additions & 0 deletions cmd/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,25 @@ func main() {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes should now succeed: %v\n", err)
}

// Shrink (remove subgraph) for node2
fmt.Println("🥕 Asking to Shrink from 4 to 3 Nodes")
err = cli.Shrink("/tiny0/rack0/node2")
if err != nil {
log.Fatalf("Error in ReapiClient Shrink: %s %s\n", err, cli.GetErrMsg())
}
fmt.Printf("Shrink request return value: %v\n", err)

fmt.Println("Asking to MatchSatisfy 4 nodes (again, not possible)")
sat, overhead, err = cli.MatchSatisfy(growJobspec)
checkErrors(cli)
if err != nil {
log.Fatalf("Error in ReapiClient MatchSatisfy: %v\n", err)
}
printSatOutput(sat, err)
if sat {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes with only 3 should fail: %v\n", err)
}

}

func printOutput(reserved bool, allocated string, at int64, jobid uint64, err error) {
Expand Down
39 changes: 30 additions & 9 deletions pkg/fluxcli/reapi_cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@

package fluxcli

/*
#include "resource/reapi/bindings/c/reapi_cli.h"
*/
import "C"
import (
"fmt"
"unsafe"

"github.com/flux-framework/fluxion-go/pkg/types"
)

/*
#include <stdlib.h>
#include "resource/reapi/bindings/c/reapi_cli.h"
*/
import "C"

type (
ReapiCtx C.struct_reapi_cli_ctx_t

Expand Down Expand Up @@ -220,23 +222,42 @@ func (cli *ReapiClient) UpdateAllocate(jobid int, r string) (at int64, overhead
return at, overhead, r_out, err
}

// Update the resource state with R.
// Update the resource state with R (grow).
//
// \param h Opaque handle. How it is used is an implementation
// detail. However, when it is used within a Flux's
// detail. However, when it is used within a Flux's
// service module, it is expected to be a pointer
// to a flux_t object.
// \param R_subgraph R String
// \return 0 on success; -1 on error.
// \param R_subgraph R String
// \return 0 on success; -1 on error.
// int reapi_cli_grow (reapi_cli_ctx_t *ctx, const char *R_subgraph);
func (cli *ReapiClient) Grow(rSubgraph string) (err error) {
var resources = C.CString(rSubgraph)
defer C.free(unsafe.Pointer(resources))

fluxerr := (int)(C.reapi_cli_grow((*C.struct_reapi_cli_ctx)(cli.ctx), resources))

defer C.free(unsafe.Pointer(resources))
return retvalToError(fluxerr, "issue resource api client grow")
}

// Update the resource state (shrink) with R_node_path.
//
// \param h Opaque handle. How it is used is an implementation
// detail. However, when it is used within a Flux's
// service module, it is expected to be a pointer
// to a flux_t object.
// \param R_node_path R String to prune down
// \return 0 on success; -1 on error.
// int reapi_cli_shrink (reapi_cli_ctx_t *ctx, const char *R_node_path);
func (cli *ReapiClient) Shrink(rNodePath string) (err error) {
var nodePath = C.CString(rNodePath)

fluxerr := (int)(C.reapi_cli_shrink((*C.struct_reapi_cli_ctx)(cli.ctx), nodePath))

defer C.free(unsafe.Pointer(nodePath))
return retvalToError(fluxerr, "issue resource api client shrink")
}

// Cancel cancels the allocation or reservation corresponding to jobid.
//
// \param jobid jobid of the uint64_t type.
Expand Down
Loading