Skip to content

Commit

Permalink
wip: partial cancel (#16)
Browse files Browse the repository at this point in the history
* wip: partial cancel

The partial cancel request seems to work, but later when
I try to do Info or Cancel (the regular cancel) for the
job I get an error (I assume the job does not exist,
but because of the bug with the error messages not passing
forward I cannot see any further output for it).

* test: improve testing setup for match and cancel

Problem: the current testing is not standard for Go,
and makes it hard to understand or run in units.
Solution: move testing into proper test alongside
package, and break apart testing for cancel and match.
Additionally, we are using the same graphs / jobspecs
from flux-sched.

* extend tests to cancel job
* docker: update image bases

Problem: older bases do not have new enough gcc to build flux-sched.

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch authored Feb 14, 2025
1 parent bbe5b38 commit a77ba46
Show file tree
Hide file tree
Showing 17 changed files with 5,011 additions and 196 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version: ^1.21
go-version: ^1.22
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
Expand Down
25 changes: 13 additions & 12 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
fail-fast: false
matrix:
# container base and lib prefix
test: [["fluxrm/flux-sched:jammy", "/usr/lib"],
["fluxrm/flux-sched:fedora38", "/usr/lib64"],
test: [["fluxrm/flux-sched:noble", "/usr/lib"],
["fluxrm/flux-sched:fedora40", "/usr/lib64"],
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"],
["fluxrm/flux-sched:el8", "/usr/lib64"]]
["fluxrm/flux-sched:el9", "/usr/lib64"]]

container:
image: ${{ matrix.test[0] }}
Expand All @@ -34,14 +34,15 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: ^1.21
go-version: ^1.22

# TODO: we should consider distributing the header files with the release builds
- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test Binary
run: LIB_PREFIX=${{ matrix.test[1] }} make test-binary
- name: Test Modules
run: make test-modules
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
# - name: build flux-sched
# run: |
# here=$(pwd)
# cd /opt/flux-sched
# mkdir build && cd build && cmake ../ && make -j && sudo make install
# cd $here
- name: Test
run: LIB_PREFIX=${{ matrix.test[1] }} make test-v
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${G
ENV PATH=$PATH:/usr/local/go/bin:/home/vscode/go/bin

RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
RUN cd /opt/flux-sched && \
export FLUX_SCHED_VERSION=0.40.0 && \
mkdir build && cd build && cmake ../ && make -j && sudo make install

# Assuming installing to /usr/local
ENV LD_LIBRARY_PATH=/usr/lib:/usr/lib/flux:/usr/local/lib
Expand Down
40 changes: 10 additions & 30 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ HERE ?= $(shell pwd)
LOCALBIN ?= $(shell pwd)/bin
JGF ?= $(HERE)/cmd/test/data/tiny.json
JOBSPECS ?= $(HERE)/cmd/test/data/jobspecs
CANCELDATA ?= $(HERE)/cmd/test/data/cancel

# This assumes a build in the .devcontainer Dockerfile environment
FLUX_SCHED_ROOT ?= /opt/flux-sched
Expand All @@ -17,43 +18,22 @@ LD_LIBRARY_PATH=$(LIB_PREFIX):$(LIB_PREFIX)/flux
BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lflux-hostlist -lboost_graph -lyaml-cpp"

.PHONY: all
all: build
all: test

.PHONY: test
test: test-binary test-modules

.PHONY: test-modules
test-modules:
go test -v ./pkg/types

.PHONY: test-binary
test-binary:
LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) $(LOCALBIN)/test --jgf=$(JGF) --jobspec=$(JOBSPECS)/test001.yaml

# test001_desc="match allocate 1 slot: 1 socket: 1 core (pol=default)"
# test_expect_success "${test001_desc}" '
# ${main} --jgf=${jgf} --jobspec=${jobspec1} > 001.R.out &&
# sed -i -E "s/, 0\.[0-9]+//g" 001.R.out &&
# test_cmp 001.R.out ${exp_dir}/001.R.out
#'

#test002_desc="match allocate 2 slots: 2 sockets: 5 cores 1 gpu 6 memory"
#test_expect_success "${test002_desc}" '
# ${main} --jgf=${jgf} --jobspec=${jobspec2} > 002.R.out &&
# sed -i -E "s/, 0\.[0-9]+//g" 002.R.out &&
# test_cmp 002.R.out ${exp_dir}/002.R.out
#'
test:
# $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -count 1 -run TestCancel -ldflags '-w' ./pkg/fluxcli ./pkg/types
$(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -ldflags '-w' ./pkg/fluxcli ./pkg/types

.PHONY: test-v
test-v:
# $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -count 1 -run TestCancel -v -ldflags '-w' ./pkg/fluxcli ./pkg/types
$(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -v -ldflags '-w' ./pkg/fluxcli ./pkg/types

.PHONY: $(LOCALBIN)
$(LOCALBIN):
mkdir -p $(LOCALBIN)

# This serves as a single test file to build a dummy main to test
.PHONY: build $(LOCALBIN)
build:
mkdir -p $(LOCALBIN)
$(COMMONENVVAR) $(BUILDENVVAR) go build -ldflags '-w' -o $(LOCALBIN)/test cmd/test/test.go

.PHONY: clean
clean:
rm -rf $(LOCALBIN)/test
132 changes: 0 additions & 132 deletions cmd/test/test.go

This file was deleted.

Loading

0 comments on commit a77ba46

Please sign in to comment.