From da40907d69630a28e5e02ceb0170cda61cbeeab8 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 2 Aug 2024 15:27:06 +0100 Subject: [PATCH 1/2] Replace docker-compose with docker compose --- .github/workflows/build-docker-images.yaml | 4 ++-- ci/htcondor.sh | 24 +++++++++++----------- ci/htcondor/start-htcondor.sh | 4 ++-- ci/pbs.sh | 4 ++-- ci/pbs/start-pbs.sh | 2 +- ci/sge.sh | 4 ++-- ci/sge/start-sge.sh | 2 +- ci/slurm.sh | 4 ++-- ci/slurm/register_cluster.sh | 2 +- ci/slurm/start-slurm.sh | 2 +- docs/source/develop.rst | 12 +++++------ 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/build-docker-images.yaml b/.github/workflows/build-docker-images.yaml index 035e822c..f1c9895c 100644 --- a/.github/workflows/build-docker-images.yaml +++ b/.github/workflows/build-docker-images.yaml @@ -22,13 +22,13 @@ jobs: - name: Check versions run: | docker version - docker-compose version + docker compose version - name: Building Image shell: bash -l {0} run: | cd ./ci/${{ matrix.jobqueue }} cp ../environment.yml environment.yml - docker-compose build + docker compose build - name: List images run: | docker ps -a diff --git a/ci/htcondor.sh b/ci/htcondor.sh index f5dcb8a8..c4c55382 100755 --- a/ci/htcondor.sh +++ b/ci/htcondor.sh @@ -2,18 +2,18 @@ function jobqueue_before_install { docker version - docker-compose version + docker compose version # start htcondor cluster cd ./ci/htcondor - docker-compose pull + docker compose pull ./start-htcondor.sh - docker-compose exec -T submit /bin/bash -c "condor_status" - docker-compose exec -T submit /bin/bash -c "condor_q" + docker compose exec -T submit /bin/bash -c "condor_status" + docker compose exec -T submit /bin/bash -c "condor_q" cd - #Set shared space permissions - docker-compose exec -T submit /bin/bash -c "chmod -R 777 /shared_space" + docker compose exec -T submit /bin/bash -c "chmod -R 777 /shared_space" docker ps -a docker images @@ -21,22 +21,22 @@ function jobqueue_before_install { function jobqueue_install { cd ./ci/htcondor - docker-compose exec -T submit conda run -n dask-jobqueue /bin/bash -c "cd /dask-jobqueue; pip3 install -e .;chown -R submituser ." + docker compose exec -T submit conda run -n dask-jobqueue /bin/bash -c "cd /dask-jobqueue; pip3 install -e .;chown -R submituser ." cd - } function jobqueue_script { cd ./ci/htcondor - docker-compose exec -T --user submituser submit conda run -n dask-jobqueue /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --log-cli-level DEBUG --capture=tee-sys --verbose -E htcondor " + docker compose exec -T --user submituser submit conda run -n dask-jobqueue /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --log-cli-level DEBUG --capture=tee-sys --verbose -E htcondor " cd - } function jobqueue_after_script { cd ./ci/htcondor - docker-compose exec -T --user submituser submit /bin/bash -c "condor_q" - docker-compose exec -T submit /bin/bash -c "condor_status" - docker-compose exec -T --user submituser submit /bin/bash -c "condor_history" - docker-compose exec -T --user submituser submit /bin/bash -c "cd; cat logs/*" - docker-compose exec -T cm /bin/bash -c " grep -R \"\" /var/log/condor/ " + docker compose exec -T --user submituser submit /bin/bash -c "condor_q" + docker compose exec -T submit /bin/bash -c "condor_status" + docker compose exec -T --user submituser submit /bin/bash -c "condor_history" + docker compose exec -T --user submituser submit /bin/bash -c "cd; cat logs/*" + docker compose exec -T cm /bin/bash -c " grep -R \"\" /var/log/condor/ " cd - } diff --git a/ci/htcondor/start-htcondor.sh b/ci/htcondor/start-htcondor.sh index 1afe4546..fe1557d3 100755 --- a/ci/htcondor/start-htcondor.sh +++ b/ci/htcondor/start-htcondor.sh @@ -1,8 +1,8 @@ #!/bin/bash -docker-compose up -d --no-build +docker compose up -d --no-build -while [ `docker-compose exec -T submit condor_status -af activity|grep Idle|wc -l` -ne 2 ] +while [ `docker compose exec -T submit condor_status -af activity|grep Idle|wc -l` -ne 2 ] do echo "Waiting for cluster to become ready"; sleep 2 diff --git a/ci/pbs.sh b/ci/pbs.sh index daa9a712..b5c8af44 100644 --- a/ci/pbs.sh +++ b/ci/pbs.sh @@ -2,11 +2,11 @@ function jobqueue_before_install { docker version - docker-compose version + docker compose version # start pbs cluster cd ./ci/pbs - docker-compose pull + docker compose pull ./start-pbs.sh cd - diff --git a/ci/pbs/start-pbs.sh b/ci/pbs/start-pbs.sh index 138d5f6e..c684ea6e 100755 --- a/ci/pbs/start-pbs.sh +++ b/ci/pbs/start-pbs.sh @@ -1,6 +1,6 @@ #!/bin/bash -docker-compose up -d --no-build +docker compose up -d --no-build while [ `docker exec -u pbsuser pbs_master pbsnodes -a | grep "Mom = pbs_slave" | wc -l` -ne 2 ] do echo "Waiting for PBS slave nodes to become available"; diff --git a/ci/sge.sh b/ci/sge.sh index fd08dca3..c258b263 100644 --- a/ci/sge.sh +++ b/ci/sge.sh @@ -2,11 +2,11 @@ function jobqueue_before_install { docker version - docker-compose version + docker compose version # start sge cluster cd ./ci/sge - docker-compose pull + docker compose pull ./start-sge.sh cd - diff --git a/ci/sge/start-sge.sh b/ci/sge/start-sge.sh index 95632536..1040f3c6 100755 --- a/ci/sge/start-sge.sh +++ b/ci/sge/start-sge.sh @@ -1,6 +1,6 @@ #!/bin/bash -docker-compose up -d --no-build +docker compose up -d --no-build START=$(date +%s) MAX_WAIT_SECONDS=300 diff --git a/ci/slurm.sh b/ci/slurm.sh index 10acc847..2ad9709d 100644 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -2,11 +2,11 @@ function jobqueue_before_install { docker version - docker-compose version + docker compose version # start slurm cluster cd ./ci/slurm - docker-compose pull + docker compose pull ./start-slurm.sh cd - diff --git a/ci/slurm/register_cluster.sh b/ci/slurm/register_cluster.sh index ef3d4d0f..e497eaa5 100755 --- a/ci/slurm/register_cluster.sh +++ b/ci/slurm/register_cluster.sh @@ -2,4 +2,4 @@ set -e docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \ -docker-compose restart slurmdbd slurmctld +docker compose restart slurmdbd slurmctld diff --git a/ci/slurm/start-slurm.sh b/ci/slurm/start-slurm.sh index 6cdce2db..6b74c2d8 100755 --- a/ci/slurm/start-slurm.sh +++ b/ci/slurm/start-slurm.sh @@ -1,6 +1,6 @@ #!/bin/bash -docker-compose up -d --no-build +docker compose up -d --no-build while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ] do diff --git a/docs/source/develop.rst b/docs/source/develop.rst index d399d087..c3dd5c78 100644 --- a/docs/source/develop.rst +++ b/docs/source/develop.rst @@ -67,9 +67,9 @@ This is basically a simplified version of what is in the ci/*.sh files. For example with Slurm:: cd ci/slurm - docker-compose pull + docker compose pull # Start a Slurm dockerized cluster - ./start-slurm.sh #which is doing docker-compose up -d --no-build + ./start-slurm.sh #which is doing docker compose up -d --no-build # Install dask-jobqueue in Docker container docker exec slurmctld /bin/bash -c "cd /dask-jobqueue; pip install -e ." # Run the tests for slurm @@ -77,7 +77,7 @@ For example with Slurm:: You can then shutdown the dockerized cluster and remove all the containers from your computer:: - docker-compose down + docker compose down Test on a real Job queuing system --------------------------------- @@ -107,12 +107,12 @@ For Slurm for example:: cd ci/slurm cp ../environment.yml environment.yml #The Dockerfile needs the reference Conda environment file in its context to build - docker-compose build + docker compose build You might want to stop your dockerized cluster and refresh the build if you have done this previously:: - docker-compose down - docker-compose build --no-cache + docker compose down + docker compose build --no-cache Update Docker images for CI tests --------------------------------- From f9becf2adca0d47ec40358051273c8cc32c0fe6e Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 2 Aug 2024 16:45:43 +0100 Subject: [PATCH 2/2] Migrate PBS container to rockylinux 8 and openpbs 23.06 --- ci/pbs/Dockerfile | 31 +++++++++++-------------------- ci/pbs/build.sh | 11 ----------- ci/pbs/master-entrypoint.sh | 2 +- ci/pbs/slave-entrypoint.sh | 2 +- 4 files changed, 13 insertions(+), 33 deletions(-) delete mode 100644 ci/pbs/build.sh diff --git a/ci/pbs/Dockerfile b/ci/pbs/Dockerfile index a13d9002..b42556f0 100644 --- a/ci/pbs/Dockerfile +++ b/ci/pbs/Dockerfile @@ -1,20 +1,5 @@ -# inspired from https://github.com/PBSPro/pbspro/blob/v18.1.beta/docker/centos7/ -# multi-stage build -# build script will be triggered -FROM centos:7.5.1804 AS builder -# install dependencies for building -RUN yum install -y gcc make rpm-build libtool hwloc-devel libX11-devel \ - libXt-devel libedit-devel libical-devel ncurses-devel perl \ - postgresql-devel python-devel tcl-devel tk-devel swig expat-devel \ - openssl-devel libXext libXft git postgresql-contrib -# get known PBS Pro source code -RUN git clone --branch release_18_1_branch https://github.com/pbspro/pbspro.git /src/pbspro -COPY build.sh / -RUN bash /build.sh - -# base image -FROM centos:7.5.1804 -LABEL description="PBS Professional Open Source and conda" +FROM rockylinux:8 +LABEL description="openpbs and conda" #The pbs master node name, can be overridden if needed ENV PBS_MASTER pbs_master @@ -22,14 +7,20 @@ ENV PATH /opt/pbs/bin:/opt/anaconda/bin:$PATH ENV LANG en_US.UTF-8 ENV LC_ALL en_US.UTF-8 -COPY --from=builder /root/rpmbuild/RPMS/x86_64/pbspro-server-*.rpm . -# install pbspro and useful packages -RUN yum install -y pbspro-server-*.rpm curl bzip2 git gcc sudo openssh-server && yum clean all +# install openpbs and useful packages +RUN yum install -y unzip bzip2 git gcc sudo openssh-server && yum clean all +RUN curl -o openpbs.zip https://vcdn.altair.com/rl/OpenPBS/openpbs_23.06.06.rockylinux_8.8.zip && \ + unzip openpbs.zip && \ + mv openpbs_23.06.06.rockylinux_8.8/openpbs-*.rpm . && \ + rm -rf openpbs.zip openpbs_23.06.06.rockylinux_8.8 +RUN yum install -y openpbs-debuginfo-*.rpm openpbs-server-*.rpm && yum clean all + # install python RUN curl -o miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash miniconda.sh -f -b -p /opt/anaconda && \ /opt/anaconda/bin/conda clean -tipy && \ rm -f miniconda.sh + # environment.yml file is copied by CI script. If manually building, you should copy it too from parent directory COPY environment.yml . RUN conda env create --file environment.yml diff --git a/ci/pbs/build.sh b/ci/pbs/build.sh deleted file mode 100644 index 11b55713..00000000 --- a/ci/pbs/build.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -cd /src/pbspro -./autogen.sh -./configure -prefix=/opt/pbs -make dist -mkdir /root/rpmbuild /root/rpmbuild/SOURCES /root/rpmbuild/SPECS -cp pbspro-*.tar.gz /root/rpmbuild/SOURCES -cp pbspro.spec /root/rpmbuild/SPECS -cp pbspro-rpmlintrc /root/rpmbuild/SOURCES -cd /root/rpmbuild/SPECS -rpmbuild -ba pbspro.spec diff --git a/ci/pbs/master-entrypoint.sh b/ci/pbs/master-entrypoint.sh index 7a2669cb..be710878 100644 --- a/ci/pbs/master-entrypoint.sh +++ b/ci/pbs/master-entrypoint.sh @@ -7,7 +7,7 @@ hostname=$(hostname) sed -i "s/PBS_SERVER=.*/PBS_SERVER=$hostname/" $pbs_conf_file sed -i "s/\$clienthost .*/\$clienthost $hostname/" $mom_conf_file -# start PBS Pro +# start openpbs /etc/init.d/pbs start # create default non-root user diff --git a/ci/pbs/slave-entrypoint.sh b/ci/pbs/slave-entrypoint.sh index b7239046..70d6ee22 100644 --- a/ci/pbs/slave-entrypoint.sh +++ b/ci/pbs/slave-entrypoint.sh @@ -16,7 +16,7 @@ sed -i "s/PBS_START_MOM=.*/PBS_START_MOM=1/" $pbs_conf_file echo "\$usecp *:/home/ /home/" >> $mom_conf_file echo "\$usecp *:/dask-jobqueue/ /tmp/" >> $mom_conf_file -# start PBS Pro +# start openpbs /etc/init.d/pbs start # create default non-root user