From db1ff46e2d9a5106eefb55b3682aa066b9db078d Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 11:04:39 +0000 Subject: [PATCH 1/9] Upgrades to run_epoch script --- run_epoch.py | 233 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 167 insertions(+), 66 deletions(-) diff --git a/run_epoch.py b/run_epoch.py index 54972b1..1964aa4 100755 --- a/run_epoch.py +++ b/run_epoch.py @@ -5,89 +5,148 @@ This script attempts to mimic the behaviour of Epoch by prompting the user to input their output directory after the program is running. This behaviour can be overridden by suppling the '-o' flag. + +This script can also be used to launch a shell in a Singularity image with sdf_helper +pre-installed. """ -import argparse +from argparse import ArgumentParser, Namespace import subprocess from pathlib import Path from textwrap import dedent +from typing import Optional -_DEFAULT = "_DEFAULT" -_DEFAULTS = dict( +_CONTAINERS = dict( docker="ghcr.io/plasmafair/epoch:latest", singularity="oras://ghcr.io/plasmafair/epoch.sif:latest", ) -def parse_args() -> argparse.Namespace: +def parse_args() -> Namespace: """Reads arguments from the command line.""" - parser = argparse.ArgumentParser(prog="run_epoch", description=__doc__) - parser.add_argument( - "platform", - choices=("docker", "singularity"), - type=str, - help="The container platform service to use: either 'docker' or 'singularity'", - ) + parser = ArgumentParser(prog="run_epoch", description=__doc__) - parser.add_argument( - "-c", - "--container", - default=_DEFAULT, - type=str, - help=dedent( - f"""The container to run. The default for Docker is {_DEFAULTS["docker"]}, - while the default for Singularity is {_DEFAULTS["singularity"]}. - """ - ).replace("\n", " "), + subparsers = parser.add_subparsers( + required=True, + dest="mode", + help=( + "Please select one of the subcommands provided. You can supply --help " + "after each subcommand to view further options." + ), ) - parser.add_argument( - "-d", - "--dims", - default=1, - type=int, - choices=range(1, 4), - help="The number of dimensions in your Epoch run. The default is 1.", + docker_parser = subparsers.add_parser( + "docker", + help="Run Epoch via a Docker container.", ) - parser.add_argument( - "-o", - "--output", - default=None, - type=Path, - help=dedent( - """The path of the output directory. If not supplied, the user will - be prompted for this information after the program starts. - """ - ).replace("\n", " "), + singularity_parser = subparsers.add_parser( + "singularity", + help="Run Epoch via a Singularity container", ) - parser.add_argument( + # Define function for container selection arg; will be needed in multiple places + def container_arg(parser: ArgumentParser, default: str, cmd: str = "run") -> None: + parser.add_argument( + "-c", + "--container", + default=default, + type=str, + help=f"The container to {cmd}. The default is {default}.", + ) + + subparser_tuple = (docker_parser, singularity_parser) + container_tuple = (_CONTAINERS["docker"], _CONTAINERS["singularity"]) + for subparser, container in zip(subparser_tuple, container_tuple): + container_arg(subparser, container) + + subparser.add_argument( + "-d", + "--dims", + default=1, + type=int, + choices=range(1, 4), + help="The number of dimensions in your Epoch run. The default is 1.", + ) + + subparser.add_argument( + "-o", + "--output", + default=None, + type=Path, + help=( + "The path of the output directory. If not supplied, the user will " + "be prompted for this information after the program starts." + ), + ) + + subparser.add_argument( + "--photons", action="store_true", help="Run with QED features enabled." + ) + + subparser.add_argument( + "--no-run", action="store_true", help="Print the command but don't run it." + ) + + # Singularity multiprocess utilties + singularity_parser.add_argument( "-n", "--nprocs", default=1, type=int, - help="The number of processes to run on. Uses mpirun.", + help=("The number of processes to run on. Uses mpirun unless --srun is set."), + ) + + singularity_parser.add_argument( + "--srun", + action="store_true", + help=( + "Run using srun instead of mpirun. " + "Recommended for HPC machines with Slurm controllers." + ), + ) + + # Extra singularity utilities + subsubparsers = singularity_parser.add_subparsers( + required=False, + dest="singularity_mode", + help="Additional Singularity utilities.", ) - parser.add_argument( - "--photons", action="store_true", help="Run with QED features enabled." + # Pull: Download a singularity image to a local file + pull_parser = subsubparsers.add_parser( + "pull", + help=( + "Pull a Singularity container to a local image file. " + "This may be used as an argument to -c/--container." + ), + ) + pull_parser.add_argument( + "-o", + "--output", + type=Path, + default=Path("epoch.sif"), + help="Filename of local image file", ) + container_arg(pull_parser, _CONTAINERS["singularity"], cmd="pull") - parser.add_argument( - "--no-run", action="store_true", help="Print the command but don't run it." + # Shell: Open a shell inside the container + shell_parser = subsubparsers.add_parser( + "shell", help="Open a shell in the Singularity image." + ) + shell_parser.add_argument( + "--python", + action="store_true", + help="Open directly into a Python shell", ) + container_arg(shell_parser, _CONTAINERS["singularity"]) return parser.parse_args() -def docker_cmd( - container: str, output: Path, dims: int, photons: bool, nprocs: int -) -> str: +def docker_cmd(container: str, output: Path, dims: int, photons: bool) -> str: """Constructs the command to run Epoch via a Docker container.""" - if nprocs != 1: - raise ValueError("When running with Docker, nprocs must equal 1") return dedent( f"""\ docker run --rm @@ -101,7 +160,7 @@ def docker_cmd( def singularity_cmd( - container: str, output: Path, dims: int, photons: bool, nprocs: int + container: str, output: Path, dims: int, photons: bool, nprocs: int, srun: bool ) -> str: """Constructs the command to run Epoch via a Singularity container.""" cmd = dedent( @@ -115,29 +174,71 @@ def singularity_cmd( {'--photons' if photons else ''} """ ).replace("\n", " ") - return f"mpirun -n {nprocs} " + cmd if nprocs != 1 else cmd + run_mode: str + if srun: + run_mode = "srun" + elif nprocs != 1: + run_mode = f"mpirun -n {nprocs}" + else: + run_mode = "" -def main() -> None: - args = parse_args() + return f"{run_mode} {cmd}".strip() - # If output not supplied, prompt user - # This allows the user to run the code using 'echo output_dir | epoch_docker.py' - output = args.output - if output is None: - output = Path(input("Please enter output directory:\n")) - if args.container == _DEFAULT: - container = _DEFAULTS[args.platform] - else: - container = args.container +def pull_cmd(container: str, output: Path) -> str: + """Constructs the command to pull to a local Singularity image.""" + return f"singularity pull {output} {container}" + + +def shell_cmd(container: str, python: bool) -> str: + """Construct the command to open a shell in a Singularity container.""" + if python: + return f"singularity exec {container} python" + return f"singularity shell {container}" + - cmd_func = docker_cmd if args.platform == "docker" else singularity_cmd - cmd = cmd_func(container, output, args.dims, args.photons, args.nprocs) - print("Running Epoch with the command:", "\n", cmd) - if not args.no_run: +def prompt_output(output: Optional[Path]) -> Path: + """ + If output is None, prompt the user to supply it. Otherwise return unchanged. + Allows the user to run the code using ``echo output_dir | run_epoch.py``. + """ + return Path(input("Please enter output directory:\n")) if output is None else output + + +def run_cmd(cmd: str, no_run: bool = False) -> None: + """Execute ``cmd`` in a subprocess, or just print ``no_run`` is ``True``.""" + if no_run: + print(f"Generated the command:\n{cmd}") + else: + print(f"Running with the command:\n{cmd}") subprocess.run(cmd.split()) +def main() -> None: + args = parse_args() + + if args.mode == "docker": + cmd = docker_cmd( + args.container, prompt_output(args.output), args.dims, args.photons + ) + run_cmd(cmd, no_run=args.no_run) + elif args.mode == "singularity": + if args.singularity_mode == "pull": + run_cmd(pull_cmd(args.container, args.output)) + elif args.singularity_mode == "shell": + run_cmd(shell_cmd(args.container, args.python)) + else: + cmd = singularity_cmd( + args.container, + prompt_output(args.output), + args.dims, + args.photons, + args.nprocs, + args.srun, + ) + run_cmd(cmd, no_run=args.no_run) + + if __name__ == "__main__": main() From 2bd7fe2513ccc93d92785744a519b917eaf2d55b Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 16:22:23 +0000 Subject: [PATCH 2/9] Add explanation documents for running on Viking --- run_epoch.py | 19 ++-- viking/README.md | 200 +++++++++++++++++++++++++++++++++++++++++ viking/epoch_viking.sh | 93 +++++++++++++++++++ 3 files changed, 307 insertions(+), 5 deletions(-) create mode 100644 viking/README.md create mode 100644 viking/epoch_viking.sh diff --git a/run_epoch.py b/run_epoch.py index 1964aa4..df39eb0 100755 --- a/run_epoch.py +++ b/run_epoch.py @@ -10,8 +10,8 @@ pre-installed. """ -from argparse import ArgumentParser, Namespace import subprocess +from argparse import ArgumentParser, Namespace from pathlib import Path from textwrap import dedent from typing import Optional @@ -140,6 +140,12 @@ def container_arg(parser: ArgumentParser, default: str, cmd: str = "run") -> Non action="store_true", help="Open directly into a Python shell", ) + shell_parser.add_argument( + "--cmd", + type=str, + default=None, + help="Run a specific command on entering the shell.", + ) container_arg(shell_parser, _CONTAINERS["singularity"]) return parser.parse_args() @@ -191,16 +197,19 @@ def pull_cmd(container: str, output: Path) -> str: return f"singularity pull {output} {container}" -def shell_cmd(container: str, python: bool) -> str: +def shell_cmd(container: str, python: bool, cmd: Optional[str] = None) -> str: """Construct the command to open a shell in a Singularity container.""" if python: return f"singularity exec {container} python" + if cmd is not None: + return f"singularity exec {container} {cmd}" return f"singularity shell {container}" def prompt_output(output: Optional[Path]) -> Path: - """ - If output is None, prompt the user to supply it. Otherwise return unchanged. + """If output is None, prompt the user to supply it. + + Otherwise return unchanged. Allows the user to run the code using ``echo output_dir | run_epoch.py``. """ return Path(input("Please enter output directory:\n")) if output is None else output @@ -227,7 +236,7 @@ def main() -> None: if args.singularity_mode == "pull": run_cmd(pull_cmd(args.container, args.output)) elif args.singularity_mode == "shell": - run_cmd(shell_cmd(args.container, args.python)) + run_cmd(shell_cmd(args.container, args.python, args.cmd)) else: cmd = singularity_cmd( args.container, diff --git a/viking/README.md b/viking/README.md new file mode 100644 index 0000000..0e39e14 --- /dev/null +++ b/viking/README.md @@ -0,0 +1,200 @@ +# Epoch on Viking + +This directory provides utilities for running Epoch on Viking2, a High-Performance +Computing (HPC) service accessible to researchers at the University of York. The +contents may be adapted to suit other HPC machines. + +## Getting Started + +To access Viking, please consult the documents on [creating an account][account] and +[connecting to Viking][connect]. Once you have an account, the following should work on +computers connected to the University of York network: + +```bash +$ ssh viking +``` + +On logging in, you should find yourself in a directory containing `./scratch`: this is +where you should run all of your jobs on Viking. Clone this repository into here: + +```bash +$ cd scratch +$ git clone https://github.com/PlasmaFAIR/epoch_containers +``` + +If you want to try running from source, you should also clone Epoch here: + +```bash +$ git clone https://github.com/Warwick-Plasma/epoch +``` + +You can copy files between your own machine and Viking using `scp`: + +```bash +$ scp local_file viking:scratch/my_file # send to viking +$ scp viking:scratch/my_file local_file # download from viking +``` + +The following sections will cover how to run Epoch either using the containers provided +in this repository, or by compiling it from source. In either case, you will need to +submit Slurm scripts using `sbatch` to do so -- more details on this later. + +> [!CAUTION] +> Do not run Epoch directly on the login nodes. Always use `sbatch`. + +## Using Singularity + +Viking uses a 'module' system to set up your environment, and it has a lot of different +scientific software packages pre-installed. To use Singularity containers, we'll need to +use the module `Apptainer`: + +```bash +$ module load Apptainer +``` + +> [!NOTE] +> Apptainer is a flavour of Singularity supported by the Linux Foundation, with the + other major flavour being SingularityCE. Containers built using one tool are usually + runnable using the other. + +The first time we try to run a Singularity container, it will take a long time to be +downloaded. To speed things up, we can pre-download it using the following command from +the top level of the `epoch_containers` repository: + +```bash +$ ./run_epoch.py singularity shell +``` + +This will download the container, cache it locally, and drop you into a command prompt. +You can escape it be pressing `Ctrl + d`. + +To run Epoch using containers, please skip forward to the section +[Running Epoch][running_epoch]. + +## Compiling from Source + +If you run into difficulties with the Singularity containers, another option would be to +compile and run Epoch from source. To do so, clone the main Epoch repo as described +above, and enter the correct version of the code: + +```bash +$ git clone https://github.com/Warwick-Plasma/epoch +$ cd epoch/epoch2d +``` + +Here, you will find a `Makefile` containing a lot of commented out settings flags with +lines beginning with `DEFINES` -- uncomment any you need. + +To install, you must first load a version of GCC that is compatible with Epoch, and the +matching version of OpenMPI: + +```bash +$ module load GCC/11.3.0 OpenMPI/4.1.4-GCC-11.3.0 +``` + +You can then build Epoch using: + +```bash +$ make COMPILER=gfortran +``` + +This will create an executable in `./bin`, which can be moved somewhere more convenient +if you wish. + +## Running Epoch + +To run Epoch, you will need to use a modified version of the script `epoch_viking.sh` +provided in this directory. Do _not_ run this directly. + +The lines marked `#SBATCH` control how the job is submitted to the Slurm controller, +which schedules jobs on Viking. Note that the fewer resources you request, the faster +your job will be run, but make sure you request enough time, memory, nodes, and tasks +for your job to succeed! + +> [!IMPORTANT] +> You'll need to set the project code to that provided to you when you signed up to + Viking. + +The variables set under `User settings` control the Epoch run. Be sure to set the +variables `output_dir` to match your directory layout. If you're running using +containers, you should set `run_epoch` to the location of your `run_epoch.py` script, +and set the variables `dims` and `photons` to set up your run properly. If you're +running from source, you'll need to set `epoch_exe` to your compiled executable, and +`mpi_module` to the MPI module you had loaded when you compiled Epoch. + +To submit a job, use `sbatch`: + +```bash +$ sbatch my_job.sh +``` + +To see where your job is on the queue, we can use `squeue`: + +```bash +$ squeue --me +``` + +This will also show you the job ID. Jobs can be cancelled with `scancel`: + +```bash +$ scancel JOBID +``` + +## Managing Output Files + +Epoch outputs data in its own SDF format, which isn't particularly user friendly, and +can even be difficult to install. The containers provided by this repository come with +`sdf_helper` pre-installed, and you can open a shell using: + +```bash +$ ./run_epoch.py singularity shell # Opens bash shell at current directory. +$ ./run_epoch.py singularity shell --python # Opens straight into Python interpreter. +``` + +From here, you can open and explore SDF files using Python: + +```python +>>> import sdf_helper as sdf +>>> data = sdf.getdata("0005.sdf") +>>> sdf.list_variables(data) +``` + +Once you have the names of your grids, you can access them as NumPy arrays using: + +```python +>>> data.My_Variable_Name.data +``` + +For more information on `sdf_helper`, please see the [official docs][sdf]. + +Note that you should not perform any computationally intensive work directly on the +login nodes. If you wish to do anything more than convert a few small arrays from SDF to +some other format, consider wrapping up your data processing in a Python script, and +submitting it in a Slurm job script. To run your own Python script from the container, +use: + +```bash +module load Apptainer +./run_epoch.sh singularity shell --cmd "python my_script.py" +``` + +If you need to install any dependencies, you can do this using a short auxiliary bash +script: + +```bash +# In bash file my_script.sh: +pip install [dependency list] +python my_script.py + +# In job script: +./run_epoch.sh singularity shell --cmd "/bin/bash my_script.sh" +``` + +Data will not be stored indefinitely on Viking's `./scratch` drives, so you should `scp` +output data to your own machine for longer term storage. It may also be easier to +perform post-processing and generate plots on your own machine than to manage these +tools via Slurm jobs. + +[account]: https://vikingdocs.york.ac.uk/getting_started/creating_accounts.html +[connect]: https://vikingdocs.york.ac.uk/getting_started/connecting_to_viking.html +[sdf]: https://epochpic.github.io/documentation/visualising_output/python.html diff --git a/viking/epoch_viking.sh b/viking/epoch_viking.sh new file mode 100644 index 0000000..f6cca10 --- /dev/null +++ b/viking/epoch_viking.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Slurm settings +# -------------- + +# Set the following to configure your job. You must set 'mail-user' to your Uni of York +# email, and 'account' to the project code you were given when you signed up to Viking. + +#SBATCH --job-name=epoch # Job name +#SBATCH --mail-user=abc123@york.ac.uk # Where to send mail +#SBATCH --account=ACCOUNT_CODE # Project account +#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) +#SBATCH --nodes=1 # Number of comput nodes to run on +#SBATCH --ntasks-per-node=2 # Number of MPI processes to spawn per node (max 96) +#SBATCH --cpus-per-task=1 # Number of CPUS per process (leave this as 1!) +#SBATCH --mem-per-cpu=1gb # Memory per task +#SBATCH --time=00:01:00 # Total time limit hrs:min:sec +#SBATCH --output=%x_%j.log # Log file for stdout outputs +#SBATCH --output=%x_%j.err # Log file for stderr outputs +#SBATCH --partition=nodes # 'test' for small test jobs (<1m), 'nodes' otherwise + +# User settings +# ------------- + +# Choose method of running Epoch. +# 'Singularity' if using containers, 'Source' if compiled from source. +method="Singularity" + +# Name of directory containing your 'input.deck' file. +# Can be a relative path. +output_dir="." + +# Number of dimensions in your run. +# Ignored if running from source. +dims="2" + +# Use QED methods +# Set to '--photons' to activate, or just leave as an empty string +# Ignored if running from source. +photons="" + +# If running Epoch from containers, set this to the 'run_epoch.py' script +# Ignored if running from source. +run_epoch="~/scratch/epoch_containers/run_epoch.py" + +# If running Epoch from source, set this to the compiled executable +# Ignored if running from containers. +epoch_exe="~/scratch/epoch/epoch2d/bin/epoch2d" + +# OpenMPI module used to compile Epoch +# Ignored if running from containers. +mpi_module="OpenMPI" + +# ------------- + +module purge +module load ${mpi_module} + +if [[ ${method} -eq "Singularity" ]]; then + + module load Apptainer + + # Suppress warnings + export PMIX_MCA_gds=^ds12 + export PMIX_MCA_psec=^munge + + # Fix intra-node communication issue + # https://ciq.com/blog/workaround-for-communication-issue-with-mpi-apps-apptainer-without-setuid/ + export OMPI_MCA_pml=ucx + export OMPI_MCA_btl='^vader,tcp,openib,uct' + export UCX_TLS=^'posix,cma' + + echo "Running Epoch with via Apptainer using ${SLURM_NTASKS} processes" + + ${run_epoch} singularity -d ${dims} -o ${output_dir} ${photons} --srun + + # Alternative in case the above isn't working: + # srun singularity exec --bind ${output_dir}:/output oras://ghcr.io/plasmafair/epoch.sif:latest run_epoch -d ${dims} -o /output --srun ${photons} + +elif [[ ${method} -eq "Source" ]]; then + + echo "Running Epoch from source using ${SLURM_NTASKS} processes" + + echo ${output_dir} | srun ${epoch_exe} + +else + + echo "Set method to one of 'Singularity' or 'Source'" 1>&2 + exit 1 + +fi + + From d394873d5022c1982c9f256994a4a28821a0a350 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 16:40:20 +0000 Subject: [PATCH 3/9] Update README.md --- README.md | 127 ++++++++++++++---------------------------------------- 1 file changed, 32 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index d12220a..e280aaa 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Epoch Containers -Tools and information for building/running [Epoch][epoch] using Docker/Singularity -containers. +This repository contains tools and information for building/running [Epoch][epoch] using +Docker/Singularity containers. ## Introduction @@ -11,127 +11,64 @@ allow researchers to run code without needing to build it themselves, and they m it much easier to share reproducible workflows. We provide support for two container platforms: [Docker][docker] and -[Singularity][singularity]. Docker is the most widely used platform, and has been used -here to build a 'base image' of Epoch on which other tools may be built. Singularity is -an alternative container platform that was designed from the ground up to be useable on -HPC systems, so unlike Docker it can be run on multi-node architectures using MPI -without issue. +[Singularity][singularity] (or Apptainer). Docker is the most widely used platform, and +has been used here to build a 'base image' of Epoch on which other tools may be created. +Singularity is an alternative container platform that was designed to be useable on HPC +systems, so unlike Docker it can be run on multi-node architectures using MPI without +issue. ## Usage Users of this software do not need to build containers themselves. Instead, they will only need to copy the Python module `run_epoch.py` in the root directory of this -repository. To run the Docker container, try: - -```python -$ python3 run_epoch.py docker -d 2 -o ./my_epoch_run -``` - -Here, `-d` specifies the number of dimensions to run (e.g. here we are performing a 2D -simulation), `-o` specifies the output directory (which should contain `input.deck`). -Users can switch on QED effects by also providing the `--photons` argument. The output -directory should not be the current working directory. - -Similarly, to run the Singularity container, try: - -```python -$ python3 run_epoch.py singularity -d 2 -o ./my_epoch_run -n 4 -``` - -The extra argument `-n` specifies the number of processes to run, which uses OpenMPI. -On HPC systems, you will need to load Singularity and OpenMPI first. For example, on -Viking at the University of York, this requires: - -```bash -$ module load tools/Singularity mpi/OpenMPI -``` - -## Running on Viking (University of York) - -To run Epoch on Viking, first create a directory within `~/scratch` in which you -want to run your code: - -``` -$ ssh @viking.york.ac.uk -$ mkdir -p ~/scratch/epoch/output -$ cd ~/scratch/epoch -``` - -You'll need to ensure your `input.deck` file is within this directory: - -```bash -$ # From your own machine -$ scp input.deck @viking.york.ac.uk:/users//scratch/epoch/output -``` - -To run the Singularity container, you'll need to load the following modules: +repository. If we run this with the argument `--help`, we can see a list of possible +commands: ```bash -$ module load tools/Singularity mpi/OpenMPI +$ python3 run_epoch.py --help ``` -You may then run the helper script as described above. +### Docker -Note that you should only run short tests on the login nodes. To run longer jobs, you'll -want to create a Slurm job file. See the `./examples` folder for an example job script -`run_sbatch.sh` and an example `input.deck`. Once we have a job script, we can submit a -job using: +We can run the Docker container using: ```bash -$ sbatch run_sbatch.sh +$ python3 run_epoch.py docker -d 2 -o ./my_epoch_run ``` -We can check the progress of our job using: +Here, `-d` specifies the number of dimensions in the simulation, and `-o` specifies the +output directory (which should contain `input.deck`). Users can switch on QED effects by +also providing the `--photons` argument. The output directory should not be the current +working directory. To see a full list of possible options, we can also supply the +`--help` option: ```bash -$ squeue -u +$ python3 run_epoch.py docker --help ``` -## Inspecting the Container - -It is also possible to pull the container from the remote repo: - -```bash -$ singularity pull epoch.sif oras://ghcr.io/plasmafair/epoch.sif:latest -``` +### Singularity/Apptainer -This will download the container image to the file `epoch.sif` (`.sif` denoting a -'Singularity Image Format' file). You can then use `epoch.sif` in place of -`library://account/repo/container` in any of the commands above. +Singularity containers can be run similarly to Docker containers, but with extra options +for specifying the number of MPI processes to run: -If you want to inspect the container, we can use: - -```bash -$ singularity shell epoch.sif +```python +$ python3 run_epoch.py singularity -d 2 -o ./my_epoch_run -n 4 ``` -## Analysing code output +The extra argument `-n` specifies the number of processes to run. -It is recommended to analyse Epoch output data on your own machine rather than on an HPC -machine: +On HPC systems, you will need to load Singularity/Apptainer and OpenMPI first. For +example, on Viking at the University of York, this requires: ```bash -$ scp @viking.york.ac.uk:/users//scratch/epoch/*.sdf . +$ module load OpenMPI Apptainer ``` -You'll need a particular Python library to read `.sdf` files, and this is packaged with -Epoch itself. To install this library, try: - -```bash -$ git clone https://github.com/Warwick-Plasma/epoch -$ cd epoch/epoch1d -$ make sdfutils -``` - -Note that the SDF Python library is not packaged with modern best-practices in mind -(i.e. using virtual environments, uploading packages to PyPI/conda-forge). It will -install to `~/.local/lib/python3.x/site-packages` regardless of whether you're in a -`venv` or `conda` environment. If you feel you know what you're doing, you can manually -copy/move the installed files to the environment of your choice after installing, but -it's recommended to just use the base user environment. - -Please see the [Epoch docs][epoch] for info on using SDF analysis tools. +Some machines may need to load a specific version of OpenMPI -- the version in the +container is 4.1.2. +Please see the `./viking` directory for help with running on Viking. This also contains +advice for processing the SDF files produced by Epoch. ## Licensing From 05ba67e48e1dc43a69e89919af00068501f3e288 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 16:58:57 +0000 Subject: [PATCH 4/9] Fixes to README files --- README.md | 16 ++++++++-------- viking/README.md | 36 ++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index e280aaa..7383d5f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Epoch Containers -This repository contains tools and information for building/running [Epoch][epoch] using -Docker/Singularity containers. +This repository contains tools and information for building and running [Epoch][epoch] +using containers. ## Introduction @@ -13,9 +13,8 @@ it much easier to share reproducible workflows. We provide support for two container platforms: [Docker][docker] and [Singularity][singularity] (or Apptainer). Docker is the most widely used platform, and has been used here to build a 'base image' of Epoch on which other tools may be created. -Singularity is an alternative container platform that was designed to be useable on HPC -systems, so unlike Docker it can be run on multi-node architectures using MPI without -issue. +Singularity is an alternative container platform that is better suited for use on HPC +systems, as unlike Docker it can be run using MPI. ## Usage @@ -36,7 +35,7 @@ We can run the Docker container using: $ python3 run_epoch.py docker -d 2 -o ./my_epoch_run ``` -Here, `-d` specifies the number of dimensions in the simulation, and `-o` specifies the +Here, `-d` specifies the number of dimensions in the simulation and `-o` specifies the output directory (which should contain `input.deck`). Users can switch on QED effects by also providing the `--photons` argument. The output directory should not be the current working directory. To see a full list of possible options, we can also supply the @@ -55,9 +54,10 @@ for specifying the number of MPI processes to run: $ python3 run_epoch.py singularity -d 2 -o ./my_epoch_run -n 4 ``` -The extra argument `-n` specifies the number of processes to run. +The extra argument `-n` specifies the number of processes to run. With Singularity, it +is permitted to run in the current working directory. -On HPC systems, you will need to load Singularity/Apptainer and OpenMPI first. For +On HPC systems, you will need to load OpenMPI and Singularity/Apptainer first. For example, on Viking at the University of York, this requires: ```bash diff --git a/viking/README.md b/viking/README.md index 0e39e14..3d4fd56 100644 --- a/viking/README.md +++ b/viking/README.md @@ -15,7 +15,7 @@ $ ssh viking ``` On logging in, you should find yourself in a directory containing `./scratch`: this is -where you should run all of your jobs on Viking. Clone this repository into here: +where you should run all of your jobs on Viking. Clone this repository here: ```bash $ cd scratch @@ -53,9 +53,8 @@ $ module load Apptainer ``` > [!NOTE] -> Apptainer is a flavour of Singularity supported by the Linux Foundation, with the - other major flavour being SingularityCE. Containers built using one tool are usually - runnable using the other. +> Apptainer is a flavour of Singularity supported by the Linux Foundation, and it is + largely compatible with other flavours. The first time we try to run a Singularity container, it will take a long time to be downloaded. To speed things up, we can pre-download it using the following command from @@ -69,11 +68,11 @@ This will download the container, cache it locally, and drop you into a command You can escape it be pressing `Ctrl + d`. To run Epoch using containers, please skip forward to the section -[Running Epoch][running_epoch]. +[Running Epoch][#running-epoch]. ## Compiling from Source -If you run into difficulties with the Singularity containers, another option would be to +If you run into difficulties with the Singularity containers, another option is to compile and run Epoch from source. To do so, clone the main Epoch repo as described above, and enter the correct version of the code: @@ -104,23 +103,23 @@ if you wish. ## Running Epoch To run Epoch, you will need to use a modified version of the script `epoch_viking.sh` -provided in this directory. Do _not_ run this directly. +provided in this directory. **Do not run this directly.** The lines marked `#SBATCH` control how the job is submitted to the Slurm controller, -which schedules jobs on Viking. Note that the fewer resources you request, the faster -your job will be run, but make sure you request enough time, memory, nodes, and tasks -for your job to succeed! +which schedules jobs on Viking. Note that the fewer resources you request, the less time +your job will sit in the queue, but make sure you request enough time, memory, nodes, +and tasks for your job to succeed! > [!IMPORTANT] -> You'll need to set the project code to that provided to you when you signed up to +> You'll need to set the project code to that was provided to you when you signed up to Viking. The variables set under `User settings` control the Epoch run. Be sure to set the -variables `output_dir` to match your directory layout. If you're running using -containers, you should set `run_epoch` to the location of your `run_epoch.py` script, -and set the variables `dims` and `photons` to set up your run properly. If you're -running from source, you'll need to set `epoch_exe` to your compiled executable, and -`mpi_module` to the MPI module you had loaded when you compiled Epoch. +variable `output_dir` to point to the directory containing your input deck. If you're +running using containers, you should set `run_epoch` to the location of your +`run_epoch.py` script, and set the variables `dims` and `photons` to set up your run +properly. If you're running from source, you'll need to set `epoch_exe` to your compiled +executable, and `mpi_module` to the MPI module that was loaded when you compiled Epoch. To submit a job, use `sbatch`: @@ -165,6 +164,10 @@ Once you have the names of your grids, you can access them as NumPy arrays using >>> data.My_Variable_Name.data ``` +From here, you may wish to repackage the data into some other format that is easier to +work with. For a very straightforward approach, the NumPy function `tofile` may be of +use, or the library [`xarray`][xarray] might be useful if you want to convert to NetCDF. + For more information on `sdf_helper`, please see the [official docs][sdf]. Note that you should not perform any computationally intensive work directly on the @@ -198,3 +201,4 @@ tools via Slurm jobs. [account]: https://vikingdocs.york.ac.uk/getting_started/creating_accounts.html [connect]: https://vikingdocs.york.ac.uk/getting_started/connecting_to_viking.html [sdf]: https://epochpic.github.io/documentation/visualising_output/python.html +[xarray]: https://docs.xarray.dev/en/stable/ From 9d824de95c6a5a129da07b02ee891aa6c039d1af Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 16:59:12 +0000 Subject: [PATCH 5/9] Remove old sbatch example --- examples/laser_test_2d/run_sbatch.sh | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 examples/laser_test_2d/run_sbatch.sh diff --git a/examples/laser_test_2d/run_sbatch.sh b/examples/laser_test_2d/run_sbatch.sh deleted file mode 100644 index a3a9e08..0000000 --- a/examples/laser_test_2d/run_sbatch.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=singularity_epoch2d_test # Job name -#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=myemail@york.ac.uk # Where to send mail -#SBATCH --ntasks=2 # Run n tasks... -#SBATCH --cpus-per-task=1 # ...with one core each -#SBATCH --mem-per-cpu=600mb # Memory per processor -#SBATCH --time=00:01:00 # Time limit hrs:min:sec -#SBATCH --output=singularity_epoch2d_test_%j.log # Standard output and error log -#SBATCH --account=my-proj-account # Project account - -module purge -module load mpi/OpenMPI tools/Singularity -export PMIX_MCA_psec=^munge # Not sure what this does exactly, but it fixes some warnings! - -# TODO Figure out proper linkage with MPI libraries - -echo "Running singularity epoch test on ${SLURM_NTASKS} CPU cores" - -./run_epoch.py singularity -d 2 -o ./output -# or... -mpirun -n ${SLURM_NTASKS} singularity exec --bind ./output:/output oras://ghcr.io/plasmafair/epoch.sif:latest run_epoch -d 2 -o /output - -# TODO srun not working on Viking, not sure if this is a machine-specific problem From 87ad2961c687512fb4b2d31d5397f65b35b87e1b Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 17:00:31 +0000 Subject: [PATCH 6/9] Add more Python tools to Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ae28a15..02b8434 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ RUN git clone --recursive https://github.com/Warwick-Plasma/epoch /app/epoch # Set up Python, install utility library RUN python3 -m pip install --upgrade pip -RUN python3 -m pip install numpy matplotlib +RUN python3 -m pip install numpy scipy matplotlib pandas xarray RUN python3 -m pip install . # Build Epoch variants From 66ffb1fc4ad6d3292a00735793816b73da6fe58f Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 17:16:04 +0000 Subject: [PATCH 7/9] Fixes to viking job script --- viking/epoch_viking.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/viking/epoch_viking.sh b/viking/epoch_viking.sh index f6cca10..fac18bc 100644 --- a/viking/epoch_viking.sh +++ b/viking/epoch_viking.sh @@ -27,7 +27,7 @@ method="Singularity" # Name of directory containing your 'input.deck' file. -# Can be a relative path. +# Recommended to use a relative path. output_dir="." # Number of dimensions in your run. @@ -41,14 +41,15 @@ photons="" # If running Epoch from containers, set this to the 'run_epoch.py' script # Ignored if running from source. -run_epoch="~/scratch/epoch_containers/run_epoch.py" +# Recommended to use a relative path. +run_epoch="./run_epoch.py" # If running Epoch from source, set this to the compiled executable # Ignored if running from containers. -epoch_exe="~/scratch/epoch/epoch2d/bin/epoch2d" +# Recommended to use a relative path, and include './' in front. +epoch_exe="./bin/epoch2d" # OpenMPI module used to compile Epoch -# Ignored if running from containers. mpi_module="OpenMPI" # ------------- @@ -58,7 +59,7 @@ module load ${mpi_module} if [[ ${method} -eq "Singularity" ]]; then - module load Apptainer + module load Python Apptainer # Suppress warnings export PMIX_MCA_gds=^ds12 @@ -72,7 +73,7 @@ if [[ ${method} -eq "Singularity" ]]; then echo "Running Epoch with via Apptainer using ${SLURM_NTASKS} processes" - ${run_epoch} singularity -d ${dims} -o ${output_dir} ${photons} --srun + python ${run_epoch} singularity -d ${dims} -o ${output_dir} ${photons} --srun # Alternative in case the above isn't working: # srun singularity exec --bind ${output_dir}:/output oras://ghcr.io/plasmafair/epoch.sif:latest run_epoch -d ${dims} -o /output --srun ${photons} From 13cba5163a8e9b2018cc850e652595085bf84526 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 17:16:30 +0000 Subject: [PATCH 8/9] Renamed examples folder --- {examples => test_decks}/laser_test_2d/output/input.deck | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {examples => test_decks}/laser_test_2d/output/input.deck (100%) diff --git a/examples/laser_test_2d/output/input.deck b/test_decks/laser_test_2d/output/input.deck similarity index 100% rename from examples/laser_test_2d/output/input.deck rename to test_decks/laser_test_2d/output/input.deck From ed12c863ae69011eff5648e4e2ba4a458a635cd5 Mon Sep 17 00:00:00 2001 From: Liam Pattinson Date: Mon, 18 Dec 2023 17:20:05 +0000 Subject: [PATCH 9/9] Minor fixes to Viking job script --- viking/epoch_viking.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/viking/epoch_viking.sh b/viking/epoch_viking.sh index fac18bc..81589ca 100644 --- a/viking/epoch_viking.sh +++ b/viking/epoch_viking.sh @@ -15,8 +15,7 @@ #SBATCH --cpus-per-task=1 # Number of CPUS per process (leave this as 1!) #SBATCH --mem-per-cpu=1gb # Memory per task #SBATCH --time=00:01:00 # Total time limit hrs:min:sec -#SBATCH --output=%x_%j.log # Log file for stdout outputs -#SBATCH --output=%x_%j.err # Log file for stderr outputs +#SBATCH --output=%x_%j.log # Log file for stdout/stderr outputs #SBATCH --partition=nodes # 'test' for small test jobs (<1m), 'nodes' otherwise # User settings @@ -71,7 +70,7 @@ if [[ ${method} -eq "Singularity" ]]; then export OMPI_MCA_btl='^vader,tcp,openib,uct' export UCX_TLS=^'posix,cma' - echo "Running Epoch with via Apptainer using ${SLURM_NTASKS} processes" + echo "Running Epoch with Apptainer using ${SLURM_NTASKS} processes" python ${run_epoch} singularity -d ${dims} -o ${output_dir} ${photons} --srun