From a5321daa86b01d96c69c34bd452d02061350843a Mon Sep 17 00:00:00 2001 From: Andrey Abramov Date: Wed, 1 Nov 2023 16:50:20 +0100 Subject: [PATCH 01/30] Add an option to set a destination directory for the simulation output --- pylhc_submitter/htc/utils.py | 32 ++++++++++++++ pylhc_submitter/job_submitter.py | 76 +++++++++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/htc/utils.py index 80f2bc2..b9905a2 100644 --- a/pylhc_submitter/htc/utils.py +++ b/pylhc_submitter/htc/utils.py @@ -60,6 +60,7 @@ COLUMN_SHELL_SCRIPT = "ShellScript" COLUMN_JOB_DIRECTORY = "JobDirectory" +COLUMN_DEST_DIRECTORY = "DestDirectory" COLUMN_JOB_FILE = "JobFile" @@ -159,6 +160,7 @@ def make_subfile(cwd: Path, job_df: DataFrame, **kwargs): def write_bash( job_df: DataFrame, output_dir: Path = None, + destination_dir: Path = None, executable: str = "madx", cmdline_arguments: dict = None, mask: Union[str, Path] = None, @@ -196,7 +198,17 @@ def write_bash( f.write(mask % dict(zip(replace_columns, job[replace_columns]))) f.write(cmds) f.write("\n") + + if destination_dir is not None: + if output_dir is not None: + cp_command = f'cp -r {output_dir} {job[COLUMN_DEST_DIRECTORY]}' + if is_eos_path(destination_dir): + cp_command = f'eos {cp_command}' + + f.write(f'{cp_command}\n') + shell_scripts[idx] = bash_file_name + job_df[COLUMN_SHELL_SCRIPT] = shell_scripts return job_df @@ -244,6 +256,26 @@ def _maybe_put_in_quotes(key, value): return f'"{value}"' return value +def is_eos_path(path): + is_eos = False + path = Path(path) + strip_path_parts = _strip_eos_uri(path).parts + if len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos': + is_eos = True + return is_eos + + +def _strip_eos_uri(path): + # EOS paths for HTCondor are given with URI, strip for direct writing + # root://eosuser.cern.ch//eos/user/a/anabramo/desktop_sync/banana.txt + path = Path(path) + parts = path.parts + outpath = path + if parts[0].endswith(':'): + # the first two parts are host info, e.g `file: //host/path` + outpath = Path('/', *parts[2:]) + return outpath + def is_mask_file(mask): try: diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index dda9aa3..745e66b 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -99,10 +99,12 @@ ) from pylhc_submitter.htc.utils import ( COLUMN_JOB_DIRECTORY, + COLUMN_DEST_DIRECTORY, COLUMN_SHELL_SCRIPT, EXECUTEABLEPATH, HTCONDOR_JOBLIMIT, JOBFLAVOURS, + _strip_eos_uri, ) from pylhc_submitter.utils.environment_tools import on_windows from pylhc_submitter.utils.iotools import PathOrStr, save_config, make_replace_entries_iterable, keys_to_path @@ -245,6 +247,11 @@ def get_params(): type=str, default="Outputdata", ) + params.add_parameter( + name="output_destination", + help="Directory where to store the output of the jobs . (Can be on EOS)", + type=PathOrStr, + ) params.add_parameter( name="htc_arguments", help=( @@ -282,6 +289,7 @@ def main(opt): opt.jobid_mask, opt.replace_dict, opt.job_output_dir, + opt.output_destination, opt.append_jobs, opt.executable, opt.script_arguments, @@ -298,6 +306,7 @@ def main(opt): job_df, opt.working_directory, opt.job_output_dir, + opt.output_destination, opt.jobflavour, opt.ssh, opt.dryrun, @@ -316,6 +325,7 @@ def _create_jobs( jobid_mask, replace_dict, output_dir, + output_dest, append_jobs, executable, script_args, @@ -354,7 +364,7 @@ def _create_jobs( data=values_grid, ) job_df = tfs.concat([job_df, data_df], sort=False, how_headers='left') - job_df = _setup_folders(job_df, cwd) + job_df = _setup_folders(job_df, cwd, output_dest) if htcutils.is_mask_file(mask_path_or_string): LOG.debug("Creating all jobs from mask.") @@ -367,12 +377,14 @@ def _create_jobs( job_df = htcutils.write_bash( job_df, output_dir, + destination_dir=output_dest, executable=executable, cmdline_arguments=script_args, mask=mask_path_or_string, ) job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str) + job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str) tfs.write(str(cwd / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID) return job_df @@ -398,6 +410,13 @@ def _drop_already_ran_jobs( def _run_local(job_df: tfs.TfsDataFrame, num_processes: int) -> None: LOG.info(f"Running {len(job_df.index)} jobs locally in {num_processes:d} processes.") + + # URI type EOS addresses won't work for copying files from local jobs + check_dest = job_df.iloc[0][COLUMN_DEST_DIRECTORY] + if not _strip_eos_uri(check_dest) == Path(check_dest): + LOG.warning("The output desitnation is likely specified as EOS URI," + "which will not work during a local run") + pool = multiprocessing.Pool(processes=num_processes) res = pool.map(_execute_shell, job_df.iterrows()) if any(res): @@ -409,6 +428,7 @@ def _run_htc( job_df: tfs.TfsDataFrame, cwd: str, output_dir: str, + dest_dir: str, flavour: str, ssh: str, dryrun: bool, @@ -416,9 +436,19 @@ def _run_htc( ) -> None: LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{flavour}'.") LOG.debug("Creating htcondor subfile.") - subfile = htcutils.make_subfile( - cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments - ) + + # If a different destination for the data is required + # is is handled through the job bash files, so remove it from + # HTConodor's file transfer specification + if dest_dir is None: + subfile = htcutils.make_subfile( + cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments + ) + else: + subfile = htcutils.make_subfile( + cwd, job_df, duration=flavour, **additional_htc_arguments + ) + if not dryrun: LOG.debug("Submitting jobs to htcondor.") htcutils.submit_jobfile(subfile, ssh) @@ -439,13 +469,25 @@ def _check_htcondor_presence() -> None: raise EnvironmentError("htcondor bindings are necessary to run this module.") -def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr) -> tfs.TfsDataFrame: +def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr, + destination_directory: PathOrStr = None) -> tfs.TfsDataFrame: def _return_job_dir(job_id): return working_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}" + + def _return_dest_dir(job_id): + return destination_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}" LOG.debug("Setting up folders: ") job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index] + if destination_directory is not None: + _custom_output_dest = True + job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index] + else: + _custom_output_dest = False + job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY] + + for job_dir in job_df[COLUMN_JOB_DIRECTORY]: try: job_dir.mkdir() @@ -453,11 +495,30 @@ def _return_job_dir(job_id): LOG.debug(f" failed '{job_dir}' (might already exist).") else: LOG.debug(f" created '{job_dir}'.") + + if _custom_output_dest: + strip_dest_dir = _strip_eos_uri(destination_directory) + strip_dest_dir.mkdir(parents=True, exist_ok=True) + + # Make some symlinks for easy navigation + sym_submission = destination_directory / Path('SUBMISSION_DIR') + sym_submission.symlink_to(working_directory.resolve()) + sym_destination = working_directory / Path('OUTPUT_DIR') + sym_destination.symlink_to(destination_directory.resolve()) + + for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: + try: + _strip_eos_uri(job_dest_dir).mkdir() + except IOError: + LOG.debug(f" failed '{job_dest_dir}' (might already exist).") + else: + LOG.debug(f" created '{job_dest_dir}'.") + return job_df def _job_was_successful(job_row, output_dir, files) -> bool: - output_dir = Path(job_row[COLUMN_JOB_DIRECTORY], output_dir) + output_dir = Path(job_row[COLUMN_DEST_DIRECTORY], output_dir) success = output_dir.is_dir() and any(output_dir.iterdir()) if success and files is not None and len(files): for f in files: @@ -497,6 +558,9 @@ def _check_opts(opt): else: mask = opt.mask + if "output_destination" in opt and opt["output_destination"] is not None: + opt["output_destination"] = Path(opt["output_destination"]) + # Replace dict --- dict_keys = set(opt.replace_dict.keys()) mask_keys = find_named_variables_in_mask(mask) From e0a99abf4aa8ef8603ef5690106f2a7572298d3c Mon Sep 17 00:00:00 2001 From: Andrey Abramov Date: Thu, 2 Nov 2023 10:07:48 +0100 Subject: [PATCH 02/30] Tidy up function. --- pylhc_submitter/htc/utils.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/htc/utils.py index b9905a2..47f0eb9 100644 --- a/pylhc_submitter/htc/utils.py +++ b/pylhc_submitter/htc/utils.py @@ -256,18 +256,16 @@ def _maybe_put_in_quotes(key, value): return f'"{value}"' return value -def is_eos_path(path): - is_eos = False - path = Path(path) - strip_path_parts = _strip_eos_uri(path).parts - if len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos': - is_eos = True - return is_eos + +def is_eos_path(path): + path = Path(path) + strip_path_parts = _strip_eos_uri(path).parts + return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos' def _strip_eos_uri(path): # EOS paths for HTCondor are given with URI, strip for direct writing - # root://eosuser.cern.ch//eos/user/a/anabramo/desktop_sync/banana.txt + # root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt path = Path(path) parts = path.parts outpath = path From 129ed15c9390038250aa22064e871e6844c987c4 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Thu, 2 Nov 2023 14:36:10 +0100 Subject: [PATCH 03/30] reversed folder creation logic --- pylhc_submitter/job_submitter.py | 42 +++++++++++++------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 745e66b..5198003 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -480,39 +480,31 @@ def _return_dest_dir(job_id): LOG.debug("Setting up folders: ") job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index] - if destination_directory is not None: - _custom_output_dest = True - job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index] - else: - _custom_output_dest = False - job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY] + for job_dir in job_df[COLUMN_JOB_DIRECTORY]: + job_dir.mkdir(exist_ok=True) + LOG.debug(f" created '{job_dir}'.") + if destination_directory is None: + job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY] + else: + job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index] - for job_dir in job_df[COLUMN_JOB_DIRECTORY]: - try: - job_dir.mkdir() - except IOError: - LOG.debug(f" failed '{job_dir}' (might already exist).") - else: - LOG.debug(f" created '{job_dir}'.") - - if _custom_output_dest: - strip_dest_dir = _strip_eos_uri(destination_directory) + strip_dest_dir: Path = _strip_eos_uri(destination_directory) strip_dest_dir.mkdir(parents=True, exist_ok=True) - # Make some symlinks for easy navigation + # Make some symlinks for easy navigation--- + # Output directory -> Working Directory sym_submission = destination_directory / Path('SUBMISSION_DIR') - sym_submission.symlink_to(working_directory.resolve()) + sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) + + # Working Directory -> Output Directory sym_destination = working_directory / Path('OUTPUT_DIR') - sym_destination.symlink_to(destination_directory.resolve()) + sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True) + # Create output dirs per job --- for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: - try: - _strip_eos_uri(job_dest_dir).mkdir() - except IOError: - LOG.debug(f" failed '{job_dest_dir}' (might already exist).") - else: - LOG.debug(f" created '{job_dest_dir}'.") + _strip_eos_uri(job_dest_dir).mkdir(exist_ok=True) + LOG.debug(f" created '{job_dest_dir}'.") return job_df From 26cbec210efe531329cb6d240b1b2732d5e3016a Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Thu, 2 Nov 2023 14:46:12 +0100 Subject: [PATCH 04/30] fixing link to homepage --- pylhc_submitter/job_submitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 5198003..4f17449 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -14,7 +14,7 @@ and job directory for further post processing. For additional information and guides, see the `Job Submitter page -`_ in the ``OMC`` documentation site. +`_ in the ``OMC`` documentation site. *--Required--* From 4c12d9a98f32c80eaa37588519ac07a37f30cd9f Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 16:27:57 +0100 Subject: [PATCH 05/30] modified tests --- tests/unit/test_job_submitter.py | 325 +++++++++++++++++-------------- 1 file changed, 178 insertions(+), 147 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 308202b..709433e 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -1,5 +1,9 @@ +import itertools +from dataclasses import astuple, dataclass, field, fields, asdict from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +import numpy as np import pytest from generic_parser import DotDict @@ -17,164 +21,191 @@ ) -@pytest.mark.parametrize("maskfile", [True, False]) -def test_job_creation_and_localrun(tmp_path, maskfile): - args, setup = _create_setup(tmp_path, mask_file=maskfile) - setup.update(run_local=True) - job_submit(**setup) - _test_output(args) - +@dataclass +class InputParameters: + working_directory: Path + executable: Optional[str] = None if on_windows() else "/bin/bash" + script_extension: Optional[str] =".bat" if on_windows() else ".sh" + job_output_dir: Optional[str] = "Outputdir" + jobid_mask: Optional[str] = "%(PARAM1)s.%(PARAM2)d" + replace_dict: Optional[Dict] = field(default_factory=lambda: dict(PARAM1=["a", "b"], PARAM2=[1, 2, 3])) + jobflavour: Optional[str] = "workday" + resume_jobs: Optional[bool] = True + check_files: Optional[Sequence] = field(default_factory=lambda: ["out.txt",]) + dryrun: Optional[bool] = False + run_local: Optional[bool] = False + htc_arguments: Optional[Dict] = field(default_factory=lambda: {"max_retries": "4", "some_other_argument": "some_other_parameter"}) + output_destination: Optional[Path] = None + mask: Union[Path, str] = None # will be set in create_mask + + def create_mask(self, name: str = "test_script.mask", content: str = None, as_file: bool = False): + output_file = Path(self.job_output_dir, self.check_files[0]) + + if content is None: + content = self.jobid_mask + + if on_windows(): + mask_string = f'echo {content}> "{output_file!s}"' + else: + mask_string = f'echo "{content}" > "{output_file!s}"' + if as_file: + mask_string = " ".join(['-c "', mask_string, '"']) + + + mask_string = f"{mask_string}\n" + + if as_file: + mask_path = self.working_directory / name + with mask_path.open("w") as f: + f.write(mask_string) + self.mask = mask_path + else: + self.mask = mask_string -@run_only_on_linux -def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path): - mask = "123\"\" \nsleep 0.1 \n/bin/bash -c \"echo \"%(PARAM1)s.%(PARAM2)s" - args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False) - setup.update(run_local=True) - job_submit(**setup) - _test_output(args) - - -@run_only_on_linux -@pytest.mark.parametrize("maskfile", [True, False]) -def test_job_creation_and_dryrun(tmp_path, maskfile): - args, setup = _create_setup(tmp_path, mask_file=maskfile) - setup.update(dryrun=True) - job_submit(**setup) - _test_subfile_content(setup) - _test_output(args, post_run=False) -@run_only_on_linux @pytest.mark.parametrize("maskfile", [True, False]) -def test_find_errorneous_percentage_signs(tmp_path, maskfile): - mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff." - args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) - with pytest.raises(KeyError) as e: - job_submit(**setup) - assert "problematic '%'" in e.value.args[0] - - -@run_only_on_linux -@pytest.mark.parametrize("maskfile", [True, False]) -def test_missing_keys(tmp_path, maskfile): - mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s" - args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) - with pytest.raises(KeyError) as e: - job_submit(**setup) - assert "PARAM3" in e.value.args[0] - - -@run_if_not_linux -def test_not_on_linux(tmp_path): - args, setup = _create_setup(tmp_path) - with pytest.raises(EnvironmentError) as e: - job_submit(**setup) - assert "htcondor bindings" in e.value.args[0] - - -@run_only_on_linux -@pytest.mark.cern_network -def test_htc_submit(): - """ This test is here for local testing only. You need to adapt the path - and delete the results afterwards manually (so you can check them before.""" - user = "jdilly" - path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") - path.mkdir(exist_ok=True) - args, setup = _create_setup(path) - - job_submit(**setup) - _test_subfile_content(setup) - _test_output(args, post_run=False) - # _test_output(args, post_run=True) # you can use this if you like after htcondor is done +def test_job_creation_and_localrun(tmp_path, maskfile): + """ Tests that the jobs are created and can be run locally + from mask-string and mask-file. """ + setup = InputParameters(working_directory=tmp_path, run_local=True) + setup.create_mask(as_file=maskfile) + job_submit(**asdict(setup)) + _test_output(setup) + + +# def test_output_directory(tmp_path): +# """ Tests that the output is copied to the output destination. +# As a by product it also tests that the jobs are created and can be run locally. """ +# output_destination = tmp_path / "my_new_output" / "long_path" +# args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination) +# setup.update(run_local=True) +# job_submit(**setup) +# _test_output(args) + + +# @run_only_on_linux +# def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path): +# """ Tests that the jobs are created and can be run locally from a multiline mask-string. """ +# mask = "123\"\" \nsleep 0.1 \n/bin/bash -c \"echo \"%(PARAM1)s.%(PARAM2)s" +# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False) +# setup.update(run_local=True) +# job_submit(**setup) +# _test_output(args) + + +# @run_only_on_linux +# @pytest.mark.parametrize("maskfile", [True, False]) +# def test_job_creation_and_dryrun(tmp_path, maskfile): +# """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """ +# args, setup = _create_setup(tmp_path, mask_file=maskfile) +# setup.update(dryrun=True) +# job_submit(**setup) +# _test_subfile_content(setup) +# _test_output(args, post_run=False) + + +# @run_only_on_linux +# @pytest.mark.parametrize("maskfile", [True, False]) +# def test_find_errorneous_percentage_signs(tmp_path, maskfile): +# """ Tests that a key-error is raised on a mask-string with percentage signs, +# that are not part of the replacement parameters. """ +# mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff." +# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) +# with pytest.raises(KeyError) as e: +# job_submit(**setup) +# assert "problematic '%'" in e.value.args[0] + + +# @run_only_on_linux +# @pytest.mark.parametrize("maskfile", [True, False]) +# def test_missing_keys(tmp_path, maskfile): +# """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """ +# mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s" +# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) +# with pytest.raises(KeyError) as e: +# job_submit(**setup) +# assert "PARAM3" in e.value.args[0] + + +# @run_if_not_linux +# def test_not_on_linux(tmp_path): +# """ Test that an error is raised if htcondor bindings are not found. +# If this tests fails, this might mean, that htcondor bindings are finally +# available for the other platforms. """ +# args, setup = _create_setup(tmp_path) +# with pytest.raises(EnvironmentError) as e: +# job_submit(**setup) +# assert "htcondor bindings" in e.value.args[0] + + +# @run_only_on_linux +# @pytest.mark.cern_network +# def test_htc_submit(): +# """ This test is here for local testing only. You need to adapt the path +# and delete the results afterwards manually (so you can check them before.""" +# user = "jdilly" +# path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") +# path.mkdir(exist_ok=True) +# args, setup = _create_setup(path) + +# job_submit(**setup) +# _test_subfile_content(setup) +# _test_output(args, post_run=False) +# # _test_output(args, post_run=True) # you can use this if you like after htcondor is done # Helper ----------------------------------------------------------------------- -def _create_setup(cwd_path: Path, mask_content: str = None, mask_file: bool = True): - """ Create a quick setup for Parameters PARAM1 and PARAM2. """ - out_name = "out.txt" - out_dir = "Outputdir" - - args = DotDict( - cwd=cwd_path, - out_name=out_name, - out_dir=out_dir, - id="%(PARAM1)s.%(PARAM2)d", - mask_name="test_script.mask", - ext=".bat" if on_windows() else ".sh", - out_file=Path(out_dir, out_name), - p1_list=["a", "b"], - p2_list=[1, 2, 3], - mask_file=mask_file - ) - - mask_string = _make_executable_string(args, mask_content) - if args.mask_file: - mask_path = args.cwd / args.mask_name - with mask_path.open("w") as f: - f.write(mask_string) - - setup = dict( - executable=None if on_windows() else "/bin/bash", - script_extension=args.ext, - job_output_dir=out_dir, - mask=str(mask_path) if args.mask_file else mask_string, - replace_dict=dict(PARAM1=args.p1_list, PARAM2=args.p2_list), - jobid_mask=args.id, - jobflavour="workday", - resume_jobs=True, - check_files=[args.out_name], - working_directory=str(args.cwd), - dryrun=False, - run_local=False, - htc_arguments={"max_retries": "4", "some_other_argument": "some_other_parameter"}, - ) - return args, setup - - -def _make_executable_string(args, mask_content): - if mask_content is None: - mask_content = args.id - - if on_windows(): - mask_string = f'echo {mask_content}> "{args.out_file}"' - else: - mask_string = f'echo "{mask_content}" > "{args.out_file}"' - if not args.mask_file: - mask_string = " ".join(['-c "', mask_string, '"']) - return f"{mask_string}\n" - - -def _test_subfile_content(setup): - subfile = Path(setup['working_directory']) / SUBFILE + +def _test_subfile_content(setup: InputParameters): + """ Checks some of the content of the subfile (queuehtc.sub). """ + subfile = setup.working_directory / SUBFILE assert subfile.exists() with subfile.open("r") as sfile: filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line) assert filecontents["MY.JobFlavour"].strip('"') == setup["jobflavour"] # flavour is saved with "" in .sub, and read in with them assert filecontents["transfer_output_files"] == setup["job_output_dir"] - for key in setup["htc_arguments"].keys(): - assert filecontents[key] == setup["htc_arguments"][key] - - -def _test_output(args, post_run=True): - for p1 in args.p1_list: - for p2 in args.p2_list: - current_id = args.id % dict(PARAM1=p1, PARAM2=p2) - job_name = f"Job.{current_id}" - job_dir_path = args.cwd / job_name - out_dir_path = job_dir_path / args.out_dir - out_file_path = out_dir_path / args.out_name - - assert job_dir_path.exists() - assert job_dir_path.is_dir() - if args.mask_file: - assert (job_dir_path / args.mask_name).with_suffix(args.ext).exists() - # assert out_dir_path.exists() # does not seem to be pre-created anymore (jdilly 2021-05-04) - if post_run: - assert out_dir_path.is_dir() - assert out_file_path.exists() - assert out_file_path.is_file() - - with out_file_path.open("r") as f: - assert f.read().strip("\n") == current_id + for key in setup.htc_arguments.keys(): + assert filecontents[key] == setup.htc_arguments[key] + + +def _test_output(setup: InputParameters, post_run: bool = True): + """ Checks the validity of the output. """ + + combinations = _generate_combinations(setup.replace_dict) + assert len(combinations) == np.prod([len(v) for v in setup.replace_dict.values()]) + + for combination_instance in combinations: + current_id = setup.jobid_mask % combination_instance + job_name = f"Job.{current_id}" + job_dir_path = setup.working_directory / job_name + out_dir_path = job_dir_path / setup.job_output_dir + out_file_path = out_dir_path / setup.check_files[0] + + assert job_dir_path.exists() + assert job_dir_path.is_dir() + if isinstance(setup.mask, Path): + assert (job_dir_path / setup.mask.name).with_suffix(setup.script_extension).exists() + # assert out_dir_path.exists() # does not seem to be pre-created anymore (jdilly 2021-05-04) + if post_run: + assert out_dir_path.is_dir() + assert out_file_path.exists() + assert out_file_path.is_file() + + with out_file_path.open("r") as f: + assert f.read().strip("\n") == current_id + + +def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]: + """ Creates all possible combinations of values in data as dictionaries. """ + keys = list(data.keys()) + all_values = [data[key] for key in keys] + + combinations = [ + {keys[i]: values[i] for i in range(len(keys))} + for values in itertools.product(*all_values) + ] + + return combinations \ No newline at end of file From 72bc145a73938c65b8d3368ac4f6dfb813bb6407 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:28:27 +0100 Subject: [PATCH 06/30] making tests run again --- tests/unit/test_job_submitter.py | 209 ++++++++++++++++--------------- 1 file changed, 109 insertions(+), 100 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 709433e..6a6bf92 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -21,8 +21,113 @@ ) +@pytest.mark.parametrize("maskfile", [True, False]) +def test_job_creation_and_localrun(tmp_path, maskfile): + """ Tests that the jobs are created and can be run locally + from mask-string and mask-file. """ + setup = InputParameters(working_directory=tmp_path, run_local=True) + setup.create_mask(as_file=maskfile) + job_submit(**asdict(setup)) + _test_output(setup) + + +# def test_output_directory(tmp_path): +# """ Tests that the output is copied to the output destination. +# As a by product it also tests that the jobs are created and can be run locally. """ +# output_destination = tmp_path / "my_new_output" / "long_path" +# args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination) +# setup.update(run_local=True) +# job_submit(**setup) +# _test_output(args) + + +@run_only_on_linux +def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path): + """ Tests that the jobs are created and can be run locally from a multiline mask-string. """ + mask = "123\"\" \nsleep 0.1 \n/bin/bash -c \"echo \"%(PARAM1)s.%(PARAM2)s" + setup = InputParameters(working_directory=tmp_path, run_local=True) + setup.create_mask(content=mask, as_file=False) + job_submit(**asdict(setup)) + _test_output(setup) + + +@run_only_on_linux +@pytest.mark.parametrize("maskfile", [True, False]) +def test_job_creation_and_dryrun(tmp_path, maskfile): + """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """ + setup = InputParameters(working_directory=tmp_path, dryrun=True) + setup.create_mask(as_file=maskfile) + job_submit(**asdict(setup)) + _test_subfile_content(setup) + _test_output(setup, post_run=False) + + +@run_only_on_linux +@pytest.mark.parametrize("maskfile", [True, False]) +def test_find_errorneous_percentage_signs(tmp_path, maskfile): + """ Tests that a key-error is raised on a mask-string with percentage signs, + that are not part of the replacement parameters. """ + mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff." + setup = InputParameters(working_directory=tmp_path) + setup.create_mask(content=mask, as_file=maskfile) + with pytest.raises(KeyError) as e: + job_submit(**asdict(setup)) + assert "problematic '%'" in e.value.args[0] + + +@run_only_on_linux +@pytest.mark.parametrize("maskfile", [True, False]) +def test_missing_keys(tmp_path, maskfile): + """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """ + mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s" + setup = InputParameters(working_directory=tmp_path) + setup.create_mask(content=mask, as_file=maskfile) + with pytest.raises(KeyError) as e: + job_submit(**asdict(setup)) + assert "PARAM3" in e.value.args[0] + + +@run_if_not_linux +def test_not_on_linux(tmp_path): + """ Test that an error is raised if htcondor bindings are not found. + If this tests fails, this might mean, that htcondor bindings are finally + available for the other platforms. """ + setup = InputParameters(working_directory=tmp_path) + with pytest.raises(EnvironmentError) as e: + job_submit(**asdict(setup)) + assert "htcondor bindings" in e.value.args[0] + + +@run_only_on_linux +@pytest.mark.cern_network +def test_htc_submit(): + """ This test is here for local testing only. You need to adapt the path + and delete the results afterwards manually (so you can check them before.""" + user = "jdilly" + path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") + path.mkdir(exist_ok=True) + + + # Fix the kerberos ticket path. + # Do klist to find your ticket manually. + import os + os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029" + + setup = InputParameters(working_directory=path) + setup.create_mask() + # pre-run --- + # job_submit(**asdict(setup)) + # _test_subfile_content(setup) + # _test_output(setup, post_run=False) + # post run --- + _test_output(setup, post_run=True) + + +# Helper ----------------------------------------------------------------------- + @dataclass class InputParameters: + """ job_submitter input parameters. """ working_directory: Path executable: Optional[str] = None if on_windows() else "/bin/bash" script_extension: Optional[str] =".bat" if on_windows() else ".sh" @@ -48,10 +153,9 @@ def create_mask(self, name: str = "test_script.mask", content: str = None, as_fi mask_string = f'echo {content}> "{output_file!s}"' else: mask_string = f'echo "{content}" > "{output_file!s}"' - if as_file: + if not as_file: mask_string = " ".join(['-c "', mask_string, '"']) - mask_string = f"{mask_string}\n" if as_file: @@ -63,110 +167,14 @@ def create_mask(self, name: str = "test_script.mask", content: str = None, as_fi self.mask = mask_string - -@pytest.mark.parametrize("maskfile", [True, False]) -def test_job_creation_and_localrun(tmp_path, maskfile): - """ Tests that the jobs are created and can be run locally - from mask-string and mask-file. """ - setup = InputParameters(working_directory=tmp_path, run_local=True) - setup.create_mask(as_file=maskfile) - job_submit(**asdict(setup)) - _test_output(setup) - - -# def test_output_directory(tmp_path): -# """ Tests that the output is copied to the output destination. -# As a by product it also tests that the jobs are created and can be run locally. """ -# output_destination = tmp_path / "my_new_output" / "long_path" -# args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination) -# setup.update(run_local=True) -# job_submit(**setup) -# _test_output(args) - - -# @run_only_on_linux -# def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path): -# """ Tests that the jobs are created and can be run locally from a multiline mask-string. """ -# mask = "123\"\" \nsleep 0.1 \n/bin/bash -c \"echo \"%(PARAM1)s.%(PARAM2)s" -# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False) -# setup.update(run_local=True) -# job_submit(**setup) -# _test_output(args) - - -# @run_only_on_linux -# @pytest.mark.parametrize("maskfile", [True, False]) -# def test_job_creation_and_dryrun(tmp_path, maskfile): -# """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """ -# args, setup = _create_setup(tmp_path, mask_file=maskfile) -# setup.update(dryrun=True) -# job_submit(**setup) -# _test_subfile_content(setup) -# _test_output(args, post_run=False) - - -# @run_only_on_linux -# @pytest.mark.parametrize("maskfile", [True, False]) -# def test_find_errorneous_percentage_signs(tmp_path, maskfile): -# """ Tests that a key-error is raised on a mask-string with percentage signs, -# that are not part of the replacement parameters. """ -# mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff." -# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) -# with pytest.raises(KeyError) as e: -# job_submit(**setup) -# assert "problematic '%'" in e.value.args[0] - - -# @run_only_on_linux -# @pytest.mark.parametrize("maskfile", [True, False]) -# def test_missing_keys(tmp_path, maskfile): -# """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """ -# mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s" -# args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile) -# with pytest.raises(KeyError) as e: -# job_submit(**setup) -# assert "PARAM3" in e.value.args[0] - - -# @run_if_not_linux -# def test_not_on_linux(tmp_path): -# """ Test that an error is raised if htcondor bindings are not found. -# If this tests fails, this might mean, that htcondor bindings are finally -# available for the other platforms. """ -# args, setup = _create_setup(tmp_path) -# with pytest.raises(EnvironmentError) as e: -# job_submit(**setup) -# assert "htcondor bindings" in e.value.args[0] - - -# @run_only_on_linux -# @pytest.mark.cern_network -# def test_htc_submit(): -# """ This test is here for local testing only. You need to adapt the path -# and delete the results afterwards manually (so you can check them before.""" -# user = "jdilly" -# path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") -# path.mkdir(exist_ok=True) -# args, setup = _create_setup(path) - -# job_submit(**setup) -# _test_subfile_content(setup) -# _test_output(args, post_run=False) -# # _test_output(args, post_run=True) # you can use this if you like after htcondor is done - - -# Helper ----------------------------------------------------------------------- - - - def _test_subfile_content(setup: InputParameters): """ Checks some of the content of the subfile (queuehtc.sub). """ subfile = setup.working_directory / SUBFILE assert subfile.exists() with subfile.open("r") as sfile: filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line) - assert filecontents["MY.JobFlavour"].strip('"') == setup["jobflavour"] # flavour is saved with "" in .sub, and read in with them - assert filecontents["transfer_output_files"] == setup["job_output_dir"] + assert filecontents["MY.JobFlavour"].strip('"') == setup.jobflavour # flavour is saved with "" in .sub, and read in with them + assert filecontents["transfer_output_files"] == setup.job_output_dir for key in setup.htc_arguments.keys(): assert filecontents[key] == setup.htc_arguments[key] @@ -175,6 +183,7 @@ def _test_output(setup: InputParameters, post_run: bool = True): """ Checks the validity of the output. """ combinations = _generate_combinations(setup.replace_dict) + assert len(combinations) assert len(combinations) == np.prod([len(v) for v in setup.replace_dict.values()]) for combination_instance in combinations: From bf813f33fdb520a83afc5a4a7eec922e6e5e7880 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 17:54:14 +0100 Subject: [PATCH 07/30] fixing mac? --- tests/unit/test_job_submitter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 6a6bf92..62d2b22 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -93,6 +93,7 @@ def test_not_on_linux(tmp_path): If this tests fails, this might mean, that htcondor bindings are finally available for the other platforms. """ setup = InputParameters(working_directory=tmp_path) + setup.create_mask() with pytest.raises(EnvironmentError) as e: job_submit(**asdict(setup)) assert "htcondor bindings" in e.value.args[0] @@ -110,8 +111,8 @@ def test_htc_submit(): # Fix the kerberos ticket path. # Do klist to find your ticket manually. - import os - os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029" + # import os + # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" setup = InputParameters(working_directory=path) setup.create_mask() From 9d7e6fa95b201df0575b75473e60bcfcf9dea99d Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 19:01:35 +0100 Subject: [PATCH 08/30] added test for output_destination --- tests/unit/test_job_submitter.py | 83 ++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 62d2b22..59be8de 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -31,14 +31,17 @@ def test_job_creation_and_localrun(tmp_path, maskfile): _test_output(setup) -# def test_output_directory(tmp_path): -# """ Tests that the output is copied to the output destination. -# As a by product it also tests that the jobs are created and can be run locally. """ -# output_destination = tmp_path / "my_new_output" / "long_path" -# args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination) -# setup.update(run_local=True) -# job_submit(**setup) -# _test_output(args) +def test_output_directory(tmp_path): + """ Tests that the output is copied to the output destination. + As a by product it also tests that the jobs are created and can be run locally. """ + setup = InputParameters( + working_directory=tmp_path, + run_local=True, + output_destination=tmp_path / "my_new_output" / "long_path", + ) + setup.create_mask() + job_submit(**asdict(setup)) + _test_output(setup) @run_only_on_linux @@ -104,24 +107,25 @@ def test_not_on_linux(tmp_path): def test_htc_submit(): """ This test is here for local testing only. You need to adapt the path and delete the results afterwards manually (so you can check them before.""" - user = "jdilly" - path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") - path.mkdir(exist_ok=True) - - # Fix the kerberos ticket path. # Do klist to find your ticket manually. # import os # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" + user = "jdilly" + path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") + path.mkdir(exist_ok=True) + setup = InputParameters(working_directory=path) setup.create_mask() + # pre-run --- - # job_submit(**asdict(setup)) - # _test_subfile_content(setup) - # _test_output(setup, post_run=False) + job_submit(**asdict(setup)) + _test_subfile_content(setup) + _test_output(setup, post_run=False) + # post run --- - _test_output(setup, post_run=True) + # _test_output(setup, post_run=True) # Helper ----------------------------------------------------------------------- @@ -190,22 +194,37 @@ def _test_output(setup: InputParameters, post_run: bool = True): for combination_instance in combinations: current_id = setup.jobid_mask % combination_instance job_name = f"Job.{current_id}" - job_dir_path = setup.working_directory / job_name - out_dir_path = job_dir_path / setup.job_output_dir - out_file_path = out_dir_path / setup.check_files[0] - - assert job_dir_path.exists() - assert job_dir_path.is_dir() + if isinstance(setup.mask, Path): - assert (job_dir_path / setup.mask.name).with_suffix(setup.script_extension).exists() - # assert out_dir_path.exists() # does not seem to be pre-created anymore (jdilly 2021-05-04) - if post_run: - assert out_dir_path.is_dir() - assert out_file_path.exists() - assert out_file_path.is_file() - - with out_file_path.open("r") as f: - assert f.read().strip("\n") == current_id + assert (setup.working_directory / job_name / setup.mask.name).with_suffix(setup.script_extension).exists() + + def _check_output_content(dir_path: Path): + # Check if the code created the folder structure --- + job_path = dir_path / job_name + + assert job_path.exists() + assert job_path.is_dir() + + if post_run: # Check if the jobs created the files --- + out_dir_path = job_path / setup.job_output_dir + out_file_path = out_dir_path / setup.check_files[0] + + assert out_dir_path.is_dir() + assert out_file_path.exists() + assert out_file_path.is_file() + + with out_file_path.open("r") as f: + assert f.read().strip("\n") == current_id + + # Check local working directory --- + _check_output_content(setup.working_directory) + + if setup.output_destination is not None: + # Check copy at output destination --- + _check_output_content(setup.output_destination) + + + def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]: From df9ea349a0a0513fdc46b5685750016ab6d112fb Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 19:02:09 +0100 Subject: [PATCH 09/30] cleanup imports --- tests/unit/test_job_submitter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 59be8de..f1cac12 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -1,11 +1,10 @@ import itertools -from dataclasses import astuple, dataclass, field, fields, asdict +from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Union import numpy as np import pytest -from generic_parser import DotDict from pylhc_submitter.job_submitter import main as job_submit from pylhc_submitter.utils.environment_tools import on_linux, on_windows From e6ca62549071c7ea9a423f917eba051206388e67 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 22:45:26 +0100 Subject: [PATCH 10/30] code cleanup --- pylhc_submitter/autosix.py | 2 +- pylhc_submitter/constants/job_submitter.py | 33 ++ pylhc_submitter/job_submitter.py | 496 ++++++------------ .../{htc => job_submitter_tools}/__init__.py | 0 .../job_submitter_tools/constants.py | 0 .../htc_utils.py} | 103 ++-- .../job_submitter_tools/iotools.py | 245 +++++++++ .../{htc => job_submitter_tools}/mask.py | 38 +- .../job_submitter_tools/runners.py | 117 +++++ .../{environment_tools.py => environment.py} | 0 pylhc_submitter/utils/iotools.py | 3 +- tests/unit/test_job_submitter.py | 7 +- 12 files changed, 616 insertions(+), 428 deletions(-) create mode 100644 pylhc_submitter/constants/job_submitter.py rename pylhc_submitter/{htc => job_submitter_tools}/__init__.py (100%) create mode 100644 pylhc_submitter/job_submitter_tools/constants.py rename pylhc_submitter/{htc/utils.py => job_submitter_tools/htc_utils.py} (76%) create mode 100644 pylhc_submitter/job_submitter_tools/iotools.py rename pylhc_submitter/{htc => job_submitter_tools}/mask.py (67%) create mode 100644 pylhc_submitter/job_submitter_tools/runners.py rename pylhc_submitter/utils/{environment_tools.py => environment.py} (100%) diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py index f1f908c..f6b720d 100644 --- a/pylhc_submitter/autosix.py +++ b/pylhc_submitter/autosix.py @@ -202,7 +202,7 @@ SIXENV_OPTIONAL, AutoSixEnvironment, ) -from pylhc_submitter.htc.mask import generate_jobdf_index +from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index from pylhc_submitter.job_submitter import ( JOBSUMMARY_FILE, COLUMN_JOBID, diff --git a/pylhc_submitter/constants/job_submitter.py b/pylhc_submitter/constants/job_submitter.py new file mode 100644 index 0000000..aca598c --- /dev/null +++ b/pylhc_submitter/constants/job_submitter.py @@ -0,0 +1,33 @@ + +""" +Constants: Job Submitter +---------------------------------- + +Collections of constants and paths used in the job-submitter. +""" +from pylhc_submitter.constants.external_paths import MADX_BIN, PYTHON2_BIN, PYTHON3_BIN + +JOBSUMMARY_FILE = "Jobs.tfs" +JOBDIRECTORY_PREFIX = "Job" +CONFIG_FILE = "config.ini" + +SCRIPT_EXTENSIONS = { + "madx": ".madx", + "python3": ".py", + "python2": ".py", +} + +EXECUTEABLEPATH = { + "madx": MADX_BIN, + "python3": PYTHON3_BIN, + "python2": PYTHON2_BIN, +} + + +COLUMN_JOBID = "JobId" +COLUMN_SHELL_SCRIPT = "ShellScript" +COLUMN_JOB_DIRECTORY = "JobDirectory" +COLUMN_DEST_DIRECTORY = "DestDirectory" +COLUMN_JOB_FILE = "JobFile" + +NON_PARAMETER_COLUMNS = (COLUMN_SHELL_SCRIPT, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE, COLUMN_DEST_DIRECTORY) \ No newline at end of file diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 4f17449..9748eed 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -16,124 +16,170 @@ For additional information and guides, see the `Job Submitter page `_ in the ``OMC`` documentation site. + *--Required--* -- **mask** *(str)*: Program mask to use +- **mask** *(PathOrStr)*: + + Program mask to use + + +- **replace_dict** *(DictAsString)*: -- **replace_dict** *(DictAsString)*: Dict containing the str to replace as - keys and values a list of parameters to replace + Dict containing the str to replace as keys and values a list of + parameters to replace -- **working_directory** *(str)*: Directory where data should be put + +- **working_directory** *(PathOrStr)*: + + Directory where data should be put *--Optional--* -- **append_jobs**: Flag to rerun job with finer/wider grid, - already existing points will not be reexecuted. +- **append_jobs**: + + Flag to rerun job with finer/wider grid, already existing points will + not be reexecuted. + + action: ``store_true`` + + +- **check_files** *(str)*: + + List of files/file-name-masks expected to be in the 'job_output_dir' + after a successful job (for appending/resuming). Uses the 'glob' + function, so unix-wildcards (*) are allowed. If not given, only the + presence of the folder itself is checked. + + +- **dryrun**: + + Flag to only prepare folders and scripts, but does not start/submit + jobs. Together with `resume_jobs` this can be use to check which jobs + succeeded and which failed. + + action: ``store_true`` + + +- **executable** *(PathOrStr)*: + + Path to executable or job-type (of ['madx', 'python3', 'python2']) to + use. + + default: ``madx`` + + +- **htc_arguments** *(DictAsString)*: + + Additional arguments for htcondor, as Dict-String. For AccountingGroup + please use 'accounting_group'. 'max_retries' and 'notification' have + defaults (if not given). Others are just passed on. + + default: ``{}`` + + +- **job_output_dir** *(str)*: + + The name of the output dir of the job. (Make sure your script puts its + data there!) - Action: ``store_true`` -- **check_files** *(str)*: List of files/file-name-masks expected to be in the - 'job_output_dir' after a successful job (for appending/resuming). Uses the 'glob' - function, so unix-wildcards (*) are allowed. If not given, only the presence of the folder itself is checked. -- **dryrun**: Flag to only prepare folders and scripts, - but does not start/submit jobs. - Together with `resume_jobs` this can be use to check which jobs succeeded and which failed. + default: ``Outputdata`` - Action: ``store_true`` -- **executable** *(str)*: Path to executable or job-type (of ['madx', 'python3', 'python2']) to use. -- **htc_arguments** *(DictAsString)*: Additional arguments for htcondor, as Dict-String. - For AccountingGroup please use 'accounting_group'. 'max_retries' and 'notification' have defaults (if not given). - Others are just passed on. +- **jobflavour** *(str)*: - Default: ``{}`` -- **job_output_dir** *(str)*: The name of the output dir of the job. (Make sure your script puts its data there!) + Jobflavour to give rough estimate of runtime of one job - Default: ``Outputdata`` -- **jobflavour** *(str)*: Jobflavour to give rough estimate of runtime of one job + choices: ``('espresso', 'microcentury', 'longlunch', 'workday', 'tomorrow', 'testmatch', 'nextweek')`` - Choices: ``('espresso', 'microcentury', 'longlunch', 'workday', 'tomorrow', 'testmatch', 'nextweek')`` - Default: ``workday`` -- **jobid_mask** *(str)*: Mask to name jobs from replace_dict + default: ``workday`` -- **num_processes** *(int)*: Number of processes to be used if run locally - Default: ``4`` -- **resume_jobs**: Only do jobs that did not work. +- **jobid_mask** *(str)*: - Action: ``store_true`` -- **run_local**: Flag to run the jobs on the local machine. Not suggested. + Mask to name jobs from replace_dict - Action: ``store_true`` -- **script_arguments** *(DictAsString)*: Additional arguments to pass to the script, - as dict in key-value pairs ('--' need to be included in the keys). - Default: ``{}`` -- **script_extension** *(str)*: New extension for the scripts created from the masks. - This is inferred automatically for ['madx', 'python3', 'python2']. Otherwise not changed. +- **num_processes** *(int)*: -- **ssh** *(str)*: Run htcondor from this machine via ssh (needs access to the `working_directory`) + Number of processes to be used if run locally + + default: ``4`` + + +- **output_destination** *(PathOrStr)*: + + Directory where to store the output of the jobs . (Can be on EOS) + + +- **resume_jobs**: + + Only do jobs that did not work. + + action: ``store_true`` + + +- **run_local**: + + Flag to run the jobs on the local machine. Not suggested. + + action: ``store_true`` + + +- **script_arguments** *(DictAsString)*: + + Additional arguments to pass to the script, as dict in key-value pairs + ('--' need to be included in the keys). + + default: ``{}`` + + +- **script_extension** *(str)*: + + New extension for the scripts created from the masks. This is inferred + automatically for ['madx', 'python3', 'python2']. Otherwise not + changed. + + +- **ssh** *(str)*: + + Run htcondor from this machine via ssh (needs access to the + `working_directory`) -:author: mihofer, jdilly, fesoubel """ -import itertools import logging -import multiprocessing -import subprocess import sys +from dataclasses import fields from pathlib import Path -import numpy as np -import tfs from generic_parser import EntryPointParameters, entrypoint from generic_parser.entry_datatypes import DictAsString from generic_parser.tools import print_dict_tree -import pylhc_submitter.htc.utils as htcutils -from pylhc_submitter.htc.mask import ( - check_percentage_signs_in_mask, - create_jobs_from_mask, - find_named_variables_in_mask, - generate_jobdf_index, -) -from pylhc_submitter.htc.utils import ( - COLUMN_JOB_DIRECTORY, - COLUMN_DEST_DIRECTORY, - COLUMN_SHELL_SCRIPT, - EXECUTEABLEPATH, - HTCONDOR_JOBLIMIT, - JOBFLAVOURS, - _strip_eos_uri, -) -from pylhc_submitter.utils.environment_tools import on_windows -from pylhc_submitter.utils.iotools import PathOrStr, save_config, make_replace_entries_iterable, keys_to_path +from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS +from pylhc_submitter.job_submitter_tools.htc_utils import JOBFLAVOURS +from pylhc_submitter.job_submitter_tools.iotools import CreationOpts, create_jobs, print_stats +from pylhc_submitter.job_submitter_tools.mask import (check_percentage_signs_in_mask, + find_named_variables_in_mask, is_mask_file) +from pylhc_submitter.job_submitter_tools.runners import RunnerOpts, run_jobs +from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable, + save_config) from pylhc_submitter.utils.logging_tools import log_setup -JOBSUMMARY_FILE = "Jobs.tfs" -JOBDIRECTORY_PREFIX = "Job" -COLUMN_JOBID = "JobId" -CONFIG_FILE = "config.ini" - -SCRIPT_EXTENSIONS = { - "madx": ".madx", - "python3": ".py", - "python2": ".py", -} - LOG = logging.getLogger(__name__) try: import htcondor - HAS_HTCONDOR = True except ImportError: platform = "macOS" if sys.platform == "darwin" else "windows" LOG.warning( f"htcondor python bindings are linux-only. You can still use job_submitter on {platform}, " "but only for local runs." ) - HAS_HTCONDOR = False + htcondor = None def get_params(): @@ -280,282 +326,37 @@ def main(opt): else: LOG.info("Starting Job-submitter.") - opt = _check_opts(opt) - save_config(opt.working_directory, opt, "job_submitter") - - job_df = _create_jobs( - opt.working_directory, - opt.mask, - opt.jobid_mask, - opt.replace_dict, - opt.job_output_dir, - opt.output_destination, - opt.append_jobs, - opt.executable, - opt.script_arguments, - opt.script_extension, - ) - job_df, dropped_jobs = _drop_already_ran_jobs( - job_df, opt.resume_jobs or opt.append_jobs, opt.job_output_dir, opt.check_files - ) + save_config(Path(opt.working_directory), opt, "job_submitter") + creation_opt, runner_opt = check_opts(opt) - if opt.run_local and not opt.dryrun: - _run_local(job_df, opt.num_processes) - else: - _run_htc( - job_df, - opt.working_directory, - opt.job_output_dir, - opt.output_destination, - opt.jobflavour, - opt.ssh, - opt.dryrun, - opt.htc_arguments, - ) - if opt.dryrun: - _print_stats(job_df.index, dropped_jobs) - - -# Main Functions --------------------------------------------------------------- - - -def _create_jobs( - cwd, - mask_path_or_string, - jobid_mask, - replace_dict, - output_dir, - output_dest, - append_jobs, - executable, - script_args, - script_extension, -) -> tfs.TfsDataFrame: - LOG.debug("Creating Jobs.") - values_grid = np.array(list(itertools.product(*replace_dict.values())), dtype=object) - - if append_jobs: - jobfile_path = cwd / JOBSUMMARY_FILE - try: - job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID) - except FileNotFoundError as filerror: - raise FileNotFoundError( - "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'" - ) from filerror - mask = [elem not in job_df[replace_dict.keys()].values for elem in values_grid] - njobs = mask.count(True) - values_grid = values_grid[mask] - else: - njobs = len(values_grid) - job_df = tfs.TfsDataFrame() - - if njobs == 0: - raise ValueError(f"No (new) jobs found!") - if njobs > HTCONDOR_JOBLIMIT: - LOG.warning( - f"You are attempting to submit an important number of jobs ({njobs})." - "This can be a high stress on your system, make sure you know what you are doing." - ) - - LOG.debug(f"Initial number of jobs: {njobs:d}") - data_df = tfs.TfsDataFrame( - index=generate_jobdf_index(job_df, jobid_mask, replace_dict.keys(), values_grid), - columns=list(replace_dict.keys()), - data=values_grid, - ) - job_df = tfs.concat([job_df, data_df], sort=False, how_headers='left') - job_df = _setup_folders(job_df, cwd, output_dest) - - if htcutils.is_mask_file(mask_path_or_string): - LOG.debug("Creating all jobs from mask.") - script_extension = _get_script_extension(script_extension, executable, mask_path_or_string) - job_df = create_jobs_from_mask( - job_df, mask_path_or_string, replace_dict.keys(), script_extension - ) - - LOG.debug("Creating shell scripts for submission.") - job_df = htcutils.write_bash( - job_df, - output_dir, - destination_dir=output_dest, - executable=executable, - cmdline_arguments=script_args, - mask=mask_path_or_string, - ) - - job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str) - job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str) - tfs.write(str(cwd / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID) - return job_df - - -def _drop_already_ran_jobs( - job_df: tfs.TfsDataFrame, drop_jobs: bool, output_dir: str, check_files: str -): - LOG.debug("Dropping already finished jobs, if necessary.") - finished_jobs = [] - if drop_jobs: - finished_jobs = [ - idx - for idx, row in job_df.iterrows() - if _job_was_successful(row, output_dir, check_files) - ] - LOG.info( - f"{len(finished_jobs):d} of {len(job_df.index):d}" - " Jobs have already finished and will be skipped." - ) - job_df = job_df.drop(index=finished_jobs) - return job_df, finished_jobs + job_df, dropped_jobs = create_jobs(creation_opt) -def _run_local(job_df: tfs.TfsDataFrame, num_processes: int) -> None: - LOG.info(f"Running {len(job_df.index)} jobs locally in {num_processes:d} processes.") - - # URI type EOS addresses won't work for copying files from local jobs - check_dest = job_df.iloc[0][COLUMN_DEST_DIRECTORY] - if not _strip_eos_uri(check_dest) == Path(check_dest): - LOG.warning("The output desitnation is likely specified as EOS URI," - "which will not work during a local run") - - pool = multiprocessing.Pool(processes=num_processes) - res = pool.map(_execute_shell, job_df.iterrows()) - if any(res): - LOG.error("At least one job has failed.") - raise RuntimeError("At least one job has failed. Check output logs!") - - -def _run_htc( - job_df: tfs.TfsDataFrame, - cwd: str, - output_dir: str, - dest_dir: str, - flavour: str, - ssh: str, - dryrun: bool, - additional_htc_arguments: DictAsString, -) -> None: - LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{flavour}'.") - LOG.debug("Creating htcondor subfile.") - - # If a different destination for the data is required - # is is handled through the job bash files, so remove it from - # HTConodor's file transfer specification - if dest_dir is None: - subfile = htcutils.make_subfile( - cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments - ) - else: - subfile = htcutils.make_subfile( - cwd, job_df, duration=flavour, **additional_htc_arguments - ) - - if not dryrun: - LOG.debug("Submitting jobs to htcondor.") - htcutils.submit_jobfile(subfile, ssh) - - -def _get_script_extension(script_extension: str, executable: PathOrStr, mask: PathOrStr) -> str: - if script_extension is not None: - return script_extension - return SCRIPT_EXTENSIONS.get(executable, mask.suffix) - - -# Sub Functions ---------------------------------------------------------------- - - -def _check_htcondor_presence() -> None: - """Checks the ``HAS_HTCONDOR`` variable and raises EnvironmentError if it is ``False``.""" - if not HAS_HTCONDOR: - raise EnvironmentError("htcondor bindings are necessary to run this module.") + run_jobs(job_df, runner_opt) + print_stats(job_df.index, dropped_jobs) -def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr, - destination_directory: PathOrStr = None) -> tfs.TfsDataFrame: - def _return_job_dir(job_id): - return working_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}" - - def _return_dest_dir(job_id): - return destination_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}" - - LOG.debug("Setting up folders: ") - job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index] - - for job_dir in job_df[COLUMN_JOB_DIRECTORY]: - job_dir.mkdir(exist_ok=True) - LOG.debug(f" created '{job_dir}'.") - - if destination_directory is None: - job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY] - else: - job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index] - - strip_dest_dir: Path = _strip_eos_uri(destination_directory) - strip_dest_dir.mkdir(parents=True, exist_ok=True) - - # Make some symlinks for easy navigation--- - # Output directory -> Working Directory - sym_submission = destination_directory / Path('SUBMISSION_DIR') - sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) - - # Working Directory -> Output Directory - sym_destination = working_directory / Path('OUTPUT_DIR') - sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True) - - # Create output dirs per job --- - for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: - _strip_eos_uri(job_dest_dir).mkdir(exist_ok=True) - LOG.debug(f" created '{job_dest_dir}'.") - - return job_df - - -def _job_was_successful(job_row, output_dir, files) -> bool: - output_dir = Path(job_row[COLUMN_DEST_DIRECTORY], output_dir) - success = output_dir.is_dir() and any(output_dir.iterdir()) - if success and files is not None and len(files): - for f in files: - success &= len(list(output_dir.glob(f))) > 0 - return success - - -def _execute_shell(df_row) -> int: - idx, column = df_row - cmd = [] if on_windows() else ["sh"] - - with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile: - process = subprocess.Popen( - cmd + [column[COLUMN_SHELL_SCRIPT]], - shell=on_windows(), - stdout=logfile, - stderr=subprocess.STDOUT, - cwd=column[COLUMN_JOB_DIRECTORY], - ) - return process.wait() - - -def _check_opts(opt): +def check_opts(opt): + """ Checks options and sorts them into job-creation and running parameters. """ LOG.debug("Checking options.") if opt.resume_jobs and opt.append_jobs: raise ValueError("Select either Resume jobs or Append jobs") # Paths --- - opt = keys_to_path(opt, "working_directory", "executable") + opt = keys_to_path(opt, "working_directory", "executable", "output_destination") if str(opt.executable) in EXECUTEABLEPATH.keys(): opt.executable = str(opt.executable) - if htcutils.is_mask_file(opt.mask): - mask = Path(opt.mask).read_text() # checks that mask and dir are there - opt["mask"] = Path(opt["mask"]) + if is_mask_file(opt.mask): + mask_content = Path(opt.mask).read_text() # checks that mask and dir are there + opt.mask = Path(opt.mask) else: - mask = opt.mask - - if "output_destination" in opt and opt["output_destination"] is not None: - opt["output_destination"] = Path(opt["output_destination"]) + mask_content = opt.mask # Replace dict --- dict_keys = set(opt.replace_dict.keys()) - mask_keys = find_named_variables_in_mask(mask) + mask_keys = find_named_variables_in_mask(mask_content) not_in_mask = dict_keys - mask_keys not_in_dict = mask_keys - dict_keys @@ -575,25 +376,24 @@ def _check_opts(opt): [opt.replace_dict.pop(key) for key in not_in_mask] if len(opt.replace_dict) == 0: raise KeyError("Empty replace-dictionary") - check_percentage_signs_in_mask(mask) + check_percentage_signs_in_mask(mask_content) print_dict_tree(opt, name="Input parameter", print_fun=LOG.debug) opt.replace_dict = make_replace_entries_iterable(opt.replace_dict) - return opt - - -def _print_stats(new_jobs, finished_jobs): - """Print some quick statistics.""" - LOG.info("------------- QUICK STATS ----------------") - LOG.info(f"Jobs total:{len(new_jobs) + len(finished_jobs):d}") - LOG.info(f"Jobs to run: {len(new_jobs):d}") - LOG.info(f"Jobs already finished: {len(finished_jobs):d}") - LOG.info("---------- JOBS TO RUN: NAMES -------------") - for job_name in new_jobs: - LOG.info(job_name) - LOG.info("--------- JOBS FINISHED: NAMES ------------") - for job_name in finished_jobs: - LOG.info(job_name) + + # Create new classes + opt.output_dir = opt.job_output_dir # renaming + + creation = CreationOpts(**{f.name: opt[f.name] for f in fields(CreationOpts)}) + runner = RunnerOpts(**{f.name: opt[f.name] for f in fields(RunnerOpts)}) + runner.output_dir = None if opt.output_destination else opt.output_dir + return creation, runner + + +def _check_htcondor_presence() -> None: + """ Raises an error if htcondor is not installed. """ + if htcondor is None: + raise EnvironmentError("htcondor bindings are necessary to run this module.") # Script Mode ------------------------------------------------------------------ diff --git a/pylhc_submitter/htc/__init__.py b/pylhc_submitter/job_submitter_tools/__init__.py similarity index 100% rename from pylhc_submitter/htc/__init__.py rename to pylhc_submitter/job_submitter_tools/__init__.py diff --git a/pylhc_submitter/job_submitter_tools/constants.py b/pylhc_submitter/job_submitter_tools/constants.py new file mode 100644 index 0000000..e69de29 diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py similarity index 76% rename from pylhc_submitter/htc/utils.py rename to pylhc_submitter/job_submitter_tools/htc_utils.py index 47f0eb9..43112a4 100644 --- a/pylhc_submitter/htc/utils.py +++ b/pylhc_submitter/job_submitter_tools/htc_utils.py @@ -15,21 +15,26 @@ import logging import subprocess from pathlib import Path -from typing import Union +from typing import Any, Dict, List, Union from pandas import DataFrame -from pylhc_submitter.utils.environment_tools import on_windows +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, + EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) +from pylhc_submitter.job_submitter_tools.iotools import is_eos_path +from pylhc_submitter.job_submitter_tools.mask import is_mask_file +from pylhc_submitter.utils.environment import on_windows try: import htcondor except ImportError: # will be handled by job_submitter pass -from pylhc_submitter.constants.external_paths import MADX_BIN, PYTHON2_BIN, PYTHON3_BIN LOG = logging.getLogger(__name__) +# HTC Constants ################################################################ SHEBANG = "#!/bin/bash" SUBFILE = "queuehtc.sub" @@ -37,13 +42,6 @@ HTCONDOR_JOBLIMIT = 100000 -EXECUTEABLEPATH = { - "madx": MADX_BIN, - "python3": PYTHON3_BIN, - "python2": PYTHON2_BIN, -} - - CMD_SUBMIT = "condor_submit" JOBFLAVOURS = ( "espresso", # 20 min @@ -58,11 +56,6 @@ NOTIFICATIONS = ("always", "complete", "error", "never") -COLUMN_SHELL_SCRIPT = "ShellScript" -COLUMN_JOB_DIRECTORY = "JobDirectory" -COLUMN_DEST_DIRECTORY = "DestDirectory" -COLUMN_JOB_FILE = "JobFile" - # Subprocess Methods ########################################################### @@ -88,7 +81,7 @@ def submit_jobfile(jobfile: Path, ssh: str): LOG.info("Jobs successfully submitted.") -def _start_subprocess(command): +def _start_subprocess(command: List[str]): LOG.debug(f"Executing command '{command}'") process = subprocess.Popen( command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -116,6 +109,7 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): notification (str): Notify under certain conditions. Defaults to ``error``. priority (int): Priority to order your jobs. Defaults to ``None``. """ + # Pre-defined HTCondor arguments for our jobs submit_dict = { "MyId": "htcondor", "universe": "vanilla", @@ -126,8 +120,9 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)", "requirements": "Machine =!= LastRemoteHost", } - submit_dict.update(_map_kwargs(kwargs)) - + submit_dict.update(map_kwargs(kwargs)) + + # Let the htcondor create the submit-file job = htcondor.Submit(submit_dict) # add the multiple bash files @@ -160,49 +155,53 @@ def make_subfile(cwd: Path, job_df: DataFrame, **kwargs): def write_bash( job_df: DataFrame, output_dir: Path = None, - destination_dir: Path = None, executable: str = "madx", cmdline_arguments: dict = None, mask: Union[str, Path] = None, ) -> DataFrame: - """Write the bash-files to be called by ``HTCondor``.""" + """ + Write the bash-files to be called by ``HTCondor``, which in turn call the executable. + """ if len(job_df.index) > HTCONDOR_JOBLIMIT: raise AttributeError("Submitting too many jobs for HTCONDOR") - cmds = "" - if cmdline_arguments is not None: - cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" - - if executable is None: - exec_path = '' - else: - exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " + exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else '' + cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else '' shell_scripts = [None] * len(job_df.index) for idx, (jobid, job) in enumerate(job_df.iterrows()): job_dir = Path(job[COLUMN_JOB_DIRECTORY]) bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}" jobfile = job_dir / bash_file_name + LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.") with open(jobfile, "w") as f: + # Preparation --- if not on_windows(): - f.write(f"{SHEBANG}\n") + f.write(f"{SHEBANG}\n") + if output_dir is not None: f.write(f"mkdir {str(output_dir)}\n") + + # The actual job execution --- f.write(exec_path) + # Call the mask-file or the filled-template string if is_mask_file(mask): f.write(str(job_dir / job[COLUMN_JOB_FILE])) else: - replace_columns = [column for column in job.index.tolist() if column not in [COLUMN_SHELL_SCRIPT, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE]] + replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS] f.write(mask % dict(zip(replace_columns, job[replace_columns]))) + + # Additional commands for the mask/string f.write(cmds) f.write("\n") - if destination_dir is not None: - if output_dir is not None: - cp_command = f'cp -r {output_dir} {job[COLUMN_DEST_DIRECTORY]}' - if is_eos_path(destination_dir): + # Manually copy output (if needed) --- + dest_dir = job.get(COLUMN_DEST_DIRECTORY) + if output_dir and dest_dir and output_dir != dest_dir: + cp_command = f'cp -r {output_dir} {dest_dir}' + if is_eos_path(dest_dir): cp_command = f'eos {cp_command}' f.write(f'{cp_command}\n') @@ -213,17 +212,14 @@ def write_bash( return job_df -# Helper ####################################################################### - - -def _map_kwargs(add_dict): +def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: """ Maps the kwargs for the job-file. Some arguments have pre-defined choices and defaults, the remaining ones are just passed on. """ new = {} - # Predefined ones + # Predefined mappings htc_map = { "duration": ("+JobFlavour", JOBFLAVOURS, "workday"), "output_dir": ("transfer_output_files", None, None), @@ -251,39 +247,14 @@ def _map_kwargs(add_dict): return new +# Helper ####################################################################### + def _maybe_put_in_quotes(key, value): if key.startswith("+"): return f'"{value}"' return value -def is_eos_path(path): - path = Path(path) - strip_path_parts = _strip_eos_uri(path).parts - return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos' - - -def _strip_eos_uri(path): - # EOS paths for HTCondor are given with URI, strip for direct writing - # root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt - path = Path(path) - parts = path.parts - outpath = path - if parts[0].endswith(':'): - # the first two parts are host info, e.g `file: //host/path` - outpath = Path('/', *parts[2:]) - return outpath - - -def is_mask_file(mask): - try: - return Path(mask).is_file() - except OSError: - return False - -def is_mask_string(mask): - return not is_mask_file(mask) - # Script Mode ################################################################## diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py new file mode 100644 index 0000000..bc29777 --- /dev/null +++ b/pylhc_submitter/job_submitter_tools/iotools.py @@ -0,0 +1,245 @@ +""" +Job Submitter IO-Tools +---------------------- + +Tools for input and output for the job-submitter. +""" +import itertools +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Sequence, Tuple, Union + +import numpy as np +import tfs + +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_JOBID, JOBDIRECTORY_PREFIX, + JOBSUMMARY_FILE, SCRIPT_EXTENSIONS) +from pylhc_submitter.job_submitter_tools import htc_utils +from pylhc_submitter.job_submitter_tools.mask import (create_job_scripts_from_mask, + generate_jobdf_index, is_mask_file) + +LOG = logging.getLogger(__name__) + + +@dataclass +class CreationOpts: + working_directory: Path + mask: Union[Path, str] + jobid_mask: str + replace_dict: Dict[str, Any] + output_dir: Path + output_destination: Path + append_jobs: bool + resume_jobs: bool + executable: str + check_files: Sequence[str] + script_arguments: Dict[str, Any] + script_extension: str + + def should_drop_jobs(self) -> bool: + return self.append_jobs or self.resume_jobs + + + +def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: + LOG.debug("Creating Jobs.") + + # Generate product of replace-dict and compare to existing jobs --- + parameters, values_grid, prev_job_df = _generate_parameter_space( + replace_dict=opt.replace_dict, + append_jobs=opt.append_jobs, + cwd=opt.working_directory, + ) + + # Check new jobs --- + njobs = len(values_grid) + if njobs == 0: + raise ValueError(f"No (new) jobs found!") + + if njobs > htc_utils.HTCONDOR_JOBLIMIT: + LOG.warning( + f"You are attempting to submit an important number of jobs ({njobs})." + "This can be a high stress on your system, make sure you know what you are doing." + ) + + LOG.debug(f"Initial number of jobs: {njobs:d}") + + # Generate new job-dataframe --- + job_df = tfs.TfsDataFrame( + index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid), + columns=parameters, + data=values_grid, + ) + job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left') + + # Setup folders --- + job_df = create_folders(job_df, opt.working_directory, opt.output_destination) + + # Create scripts --- + if is_mask_file(opt.mask): + LOG.debug("Creating all jobs from mask.") + script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask) + job_df = create_job_scripts_from_mask( + job_df, opt.mask, parameters, script_extension + ) + + LOG.debug("Creating shell scripts.") + job_df = htc_utils.write_bash( + job_df, + output_dir=opt.output_dir, + executable=opt.executable, + cmdline_arguments=opt.script_arguments, + mask=opt.mask, + ) + + # Convert paths to strings and write df to file --- + job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str) + if COLUMN_DEST_DIRECTORY in job_df.columns: + job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str) + + tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID) + + # Drop already run jobs --- + dropped_jobs = [] + if opt.should_drop_jobs(): + job_df, dropped_jobs = _drop_already_run_jobs( + job_df, opt.output_dir, opt.check_files + ) + return job_df, dropped_jobs + + +def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, + destination_directory: Path = None) -> tfs.TfsDataFrame: + LOG.debug("Setting up folders: ") + + jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}" + job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index] + + for job_dir in job_df[COLUMN_JOB_DIRECTORY]: + job_dir.mkdir(exist_ok=True) + LOG.debug(f" created '{job_dir}'.") + + if destination_directory: + job_df[COLUMN_DEST_DIRECTORY] = [destination_directory / jobname.format(id_) for id_ in job_df.index] + + strip_dest_dir = strip_eos_uri(destination_directory) + strip_dest_dir.mkdir(parents=True, exist_ok=True) + + # Make some symlinks for easy navigation--- + # Output directory -> Working Directory + sym_submission = destination_directory / Path('SUBMISSION_DIR') + sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) + + # Working Directory -> Output Directory + sym_destination = working_directory / Path('OUTPUT_DIR') + sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True) + + # Create output dirs per job --- + for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: + strip_eos_uri(job_dest_dir).mkdir(exist_ok=True) + LOG.debug(f" created '{job_dest_dir}'.") + + return job_df + + +def is_eos_path(path: Union[Path, str]) -> bool: + """ Check if the given path leads to EOS.""" + strip_path_parts = strip_eos_uri(path).parts + return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos' + + +def strip_eos_uri(path: Union[Path, str]) -> Path: + # EOS paths for HTCondor can be given as URI. Strip for direct writing. + # E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + path = Path(path) + parts = path.parts + outpath = path + if parts[0].endswith(':'): + # the first two parts are host info, e.g `file: //host/path` + outpath = Path('/', *parts[2:]) + return outpath + + +def print_stats(new_jobs, finished_jobs): + """Print some quick statistics.""" + text = [ + "\n------------- QUICK STATS ----------------" + f"Jobs total:{len(new_jobs) + len(finished_jobs):d}", + f"Jobs to run: {len(new_jobs):d}", + f"Jobs already finished: {len(finished_jobs):d}", + "---------- JOBS TO RUN: NAMES -------------" + ] + for job_name in new_jobs: + text.append(job_name) + text += ["--------- JOBS FINISHED: NAMES ------------"] + for job_name in finished_jobs: + text.append(job_name) + LOG.info("\n".join(text)) + + +def _generate_parameter_space( + replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path + ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]: + """ Generate parameter space from replace-dict, check for existing jobs. """ + LOG.debug("Generating parameter space from replace-dict.") + parameters = list(replace_dict.keys()) + values_grid = _generate_values_grid(replace_dict) + if not append_jobs: + return parameters, values_grid, tfs.TfsDataFrame() + + jobfile_path = cwd / JOBSUMMARY_FILE + try: + prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID) + except FileNotFoundError as filerror: + raise FileNotFoundError( + "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'" + ) from filerror + new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid] + values_grid = values_grid[new_jobs_mask] + + return parameters, values_grid, prev_job_df + + +def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray: + """ Creates an array of the inner-product of the replace-dict. """ + return np.array(list(itertools.product(*replace_dict.values())), dtype=object) + + +def _drop_already_run_jobs( + job_df: tfs.TfsDataFrame, output_dir: str, check_files: str + ) -> Tuple[tfs.TfsDataFrame, List[str]]: + """ Check for jobs that have already been run and drop them from current job_df. """ + LOG.debug("Dropping already finished jobs.") + finished_jobs = [ + idx + for idx, row in job_df.iterrows() + if _job_was_successful(row, output_dir, check_files) + ] + + LOG.info( + f"{len(finished_jobs):d} of {len(job_df.index):d}" + " Jobs have already finished and will be skipped." + ) + + job_df = job_df.drop(index=finished_jobs) + return job_df, finished_jobs + + +def _job_was_successful(job_row, output_dir, files) -> bool: + job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY] + output_dir = Path(job_dir, output_dir) + success = output_dir.is_dir() and any(output_dir.iterdir()) + if success and files is not None and len(files): + for f in files: + success &= len(list(output_dir.glob(f))) > 0 + return success + + +def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str: + """ Returns the extension of the script to run based on + either the given value, its executable or the mask. """ + if script_extension is not None: + return script_extension + return SCRIPT_EXTENSIONS.get(executable, mask.suffix) diff --git a/pylhc_submitter/htc/mask.py b/pylhc_submitter/job_submitter_tools/mask.py similarity index 67% rename from pylhc_submitter/htc/mask.py rename to pylhc_submitter/job_submitter_tools/mask.py index 616e10f..616bb7a 100644 --- a/pylhc_submitter/htc/mask.py +++ b/pylhc_submitter/job_submitter_tools/mask.py @@ -8,15 +8,17 @@ import logging import re from pathlib import Path +from typing import Sequence import pandas as pd +from numpy.typing import ArrayLike -from pylhc_submitter.htc.utils import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE +from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE LOG = logging.getLogger(__name__) -def create_jobs_from_mask( +def create_job_scripts_from_mask( job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str ) -> pd.DataFrame: """ @@ -44,8 +46,8 @@ def create_jobs_from_mask( for idx, (jobid, values) in enumerate(job_df.iterrows()): jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext) - with jobfile_fullpath.open("w") as madxjob: - madxjob.write(template % dict(zip(replace_keys, values[list(replace_keys)]))) + with jobfile_fullpath.open("w") as job_file: + job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)]))) jobs[idx] = jobfile_fullpath.name job_df[COLUMN_JOB_FILE] = jobs return job_df @@ -70,14 +72,38 @@ def check_percentage_signs_in_mask(mask: str): raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.") -def generate_jobdf_index(old_df, jobid_mask, keys, values): - """ Generates index for jobdf from mask for job_id naming. """ +def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike): + """ Generates index for jobdf from mask for job_id naming. + + Args: + old_df (pd.DataFrame): Existing jobdf. + jobid_mask (str): Mask for naming the jobs. + keys (Sequence[str]): Keys to be replaced in the mask. + values (np.array_like): Values-Grid to be replaced in the mask. + """ if not jobid_mask: + # Use integer-range as index, if no mask is given + # Start with last index if old_df is not None. nold = len(old_df.index) if old_df is not None else 0 start = nold-1 if nold > 0 else 0 return range(start, start + values.shape[0]) + + # Fill job-id mask return [jobid_mask % dict(zip(keys, v)) for v in values] +def is_mask_file(mask: str) -> bool: + """ Check if given string points to a file. """ + try: + return Path(mask).is_file() + except OSError: + return False + + +def is_mask_string(mask: str) -> bool: + """ Checks that given string does not point to a file. """ + return not is_mask_file(mask) + + if __name__ == "__main__": raise EnvironmentError(f"{__file__} is not supposed to run as main.") diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py new file mode 100644 index 0000000..516813a --- /dev/null +++ b/pylhc_submitter/job_submitter_tools/runners.py @@ -0,0 +1,117 @@ +""" +Job Submitter Runners +--------------------- + +Defines the methods to run the job-submitter, locally or on HTC. +""" +import logging +import multiprocessing +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +import tfs + +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_SHELL_SCRIPT) +from pylhc_submitter.job_submitter_tools import htc_utils +from pylhc_submitter.job_submitter_tools.iotools import strip_eos_uri +from pylhc_submitter.utils.environment import on_windows + +LOG = logging.getLogger(__name__) + + +@dataclass +class RunnerOpts: + working_directory: Path # Path to the working directory (e.g. afs) + jobflavour: Optional[str] = None # HTCondor job flavour (lengths of the job) + output_dir: Optional[str] = None # Name of the output directory, where jobs store data + ssh: Optional[str] = None # SSH command + dryrun: Optional[bool] = False # Perform only a dry-run, i.e. do all but submit to HTC + htc_arguments: Optional[Dict[str, Any]] = None # Arguments to pass on to htc as keywords + run_local: Optional[bool] = False # Run jobs locally + num_processes: Optional[int] = 4 # Number of processes to run in parallel (locally) + + +def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """Selects how to run the jobs. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + + if opt.run_local: + run_local(job_df, opt) + else: + run_htc(job_df, opt) + + +def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """Run all jobs locally. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + if opt.dryrun: + LOG.info(f"Dry-run: Skipping local run.") + return + + LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.") + + # URI type EOS addresses won't work for copying files from local jobs + check_dest = job_df.get(COLUMN_DEST_DIRECTORY) + if check_dest is not None and strip_eos_uri(check_dest.iloc[0]) != Path(check_dest.iloc[0]): + LOG.warning("The output destination is likely specified as EOS URI," + "which will not work during a local run") + + pool = multiprocessing.Pool(processes=opt.num_processes) + res = pool.map(_execute_shell, job_df.iterrows()) + if any(res): + jobs_failed = [j for r, j in zip(res, job_df.index) if r] + LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}") + raise RuntimeError("At least one job has failed. Check output logs!") + + +def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """ Create submission file and submit the jobs to ``HTCondor``. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.") + LOG.debug("Creating htcondor subfile.") + + subfile = htc_utils.make_subfile( + opt.working_directory, job_df, + output_dir=opt.output_dir, + duration=opt.jobflavour, + **opt.htc_arguments + ) + + if opt.dryrun: + LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.") + return + + LOG.debug("Submitting jobs to htcondor.") + htc_utils.submit_jobfile(subfile, opt.ssh) + + +# Helper ####################################################################### + +def _execute_shell(df_row) -> int: + _, column = df_row + cmd = [] if on_windows() else ["sh"] + + with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile: + process = subprocess.Popen( + cmd + [column[COLUMN_SHELL_SCRIPT]], + shell=on_windows(), + stdout=logfile, + stderr=subprocess.STDOUT, + cwd=column[COLUMN_JOB_DIRECTORY], + ) + return process.wait() \ No newline at end of file diff --git a/pylhc_submitter/utils/environment_tools.py b/pylhc_submitter/utils/environment.py similarity index 100% rename from pylhc_submitter/utils/environment_tools.py rename to pylhc_submitter/utils/environment.py diff --git a/pylhc_submitter/utils/iotools.py b/pylhc_submitter/utils/iotools.py index 261861e..31a50b5 100644 --- a/pylhc_submitter/utils/iotools.py +++ b/pylhc_submitter/utils/iotools.py @@ -4,8 +4,8 @@ Tools for input and output. """ -from pathlib import Path from datetime import datetime +from pathlib import Path from typing import Iterable from generic_parser.entry_datatypes import get_instance_faker_meta @@ -13,7 +13,6 @@ from pylhc_submitter.constants.general import TIME - # Output ----------------------------------------------------------------------- diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index f1cac12..3d3427d 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -7,7 +7,7 @@ import pytest from pylhc_submitter.job_submitter import main as job_submit -from pylhc_submitter.utils.environment_tools import on_linux, on_windows +from pylhc_submitter.utils.environment import on_linux, on_windows SUBFILE = "queuehtc.sub" @@ -223,11 +223,8 @@ def _check_output_content(dir_path: Path): _check_output_content(setup.output_destination) - - - def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]: - """ Creates all possible combinations of values in data as dictionaries. """ + """ Creates all possible combinations of values in data as a list of dictionaries. """ keys = list(data.keys()) all_values = [data[key] for key in keys] From ac8f2426bc9aa305da98bbe9d9b6347b19046d12 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 22:47:14 +0100 Subject: [PATCH 11/30] fixing imports --- pylhc_submitter/sixdesk_tools/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylhc_submitter/sixdesk_tools/utils.py b/pylhc_submitter/sixdesk_tools/utils.py index 2c629fb..135b0c3 100644 --- a/pylhc_submitter/sixdesk_tools/utils.py +++ b/pylhc_submitter/sixdesk_tools/utils.py @@ -10,7 +10,7 @@ from pylhc_submitter.constants.autosix import SIXDESKLOCKFILE, get_workspace_path from pylhc_submitter.constants.external_paths import SIXDESK_UTILS -from pylhc_submitter.htc.mask import find_named_variables_in_mask +from pylhc_submitter.job_submitter_tools.mask import find_named_variables_in_mask LOG = logging.getLogger(__name__) From 2d50155bfdeca9487fbaa2c31dcd9065bd6909ae Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 7 Nov 2023 22:50:14 +0100 Subject: [PATCH 12/30] more import fixes --- pylhc_submitter/autosix.py | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py index f6b720d..9949adf 100644 --- a/pylhc_submitter/autosix.py +++ b/pylhc_submitter/autosix.py @@ -193,34 +193,17 @@ import numpy as np import tfs -from generic_parser import EntryPointParameters, entrypoint, DotDict +from generic_parser import EntryPointParameters, entrypoint from generic_parser.entry_datatypes import DictAsString -from pylhc_submitter.constants.autosix import ( - HEADER_BASEDIR, - SIXENV_REQUIRED, - SIXENV_OPTIONAL, - AutoSixEnvironment, -) +from pylhc_submitter.constants.autosix import (HEADER_BASEDIR, SIXENV_OPTIONAL, SIXENV_REQUIRED, + AutoSixEnvironment) +from pylhc_submitter.constants.job_submitter import COLUMN_JOBID, JOBSUMMARY_FILE from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index -from pylhc_submitter.job_submitter import ( - JOBSUMMARY_FILE, - COLUMN_JOBID, -) -from pylhc_submitter.sixdesk_tools.create_workspace import ( - set_max_materialize -) -from pylhc_submitter.sixdesk_tools.stages import Stage, STAGE_ORDER -from pylhc_submitter.sixdesk_tools.utils import ( - is_locked, - check_mask, -) -from pylhc_submitter.utils.iotools import ( - PathOrStr, - save_config, - make_replace_entries_iterable, - keys_to_path -) +from pylhc_submitter.sixdesk_tools.stages import STAGE_ORDER, Stage +from pylhc_submitter.sixdesk_tools.utils import check_mask, is_locked +from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable, + save_config) from pylhc_submitter.utils.logging_tools import log_setup LOG = logging.getLogger(__name__) From a8db8951d2c8e5249da69f9b518cf67b95fcccf0 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 14:46:43 +0100 Subject: [PATCH 13/30] Lots of doc --- doc/modules/constants.rst | 2 + doc/modules/htc.rst | 9 -- doc/modules/job_submitter_tools.rst | 15 +++ pylhc_submitter/job_submitter.py | 4 +- .../job_submitter_tools/constants.py | 0 .../job_submitter_tools/htc_utils.py | 97 +++++++++++++++---- .../job_submitter_tools/iotools.py | 72 +++++++++++--- pylhc_submitter/job_submitter_tools/mask.py | 13 ++- .../job_submitter_tools/runners.py | 15 ++- 9 files changed, 174 insertions(+), 53 deletions(-) delete mode 100644 doc/modules/htc.rst create mode 100644 doc/modules/job_submitter_tools.rst delete mode 100644 pylhc_submitter/job_submitter_tools/constants.py diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst index a0a2e36..4f4f241 100644 --- a/doc/modules/constants.rst +++ b/doc/modules/constants.rst @@ -9,6 +9,8 @@ Constants Definitions .. automodule:: pylhc_submitter.constants.external_paths :members: +.. automodule:: pylhc_submitter.constants.job_submitter + :members: .. automodule:: pylhc_submitter.constants.autosix :members: diff --git a/doc/modules/htc.rst b/doc/modules/htc.rst deleted file mode 100644 index f4965b0..0000000 --- a/doc/modules/htc.rst +++ /dev/null @@ -1,9 +0,0 @@ -HTCondor Tools -************************** - -.. automodule:: pylhc_submitter.htc.utils - :members: - - -.. automodule:: pylhc_submitter.htc.mask - :members: diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/job_submitter_tools.rst new file mode 100644 index 0000000..4f65893 --- /dev/null +++ b/doc/modules/job_submitter_tools.rst @@ -0,0 +1,15 @@ +HTCondor Tools +************************** + +.. automodule:: pylhc_submitter.job_submitter_tools.htc_utils + :members: + + +.. automodule:: pylhc_submitter.job_submitter_tools.iotools + :members: + +.. automodule:: pylhc_submitter.job_submitter_tools.mask + :members: + +.. automodule:: pylhc_submitter.job_submitter_tools.runners + :members: diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 9748eed..f2ba77e 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -331,8 +331,8 @@ def main(opt): job_df, dropped_jobs = create_jobs(creation_opt) - run_jobs(job_df, runner_opt) + print_stats(job_df.index, dropped_jobs) @@ -383,7 +383,7 @@ def check_opts(opt): # Create new classes opt.output_dir = opt.job_output_dir # renaming - + creation = CreationOpts(**{f.name: opt[f.name] for f in fields(CreationOpts)}) runner = RunnerOpts(**{f.name: opt[f.name] for f in fields(RunnerOpts)}) runner.output_dir = None if opt.output_destination else opt.output_dir diff --git a/pylhc_submitter/job_submitter_tools/constants.py b/pylhc_submitter/job_submitter_tools/constants.py deleted file mode 100644 index e69de29..0000000 diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py index 43112a4..9d6f298 100644 --- a/pylhc_submitter/job_submitter_tools/htc_utils.py +++ b/pylhc_submitter/job_submitter_tools/htc_utils.py @@ -29,7 +29,9 @@ try: import htcondor except ImportError: # will be handled by job_submitter - pass + class htcondor: + """Dummy HTCondor module. To satisfy the typing. """ + Submit: Any = None LOG = logging.getLogger(__name__) @@ -60,17 +62,33 @@ # Subprocess Methods ########################################################### -def create_subfile_from_job(cwd: Path, job: str): - """Write file to submit to ``HTCondor``.""" +def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path: + """ + Write file to submit to ``HTCondor``. + + Args: + cwd (Path): working directory + submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file) + + Returns: + Path: path to sub-file + + """ subfile = cwd / SUBFILE LOG.debug(f"Writing sub-file '{str(subfile)}'.") with subfile.open("w") as f: - f.write(str(job)) + f.write(str(submission)) return subfile -def submit_jobfile(jobfile: Path, ssh: str): - """Submit subfile to ``HTCondor`` via subprocess.""" +def submit_jobfile(jobfile: Path, ssh: str) -> None: + """Submit subfile to ``HTCondor`` via subprocess. + + Args: + jobfile (Path): path to sub-file + ssh (str): ssh target + + """ proc_args = [CMD_SUBMIT, jobfile] if ssh: proc_args = ["ssh", ssh] + proc_args @@ -81,7 +99,16 @@ def submit_jobfile(jobfile: Path, ssh: str): LOG.info("Jobs successfully submitted.") -def _start_subprocess(command: List[str]): +def _start_subprocess(command: List[str]) -> int: + """ Start subprocess and log output. + + Args: + command (List[str]): command to execute + + Returns: + int: return code of the process + + """ LOG.debug(f"Executing command '{command}'") process = subprocess.Popen( command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -96,9 +123,10 @@ def _start_subprocess(command: List[str]): # Job Creation ################################################################# -def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): +def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str: """ - Function to create an ``HTCondor`` job assuming n_files bash-files. + Function to create an ``HTCondor`` submission content for all job-scripts, + i.e. bash-files, in the job_df. Keyword Args: output_dir (str): output directory that will be transferred. Defaults to ``None``. @@ -108,6 +136,9 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): retries (int): maximum amount of retries. Default to ``3``. notification (str): Notify under certain conditions. Defaults to ``error``. priority (int): Priority to order your jobs. Defaults to ``None``. + + Returns: + str: HTCondor submission definition. """ # Pre-defined HTCondor arguments for our jobs submit_dict = { @@ -123,7 +154,7 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): submit_dict.update(map_kwargs(kwargs)) # Let the htcondor create the submit-file - job = htcondor.Submit(submit_dict) + submission = htcondor.Submit(submit_dict) # add the multiple bash files scripts = [ @@ -133,20 +164,27 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs): args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])] queueArgs = ["queue executable, initialdir from (", *args, ")"] - # ugly but job.setQArgs doesn't take string containing \n - # job.setQArgs("\n".join(queueArgs)) - job = str(job) + "\n".join(queueArgs) - LOG.debug(f"Created HTCondor subfile with content: \n{job}") - return job + # ugly but submission.setQArgs doesn't take string containing '\n': + # submission.setQArgs("\n".join(queueArgs)) # doesn't work + submission = str(submission) + "\n".join(queueArgs) + LOG.debug(f"Created HTCondor subfile with content: \n{submission}") + return submission # Main functions ############################################################### -def make_subfile(cwd: Path, job_df: DataFrame, **kwargs): +def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path: """ Creates submit-file for ``HTCondor``. For kwargs, see ``create_multijob_for_bashfiles``. + + Args: + cwd (Path): working directory + job_df (DataFrame): DataFrame containing all the job-information + + Returns: + Path: path to the submit-file """ job = create_multijob_for_bashfiles(job_df, **kwargs) return create_subfile_from_job(cwd, job) @@ -161,6 +199,19 @@ def write_bash( ) -> DataFrame: """ Write the bash-files to be called by ``HTCondor``, which in turn call the executable. + Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script. + A shell script is created in each job directory in the dataframe. + + Args: + job_df (DataFrame): DataFrame containing all the job-information + output_dir (str): output directory that will be transferred. Defaults to ``None``. + executable (str): name of the executable. Defaults to ``madx``. + cmdline_arguments (dict): additional commandline arguments for the executable + mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``. + + Returns: + DataFrame: The provided ``job_df`` but with added path to the scripts. + """ if len(job_df.index) > HTCONDOR_JOBLIMIT: raise AttributeError("Submitting too many jobs for HTCONDOR") @@ -214,8 +265,15 @@ def write_bash( def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: """ - Maps the kwargs for the job-file. Some arguments have pre-defined choices and defaults, - the remaining ones are just passed on. + Maps the kwargs for the job-file. + Some arguments have pre-defined choices and defaults, the remaining ones are just passed on. + + Args: + add_dict (Dict[str, Any]): additional kwargs to add to the defaults. + + Returns: + Dict[str, Any]: The mapped kwargs. + """ new = {} @@ -249,7 +307,8 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: # Helper ####################################################################### -def _maybe_put_in_quotes(key, value): +def _maybe_put_in_quotes(key: str, value: Any) -> Any: + """ Put value in quoted strings if key starts with '+' """ if key.startswith("+"): return f'"{value}"' return value diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py index bc29777..85a1dcc 100644 --- a/pylhc_submitter/job_submitter_tools/iotools.py +++ b/pylhc_submitter/job_submitter_tools/iotools.py @@ -11,6 +11,7 @@ from typing import Any, Dict, List, Sequence, Tuple, Union import numpy as np +import pandas as pd import tfs from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, @@ -25,25 +26,41 @@ @dataclass class CreationOpts: - working_directory: Path - mask: Union[Path, str] - jobid_mask: str - replace_dict: Dict[str, Any] - output_dir: Path - output_destination: Path - append_jobs: bool - resume_jobs: bool - executable: str - check_files: Sequence[str] - script_arguments: Dict[str, Any] - script_extension: str + """ Options for creating jobs. """ + working_directory: Path # Path to working directory (afs) + mask: Union[Path, str] # Path to mask file or mask-string + jobid_mask: str # Mask for jobid + replace_dict: Dict[str, Any] # Replace-dict + output_dir: Path # Path to local output directory + output_destination: Path # Path to remote output directory (e.g. eos) + append_jobs: bool # Append jobs to existing jobs + resume_jobs: bool # Resume jobs that have already run/failed/got interrupted + executable: str # Name of executable to call the script (from mask) + check_files: Sequence[str] # List of output files to check for success + script_arguments: Dict[str, Any] # Arguments to pass to script + script_extension: str # Extension of the script to run def should_drop_jobs(self) -> bool: + """ Check if jobs should be dropped after creating the whole parameter space, + e.g. because they already exist. """ return self.append_jobs or self.resume_jobs def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: + """Main function to prepare all the jobs and folder structure. + This greates the value-grid based on the replace-dict and + checks for existing jobs (if so desired). + A job-dataframe is created - and written out - containing all the information and + its values are used to generate the job-scripts. + It also creates bash-scripts to call the executable for the job-scripts. + + Args: + opt (CreationOpts): Options for creating jobs + + Returns: + tfs.TfsDataFrame: The job-dataframe containing information for all jobs. + """ LOG.debug("Creating Jobs.") # Generate product of replace-dict and compare to existing jobs --- @@ -112,6 +129,20 @@ def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, destination_directory: Path = None) -> tfs.TfsDataFrame: + """Create the folder-structure in the given working directory and the + destination directory if given. + This creates a folder per job in which then the job-scripts and bash-scripts + can be stored later. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + working_directory (Path): Path to the working directory + destination_directory (Path, optional): Path to the destination directory, + i.e. the directory to copy the outputs to manually. Defaults to None. + + Returns: + tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs. + """ LOG.debug("Setting up folders: ") jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}" @@ -151,8 +182,10 @@ def is_eos_path(path: Union[Path, str]) -> bool: def strip_eos_uri(path: Union[Path, str]) -> Path: - # EOS paths for HTCondor can be given as URI. Strip for direct writing. - # E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + """ Strip EOS path information from a path. + EOS paths for HTCondor can be given as URI. Strip for direct writing. + E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + """ path = Path(path) parts = path.parts outpath = path @@ -162,7 +195,7 @@ def strip_eos_uri(path: Union[Path, str]) -> Path: return outpath -def print_stats(new_jobs, finished_jobs): +def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]): """Print some quick statistics.""" text = [ "\n------------- QUICK STATS ----------------" @@ -227,7 +260,14 @@ def _drop_already_run_jobs( return job_df, finished_jobs -def _job_was_successful(job_row, output_dir, files) -> bool: +def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool: + """ Determines if the job was successful. + + Args: + job_row (pd.Series): row from the job_df + output_dir (str): Name of the (local) output directory + files (List[str]): list of files that should have been generated + """ job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY] output_dir = Path(job_dir, output_dir) success = output_dir.is_dir() and any(output_dir.iterdir()) diff --git a/pylhc_submitter/job_submitter_tools/mask.py b/pylhc_submitter/job_submitter_tools/mask.py index 616bb7a..3a2dcaa 100644 --- a/pylhc_submitter/job_submitter_tools/mask.py +++ b/pylhc_submitter/job_submitter_tools/mask.py @@ -8,7 +8,7 @@ import logging import re from pathlib import Path -from typing import Sequence +from typing import Iterable, List, Sequence, Set, Union import pandas as pd from numpy.typing import ArrayLike @@ -53,11 +53,12 @@ def create_job_scripts_from_mask( return job_df -def find_named_variables_in_mask(mask: str): +def find_named_variables_in_mask(mask: str) -> Set[str]: + """ Find all variable-names in the mask. """ return set(re.findall(r"%\((\w+)\)", mask)) -def check_percentage_signs_in_mask(mask: str): +def check_percentage_signs_in_mask(mask: str) -> None: """ Checks for '%' in the mask, that are not replacement variables. """ cleaned_mask = re.sub(r"%\((\w+)\)", "", mask) n_signs = cleaned_mask.count("%") @@ -72,7 +73,8 @@ def check_percentage_signs_in_mask(mask: str): raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.") -def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike): +def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike + ) -> Union[List[str], Iterable[int]]: """ Generates index for jobdf from mask for job_id naming. Args: @@ -80,6 +82,9 @@ def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[s jobid_mask (str): Mask for naming the jobs. keys (Sequence[str]): Keys to be replaced in the mask. values (np.array_like): Values-Grid to be replaced in the mask. + + Returns: + List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range. """ if not jobid_mask: # Use integer-range as index, if no mask is given diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py index 516813a..d85d7a8 100644 --- a/pylhc_submitter/job_submitter_tools/runners.py +++ b/pylhc_submitter/job_submitter_tools/runners.py @@ -10,6 +10,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, Optional +import pandas as pd import tfs @@ -24,6 +25,7 @@ @dataclass class RunnerOpts: + """ Options for running the submission. """ working_directory: Path # Path to the working directory (e.g. afs) jobflavour: Optional[str] = None # HTCondor job flavour (lengths of the job) output_dir: Optional[str] = None # Name of the output directory, where jobs store data @@ -41,7 +43,6 @@ def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: job_df (tfs.TfsDataFrame): DataFrame containing all the job-information opt (RunnerOpts): Parameters for the runner """ - if opt.run_local: run_local(job_df, opt) else: @@ -102,7 +103,15 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: # Helper ####################################################################### -def _execute_shell(df_row) -> int: +def _execute_shell(df_row: pd.Series) -> int: + """ Execute the shell script. + + Args: + df_row (pd.Series): row in the job-dataframe + + Returns: + int: return code of the process + """ _, column = df_row cmd = [] if on_windows() else ["sh"] @@ -114,4 +123,4 @@ def _execute_shell(df_row) -> int: stderr=subprocess.STDOUT, cwd=column[COLUMN_JOB_DIRECTORY], ) - return process.wait() \ No newline at end of file + return process.wait() From b4c9ae21a1e8033ab647e06d1ab9b11291493bd6 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:02:16 +0100 Subject: [PATCH 14/30] bugfixes --- .../job_submitter_tools/htc_utils.py | 10 +++---- .../job_submitter_tools/iotools.py | 30 +++++++++++++------ .../job_submitter_tools/runners.py | 10 ++----- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py index 9d6f298..49150a5 100644 --- a/pylhc_submitter/job_submitter_tools/htc_utils.py +++ b/pylhc_submitter/job_submitter_tools/htc_utils.py @@ -22,7 +22,7 @@ from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) -from pylhc_submitter.job_submitter_tools.iotools import is_eos_path +from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri from pylhc_submitter.job_submitter_tools.mask import is_mask_file from pylhc_submitter.utils.environment import on_windows @@ -252,7 +252,7 @@ def write_bash( dest_dir = job.get(COLUMN_DEST_DIRECTORY) if output_dir and dest_dir and output_dir != dest_dir: cp_command = f'cp -r {output_dir} {dest_dir}' - if is_eos_path(dest_dir): + if is_eos_uri(dest_dir): cp_command = f'eos {cp_command}' f.write(f'{cp_command}\n') @@ -278,7 +278,7 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: new = {} # Predefined mappings - htc_map = { + htc_map = { # name: mapped_name, choices, default "duration": ("+JobFlavour", JOBFLAVOURS, "workday"), "output_dir": ("transfer_output_files", None, None), "accounting_group": ("+AccountingGroup", None, None), @@ -289,14 +289,14 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: try: value = add_dict.pop(key) except KeyError: - if default is not None: - new[mapped] = default + value = default # could be `None` else: if choices is not None and value not in choices: raise TypeError( f"{key} needs to be one of '{str(choices).strip('[]')}' but " f"instead was '{value}'" ) + if value is not None: new[mapped] = _maybe_put_in_quotes(mapped, value) # Pass-Through Arguments diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py index 85a1dcc..33631c4 100644 --- a/pylhc_submitter/job_submitter_tools/iotools.py +++ b/pylhc_submitter/job_submitter_tools/iotools.py @@ -160,12 +160,14 @@ def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, # Make some symlinks for easy navigation--- # Output directory -> Working Directory - sym_submission = destination_directory / Path('SUBMISSION_DIR') + sym_submission = strip_dest_dir / Path('SUBMISSION_DIR') + sym_submission.unlink(missing_ok=True) sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) # Working Directory -> Output Directory sym_destination = working_directory / Path('OUTPUT_DIR') - sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True) + sym_destination.unlink(missing_ok=True) + sym_destination.symlink_to(strip_dest_dir.resolve(), target_is_directory=True) # Create output dirs per job --- for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: @@ -175,10 +177,21 @@ def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, return job_df -def is_eos_path(path: Union[Path, str]) -> bool: - """ Check if the given path leads to EOS.""" - strip_path_parts = strip_eos_uri(path).parts - return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos' +def is_eos_uri(path: Union[Path, str, None]) -> bool: + """ Check if the given path is an EOS-URI as `eos cp` only works with those. + E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + """ + if path is None: + return False + + parts = Path(path).parts + return ( + len(parts) >= 3 # at least root:, server, path + and + parts[0].endswith(':') + and + parts[2] == 'eos' + ) def strip_eos_uri(path: Union[Path, str]) -> Path: @@ -188,11 +201,10 @@ def strip_eos_uri(path: Union[Path, str]) -> Path: """ path = Path(path) parts = path.parts - outpath = path if parts[0].endswith(':'): # the first two parts are host info, e.g `file: //host/path` - outpath = Path('/', *parts[2:]) - return outpath + return Path('/', *parts[2:]) + return path def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]): diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py index d85d7a8..06eeceb 100644 --- a/pylhc_submitter/job_submitter_tools/runners.py +++ b/pylhc_submitter/job_submitter_tools/runners.py @@ -10,14 +10,14 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, Optional -import pandas as pd +import pandas as pd import tfs from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, COLUMN_SHELL_SCRIPT) from pylhc_submitter.job_submitter_tools import htc_utils -from pylhc_submitter.job_submitter_tools.iotools import strip_eos_uri +from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri from pylhc_submitter.utils.environment import on_windows LOG = logging.getLogger(__name__) @@ -61,12 +61,6 @@ def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: return LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.") - - # URI type EOS addresses won't work for copying files from local jobs - check_dest = job_df.get(COLUMN_DEST_DIRECTORY) - if check_dest is not None and strip_eos_uri(check_dest.iloc[0]) != Path(check_dest.iloc[0]): - LOG.warning("The output destination is likely specified as EOS URI," - "which will not work during a local run") pool = multiprocessing.Pool(processes=opt.num_processes) res = pool.map(_execute_shell, job_df.iterrows()) From bc6ff9c4356c3ac432eafc93dbb910ffd8eb2199 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:31:02 +0100 Subject: [PATCH 15/30] uri-bugfixes --- doc/modules/job_submitter_tools.rst | 8 +- pylhc_submitter/autosix.py | 2 +- pylhc_submitter/job_submitter.py | 16 +- .../job_submitter_tools/__init__.py | 0 .../job_submitter_tools/htc_utils.py | 321 ------------------ .../job_submitter_tools/iotools.py | 297 ---------------- pylhc_submitter/job_submitter_tools/mask.py | 114 ------- .../job_submitter_tools/runners.py | 120 ------- pylhc_submitter/sixdesk_tools/utils.py | 2 +- tests/unit/test_job_submitter.py | 82 +++-- 10 files changed, 80 insertions(+), 882 deletions(-) delete mode 100644 pylhc_submitter/job_submitter_tools/__init__.py delete mode 100644 pylhc_submitter/job_submitter_tools/htc_utils.py delete mode 100644 pylhc_submitter/job_submitter_tools/iotools.py delete mode 100644 pylhc_submitter/job_submitter_tools/mask.py delete mode 100644 pylhc_submitter/job_submitter_tools/runners.py diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/job_submitter_tools.rst index 4f65893..34be7cc 100644 --- a/doc/modules/job_submitter_tools.rst +++ b/doc/modules/job_submitter_tools.rst @@ -1,15 +1,15 @@ HTCondor Tools ************************** -.. automodule:: pylhc_submitter.job_submitter_tools.htc_utils +.. automodule:: pylhc_submitter.submitter.htc_utils :members: -.. automodule:: pylhc_submitter.job_submitter_tools.iotools +.. automodule:: pylhc_submitter.submitter.iotools :members: -.. automodule:: pylhc_submitter.job_submitter_tools.mask +.. automodule:: pylhc_submitter.submitter.mask :members: -.. automodule:: pylhc_submitter.job_submitter_tools.runners +.. automodule:: pylhc_submitter.submitter.runners :members: diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py index 9949adf..09b2903 100644 --- a/pylhc_submitter/autosix.py +++ b/pylhc_submitter/autosix.py @@ -199,7 +199,7 @@ from pylhc_submitter.constants.autosix import (HEADER_BASEDIR, SIXENV_OPTIONAL, SIXENV_REQUIRED, AutoSixEnvironment) from pylhc_submitter.constants.job_submitter import COLUMN_JOBID, JOBSUMMARY_FILE -from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index +from pylhc_submitter.submitter.mask import generate_jobdf_index from pylhc_submitter.sixdesk_tools.stages import STAGE_ORDER, Stage from pylhc_submitter.sixdesk_tools.utils import check_mask, is_locked from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable, diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index f2ba77e..30d08a4 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -159,11 +159,11 @@ from generic_parser.tools import print_dict_tree from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS -from pylhc_submitter.job_submitter_tools.htc_utils import JOBFLAVOURS -from pylhc_submitter.job_submitter_tools.iotools import CreationOpts, create_jobs, print_stats -from pylhc_submitter.job_submitter_tools.mask import (check_percentage_signs_in_mask, +from pylhc_submitter.submitter.htc_utils import JOBFLAVOURS +from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats +from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask, find_named_variables_in_mask, is_mask_file) -from pylhc_submitter.job_submitter_tools.runners import RunnerOpts, run_jobs +from pylhc_submitter.submitter.runners import RunnerOpts, run_jobs from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable, save_config) from pylhc_submitter.utils.logging_tools import log_setup @@ -343,7 +343,7 @@ def check_opts(opt): raise ValueError("Select either Resume jobs or Append jobs") # Paths --- - opt = keys_to_path(opt, "working_directory", "executable", "output_destination") + opt = keys_to_path(opt, "working_directory", "executable") if str(opt.executable) in EXECUTEABLEPATH.keys(): opt.executable = str(opt.executable) @@ -353,6 +353,12 @@ def check_opts(opt): opt.mask = Path(opt.mask) else: mask_content = opt.mask + + if is_eos_uri(opt.output_destination) and not ("://" in opt.output_destination and "//eos" in opt.output_destination): + raise ValueError( + "The 'output_destination' is an EOS-URI but missing '://' or '//eos' (double slashes?). " + ) + # Replace dict --- dict_keys = set(opt.replace_dict.keys()) diff --git a/pylhc_submitter/job_submitter_tools/__init__.py b/pylhc_submitter/job_submitter_tools/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py deleted file mode 100644 index 49150a5..0000000 --- a/pylhc_submitter/job_submitter_tools/htc_utils.py +++ /dev/null @@ -1,321 +0,0 @@ -""" -HTCondor Utilities ------------------- - -This module provides functionality to create HTCondor jobs and submit them to ``HTCondor``. - -``write_bash`` creates bash scripts executing either a python or madx script. -Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script. -A shell script is created in each job directory in the dataframe. - -``make_subfile`` takes the job dataframe and creates the **.sub** files required for submissions to -``HTCondor``. The **.sub** file will be put in the working directory. The maximum runtime of one -job can be specified, standard is 8h. -""" -import logging -import subprocess -from pathlib import Path -from typing import Any, Dict, List, Union - -from pandas import DataFrame - -from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, - COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, - EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) -from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri -from pylhc_submitter.job_submitter_tools.mask import is_mask_file -from pylhc_submitter.utils.environment import on_windows - -try: - import htcondor -except ImportError: # will be handled by job_submitter - class htcondor: - """Dummy HTCondor module. To satisfy the typing. """ - Submit: Any = None - - -LOG = logging.getLogger(__name__) - -# HTC Constants ################################################################ - -SHEBANG = "#!/bin/bash" -SUBFILE = "queuehtc.sub" -BASH_FILENAME = "Job" - -HTCONDOR_JOBLIMIT = 100000 - -CMD_SUBMIT = "condor_submit" -JOBFLAVOURS = ( - "espresso", # 20 min - "microcentury", # 1 h - "longlunch", # 2 h - "workday", # 8 h - "tomorrow", # 1 d - "testmatch", # 3 d - "nextweek", # 1 w -) - -NOTIFICATIONS = ("always", "complete", "error", "never") - - - -# Subprocess Methods ########################################################### - - -def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path: - """ - Write file to submit to ``HTCondor``. - - Args: - cwd (Path): working directory - submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file) - - Returns: - Path: path to sub-file - - """ - subfile = cwd / SUBFILE - LOG.debug(f"Writing sub-file '{str(subfile)}'.") - with subfile.open("w") as f: - f.write(str(submission)) - return subfile - - -def submit_jobfile(jobfile: Path, ssh: str) -> None: - """Submit subfile to ``HTCondor`` via subprocess. - - Args: - jobfile (Path): path to sub-file - ssh (str): ssh target - - """ - proc_args = [CMD_SUBMIT, jobfile] - if ssh: - proc_args = ["ssh", ssh] + proc_args - status = _start_subprocess(proc_args) - if status: - raise RuntimeError("Submit to HTCondor was not successful!") - else: - LOG.info("Jobs successfully submitted.") - - -def _start_subprocess(command: List[str]) -> int: - """ Start subprocess and log output. - - Args: - command (List[str]): command to execute - - Returns: - int: return code of the process - - """ - LOG.debug(f"Executing command '{command}'") - process = subprocess.Popen( - command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - ) - for line in process.stdout: - htc_line = line.decode("utf-8").strip() - if htc_line: - LOG.debug(f"{htc_line} (from HTCondor)") - return process.wait() - - -# Job Creation ################################################################# - - -def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str: - """ - Function to create an ``HTCondor`` submission content for all job-scripts, - i.e. bash-files, in the job_df. - - Keyword Args: - output_dir (str): output directory that will be transferred. Defaults to ``None``. - duration (str): max duration of the job. Needs to be one of the ``HTCondor`` Jobflavours. - Defaults to ``workday``. - group (str): force use of accounting group. Defaults to ``None``. - retries (int): maximum amount of retries. Default to ``3``. - notification (str): Notify under certain conditions. Defaults to ``error``. - priority (int): Priority to order your jobs. Defaults to ``None``. - - Returns: - str: HTCondor submission definition. - """ - # Pre-defined HTCondor arguments for our jobs - submit_dict = { - "MyId": "htcondor", - "universe": "vanilla", - "arguments": "$(ClusterId) $(ProcId)", - "output": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).out"), - "error": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).err"), - "log": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).log"), - "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)", - "requirements": "Machine =!= LastRemoteHost", - } - submit_dict.update(map_kwargs(kwargs)) - - # Let the htcondor create the submit-file - submission = htcondor.Submit(submit_dict) - - # add the multiple bash files - scripts = [ - str(Path(*parts)) - for parts in zip(job_df[COLUMN_JOB_DIRECTORY], job_df[COLUMN_SHELL_SCRIPT]) - ] - args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])] - queueArgs = ["queue executable, initialdir from (", *args, ")"] - - # ugly but submission.setQArgs doesn't take string containing '\n': - # submission.setQArgs("\n".join(queueArgs)) # doesn't work - submission = str(submission) + "\n".join(queueArgs) - LOG.debug(f"Created HTCondor subfile with content: \n{submission}") - return submission - - -# Main functions ############################################################### - - -def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path: - """ - Creates submit-file for ``HTCondor``. - For kwargs, see ``create_multijob_for_bashfiles``. - - Args: - cwd (Path): working directory - job_df (DataFrame): DataFrame containing all the job-information - - Returns: - Path: path to the submit-file - """ - job = create_multijob_for_bashfiles(job_df, **kwargs) - return create_subfile_from_job(cwd, job) - - -def write_bash( - job_df: DataFrame, - output_dir: Path = None, - executable: str = "madx", - cmdline_arguments: dict = None, - mask: Union[str, Path] = None, -) -> DataFrame: - """ - Write the bash-files to be called by ``HTCondor``, which in turn call the executable. - Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script. - A shell script is created in each job directory in the dataframe. - - Args: - job_df (DataFrame): DataFrame containing all the job-information - output_dir (str): output directory that will be transferred. Defaults to ``None``. - executable (str): name of the executable. Defaults to ``madx``. - cmdline_arguments (dict): additional commandline arguments for the executable - mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``. - - Returns: - DataFrame: The provided ``job_df`` but with added path to the scripts. - - """ - if len(job_df.index) > HTCONDOR_JOBLIMIT: - raise AttributeError("Submitting too many jobs for HTCONDOR") - - exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else '' - cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else '' - - shell_scripts = [None] * len(job_df.index) - for idx, (jobid, job) in enumerate(job_df.iterrows()): - job_dir = Path(job[COLUMN_JOB_DIRECTORY]) - bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}" - jobfile = job_dir / bash_file_name - - LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.") - with open(jobfile, "w") as f: - # Preparation --- - if not on_windows(): - f.write(f"{SHEBANG}\n") - - if output_dir is not None: - f.write(f"mkdir {str(output_dir)}\n") - - # The actual job execution --- - f.write(exec_path) - - # Call the mask-file or the filled-template string - if is_mask_file(mask): - f.write(str(job_dir / job[COLUMN_JOB_FILE])) - else: - replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS] - f.write(mask % dict(zip(replace_columns, job[replace_columns]))) - - # Additional commands for the mask/string - f.write(cmds) - f.write("\n") - - # Manually copy output (if needed) --- - dest_dir = job.get(COLUMN_DEST_DIRECTORY) - if output_dir and dest_dir and output_dir != dest_dir: - cp_command = f'cp -r {output_dir} {dest_dir}' - if is_eos_uri(dest_dir): - cp_command = f'eos {cp_command}' - - f.write(f'{cp_command}\n') - - shell_scripts[idx] = bash_file_name - - job_df[COLUMN_SHELL_SCRIPT] = shell_scripts - return job_df - - -def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: - """ - Maps the kwargs for the job-file. - Some arguments have pre-defined choices and defaults, the remaining ones are just passed on. - - Args: - add_dict (Dict[str, Any]): additional kwargs to add to the defaults. - - Returns: - Dict[str, Any]: The mapped kwargs. - - """ - new = {} - - # Predefined mappings - htc_map = { # name: mapped_name, choices, default - "duration": ("+JobFlavour", JOBFLAVOURS, "workday"), - "output_dir": ("transfer_output_files", None, None), - "accounting_group": ("+AccountingGroup", None, None), - "max_retries": ("max_retries", None, 3), - "notification": ("notification", NOTIFICATIONS, "error"), - } - for key, (mapped, choices, default) in htc_map.items(): - try: - value = add_dict.pop(key) - except KeyError: - value = default # could be `None` - else: - if choices is not None and value not in choices: - raise TypeError( - f"{key} needs to be one of '{str(choices).strip('[]')}' but " - f"instead was '{value}'" - ) - if value is not None: - new[mapped] = _maybe_put_in_quotes(mapped, value) - - # Pass-Through Arguments - LOG.debug(f"Remaining arguments to be added: '{str(add_dict).strip('{}'):s}'") - new.update(add_dict) - return new - - -# Helper ####################################################################### - -def _maybe_put_in_quotes(key: str, value: Any) -> Any: - """ Put value in quoted strings if key starts with '+' """ - if key.startswith("+"): - return f'"{value}"' - return value - - -# Script Mode ################################################################## - - -if __name__ == "__main__": - raise EnvironmentError(f"{__file__} is not supposed to run as main.") diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py deleted file mode 100644 index 33631c4..0000000 --- a/pylhc_submitter/job_submitter_tools/iotools.py +++ /dev/null @@ -1,297 +0,0 @@ -""" -Job Submitter IO-Tools ----------------------- - -Tools for input and output for the job-submitter. -""" -import itertools -import logging -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Union - -import numpy as np -import pandas as pd -import tfs - -from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, - COLUMN_JOBID, JOBDIRECTORY_PREFIX, - JOBSUMMARY_FILE, SCRIPT_EXTENSIONS) -from pylhc_submitter.job_submitter_tools import htc_utils -from pylhc_submitter.job_submitter_tools.mask import (create_job_scripts_from_mask, - generate_jobdf_index, is_mask_file) - -LOG = logging.getLogger(__name__) - - -@dataclass -class CreationOpts: - """ Options for creating jobs. """ - working_directory: Path # Path to working directory (afs) - mask: Union[Path, str] # Path to mask file or mask-string - jobid_mask: str # Mask for jobid - replace_dict: Dict[str, Any] # Replace-dict - output_dir: Path # Path to local output directory - output_destination: Path # Path to remote output directory (e.g. eos) - append_jobs: bool # Append jobs to existing jobs - resume_jobs: bool # Resume jobs that have already run/failed/got interrupted - executable: str # Name of executable to call the script (from mask) - check_files: Sequence[str] # List of output files to check for success - script_arguments: Dict[str, Any] # Arguments to pass to script - script_extension: str # Extension of the script to run - - def should_drop_jobs(self) -> bool: - """ Check if jobs should be dropped after creating the whole parameter space, - e.g. because they already exist. """ - return self.append_jobs or self.resume_jobs - - - -def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: - """Main function to prepare all the jobs and folder structure. - This greates the value-grid based on the replace-dict and - checks for existing jobs (if so desired). - A job-dataframe is created - and written out - containing all the information and - its values are used to generate the job-scripts. - It also creates bash-scripts to call the executable for the job-scripts. - - Args: - opt (CreationOpts): Options for creating jobs - - Returns: - tfs.TfsDataFrame: The job-dataframe containing information for all jobs. - """ - LOG.debug("Creating Jobs.") - - # Generate product of replace-dict and compare to existing jobs --- - parameters, values_grid, prev_job_df = _generate_parameter_space( - replace_dict=opt.replace_dict, - append_jobs=opt.append_jobs, - cwd=opt.working_directory, - ) - - # Check new jobs --- - njobs = len(values_grid) - if njobs == 0: - raise ValueError(f"No (new) jobs found!") - - if njobs > htc_utils.HTCONDOR_JOBLIMIT: - LOG.warning( - f"You are attempting to submit an important number of jobs ({njobs})." - "This can be a high stress on your system, make sure you know what you are doing." - ) - - LOG.debug(f"Initial number of jobs: {njobs:d}") - - # Generate new job-dataframe --- - job_df = tfs.TfsDataFrame( - index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid), - columns=parameters, - data=values_grid, - ) - job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left') - - # Setup folders --- - job_df = create_folders(job_df, opt.working_directory, opt.output_destination) - - # Create scripts --- - if is_mask_file(opt.mask): - LOG.debug("Creating all jobs from mask.") - script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask) - job_df = create_job_scripts_from_mask( - job_df, opt.mask, parameters, script_extension - ) - - LOG.debug("Creating shell scripts.") - job_df = htc_utils.write_bash( - job_df, - output_dir=opt.output_dir, - executable=opt.executable, - cmdline_arguments=opt.script_arguments, - mask=opt.mask, - ) - - # Convert paths to strings and write df to file --- - job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str) - if COLUMN_DEST_DIRECTORY in job_df.columns: - job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str) - - tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID) - - # Drop already run jobs --- - dropped_jobs = [] - if opt.should_drop_jobs(): - job_df, dropped_jobs = _drop_already_run_jobs( - job_df, opt.output_dir, opt.check_files - ) - return job_df, dropped_jobs - - -def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, - destination_directory: Path = None) -> tfs.TfsDataFrame: - """Create the folder-structure in the given working directory and the - destination directory if given. - This creates a folder per job in which then the job-scripts and bash-scripts - can be stored later. - - Args: - job_df (tfs.TfsDataFrame): DataFrame containing all the job-information - working_directory (Path): Path to the working directory - destination_directory (Path, optional): Path to the destination directory, - i.e. the directory to copy the outputs to manually. Defaults to None. - - Returns: - tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs. - """ - LOG.debug("Setting up folders: ") - - jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}" - job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index] - - for job_dir in job_df[COLUMN_JOB_DIRECTORY]: - job_dir.mkdir(exist_ok=True) - LOG.debug(f" created '{job_dir}'.") - - if destination_directory: - job_df[COLUMN_DEST_DIRECTORY] = [destination_directory / jobname.format(id_) for id_ in job_df.index] - - strip_dest_dir = strip_eos_uri(destination_directory) - strip_dest_dir.mkdir(parents=True, exist_ok=True) - - # Make some symlinks for easy navigation--- - # Output directory -> Working Directory - sym_submission = strip_dest_dir / Path('SUBMISSION_DIR') - sym_submission.unlink(missing_ok=True) - sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) - - # Working Directory -> Output Directory - sym_destination = working_directory / Path('OUTPUT_DIR') - sym_destination.unlink(missing_ok=True) - sym_destination.symlink_to(strip_dest_dir.resolve(), target_is_directory=True) - - # Create output dirs per job --- - for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: - strip_eos_uri(job_dest_dir).mkdir(exist_ok=True) - LOG.debug(f" created '{job_dest_dir}'.") - - return job_df - - -def is_eos_uri(path: Union[Path, str, None]) -> bool: - """ Check if the given path is an EOS-URI as `eos cp` only works with those. - E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt - """ - if path is None: - return False - - parts = Path(path).parts - return ( - len(parts) >= 3 # at least root:, server, path - and - parts[0].endswith(':') - and - parts[2] == 'eos' - ) - - -def strip_eos_uri(path: Union[Path, str]) -> Path: - """ Strip EOS path information from a path. - EOS paths for HTCondor can be given as URI. Strip for direct writing. - E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt - """ - path = Path(path) - parts = path.parts - if parts[0].endswith(':'): - # the first two parts are host info, e.g `file: //host/path` - return Path('/', *parts[2:]) - return path - - -def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]): - """Print some quick statistics.""" - text = [ - "\n------------- QUICK STATS ----------------" - f"Jobs total:{len(new_jobs) + len(finished_jobs):d}", - f"Jobs to run: {len(new_jobs):d}", - f"Jobs already finished: {len(finished_jobs):d}", - "---------- JOBS TO RUN: NAMES -------------" - ] - for job_name in new_jobs: - text.append(job_name) - text += ["--------- JOBS FINISHED: NAMES ------------"] - for job_name in finished_jobs: - text.append(job_name) - LOG.info("\n".join(text)) - - -def _generate_parameter_space( - replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path - ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]: - """ Generate parameter space from replace-dict, check for existing jobs. """ - LOG.debug("Generating parameter space from replace-dict.") - parameters = list(replace_dict.keys()) - values_grid = _generate_values_grid(replace_dict) - if not append_jobs: - return parameters, values_grid, tfs.TfsDataFrame() - - jobfile_path = cwd / JOBSUMMARY_FILE - try: - prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID) - except FileNotFoundError as filerror: - raise FileNotFoundError( - "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'" - ) from filerror - new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid] - values_grid = values_grid[new_jobs_mask] - - return parameters, values_grid, prev_job_df - - -def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray: - """ Creates an array of the inner-product of the replace-dict. """ - return np.array(list(itertools.product(*replace_dict.values())), dtype=object) - - -def _drop_already_run_jobs( - job_df: tfs.TfsDataFrame, output_dir: str, check_files: str - ) -> Tuple[tfs.TfsDataFrame, List[str]]: - """ Check for jobs that have already been run and drop them from current job_df. """ - LOG.debug("Dropping already finished jobs.") - finished_jobs = [ - idx - for idx, row in job_df.iterrows() - if _job_was_successful(row, output_dir, check_files) - ] - - LOG.info( - f"{len(finished_jobs):d} of {len(job_df.index):d}" - " Jobs have already finished and will be skipped." - ) - - job_df = job_df.drop(index=finished_jobs) - return job_df, finished_jobs - - -def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool: - """ Determines if the job was successful. - - Args: - job_row (pd.Series): row from the job_df - output_dir (str): Name of the (local) output directory - files (List[str]): list of files that should have been generated - """ - job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY] - output_dir = Path(job_dir, output_dir) - success = output_dir.is_dir() and any(output_dir.iterdir()) - if success and files is not None and len(files): - for f in files: - success &= len(list(output_dir.glob(f))) > 0 - return success - - -def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str: - """ Returns the extension of the script to run based on - either the given value, its executable or the mask. """ - if script_extension is not None: - return script_extension - return SCRIPT_EXTENSIONS.get(executable, mask.suffix) diff --git a/pylhc_submitter/job_submitter_tools/mask.py b/pylhc_submitter/job_submitter_tools/mask.py deleted file mode 100644 index 3a2dcaa..0000000 --- a/pylhc_submitter/job_submitter_tools/mask.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -Mask Resolver -------------- - -This module provides functionality to resolve and write script masks for ``HTCondor`` jobs -submission. -""" -import logging -import re -from pathlib import Path -from typing import Iterable, List, Sequence, Set, Union - -import pandas as pd -from numpy.typing import ArrayLike - -from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE - -LOG = logging.getLogger(__name__) - - -def create_job_scripts_from_mask( - job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str -) -> pd.DataFrame: - """ - Takes path to mask file, list of parameter to be replaced and pandas dataframe containg per job - the job directory where processed mask is to be put, and columns containing the parameter values - with column named like replace parameters. Job directories have to be created beforehand. - Processed (madx) mask has the same filename as mask but with the given file extension. - Input Dataframe is returned with additional column containing path to the processed script - files. - - Args: - job_df (pd.DataFrame): Job parameters as defined in description. - maskfile: `Path` object to the mask file. - replace_keys: keys to be replaced (must correspond to columns in ``job_df``). - file_ext: file extention to use (defaults to **madx**). - - Returns: - The provided ``job_df`` but with added path to the scripts. - """ - with maskfile.open("r") as mfile: - template = mfile.read() - - jobname = maskfile.with_suffix("").name - jobs = [None] * len(job_df) - for idx, (jobid, values) in enumerate(job_df.iterrows()): - jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext) - - with jobfile_fullpath.open("w") as job_file: - job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)]))) - jobs[idx] = jobfile_fullpath.name - job_df[COLUMN_JOB_FILE] = jobs - return job_df - - -def find_named_variables_in_mask(mask: str) -> Set[str]: - """ Find all variable-names in the mask. """ - return set(re.findall(r"%\((\w+)\)", mask)) - - -def check_percentage_signs_in_mask(mask: str) -> None: - """ Checks for '%' in the mask, that are not replacement variables. """ - cleaned_mask = re.sub(r"%\((\w+)\)", "", mask) - n_signs = cleaned_mask.count("%") - if n_signs == 0: - return - - # Help the user find the % - for idx, line in enumerate(cleaned_mask.split("\n")): - if "%" in line: - positions = [str(i) for i, char in enumerate(line) if char == "%"] - LOG.error(f"Problematic '%' sign(s) in line {idx}, pos {' ,'.join(positions)}.") - raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.") - - -def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike - ) -> Union[List[str], Iterable[int]]: - """ Generates index for jobdf from mask for job_id naming. - - Args: - old_df (pd.DataFrame): Existing jobdf. - jobid_mask (str): Mask for naming the jobs. - keys (Sequence[str]): Keys to be replaced in the mask. - values (np.array_like): Values-Grid to be replaced in the mask. - - Returns: - List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range. - """ - if not jobid_mask: - # Use integer-range as index, if no mask is given - # Start with last index if old_df is not None. - nold = len(old_df.index) if old_df is not None else 0 - start = nold-1 if nold > 0 else 0 - return range(start, start + values.shape[0]) - - # Fill job-id mask - return [jobid_mask % dict(zip(keys, v)) for v in values] - - -def is_mask_file(mask: str) -> bool: - """ Check if given string points to a file. """ - try: - return Path(mask).is_file() - except OSError: - return False - - -def is_mask_string(mask: str) -> bool: - """ Checks that given string does not point to a file. """ - return not is_mask_file(mask) - - -if __name__ == "__main__": - raise EnvironmentError(f"{__file__} is not supposed to run as main.") diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py deleted file mode 100644 index 06eeceb..0000000 --- a/pylhc_submitter/job_submitter_tools/runners.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Job Submitter Runners ---------------------- - -Defines the methods to run the job-submitter, locally or on HTC. -""" -import logging -import multiprocessing -import subprocess -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, Optional - -import pandas as pd -import tfs - -from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, - COLUMN_SHELL_SCRIPT) -from pylhc_submitter.job_submitter_tools import htc_utils -from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri -from pylhc_submitter.utils.environment import on_windows - -LOG = logging.getLogger(__name__) - - -@dataclass -class RunnerOpts: - """ Options for running the submission. """ - working_directory: Path # Path to the working directory (e.g. afs) - jobflavour: Optional[str] = None # HTCondor job flavour (lengths of the job) - output_dir: Optional[str] = None # Name of the output directory, where jobs store data - ssh: Optional[str] = None # SSH command - dryrun: Optional[bool] = False # Perform only a dry-run, i.e. do all but submit to HTC - htc_arguments: Optional[Dict[str, Any]] = None # Arguments to pass on to htc as keywords - run_local: Optional[bool] = False # Run jobs locally - num_processes: Optional[int] = 4 # Number of processes to run in parallel (locally) - - -def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: - """Selects how to run the jobs. - - Args: - job_df (tfs.TfsDataFrame): DataFrame containing all the job-information - opt (RunnerOpts): Parameters for the runner - """ - if opt.run_local: - run_local(job_df, opt) - else: - run_htc(job_df, opt) - - -def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: - """Run all jobs locally. - - Args: - job_df (tfs.TfsDataFrame): DataFrame containing all the job-information - opt (RunnerOpts): Parameters for the runner - """ - if opt.dryrun: - LOG.info(f"Dry-run: Skipping local run.") - return - - LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.") - - pool = multiprocessing.Pool(processes=opt.num_processes) - res = pool.map(_execute_shell, job_df.iterrows()) - if any(res): - jobs_failed = [j for r, j in zip(res, job_df.index) if r] - LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}") - raise RuntimeError("At least one job has failed. Check output logs!") - - -def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: - """ Create submission file and submit the jobs to ``HTCondor``. - - Args: - job_df (tfs.TfsDataFrame): DataFrame containing all the job-information - opt (RunnerOpts): Parameters for the runner - """ - LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.") - LOG.debug("Creating htcondor subfile.") - - subfile = htc_utils.make_subfile( - opt.working_directory, job_df, - output_dir=opt.output_dir, - duration=opt.jobflavour, - **opt.htc_arguments - ) - - if opt.dryrun: - LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.") - return - - LOG.debug("Submitting jobs to htcondor.") - htc_utils.submit_jobfile(subfile, opt.ssh) - - -# Helper ####################################################################### - -def _execute_shell(df_row: pd.Series) -> int: - """ Execute the shell script. - - Args: - df_row (pd.Series): row in the job-dataframe - - Returns: - int: return code of the process - """ - _, column = df_row - cmd = [] if on_windows() else ["sh"] - - with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile: - process = subprocess.Popen( - cmd + [column[COLUMN_SHELL_SCRIPT]], - shell=on_windows(), - stdout=logfile, - stderr=subprocess.STDOUT, - cwd=column[COLUMN_JOB_DIRECTORY], - ) - return process.wait() diff --git a/pylhc_submitter/sixdesk_tools/utils.py b/pylhc_submitter/sixdesk_tools/utils.py index 135b0c3..d287eb0 100644 --- a/pylhc_submitter/sixdesk_tools/utils.py +++ b/pylhc_submitter/sixdesk_tools/utils.py @@ -10,7 +10,7 @@ from pylhc_submitter.constants.autosix import SIXDESKLOCKFILE, get_workspace_path from pylhc_submitter.constants.external_paths import SIXDESK_UTILS -from pylhc_submitter.job_submitter_tools.mask import find_named_variables_in_mask +from pylhc_submitter.submitter.mask import find_named_variables_in_mask LOG = logging.getLogger(__name__) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 3d3427d..6f79dcb 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -7,6 +7,7 @@ import pytest from pylhc_submitter.job_submitter import main as job_submit +from pylhc_submitter.submitter.iotools import get_server_from_uri, is_eos_uri, uri_to_path from pylhc_submitter.utils.environment import on_linux, on_windows SUBFILE = "queuehtc.sub" @@ -43,6 +44,19 @@ def test_output_directory(tmp_path): _test_output(setup) +def test_wrong_uri(tmp_path): + """ Tests that wrong URI's are identified. """ + setup = InputParameters( + working_directory=tmp_path, + run_local=True, + output_destination="root:/eosuser.cern.ch/eos/my_new_output", + ) + setup.create_mask() + with pytest.raises(ValueError) as e: + job_submit(**asdict(setup)) + assert "EOS-URI" in str(e) + + @run_only_on_linux def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path): """ Tests that the jobs are created and can be run locally from a multiline mask-string. """ @@ -101,30 +115,56 @@ def test_not_on_linux(tmp_path): assert "htcondor bindings" in e.value.args[0] +def test_eos_uri(): + """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)""" + server = "root://eosuser.cern.ch/" + path = "/eos/user/m/mmustermann/" + uri = f"{server}{path}" + assert is_eos_uri(uri) + assert not is_eos_uri(path) + assert uri_to_path(uri) == Path(path) + assert get_server_from_uri(uri) == server + + @run_only_on_linux @pytest.mark.cern_network -def test_htc_submit(): - """ This test is here for local testing only. You need to adapt the path - and delete the results afterwards manually (so you can check them before.""" +@pytest.mark.parametrize("uri", [True, False]) +def test_htc_submit(uri: bool): + """ This test is here for local testing only. + You need to adapt the path and delete the results afterwards manually.""" # Fix the kerberos ticket path. # Do klist to find your ticket manually. - # import os + import os # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" + os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029" + + tmp_name = "htc_temp" + if uri: + tmp_name = f"{tmp_name}_uri" user = "jdilly" - path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp") + path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name) path.mkdir(exist_ok=True) - setup = InputParameters(working_directory=path) - setup.create_mask() + dest = f"/eos/user/{user[0]}/{user}/{tmp_name}" + if uri: + dest = f"root://eosuser.cern.ch/{dest}" - # pre-run --- - job_submit(**asdict(setup)) - _test_subfile_content(setup) - _test_output(setup, post_run=False) + setup = InputParameters( + working_directory=path, + output_destination=dest, + # dryrun=True + ) + setup.create_mask() - # post run --- - # _test_output(setup, post_run=True) + prerun = True + prerun = False # Manually switch here after running. + if prerun: + job_submit(**asdict(setup)) + _test_subfile_content(setup) + _test_output(setup, post_run=False) + else: + _test_output(setup, post_run=True) # Helper ----------------------------------------------------------------------- @@ -178,7 +218,11 @@ def _test_subfile_content(setup: InputParameters): with subfile.open("r") as sfile: filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line) assert filecontents["MY.JobFlavour"].strip('"') == setup.jobflavour # flavour is saved with "" in .sub, and read in with them - assert filecontents["transfer_output_files"] == setup.job_output_dir + if setup.output_destination is None: + assert filecontents["transfer_output_files"] == setup.job_output_dir + else: + assert "transfer_output_files" not in filecontents + for key in setup.htc_arguments.keys(): assert filecontents[key] == setup.htc_arguments[key] @@ -197,14 +241,14 @@ def _test_output(setup: InputParameters, post_run: bool = True): if isinstance(setup.mask, Path): assert (setup.working_directory / job_name / setup.mask.name).with_suffix(setup.script_extension).exists() - def _check_output_content(dir_path: Path): + def _check_output_content(dir_path: Path, check_output: bool = True): # Check if the code created the folder structure --- - job_path = dir_path / job_name + job_path = uri_to_path(dir_path) / job_name assert job_path.exists() assert job_path.is_dir() - if post_run: # Check if the jobs created the files --- + if check_output: # Check if the jobs created the files --- out_dir_path = job_path / setup.job_output_dir out_file_path = out_dir_path / setup.check_files[0] @@ -216,11 +260,11 @@ def _check_output_content(dir_path: Path): assert f.read().strip("\n") == current_id # Check local working directory --- - _check_output_content(setup.working_directory) + _check_output_content(setup.working_directory, check_output=post_run and setup.output_destination is None) if setup.output_destination is not None: # Check copy at output destination --- - _check_output_content(setup.output_destination) + _check_output_content(setup.output_destination, check_output=post_run) def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]: From 8b88de4e9ae01d9e897aff0a73e027bb746d20a8 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:55:06 +0100 Subject: [PATCH 16/30] doc and version --- doc/Makefile | 4 +- doc/_static/css/custom.css | 91 +++++++++++++++++++ doc/_templates/layout.html | 12 +++ doc/conf.py | 27 +++--- doc/entrypoints/autosix.rst | 1 + doc/entrypoints/job_submitter.rst | 1 + doc/modules/constants.rst | 6 +- doc/modules/sixdesk_tools.rst | 7 ++ ...{job_submitter_tools.rst => submitter.rst} | 8 +- doc/modules/utils.rst | 2 + pylhc_submitter/__init__.py | 2 +- 11 files changed, 140 insertions(+), 21 deletions(-) create mode 100644 doc/_templates/layout.html rename doc/modules/{job_submitter_tools.rst => submitter.rst} (73%) diff --git a/doc/Makefile b/doc/Makefile index 08f8749..ff66ee4 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -48,9 +48,9 @@ html: @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." josch: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) /home/jdilly/Software/Documentation/submitter-doc + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) ../../Documentation/submitter-doc @echo - @echo "Build finished. The HTML pages are in /home/jdilly/Software/Documentation/submitter-doc." + @echo "Build finished. The HTML pages are in ../../Documentation/submitter-doc." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css index f201296..eea8824 100644 --- a/doc/_static/css/custom.css +++ b/doc/_static/css/custom.css @@ -1,7 +1,41 @@ +:root { + --nav-side-width: 300px; /* default is 300px */ + /* for 100% width */ + /*--nav-content-width: 100%;*/ + /*--local-toc-width: 300px;*/ + /*--nav-content-width-wide: calc(100% - var(--local-toc-width)); /* 100% here is fullscreen */ + /*--local-toc-left: calc(100% - var(--local-toc-width)); /* 100% here is w/o sidebar */ + + /* for fixed widths */ + --nav-content-width: 800px; /* default is 800px */ + --nav-content-width-wide: var(--nav-content-width); + --local-toc-width: calc(100% - var(--nav-content-width-wide)); + --local-toc-left: calc(var(--nav-content-width-wide) + var(--nav-side-width)); +} + +/* main content width */ +.wy-nav-content { + max-width: var(--nav-content-width); +} + +/* Sidebar width */ +.wy-nav-side { + width: var(--nav-side-width); +} + .wy-side-nav-search { background: rgb(243,244,247); } +.wy-side-nav-search > a { + color: black; +} + +.wy-side-nav-search> a img.logo { + width: 50%; +} + + .wy-side-nav-search > div.version { color: black; } @@ -182,3 +216,60 @@ em.sig-param span.default_value { .rst-content table.field-list th { padding: 16px; } + + +/* Create local table of contents + ------------------------------ + inspired by https://github.com/readthedocs/sphinx_rtd_theme/pull/919 + and https://github.com/readthedocs/sphinx_rtd_theme/issues/764 + see also _templates/layout.html + */ + +#local-table-of-contents { + padding-bottom: 20px; + /* display: none; */ +} + +/* Mask entry of main header (chapter) */ +#local-table-of-contents a[href="#"]{ + /*display: none;*/ +} + +/* indent subsections */ +#local-table-of-contents ul > ul { + padding-left: 0px; + margin-left: 20px; + padding-right: 0; + padding-bottom: 5px; +} + + +#local-table-of-contents-title { + margin-bottom: 10px; +} + +/* Show in Sidebar if window width is larger than nav-side + nav-content + toc-width */ +@media screen and (min-width: 1200px) { + .wy-nav-content { + max-width: var(--nav-content-width-wide); + } + + #local-table-of-contents { + display: block; + position: fixed; + margin-left: 15px; + overflow-y: auto; + height: 95%; + top: 45px; + left: var(--local-toc-left); + width: var(--local-toc-width); + } + + #local-table-of-contents-title { + display: block; + font-size: 16px; + width: 100%; + padding-top: 10px; + padding-bottom: 5px; + } +} \ No newline at end of file diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html new file mode 100644 index 0000000..aa67d6d --- /dev/null +++ b/doc/_templates/layout.html @@ -0,0 +1,12 @@ +{% extends "!layout.html" %} +{% block document %} + {%- if toc|length > title|length + 75 %} + + {%- endif %} + + {{ super() }} +{% endblock %} + diff --git a/doc/conf.py b/doc/conf.py index 6fbff20..9da8056 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -31,20 +31,10 @@ sys.path.insert(0, str(TOPLEVEL_DIR)) -def about_package(init_posixpath: pathlib.Path) -> dict: - """ - Return package information defined with dunders in __init__.py as a dictionary, when - provided with a PosixPath to the __init__.py file. - """ - about_text: str = init_posixpath.read_text() - return { - entry.split(" = ")[0]: entry.split(" = ")[1].strip('"') - for entry in about_text.strip().split("\n") - if entry.startswith("__") - } - -ABOUT_PYLHC_SUBMITTER = about_package(ABOUT_FILE) +ABOUT_PYLHC_SUBMITTER: dict = {} +with ABOUT_FILE.open("r") as f: + exec(f.read(), ABOUT_PYLHC_SUBMITTER) # -- General configuration ------------------------------------------------ @@ -66,9 +56,11 @@ def about_package(init_posixpath: pathlib.Path) -> dict: "sphinx.ext.githubpages", "sphinx.ext.napoleon", ] +autosectionlabel_prefix_document = True +autosectionlabel_maxdepth = 2 # Add any paths that contain templates here, relative to this directory. -# templates_path = ['_templates'] +templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: @@ -84,6 +76,11 @@ def about_package(init_posixpath: pathlib.Path) -> dict: copyright_ = "2019, pyLHC/OMC-TEAM" author = ABOUT_PYLHC_SUBMITTER["__author__"] +# Override link in 'Edit on Github' +rst_prolog = f""" +:github_url: {ABOUT_PYLHC_SUBMITTER['__url__']} +""" + # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. @@ -98,7 +95,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict: # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/doc/entrypoints/autosix.rst b/doc/entrypoints/autosix.rst index 470e586..0276dfc 100644 --- a/doc/entrypoints/autosix.rst +++ b/doc/entrypoints/autosix.rst @@ -1,2 +1,3 @@ .. automodule:: pylhc_submitter.autosix :members: + :noindex: diff --git a/doc/entrypoints/job_submitter.rst b/doc/entrypoints/job_submitter.rst index 7854e56..675569c 100644 --- a/doc/entrypoints/job_submitter.rst +++ b/doc/entrypoints/job_submitter.rst @@ -1,2 +1,3 @@ .. automodule:: pylhc_submitter.job_submitter :members: + :noindex: diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst index 4f4f241..c7b3011 100644 --- a/doc/modules/constants.rst +++ b/doc/modules/constants.rst @@ -1,17 +1,21 @@ Constants Definitions -************************** +********************* .. automodule:: pylhc_submitter.constants.general :members: + :noindex: .. automodule:: pylhc_submitter.constants.external_paths :members: + :noindex: .. automodule:: pylhc_submitter.constants.job_submitter :members: + :noindex: .. automodule:: pylhc_submitter.constants.autosix :members: + :noindex: diff --git a/doc/modules/sixdesk_tools.rst b/doc/modules/sixdesk_tools.rst index 072ad58..0271a84 100644 --- a/doc/modules/sixdesk_tools.rst +++ b/doc/modules/sixdesk_tools.rst @@ -3,21 +3,28 @@ Sixdesk Tools .. automodule:: pylhc_submitter.sixdesk_tools.stages :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.create_workspace :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.submit :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.post_process_da :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.extract_data_from_db :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.utils :members: + :noindex: .. automodule:: pylhc_submitter.sixdesk_tools.troubleshooting :members: + :noindex: diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/submitter.rst similarity index 73% rename from doc/modules/job_submitter_tools.rst rename to doc/modules/submitter.rst index 34be7cc..ac54d63 100644 --- a/doc/modules/job_submitter_tools.rst +++ b/doc/modules/submitter.rst @@ -1,15 +1,19 @@ -HTCondor Tools -************************** +Submitter +********* .. automodule:: pylhc_submitter.submitter.htc_utils :members: + :noindex: .. automodule:: pylhc_submitter.submitter.iotools :members: + :noindex: .. automodule:: pylhc_submitter.submitter.mask :members: + :noindex: .. automodule:: pylhc_submitter.submitter.runners :members: + :noindex: diff --git a/doc/modules/utils.rst b/doc/modules/utils.rst index aa7ecb0..107fb06 100644 --- a/doc/modules/utils.rst +++ b/doc/modules/utils.rst @@ -3,7 +3,9 @@ Utilities .. automodule:: pylhc_submitter.utils.iotools :members: + :noindex: .. automodule:: pylhc_submitter.utils.logging_tools :members: + :noindex: diff --git a/pylhc_submitter/__init__.py b/pylhc_submitter/__init__.py index 62de331..ed21d9e 100644 --- a/pylhc_submitter/__init__.py +++ b/pylhc_submitter/__init__.py @@ -10,7 +10,7 @@ __title__ = "pylhc_submitter" __description__ = "pylhc-submitter contains scripts to simplify the creation and submission of jobs to HTCondor at CERN" __url__ = "https://github.com/pylhc/submitter" -__version__ = "1.1.1" +__version__ = "2.0.0" __author__ = "pylhc" __author_email__ = "pylhc@github.com" __license__ = "MIT" From 796a47770902f8108cf15fd139f4be6ed7374f30 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:56:26 +0100 Subject: [PATCH 17/30] missing package --- pylhc_submitter/submitter/__init__.py | 0 pylhc_submitter/submitter/htc_utils.py | 330 +++++++++++++++++++++++++ pylhc_submitter/submitter/iotools.py | 317 ++++++++++++++++++++++++ pylhc_submitter/submitter/mask.py | 114 +++++++++ pylhc_submitter/submitter/runners.py | 120 +++++++++ 5 files changed, 881 insertions(+) create mode 100644 pylhc_submitter/submitter/__init__.py create mode 100644 pylhc_submitter/submitter/htc_utils.py create mode 100644 pylhc_submitter/submitter/iotools.py create mode 100644 pylhc_submitter/submitter/mask.py create mode 100644 pylhc_submitter/submitter/runners.py diff --git a/pylhc_submitter/submitter/__init__.py b/pylhc_submitter/submitter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py new file mode 100644 index 0000000..58ffd2a --- /dev/null +++ b/pylhc_submitter/submitter/htc_utils.py @@ -0,0 +1,330 @@ +""" +HTCondor Utilities +------------------ + +This module provides functionality to create HTCondor jobs and submit them to ``HTCondor``. + +``write_bash`` creates bash scripts executing either a python or madx script. +Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script. +A shell script is created in each job directory in the dataframe. + +``make_subfile`` takes the job dataframe and creates the **.sub** files required for submissions to +``HTCondor``. The **.sub** file will be put in the working directory. The maximum runtime of one +job can be specified, standard is 8h. +""" +import logging +import subprocess +from pathlib import Path +from typing import Any, Dict, List, Union + +from pandas import DataFrame + +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, + EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) +from pylhc_submitter.submitter.iotools import is_eos_uri +from pylhc_submitter.submitter.mask import is_mask_file +from pylhc_submitter.utils.environment import on_windows + +try: + import htcondor +except ImportError: # will be handled by job_submitter + class htcondor: + """Dummy HTCondor module. To satisfy the typing. """ + Submit: Any = None + + +LOG = logging.getLogger(__name__) + +# HTC Constants ################################################################ + +SHEBANG = "#!/bin/bash" +SUBFILE = "queuehtc.sub" +BASH_FILENAME = "Job" + +HTCONDOR_JOBLIMIT = 100000 + +CMD_SUBMIT = "condor_submit" +JOBFLAVOURS = ( + "espresso", # 20 min + "microcentury", # 1 h + "longlunch", # 2 h + "workday", # 8 h + "tomorrow", # 1 d + "testmatch", # 3 d + "nextweek", # 1 w +) + +NOTIFICATIONS = ("always", "complete", "error", "never") + + + +# Subprocess Methods ########################################################### + + +def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path: + """ + Write file to submit to ``HTCondor``. + + Args: + cwd (Path): working directory + submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file) + + Returns: + Path: path to sub-file + + """ + subfile = cwd / SUBFILE + LOG.debug(f"Writing sub-file '{str(subfile)}'.") + with subfile.open("w") as f: + f.write(str(submission)) + return subfile + + +def submit_jobfile(jobfile: Path, ssh: str) -> None: + """Submit subfile to ``HTCondor`` via subprocess. + + Args: + jobfile (Path): path to sub-file + ssh (str): ssh target + + """ + proc_args = [CMD_SUBMIT, jobfile] + if ssh: + proc_args = ["ssh", ssh] + proc_args + status = _start_subprocess(proc_args) + if status: + raise RuntimeError("Submit to HTCondor was not successful!") + else: + LOG.info("Jobs successfully submitted.") + + +def _start_subprocess(command: List[str]) -> int: + """ Start subprocess and log output. + + Args: + command (List[str]): command to execute + + Returns: + int: return code of the process + + """ + LOG.debug(f"Executing command '{command}'") + process = subprocess.Popen( + command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + for line in process.stdout: + htc_line = line.decode("utf-8").strip() + if htc_line: + LOG.debug(f"{htc_line} (from HTCondor)") + return process.wait() + + +# Job Creation ################################################################# + + +def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str: + """ + Function to create an ``HTCondor`` submission content for all job-scripts, + i.e. bash-files, in the job_df. + + Keyword Args: + output_dir (str): output directory that will be transferred. Defaults to ``None``. + duration (str): max duration of the job. Needs to be one of the ``HTCondor`` Jobflavours. + Defaults to ``workday``. + group (str): force use of accounting group. Defaults to ``None``. + retries (int): maximum amount of retries. Default to ``3``. + notification (str): Notify under certain conditions. Defaults to ``error``. + priority (int): Priority to order your jobs. Defaults to ``None``. + + Returns: + str: HTCondor submission definition. + """ + # Pre-defined HTCondor arguments for our jobs + submit_dict = { + "MyId": "htcondor", + "universe": "vanilla", + "arguments": "$(ClusterId) $(ProcId)", + "output": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).out"), + "error": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).err"), + "log": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).log"), + "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)", + "requirements": "Machine =!= LastRemoteHost", + } + submit_dict.update(map_kwargs(kwargs)) + + # Let the htcondor create the submit-file + submission = htcondor.Submit(submit_dict) + + # add the multiple bash files + scripts = [ + str(Path(*parts)) + for parts in zip(job_df[COLUMN_JOB_DIRECTORY], job_df[COLUMN_SHELL_SCRIPT]) + ] + args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])] + queueArgs = ["queue executable, initialdir from (", *args, ")"] + + # ugly but submission.setQArgs doesn't take string containing '\n': + # submission.setQArgs("\n".join(queueArgs)) # doesn't work + submission = str(submission) + "\n".join(queueArgs) + LOG.debug(f"Created HTCondor subfile with content: \n{submission}") + return submission + + +# Main functions ############################################################### + + +def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path: + """ + Creates submit-file for ``HTCondor``. + For kwargs, see ``create_multijob_for_bashfiles``. + + Args: + cwd (Path): working directory + job_df (DataFrame): DataFrame containing all the job-information + + Returns: + Path: path to the submit-file + """ + job = create_multijob_for_bashfiles(job_df, **kwargs) + return create_subfile_from_job(cwd, job) + + +def write_bash( + job_df: DataFrame, + output_dir: Path = None, + executable: str = "madx", + cmdline_arguments: dict = None, + mask: Union[str, Path] = None, +) -> DataFrame: + """ + Write the bash-files to be called by ``HTCondor``, which in turn call the executable. + Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script. + A shell script is created in each job directory in the dataframe. + + Args: + job_df (DataFrame): DataFrame containing all the job-information + output_dir (str): output directory that will be transferred. Defaults to ``None``. + executable (str): name of the executable. Defaults to ``madx``. + cmdline_arguments (dict): additional commandline arguments for the executable + mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``. + + Returns: + DataFrame: The provided ``job_df`` but with added path to the scripts. + + """ + if len(job_df.index) > HTCONDOR_JOBLIMIT: + raise AttributeError("Submitting too many jobs for HTCONDOR") + + exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else '' + cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else '' + + shell_scripts = [None] * len(job_df.index) + for idx, (jobid, job) in enumerate(job_df.iterrows()): + job_dir = Path(job[COLUMN_JOB_DIRECTORY]) + bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}" + jobfile = job_dir / bash_file_name + + LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.") + with open(jobfile, "w") as f: + # Preparation --- + if not on_windows(): + f.write(f"{SHEBANG}\n") + + if output_dir is not None: + f.write(f"mkdir {str(output_dir)}\n") + + # The actual job execution --- + f.write(exec_path) + + # Call the mask-file or the filled-template string + if is_mask_file(mask): + f.write(str(job_dir / job[COLUMN_JOB_FILE])) + else: + replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS] + f.write(mask % dict(zip(replace_columns, job[replace_columns]))) + + # Additional commands for the mask/string + f.write(cmds) + f.write("\n") + + # Manually copy output (if needed) --- + dest_dir = job.get(COLUMN_DEST_DIRECTORY) + if output_dir and dest_dir and output_dir != dest_dir: + # Note: only eos-cp needs `/` at the end of dirs, but should not hurt in any case + cp_command = f'cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}' + if is_eos_uri(dest_dir): + cp_command = f'eos {cp_command}' + + f.write(f'{cp_command}\n') + + shell_scripts[idx] = bash_file_name + + job_df[COLUMN_SHELL_SCRIPT] = shell_scripts + return job_df + + +def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: + """ + Maps the kwargs for the job-file. + Some arguments have pre-defined choices and defaults, the remaining ones are just passed on. + + Args: + add_dict (Dict[str, Any]): additional kwargs to add to the defaults. + + Returns: + Dict[str, Any]: The mapped kwargs. + + """ + new = {} + + # Predefined mappings + htc_map = { # name: mapped_name, choices, default + "duration": ("+JobFlavour", JOBFLAVOURS, "workday"), + "output_dir": ("transfer_output_files", None, None), + "accounting_group": ("+AccountingGroup", None, None), + "max_retries": ("max_retries", None, 3), + "notification": ("notification", NOTIFICATIONS, "error"), + } + for key, (mapped, choices, default) in htc_map.items(): + try: + value = add_dict.pop(key) + except KeyError: + value = default # could be `None` + else: + if choices is not None and value not in choices: + raise TypeError( + f"{key} needs to be one of '{str(choices).strip('[]')}' but " + f"instead was '{value}'" + ) + if value is not None: + new[mapped] = _maybe_put_in_quotes(mapped, value) + + # Pass-Through Arguments + LOG.debug(f"Remaining arguments to be added: '{str(add_dict).strip('{}'):s}'") + new.update(add_dict) + return new + + +# Helper ####################################################################### + +def _maybe_put_in_quotes(key: str, value: Any) -> Any: + """ Put value in quoted strings if key starts with '+' """ + if key.startswith("+"): + return f'"{value}"' + return value + + +def _str_ending_with_slash(s: Union[Path, str]) -> str: + """ Add a slash at the end of a path if not present. """ + s = str(s) + if s.endswith("/"): + return s + return f"{s}/" + + +# Script Mode ################################################################## + + +if __name__ == "__main__": + raise EnvironmentError(f"{__file__} is not supposed to run as main.") diff --git a/pylhc_submitter/submitter/iotools.py b/pylhc_submitter/submitter/iotools.py new file mode 100644 index 0000000..be5669e --- /dev/null +++ b/pylhc_submitter/submitter/iotools.py @@ -0,0 +1,317 @@ +""" +Job Submitter IO-Tools +---------------------- + +Tools for input and output for the job-submitter. +""" +import itertools +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Sequence, Tuple, Union + +import numpy as np +import pandas as pd +import tfs + +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_JOBID, JOBDIRECTORY_PREFIX, + JOBSUMMARY_FILE, SCRIPT_EXTENSIONS) +from pylhc_submitter.submitter import htc_utils +from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask, + generate_jobdf_index, is_mask_file) + +LOG = logging.getLogger(__name__) + + +@dataclass +class CreationOpts: + """ Options for creating jobs. """ + working_directory: Path # Path to working directory (afs) + mask: Union[Path, str] # Path to mask file or mask-string + jobid_mask: str # Mask for jobid + replace_dict: Dict[str, Any] # Replace-dict + output_dir: Path # Path to local output directory + output_destination: Union[Path, str] # Path or URI to remote output directory (e.g. eos) + append_jobs: bool # Append jobs to existing jobs + resume_jobs: bool # Resume jobs that have already run/failed/got interrupted + executable: str # Name of executable to call the script (from mask) + check_files: Sequence[str] # List of output files to check for success + script_arguments: Dict[str, Any] # Arguments to pass to script + script_extension: str # Extension of the script to run + + def should_drop_jobs(self) -> bool: + """ Check if jobs should be dropped after creating the whole parameter space, + e.g. because they already exist. """ + return self.append_jobs or self.resume_jobs + + + +def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: + """Main function to prepare all the jobs and folder structure. + This greates the value-grid based on the replace-dict and + checks for existing jobs (if so desired). + A job-dataframe is created - and written out - containing all the information and + its values are used to generate the job-scripts. + It also creates bash-scripts to call the executable for the job-scripts. + + Args: + opt (CreationOpts): Options for creating jobs + + Returns: + tfs.TfsDataFrame: The job-dataframe containing information for all jobs. + """ + LOG.debug("Creating Jobs.") + + # Generate product of replace-dict and compare to existing jobs --- + parameters, values_grid, prev_job_df = _generate_parameter_space( + replace_dict=opt.replace_dict, + append_jobs=opt.append_jobs, + cwd=opt.working_directory, + ) + + # Check new jobs --- + njobs = len(values_grid) + if njobs == 0: + raise ValueError(f"No (new) jobs found!") + + if njobs > htc_utils.HTCONDOR_JOBLIMIT: + LOG.warning( + f"You are attempting to submit an important number of jobs ({njobs})." + "This can be a high stress on your system, make sure you know what you are doing." + ) + + LOG.debug(f"Initial number of jobs: {njobs:d}") + + # Generate new job-dataframe --- + job_df = tfs.TfsDataFrame( + index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid), + columns=parameters, + data=values_grid, + ) + job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left') + + # Setup folders --- + job_df = create_folders(job_df, opt.working_directory, opt.output_destination) + + # Create scripts --- + if is_mask_file(opt.mask): + LOG.debug("Creating all jobs from mask.") + script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask) + job_df = create_job_scripts_from_mask( + job_df, opt.mask, parameters, script_extension + ) + + LOG.debug("Creating shell scripts.") + job_df = htc_utils.write_bash( + job_df, + output_dir=opt.output_dir, + executable=opt.executable, + cmdline_arguments=opt.script_arguments, + mask=opt.mask, + ) + + # Convert paths to strings and write df to file --- + job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str) + if COLUMN_DEST_DIRECTORY in job_df.columns: + job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str) + + tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID) + + # Drop already run jobs --- + dropped_jobs = [] + if opt.should_drop_jobs(): + job_df, dropped_jobs = _drop_already_run_jobs( + job_df, opt.output_dir, opt.check_files + ) + return job_df, dropped_jobs + + +def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, + destination_directory: Union[Path, str] = None) -> tfs.TfsDataFrame: + """Create the folder-structure in the given working directory and the + destination directory if given. + This creates a folder per job in which then the job-scripts and bash-scripts + can be stored later. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + working_directory (Path): Path to the working directory + destination_directory (Path, optional): Path to the destination directory, + i.e. the directory to copy the outputs to manually. Defaults to None. + + Returns: + tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs. + """ + LOG.debug("Setting up folders: ") + + jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}" + job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index] + + for job_dir in job_df[COLUMN_JOB_DIRECTORY]: + job_dir.mkdir(exist_ok=True) + LOG.debug(f" created '{job_dir}'.") + + if destination_directory: + dest_path = uri_to_path(destination_directory) + dest_path.mkdir(parents=True, exist_ok=True) + + server = get_server_from_uri(destination_directory) + job_df[COLUMN_DEST_DIRECTORY] = [f"{server}{dest_path / jobname.format(id_)}" for id_ in job_df.index] + + # Make some symlinks for easy navigation--- + # Output directory -> Working Directory + sym_submission = dest_path / Path('SUBMISSION_DIR') + sym_submission.unlink(missing_ok=True) + sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True) + + # Working Directory -> Output Directory + sym_destination = working_directory / Path('OUTPUT_DIR') + sym_destination.unlink(missing_ok=True) + sym_destination.symlink_to(dest_path.resolve(), target_is_directory=True) + + # Create output dirs per job --- + for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]: + uri_to_path(job_dest_dir).mkdir(exist_ok=True) + LOG.debug(f" created '{job_dest_dir}'.") + + return job_df + + +def is_eos_uri(path: Union[Path, str, None]) -> bool: + """ Check if the given path is an EOS-URI as `eos cp` only works with those. + E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + + This function does not check the double slashes, + to avoid having the user pass a malformed path by accident and then + assuming it is just a path. This is tested for in + :func:`pylhc_submitter.job_submitter.check_opts`. + """ + if path is None: + return False + + parts = Path(path).parts + return ( + len(parts) >= 3 # at least root:, server, path + and + parts[0].endswith(':') + and + parts[2] == 'eos' + ) + + +def uri_to_path(path: Union[Path, str]) -> Path: + """ Strip EOS path information from a path. + EOS paths for HTCondor can be given as URI. Strip for direct writing. + E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt + """ + path = Path(path) + parts = path.parts + if parts[0].endswith(':'): + # the first two parts are host info, e.g `file: //host/path` + return Path('/', *parts[2:]) + return path + + +def get_server_from_uri(path: Union[Path, str]) -> str: + """ Get server information from a path. + E.g.: root://eosuser.cern.ch//eos/user/a/ -> root://eosuser.cern.ch/ + """ + path_part = uri_to_path(path) + if path_part == Path(path): + return "" + + server_part = str(path).replace(str(path_part), '') + if server_part.endswith("//"): + server_part = server_part[:-1] + return server_part + + +def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]): + """Print some quick statistics.""" + text = [ + "\n------------- QUICK STATS ----------------" + f"Jobs total:{len(new_jobs) + len(finished_jobs):d}", + f"Jobs to run: {len(new_jobs):d}", + f"Jobs already finished: {len(finished_jobs):d}", + "---------- JOBS TO RUN: NAMES -------------" + ] + for job_name in new_jobs: + text.append(job_name) + text += ["--------- JOBS FINISHED: NAMES ------------"] + for job_name in finished_jobs: + text.append(job_name) + LOG.info("\n".join(text)) + + +def _generate_parameter_space( + replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path + ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]: + """ Generate parameter space from replace-dict, check for existing jobs. """ + LOG.debug("Generating parameter space from replace-dict.") + parameters = list(replace_dict.keys()) + values_grid = _generate_values_grid(replace_dict) + if not append_jobs: + return parameters, values_grid, tfs.TfsDataFrame() + + jobfile_path = cwd / JOBSUMMARY_FILE + try: + prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID) + except FileNotFoundError as filerror: + raise FileNotFoundError( + "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'" + ) from filerror + new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid] + values_grid = values_grid[new_jobs_mask] + + return parameters, values_grid, prev_job_df + + +def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray: + """ Creates an array of the inner-product of the replace-dict. """ + return np.array(list(itertools.product(*replace_dict.values())), dtype=object) + + +def _drop_already_run_jobs( + job_df: tfs.TfsDataFrame, output_dir: str, check_files: str + ) -> Tuple[tfs.TfsDataFrame, List[str]]: + """ Check for jobs that have already been run and drop them from current job_df. """ + LOG.debug("Dropping already finished jobs.") + finished_jobs = [ + idx + for idx, row in job_df.iterrows() + if _job_was_successful(row, output_dir, check_files) + ] + + LOG.info( + f"{len(finished_jobs):d} of {len(job_df.index):d}" + " Jobs have already finished and will be skipped." + ) + + job_df = job_df.drop(index=finished_jobs) + return job_df, finished_jobs + + +def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool: + """ Determines if the job was successful. + + Args: + job_row (pd.Series): row from the job_df + output_dir (str): Name of the (local) output directory + files (List[str]): list of files that should have been generated + """ + job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY] + output_dir = Path(job_dir, output_dir) + success = output_dir.is_dir() and any(output_dir.iterdir()) + if success and files is not None and len(files): + for f in files: + success &= len(list(output_dir.glob(f))) > 0 + return success + + +def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str: + """ Returns the extension of the script to run based on + either the given value, its executable or the mask. """ + if script_extension is not None: + return script_extension + return SCRIPT_EXTENSIONS.get(executable, mask.suffix) diff --git a/pylhc_submitter/submitter/mask.py b/pylhc_submitter/submitter/mask.py new file mode 100644 index 0000000..3a2dcaa --- /dev/null +++ b/pylhc_submitter/submitter/mask.py @@ -0,0 +1,114 @@ +""" +Mask Resolver +------------- + +This module provides functionality to resolve and write script masks for ``HTCondor`` jobs +submission. +""" +import logging +import re +from pathlib import Path +from typing import Iterable, List, Sequence, Set, Union + +import pandas as pd +from numpy.typing import ArrayLike + +from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE + +LOG = logging.getLogger(__name__) + + +def create_job_scripts_from_mask( + job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str +) -> pd.DataFrame: + """ + Takes path to mask file, list of parameter to be replaced and pandas dataframe containg per job + the job directory where processed mask is to be put, and columns containing the parameter values + with column named like replace parameters. Job directories have to be created beforehand. + Processed (madx) mask has the same filename as mask but with the given file extension. + Input Dataframe is returned with additional column containing path to the processed script + files. + + Args: + job_df (pd.DataFrame): Job parameters as defined in description. + maskfile: `Path` object to the mask file. + replace_keys: keys to be replaced (must correspond to columns in ``job_df``). + file_ext: file extention to use (defaults to **madx**). + + Returns: + The provided ``job_df`` but with added path to the scripts. + """ + with maskfile.open("r") as mfile: + template = mfile.read() + + jobname = maskfile.with_suffix("").name + jobs = [None] * len(job_df) + for idx, (jobid, values) in enumerate(job_df.iterrows()): + jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext) + + with jobfile_fullpath.open("w") as job_file: + job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)]))) + jobs[idx] = jobfile_fullpath.name + job_df[COLUMN_JOB_FILE] = jobs + return job_df + + +def find_named_variables_in_mask(mask: str) -> Set[str]: + """ Find all variable-names in the mask. """ + return set(re.findall(r"%\((\w+)\)", mask)) + + +def check_percentage_signs_in_mask(mask: str) -> None: + """ Checks for '%' in the mask, that are not replacement variables. """ + cleaned_mask = re.sub(r"%\((\w+)\)", "", mask) + n_signs = cleaned_mask.count("%") + if n_signs == 0: + return + + # Help the user find the % + for idx, line in enumerate(cleaned_mask.split("\n")): + if "%" in line: + positions = [str(i) for i, char in enumerate(line) if char == "%"] + LOG.error(f"Problematic '%' sign(s) in line {idx}, pos {' ,'.join(positions)}.") + raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.") + + +def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike + ) -> Union[List[str], Iterable[int]]: + """ Generates index for jobdf from mask for job_id naming. + + Args: + old_df (pd.DataFrame): Existing jobdf. + jobid_mask (str): Mask for naming the jobs. + keys (Sequence[str]): Keys to be replaced in the mask. + values (np.array_like): Values-Grid to be replaced in the mask. + + Returns: + List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range. + """ + if not jobid_mask: + # Use integer-range as index, if no mask is given + # Start with last index if old_df is not None. + nold = len(old_df.index) if old_df is not None else 0 + start = nold-1 if nold > 0 else 0 + return range(start, start + values.shape[0]) + + # Fill job-id mask + return [jobid_mask % dict(zip(keys, v)) for v in values] + + +def is_mask_file(mask: str) -> bool: + """ Check if given string points to a file. """ + try: + return Path(mask).is_file() + except OSError: + return False + + +def is_mask_string(mask: str) -> bool: + """ Checks that given string does not point to a file. """ + return not is_mask_file(mask) + + +if __name__ == "__main__": + raise EnvironmentError(f"{__file__} is not supposed to run as main.") diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py new file mode 100644 index 0000000..f7ab0d0 --- /dev/null +++ b/pylhc_submitter/submitter/runners.py @@ -0,0 +1,120 @@ +""" +Job Submitter Runners +--------------------- + +Defines the methods to run the job-submitter, locally or on HTC. +""" +import logging +import multiprocessing +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +import pandas as pd +import tfs + +from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, + COLUMN_SHELL_SCRIPT) +from pylhc_submitter.submitter import htc_utils +from pylhc_submitter.submitter.iotools import is_eos_uri +from pylhc_submitter.utils.environment import on_windows + +LOG = logging.getLogger(__name__) + + +@dataclass +class RunnerOpts: + """ Options for running the submission. """ + working_directory: Path # Path to the working directory (e.g. afs) + jobflavour: Optional[str] = None # HTCondor job flavour (lengths of the job) + output_dir: Optional[str] = None # Name of the output directory, where jobs store data + ssh: Optional[str] = None # SSH command + dryrun: Optional[bool] = False # Perform only a dry-run, i.e. do all but submit to HTC + htc_arguments: Optional[Dict[str, Any]] = None # Arguments to pass on to htc as keywords + run_local: Optional[bool] = False # Run jobs locally + num_processes: Optional[int] = 4 # Number of processes to run in parallel (locally) + + +def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """Selects how to run the jobs. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + if opt.run_local: + run_local(job_df, opt) + else: + run_htc(job_df, opt) + + +def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """Run all jobs locally. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + if opt.dryrun: + LOG.info(f"Dry-run: Skipping local run.") + return + + LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.") + + pool = multiprocessing.Pool(processes=opt.num_processes) + res = pool.map(_execute_shell, job_df.iterrows()) + if any(res): + jobs_failed = [j for r, j in zip(res, job_df.index) if r] + LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}") + raise RuntimeError("At least one job has failed. Check output logs!") + + +def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: + """ Create submission file and submit the jobs to ``HTCondor``. + + Args: + job_df (tfs.TfsDataFrame): DataFrame containing all the job-information + opt (RunnerOpts): Parameters for the runner + """ + LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.") + LOG.debug("Creating htcondor subfile.") + + subfile = htc_utils.make_subfile( + opt.working_directory, job_df, + output_dir=opt.output_dir, + duration=opt.jobflavour, + **opt.htc_arguments + ) + + if opt.dryrun: + LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.") + return + + LOG.debug("Submitting jobs to htcondor.") + htc_utils.submit_jobfile(subfile, opt.ssh) + + +# Helper ####################################################################### + +def _execute_shell(df_row: pd.Series) -> int: + """ Execute the shell script. + + Args: + df_row (pd.Series): row in the job-dataframe + + Returns: + int: return code of the process + """ + _, column = df_row + cmd = [] if on_windows() else ["sh"] + + with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile: + process = subprocess.Popen( + cmd + [column[COLUMN_SHELL_SCRIPT]], + shell=on_windows(), + stdout=logfile, + stderr=subprocess.STDOUT, + cwd=column[COLUMN_JOB_DIRECTORY], + ) + return process.wait() From c621b5ff878a049ec9163a561629e91ba2f7a9ec Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 19:02:43 +0100 Subject: [PATCH 18/30] gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 713d8a6..5e133cd 100644 --- a/.gitignore +++ b/.gitignore @@ -243,9 +243,14 @@ Temporary Items # Neovim .nvimlog +# Intellij /.idea/codeStyles/codeStyleConfig.xml /.idea/misc.xml /.idea/modules.xml /.idea/inspectionProfiles/profiles_settings.xml /.idea/vcs.xml /.idea/PyhDToolkit.iml + +# Other +tst_* + From 3f395ef387b7678be9426de23850bfab0a6957d3 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 19:47:53 +0100 Subject: [PATCH 19/30] fixing macos --- pylhc_submitter/submitter/htc_utils.py | 8 +++++--- tests/unit/test_job_submitter.py | 13 ++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py index 58ffd2a..4482b17 100644 --- a/pylhc_submitter/submitter/htc_utils.py +++ b/pylhc_submitter/submitter/htc_utils.py @@ -251,10 +251,12 @@ def write_bash( # Manually copy output (if needed) --- dest_dir = job.get(COLUMN_DEST_DIRECTORY) if output_dir and dest_dir and output_dir != dest_dir: - # Note: only eos-cp needs `/` at the end of dirs, but should not hurt in any case - cp_command = f'cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}' if is_eos_uri(dest_dir): - cp_command = f'eos {cp_command}' + # Note: eos-cp needs `/` at the end of both, source and target, dirs... + cp_command = f'eos cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}' + else: + # ...but '/' at the end of source dir copies only the content on macOS. + cp_command = f'cp -r {output_dir} {_str_ending_with_slash(dest_dir)}' f.write(f'{cp_command}\n') diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 6f79dcb..990d573 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -88,7 +88,7 @@ def test_find_errorneous_percentage_signs(tmp_path, maskfile): setup.create_mask(content=mask, as_file=maskfile) with pytest.raises(KeyError) as e: job_submit(**asdict(setup)) - assert "problematic '%'" in e.value.args[0] + assert "problematic '%'" in str(e) @run_only_on_linux @@ -100,7 +100,7 @@ def test_missing_keys(tmp_path, maskfile): setup.create_mask(content=mask, as_file=maskfile) with pytest.raises(KeyError) as e: job_submit(**asdict(setup)) - assert "PARAM3" in e.value.args[0] + assert "PARAM3" in str(e) @run_if_not_linux @@ -112,7 +112,7 @@ def test_not_on_linux(tmp_path): setup.create_mask() with pytest.raises(EnvironmentError) as e: job_submit(**asdict(setup)) - assert "htcondor bindings" in e.value.args[0] + assert "htcondor bindings" in str(e) def test_eos_uri(): @@ -134,9 +134,8 @@ def test_htc_submit(uri: bool): You need to adapt the path and delete the results afterwards manually.""" # Fix the kerberos ticket path. # Do klist to find your ticket manually. - import os + # import os # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" - os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029" tmp_name = "htc_temp" if uri: @@ -158,7 +157,7 @@ def test_htc_submit(uri: bool): setup.create_mask() prerun = True - prerun = False # Manually switch here after running. + # prerun = False # Manually switch here after running. if prerun: job_submit(**asdict(setup)) _test_subfile_content(setup) @@ -277,4 +276,4 @@ def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]: for values in itertools.product(*all_values) ] - return combinations \ No newline at end of file + return combinations From cc1c7b536d26da7e98dc09bacd5020d87dfb7e14 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 19:54:41 +0100 Subject: [PATCH 20/30] skip windows --- tests/unit/test_job_submitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 990d573..70c749d 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -115,6 +115,7 @@ def test_not_on_linux(tmp_path): assert "htcondor bindings" in str(e) +pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.") def test_eos_uri(): """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)""" server = "root://eosuser.cern.ch/" From a9dcc34502d5446dedde74883955ff392c0c1918 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 8 Nov 2023 19:57:51 +0100 Subject: [PATCH 21/30] missing @ --- tests/unit/test_job_submitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 70c749d..8771dc0 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -115,7 +115,7 @@ def test_not_on_linux(tmp_path): assert "htcondor bindings" in str(e) -pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.") +@pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.") def test_eos_uri(): """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)""" server = "root://eosuser.cern.ch/" From 6e69ecaa9576b235d5c87a2a7ba9c7b2cbbe146c Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:42:40 +0100 Subject: [PATCH 22/30] conf, changelog, additional doc for output_destination --- CHANGELOG.md | 13 +++++++++++++ doc/conf.py | 18 ++++++++++++++---- pylhc_submitter/job_submitter.py | 6 ++++-- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0c717..9963c3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # `pylhc-submitter` Changelog +## Version 2.0.0 + +- General code cleanup/refactoring/documentation. + - partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used. + - does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module. + - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. + + +- New Feature of `job_submitter`: + - `output_destination` input parameter, which sets an output directory in which the folder-stucture + for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, + instead of having the directory transferred back to the `working directory` by htcondor. + ## Version 1.1.1 - Uses `concat` instead of `append` to stack the DataFrames. diff --git a/doc/conf.py b/doc/conf.py index 9da8056..283ad0a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -31,10 +31,20 @@ sys.path.insert(0, str(TOPLEVEL_DIR)) - -ABOUT_PYLHC_SUBMITTER: dict = {} -with ABOUT_FILE.open("r") as f: - exec(f.read(), ABOUT_PYLHC_SUBMITTER) +def about_package(init_posixpath: pathlib.Path) -> dict: + """ + Return package information defined with dunders in __init__.py as a dictionary, when + provided with a PosixPath to the __init__.py file. + """ + about_text: str = init_posixpath.read_text() + return { + entry.split(" = ")[0]: entry.split(" = ")[1].strip('"') + for entry in about_text.strip().split("\n") + if entry.startswith("__") + } + + +ABOUT_PYLHC_SUBMITTER = about_package(ABOUT_FILE) # -- General configuration ------------------------------------------------ diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 30d08a4..9032412 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -110,7 +110,8 @@ - **output_destination** *(PathOrStr)*: - Directory where to store the output of the jobs . (Can be on EOS) + Directory to copy the output of the jobs to, sorted into folders per job. + Can be on EOS, preferrably via EOS-URI format ('root://eosuser.cern.ch//eos/...'). - **resume_jobs**: @@ -295,7 +296,8 @@ def get_params(): ) params.add_parameter( name="output_destination", - help="Directory where to store the output of the jobs . (Can be on EOS)", + help="Directory to copy the output of the jobs to, sorted into folders per job. " + "Can be on EOS, preferrably via EOS-URI format ('root://eosuser.cern.ch//eos/...').", type=PathOrStr, ) params.add_parameter( From 147b3c621787f2df1d9a47fcd0a8efc8c10fa8d6 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:51:58 +0100 Subject: [PATCH 23/30] extracting HTC constants --- pylhc_submitter/constants/autosix.py | 4 ++-- pylhc_submitter/constants/external_paths.py | 4 ++-- pylhc_submitter/constants/general.py | 4 ++-- pylhc_submitter/constants/job_submitter.py | 4 ++-- pylhc_submitter/job_submitter.py | 2 +- pylhc_submitter/submitter/htc_utils.py | 24 ++------------------- pylhc_submitter/submitter/iotools.py | 7 +++--- 7 files changed, 15 insertions(+), 34 deletions(-) diff --git a/pylhc_submitter/constants/autosix.py b/pylhc_submitter/constants/autosix.py index 39d0945..b91065d 100644 --- a/pylhc_submitter/constants/autosix.py +++ b/pylhc_submitter/constants/autosix.py @@ -1,6 +1,6 @@ """ -Constants: Autosix ----------------------------------- +Autosix +------- Collections of constants and paths used in autosix. diff --git a/pylhc_submitter/constants/external_paths.py b/pylhc_submitter/constants/external_paths.py index d470cbf..2e7e9e2 100644 --- a/pylhc_submitter/constants/external_paths.py +++ b/pylhc_submitter/constants/external_paths.py @@ -1,6 +1,6 @@ """ -Constants: External Paths -------------------------- +External Paths +-------------- Specific constants relating to external paths to be used, to help with consistency. diff --git a/pylhc_submitter/constants/general.py b/pylhc_submitter/constants/general.py index 59f796d..2c68d93 100644 --- a/pylhc_submitter/constants/general.py +++ b/pylhc_submitter/constants/general.py @@ -1,6 +1,6 @@ """ -Constants: General ------------------- +General +------- General constants to help with consistency. """ diff --git a/pylhc_submitter/constants/job_submitter.py b/pylhc_submitter/constants/job_submitter.py index aca598c..93c1236 100644 --- a/pylhc_submitter/constants/job_submitter.py +++ b/pylhc_submitter/constants/job_submitter.py @@ -1,7 +1,7 @@ """ -Constants: Job Submitter ----------------------------------- +Job Submitter +------------- Collections of constants and paths used in the job-submitter. """ diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 9032412..6a79982 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -160,7 +160,7 @@ from generic_parser.tools import print_dict_tree from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS -from pylhc_submitter.submitter.htc_utils import JOBFLAVOURS +from pylhc_submitter.constants.htcondor import JOBFLAVOURS from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask, find_named_variables_in_mask, is_mask_file) diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py index 4482b17..64be126 100644 --- a/pylhc_submitter/submitter/htc_utils.py +++ b/pylhc_submitter/submitter/htc_utils.py @@ -19,6 +19,8 @@ from pandas import DataFrame +from pylhc_submitter.constants.htcondor import (BASH_FILENAME, CMD_SUBMIT, HTCONDOR_JOBLIMIT, + JOBFLAVOURS, NOTIFICATIONS, SHEBANG, SUBFILE) from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) @@ -36,28 +38,6 @@ class htcondor: LOG = logging.getLogger(__name__) -# HTC Constants ################################################################ - -SHEBANG = "#!/bin/bash" -SUBFILE = "queuehtc.sub" -BASH_FILENAME = "Job" - -HTCONDOR_JOBLIMIT = 100000 - -CMD_SUBMIT = "condor_submit" -JOBFLAVOURS = ( - "espresso", # 20 min - "microcentury", # 1 h - "longlunch", # 2 h - "workday", # 8 h - "tomorrow", # 1 d - "testmatch", # 3 d - "nextweek", # 1 w -) - -NOTIFICATIONS = ("always", "complete", "error", "never") - - # Subprocess Methods ########################################################### diff --git a/pylhc_submitter/submitter/iotools.py b/pylhc_submitter/submitter/iotools.py index be5669e..5c66c90 100644 --- a/pylhc_submitter/submitter/iotools.py +++ b/pylhc_submitter/submitter/iotools.py @@ -14,12 +14,13 @@ import pandas as pd import tfs +from pylhc_submitter.constants.htcondor import HTCONDOR_JOBLIMIT from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, COLUMN_JOBID, JOBDIRECTORY_PREFIX, JOBSUMMARY_FILE, SCRIPT_EXTENSIONS) from pylhc_submitter.submitter import htc_utils -from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask, - generate_jobdf_index, is_mask_file) +from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask, generate_jobdf_index, + is_mask_file) LOG = logging.getLogger(__name__) @@ -75,7 +76,7 @@ def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame: if njobs == 0: raise ValueError(f"No (new) jobs found!") - if njobs > htc_utils.HTCONDOR_JOBLIMIT: + if njobs > HTCONDOR_JOBLIMIT: LOG.warning( f"You are attempting to submit an important number of jobs ({njobs})." "This can be a high stress on your system, make sure you know what you are doing." From a697079a9792ba8d4afda6d6f2fcbbac80eaee6b Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:53:01 +0100 Subject: [PATCH 24/30] actually add constants file --- pylhc_submitter/constants/htcondor.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 pylhc_submitter/constants/htcondor.py diff --git a/pylhc_submitter/constants/htcondor.py b/pylhc_submitter/constants/htcondor.py new file mode 100644 index 0000000..8e9ac24 --- /dev/null +++ b/pylhc_submitter/constants/htcondor.py @@ -0,0 +1,24 @@ +""" +HTCondor +-------- + +Constants for the HTCondor parameters. +""" +SHEBANG = "#!/bin/bash" +SUBFILE = "queuehtc.sub" +BASH_FILENAME = "Job" + +HTCONDOR_JOBLIMIT = 100000 + +CMD_SUBMIT = "condor_submit" +JOBFLAVOURS = ( + "espresso", # 20 min + "microcentury", # 1 h + "longlunch", # 2 h + "workday", # 8 h + "tomorrow", # 1 d + "testmatch", # 3 d + "nextweek", # 1 w +) + +NOTIFICATIONS = ("always", "complete", "error", "never") \ No newline at end of file From ed7abbfdce503531a680216209620ad8811c8748 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:56:36 +0100 Subject: [PATCH 25/30] typehint for df_row --- pylhc_submitter/submitter/htc_utils.py | 5 ----- pylhc_submitter/submitter/runners.py | 7 ++++--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py index 64be126..f1efd4e 100644 --- a/pylhc_submitter/submitter/htc_utils.py +++ b/pylhc_submitter/submitter/htc_utils.py @@ -52,7 +52,6 @@ def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) Returns: Path: path to sub-file - """ subfile = cwd / SUBFILE LOG.debug(f"Writing sub-file '{str(subfile)}'.") @@ -67,7 +66,6 @@ def submit_jobfile(jobfile: Path, ssh: str) -> None: Args: jobfile (Path): path to sub-file ssh (str): ssh target - """ proc_args = [CMD_SUBMIT, jobfile] if ssh: @@ -87,7 +85,6 @@ def _start_subprocess(command: List[str]) -> int: Returns: int: return code of the process - """ LOG.debug(f"Executing command '{command}'") process = subprocess.Popen( @@ -191,7 +188,6 @@ def write_bash( Returns: DataFrame: The provided ``job_df`` but with added path to the scripts. - """ if len(job_df.index) > HTCONDOR_JOBLIMIT: raise AttributeError("Submitting too many jobs for HTCONDOR") @@ -256,7 +252,6 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]: Returns: Dict[str, Any]: The mapped kwargs. - """ new = {} diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py index f7ab0d0..49d9ce8 100644 --- a/pylhc_submitter/submitter/runners.py +++ b/pylhc_submitter/submitter/runners.py @@ -9,7 +9,7 @@ import subprocess from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple import pandas as pd import tfs @@ -97,11 +97,12 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: # Helper ####################################################################### -def _execute_shell(df_row: pd.Series) -> int: +def _execute_shell(df_row: Tuple(Any, pd.Series)) -> int: """ Execute the shell script. Args: - df_row (pd.Series): row in the job-dataframe + df_row (Tuple[Any, pd.Series]): Row in the job-dataframe as coming from `iterrows()`, + i.e. a tuple of (index, series) Returns: int: return code of the process From a46a6cba2fb98e5d27e78f344411d82cd6cbab93 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:11:27 +0100 Subject: [PATCH 26/30] test naming and test doc --- tests/unit/test_job_submitter.py | 48 +++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 8771dc0..567a536 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -44,7 +44,7 @@ def test_output_directory(tmp_path): _test_output(setup) -def test_wrong_uri(tmp_path): +def test_detects_wrong_uri(tmp_path): """ Tests that wrong URI's are identified. """ setup = InputParameters( working_directory=tmp_path, @@ -104,7 +104,7 @@ def test_missing_keys(tmp_path, maskfile): @run_if_not_linux -def test_not_on_linux(tmp_path): +def test_htcondor_bindings_not_found_on_nonlinux_os(tmp_path): """ Test that an error is raised if htcondor bindings are not found. If this tests fails, this might mean, that htcondor bindings are finally available for the other platforms. """ @@ -116,7 +116,7 @@ def test_not_on_linux(tmp_path): @pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.") -def test_eos_uri(): +def test_eos_uri_manipulation_functions(): """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)""" server = "root://eosuser.cern.ch/" path = "/eos/user/m/mmustermann/" @@ -129,16 +129,36 @@ def test_eos_uri(): @run_only_on_linux @pytest.mark.cern_network -@pytest.mark.parametrize("uri", [True, False]) -def test_htc_submit(uri: bool): - """ This test is here for local testing only. - You need to adapt the path and delete the results afterwards manually.""" - # Fix the kerberos ticket path. - # Do klist to find your ticket manually. +@pytest.mark.parametrize("destination", [True, False]) +@pytest.mark.parametrize("uri", [False, True]) +def test_htc_submit(destination: bool, uri: bool): + """ This test is here for manual testing. + It runs 3 scenarios and each submits 6 jobs to HTCondor. + This means you need to be in the cern-network on a machine with afs and eos access + and htcondor installed. + You need to adapt the path to your user-name and delete the results afterwards manually. + + Scenarios: + a) destination = False: Transfer output data back to afs + b) destination = True, uri = False: Copy output data to EOS (via eos path) + c) destination = True, uri = True: Copy output data to EOS (via eos uri) + + Run this test twice, manually changing `prerun` from "True" to "False" after the jobs are finished. + - `prerun = True`: create the folder structures and submit the jobs. + - `prerun = False`: check that the output data is present. + """ + if uri and not destination: + return # only need to run one of those + + # Fix the kerberos ticket path, in case kerberos doesn't find it. + # Do a `klist` in terminal to find your ticket manually and adapt the path. # import os # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" tmp_name = "htc_temp" + if destination: + tmp_name = f"{tmp_name}_dest" + if uri: tmp_name = f"{tmp_name}_uri" @@ -146,9 +166,11 @@ def test_htc_submit(uri: bool): path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name) path.mkdir(exist_ok=True) - dest = f"/eos/user/{user[0]}/{user}/{tmp_name}" - if uri: - dest = f"root://eosuser.cern.ch/{dest}" + dest = None + if destination: + dest = f"/eos/user/{user[0]}/{user}/{tmp_name}" + if uri: + dest = f"root://eosuser.cern.ch/{dest}" setup = InputParameters( working_directory=path, @@ -158,7 +180,7 @@ def test_htc_submit(uri: bool): setup.create_mask() prerun = True - # prerun = False # Manually switch here after running. + # prerun = False # !! Manually switch here after jobs finished. if prerun: job_submit(**asdict(setup)) _test_subfile_content(setup) From ac4e6e930e85b8fe33f51a7fff41201e04b29bf6 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:20:41 +0100 Subject: [PATCH 27/30] even more doc in test --- tests/unit/test_job_submitter.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py index 567a536..f395840 100644 --- a/tests/unit/test_job_submitter.py +++ b/tests/unit/test_job_submitter.py @@ -147,31 +147,37 @@ def test_htc_submit(destination: bool, uri: bool): - `prerun = True`: create the folder structures and submit the jobs. - `prerun = False`: check that the output data is present. """ - if uri and not destination: - return # only need to run one of those - - # Fix the kerberos ticket path, in case kerberos doesn't find it. - # Do a `klist` in terminal to find your ticket manually and adapt the path. + # MANUAL THINGS TO CHANGE ############################################## + user = "mmustermann" # set your username + tmp_name = "htc_temp" # name for the temporary folder (will be created) + prerun = True + # prerun = False # switch here after jobs finished. + + # Uncomment to fix the kerberos ticket, in case htcondor doesn't find it. + # Do a `klist` in terminal and adapt the path. # import os # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####" + ######################################################################## + if uri and not destination: + return # only need to run one when destination is not set - tmp_name = "htc_temp" + # set working_directory if destination: tmp_name = f"{tmp_name}_dest" + if uri: + tmp_name = f"{tmp_name}_uri" - if uri: - tmp_name = f"{tmp_name}_uri" - - user = "jdilly" path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name) path.mkdir(exist_ok=True) + # set output_destination dest = None if destination: dest = f"/eos/user/{user[0]}/{user}/{tmp_name}" if uri: dest = f"root://eosuser.cern.ch/{dest}" + # create setup setup = InputParameters( working_directory=path, output_destination=dest, @@ -179,13 +185,13 @@ def test_htc_submit(destination: bool, uri: bool): ) setup.create_mask() - prerun = True - # prerun = False # !! Manually switch here after jobs finished. if prerun: + # submit jobs job_submit(**asdict(setup)) _test_subfile_content(setup) _test_output(setup, post_run=False) else: + # check output _test_output(setup, post_run=True) From 63c7c1f22cd9a3c574db93c7f32c3016f115af89 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:38:08 +0100 Subject: [PATCH 28/30] fixing cyclic import --- pylhc_submitter/job_submitter.py | 4 ++-- pylhc_submitter/submitter/htc_utils.py | 4 ++-- pylhc_submitter/submitter/runners.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py index 6a79982..6efe9b8 100644 --- a/pylhc_submitter/job_submitter.py +++ b/pylhc_submitter/job_submitter.py @@ -159,11 +159,11 @@ from generic_parser.entry_datatypes import DictAsString from generic_parser.tools import print_dict_tree -from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS from pylhc_submitter.constants.htcondor import JOBFLAVOURS +from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask, - find_named_variables_in_mask, is_mask_file) + find_named_variables_in_mask, is_mask_file) from pylhc_submitter.submitter.runners import RunnerOpts, run_jobs from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable, save_config) diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py index f1efd4e..873fd23 100644 --- a/pylhc_submitter/submitter/htc_utils.py +++ b/pylhc_submitter/submitter/htc_utils.py @@ -24,7 +24,7 @@ from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT, EXECUTEABLEPATH, NON_PARAMETER_COLUMNS) -from pylhc_submitter.submitter.iotools import is_eos_uri +from pylhc_submitter.submitter import iotools from pylhc_submitter.submitter.mask import is_mask_file from pylhc_submitter.utils.environment import on_windows @@ -227,7 +227,7 @@ def write_bash( # Manually copy output (if needed) --- dest_dir = job.get(COLUMN_DEST_DIRECTORY) if output_dir and dest_dir and output_dir != dest_dir: - if is_eos_uri(dest_dir): + if iotools.is_eos_uri(dest_dir): # Note: eos-cp needs `/` at the end of both, source and target, dirs... cp_command = f'eos cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}' else: diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py index 49d9ce8..c215e07 100644 --- a/pylhc_submitter/submitter/runners.py +++ b/pylhc_submitter/submitter/runners.py @@ -97,7 +97,7 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None: # Helper ####################################################################### -def _execute_shell(df_row: Tuple(Any, pd.Series)) -> int: +def _execute_shell(df_row: Tuple[Any, pd.Series]) -> int: """ Execute the shell script. Args: From 55e8205dc98db003e0e0fadce64d233bfb0e5479 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:48:50 +0100 Subject: [PATCH 29/30] fixes to CHANGELOG --- CHANGELOG.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9963c3b..75e8f64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,16 +2,16 @@ ## Version 2.0.0 -- General code cleanup/refactoring/documentation. - - partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used. - - does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module. - - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. +- General code cleanup/refactoring/documentation: + - Partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used. + - Does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module. + - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. - New Feature of `job_submitter`: - `output_destination` input parameter, which sets an output directory in which the folder-stucture - for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, - instead of having the directory transferred back to the `working directory` by htcondor. + for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, + instead of having the directory transferred back to the `working directory` by htcondor. ## Version 1.1.1 From b26a9db7f756fcf24ef50a86fcb09a284a913f36 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Fri, 10 Nov 2023 12:51:59 +0100 Subject: [PATCH 30/30] added constants module to doc --- doc/modules/constants.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst index c7b3011..8474b25 100644 --- a/doc/modules/constants.rst +++ b/doc/modules/constants.rst @@ -15,6 +15,10 @@ Constants Definitions :members: :noindex: +.. automodule:: pylhc_submitter.constants.htcondor + :members: + :noindex: + .. automodule:: pylhc_submitter.constants.autosix :members: :noindex: