From a5321daa86b01d96c69c34bd452d02061350843a Mon Sep 17 00:00:00 2001
From: Andrey Abramov <Andrey.Abramov.2012@live.rhul.ac.uk>
Date: Wed, 1 Nov 2023 16:50:20 +0100
Subject: [PATCH 01/30] Add an option to set a destination directory for the
 simulation output

---
 pylhc_submitter/htc/utils.py     | 32 ++++++++++++++
 pylhc_submitter/job_submitter.py | 76 +++++++++++++++++++++++++++++---
 2 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/htc/utils.py
index 80f2bc2..b9905a2 100644
--- a/pylhc_submitter/htc/utils.py
+++ b/pylhc_submitter/htc/utils.py
@@ -60,6 +60,7 @@
 
 COLUMN_SHELL_SCRIPT = "ShellScript"
 COLUMN_JOB_DIRECTORY = "JobDirectory"
+COLUMN_DEST_DIRECTORY = "DestDirectory"
 COLUMN_JOB_FILE = "JobFile"
 
 
@@ -159,6 +160,7 @@ def make_subfile(cwd: Path, job_df: DataFrame, **kwargs):
 def write_bash(
     job_df: DataFrame,
     output_dir: Path = None,
+    destination_dir: Path = None,
     executable: str = "madx",
     cmdline_arguments: dict = None,
     mask: Union[str, Path] = None,
@@ -196,7 +198,17 @@ def write_bash(
                 f.write(mask % dict(zip(replace_columns, job[replace_columns])))
             f.write(cmds)
             f.write("\n")
+
+            if destination_dir is not None:
+                if output_dir is not None:
+                    cp_command =  f'cp -r {output_dir} {job[COLUMN_DEST_DIRECTORY]}'
+                if is_eos_path(destination_dir):
+                    cp_command = f'eos {cp_command}'
+                    
+                f.write(f'{cp_command}\n')
+
         shell_scripts[idx] = bash_file_name
+
     job_df[COLUMN_SHELL_SCRIPT] = shell_scripts
     return job_df
 
@@ -244,6 +256,26 @@ def _maybe_put_in_quotes(key, value):
         return f'"{value}"'
     return value
 
+def is_eos_path(path):
+    is_eos = False
+    path = Path(path)
+    strip_path_parts = _strip_eos_uri(path).parts
+    if len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos':
+        is_eos = True
+    return is_eos
+
+
+def _strip_eos_uri(path):
+    # EOS paths for HTCondor are given with URI, strip for direct writing
+    # root://eosuser.cern.ch//eos/user/a/anabramo/desktop_sync/banana.txt
+    path = Path(path)
+    parts = path.parts
+    outpath = path
+    if parts[0].endswith(':'):
+        # the first two parts are host info, e.g `file: //host/path`
+        outpath = Path('/', *parts[2:])
+    return outpath
+
 
 def is_mask_file(mask):
     try:
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index dda9aa3..745e66b 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -99,10 +99,12 @@
 )
 from pylhc_submitter.htc.utils import (
     COLUMN_JOB_DIRECTORY,
+    COLUMN_DEST_DIRECTORY,
     COLUMN_SHELL_SCRIPT,
     EXECUTEABLEPATH,
     HTCONDOR_JOBLIMIT,
     JOBFLAVOURS,
+    _strip_eos_uri,
 )
 from pylhc_submitter.utils.environment_tools import on_windows
 from pylhc_submitter.utils.iotools import PathOrStr, save_config, make_replace_entries_iterable, keys_to_path
@@ -245,6 +247,11 @@ def get_params():
         type=str,
         default="Outputdata",
     )
+    params.add_parameter(
+        name="output_destination",
+        help="Directory where to store the output of the jobs . (Can be on EOS)",
+        type=PathOrStr,
+    )
     params.add_parameter(
         name="htc_arguments",
         help=(
@@ -282,6 +289,7 @@ def main(opt):
         opt.jobid_mask,
         opt.replace_dict,
         opt.job_output_dir,
+        opt.output_destination,
         opt.append_jobs,
         opt.executable,
         opt.script_arguments,
@@ -298,6 +306,7 @@ def main(opt):
             job_df,
             opt.working_directory,
             opt.job_output_dir,
+            opt.output_destination,
             opt.jobflavour,
             opt.ssh,
             opt.dryrun,
@@ -316,6 +325,7 @@ def _create_jobs(
     jobid_mask,
     replace_dict,
     output_dir,
+    output_dest,
     append_jobs,
     executable,
     script_args,
@@ -354,7 +364,7 @@ def _create_jobs(
         data=values_grid,
     )
     job_df = tfs.concat([job_df, data_df], sort=False, how_headers='left')
-    job_df = _setup_folders(job_df, cwd)
+    job_df = _setup_folders(job_df, cwd, output_dest)
 
     if htcutils.is_mask_file(mask_path_or_string):
         LOG.debug("Creating all jobs from mask.")
@@ -367,12 +377,14 @@ def _create_jobs(
     job_df = htcutils.write_bash(
         job_df,
         output_dir,
+        destination_dir=output_dest,
         executable=executable,
         cmdline_arguments=script_args,
         mask=mask_path_or_string,
     )
 
     job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str)
+    job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str)
     tfs.write(str(cwd / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID)
     return job_df
 
@@ -398,6 +410,13 @@ def _drop_already_ran_jobs(
 
 def _run_local(job_df: tfs.TfsDataFrame, num_processes: int) -> None:
     LOG.info(f"Running {len(job_df.index)} jobs locally in {num_processes:d} processes.")
+    
+    # URI type EOS addresses won't work for copying files from local jobs
+    check_dest = job_df.iloc[0][COLUMN_DEST_DIRECTORY]
+    if not _strip_eos_uri(check_dest) == Path(check_dest):
+        LOG.warning("The output desitnation is likely specified as EOS URI,"
+                     "which will not work during a local run")
+        
     pool = multiprocessing.Pool(processes=num_processes)
     res = pool.map(_execute_shell, job_df.iterrows())
     if any(res):
@@ -409,6 +428,7 @@ def _run_htc(
     job_df: tfs.TfsDataFrame,
     cwd: str,
     output_dir: str,
+    dest_dir: str,
     flavour: str,
     ssh: str,
     dryrun: bool,
@@ -416,9 +436,19 @@ def _run_htc(
 ) -> None:
     LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{flavour}'.")
     LOG.debug("Creating htcondor subfile.")
-    subfile = htcutils.make_subfile(
-        cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments
-    )
+
+    # If a different destination for the data is required
+    # is is handled through the job bash files, so remove it from
+    # HTConodor's file transfer specification
+    if dest_dir is None:
+        subfile = htcutils.make_subfile(
+            cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments
+        )
+    else:
+        subfile = htcutils.make_subfile(
+            cwd, job_df, duration=flavour, **additional_htc_arguments
+        )
+
     if not dryrun:
         LOG.debug("Submitting jobs to htcondor.")
         htcutils.submit_jobfile(subfile, ssh)
@@ -439,13 +469,25 @@ def _check_htcondor_presence() -> None:
         raise EnvironmentError("htcondor bindings are necessary to run this module.")
 
 
-def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr) -> tfs.TfsDataFrame:
+def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr, 
+                   destination_directory: PathOrStr = None) -> tfs.TfsDataFrame:
     def _return_job_dir(job_id):
         return working_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}"
+    
+    def _return_dest_dir(job_id):
+        return destination_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}"
 
     LOG.debug("Setting up folders: ")
     job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index]
 
+    if destination_directory is not None:
+        _custom_output_dest = True
+        job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index]
+    else:
+        _custom_output_dest = False
+        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY]
+
+
     for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
         try:
             job_dir.mkdir()
@@ -453,11 +495,30 @@ def _return_job_dir(job_id):
             LOG.debug(f"   failed '{job_dir}' (might already exist).")
         else:
             LOG.debug(f"   created '{job_dir}'.")
+
+    if _custom_output_dest:
+        strip_dest_dir = _strip_eos_uri(destination_directory)
+        strip_dest_dir.mkdir(parents=True, exist_ok=True)
+
+        # Make some symlinks for easy navigation
+        sym_submission = destination_directory / Path('SUBMISSION_DIR')
+        sym_submission.symlink_to(working_directory.resolve()) 
+        sym_destination = working_directory / Path('OUTPUT_DIR')
+        sym_destination.symlink_to(destination_directory.resolve())
+
+        for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
+            try:
+                _strip_eos_uri(job_dest_dir).mkdir()
+            except IOError:
+                LOG.debug(f"   failed '{job_dest_dir}' (might already exist).")
+            else:
+                LOG.debug(f"   created '{job_dest_dir}'.")
+
     return job_df
 
 
 def _job_was_successful(job_row, output_dir, files) -> bool:
-    output_dir = Path(job_row[COLUMN_JOB_DIRECTORY], output_dir)
+    output_dir = Path(job_row[COLUMN_DEST_DIRECTORY], output_dir)
     success = output_dir.is_dir() and any(output_dir.iterdir())
     if success and files is not None and len(files):
         for f in files:
@@ -497,6 +558,9 @@ def _check_opts(opt):
     else:
         mask = opt.mask
 
+    if "output_destination" in opt and opt["output_destination"] is not None:
+        opt["output_destination"] = Path(opt["output_destination"])
+
     # Replace dict ---
     dict_keys = set(opt.replace_dict.keys())
     mask_keys = find_named_variables_in_mask(mask)

From e0a99abf4aa8ef8603ef5690106f2a7572298d3c Mon Sep 17 00:00:00 2001
From: Andrey Abramov <Andrey.Abramov.2012@live.rhul.ac.uk>
Date: Thu, 2 Nov 2023 10:07:48 +0100
Subject: [PATCH 02/30] Tidy up function.

---
 pylhc_submitter/htc/utils.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/htc/utils.py
index b9905a2..47f0eb9 100644
--- a/pylhc_submitter/htc/utils.py
+++ b/pylhc_submitter/htc/utils.py
@@ -256,18 +256,16 @@ def _maybe_put_in_quotes(key, value):
         return f'"{value}"'
     return value
 
-def is_eos_path(path):
-    is_eos = False
-    path = Path(path)
-    strip_path_parts = _strip_eos_uri(path).parts
-    if len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos':
-        is_eos = True
-    return is_eos
+
+def is_eos_path(path): 
+    path = Path(path) 
+    strip_path_parts = _strip_eos_uri(path).parts 
+    return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos'
 
 
 def _strip_eos_uri(path):
     # EOS paths for HTCondor are given with URI, strip for direct writing
-    # root://eosuser.cern.ch//eos/user/a/anabramo/desktop_sync/banana.txt
+    # root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
     path = Path(path)
     parts = path.parts
     outpath = path

From 129ed15c9390038250aa22064e871e6844c987c4 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Thu, 2 Nov 2023 14:36:10 +0100
Subject: [PATCH 03/30] reversed folder creation logic

---
 pylhc_submitter/job_submitter.py | 42 +++++++++++++-------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 745e66b..5198003 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -480,39 +480,31 @@ def _return_dest_dir(job_id):
     LOG.debug("Setting up folders: ")
     job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index]
 
-    if destination_directory is not None:
-        _custom_output_dest = True
-        job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index]
-    else:
-        _custom_output_dest = False
-        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY]
+    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
+        job_dir.mkdir(exist_ok=True)
+        LOG.debug(f"   created '{job_dir}'.")
 
+    if destination_directory is None:
+        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY]
+    else:
+        job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index]
 
-    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
-        try:
-            job_dir.mkdir()
-        except IOError:
-            LOG.debug(f"   failed '{job_dir}' (might already exist).")
-        else:
-            LOG.debug(f"   created '{job_dir}'.")
-
-    if _custom_output_dest:
-        strip_dest_dir = _strip_eos_uri(destination_directory)
+        strip_dest_dir: Path = _strip_eos_uri(destination_directory)
         strip_dest_dir.mkdir(parents=True, exist_ok=True)
 
-        # Make some symlinks for easy navigation
+        # Make some symlinks for easy navigation---
+        # Output directory -> Working Directory
         sym_submission = destination_directory / Path('SUBMISSION_DIR')
-        sym_submission.symlink_to(working_directory.resolve()) 
+        sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
+
+        # Working Directory -> Output Directory
         sym_destination = working_directory / Path('OUTPUT_DIR')
-        sym_destination.symlink_to(destination_directory.resolve())
+        sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True)
 
+        # Create output dirs per job ---
         for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
-            try:
-                _strip_eos_uri(job_dest_dir).mkdir()
-            except IOError:
-                LOG.debug(f"   failed '{job_dest_dir}' (might already exist).")
-            else:
-                LOG.debug(f"   created '{job_dest_dir}'.")
+            _strip_eos_uri(job_dest_dir).mkdir(exist_ok=True)
+            LOG.debug(f"   created '{job_dest_dir}'.")
 
     return job_df
 

From 26cbec210efe531329cb6d240b1b2732d5e3016a Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Thu, 2 Nov 2023 14:46:12 +0100
Subject: [PATCH 04/30] fixing link to homepage

---
 pylhc_submitter/job_submitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 5198003..4f17449 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -14,7 +14,7 @@
 and job directory for further post processing.
 
 For additional information and guides, see the `Job Submitter page
-<https://pylhc.github.io/packages/pylhcsubmitter/job_submitter/>`_ in the ``OMC`` documentation site.
+<https://pylhc.github.io/packages/pylhcsubmitter/job_submitter.html>`_ in the ``OMC`` documentation site.
 
 *--Required--*
 

From 4c12d9a98f32c80eaa37588519ac07a37f30cd9f Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 16:27:57 +0100
Subject: [PATCH 05/30] modified tests

---
 tests/unit/test_job_submitter.py | 325 +++++++++++++++++--------------
 1 file changed, 178 insertions(+), 147 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 308202b..709433e 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -1,5 +1,9 @@
+import itertools
+from dataclasses import astuple, dataclass, field, fields, asdict
 from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
+import numpy as np
 import pytest
 from generic_parser import DotDict
 
@@ -17,164 +21,191 @@
 )
 
 
-@pytest.mark.parametrize("maskfile", [True, False])
-def test_job_creation_and_localrun(tmp_path, maskfile):
-    args, setup = _create_setup(tmp_path, mask_file=maskfile)
-    setup.update(run_local=True)
-    job_submit(**setup)
-    _test_output(args)
-
+@dataclass
+class InputParameters:
+    working_directory: Path
+    executable: Optional[str] = None if on_windows() else "/bin/bash"
+    script_extension: Optional[str] =".bat" if on_windows() else ".sh"
+    job_output_dir: Optional[str] = "Outputdir"
+    jobid_mask: Optional[str] = "%(PARAM1)s.%(PARAM2)d"
+    replace_dict: Optional[Dict] = field(default_factory=lambda: dict(PARAM1=["a", "b"], PARAM2=[1, 2, 3]))
+    jobflavour: Optional[str] = "workday"
+    resume_jobs: Optional[bool] = True
+    check_files: Optional[Sequence] = field(default_factory=lambda: ["out.txt",])
+    dryrun: Optional[bool] = False
+    run_local: Optional[bool] = False
+    htc_arguments: Optional[Dict] = field(default_factory=lambda: {"max_retries": "4", "some_other_argument": "some_other_parameter"})
+    output_destination: Optional[Path] = None
+    mask: Union[Path, str] = None  # will be set in create_mask
+
+    def create_mask(self, name: str = "test_script.mask", content: str = None, as_file: bool = False):
+        output_file = Path(self.job_output_dir, self.check_files[0])
+
+        if content is None:
+            content = self.jobid_mask
+
+        if on_windows():
+            mask_string = f'echo {content}> "{output_file!s}"'
+        else:
+            mask_string = f'echo "{content}" > "{output_file!s}"'
+            if as_file:
+                mask_string = " ".join(['-c "', mask_string, '"'])
+        
+        
+        mask_string = f"{mask_string}\n"
+        
+        if as_file:
+            mask_path = self.working_directory / name
+            with mask_path.open("w") as f:
+                f.write(mask_string)
+            self.mask = mask_path
+        else:
+            self.mask = mask_string
 
-@run_only_on_linux
-def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path):
-    mask = "123\"\" \nsleep 0.1 \n/bin/bash -c  \"echo \"%(PARAM1)s.%(PARAM2)s"
-    args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False)
-    setup.update(run_local=True)
-    job_submit(**setup)
-    _test_output(args)
-
-
-@run_only_on_linux
-@pytest.mark.parametrize("maskfile", [True, False])
-def test_job_creation_and_dryrun(tmp_path, maskfile):
-    args, setup = _create_setup(tmp_path, mask_file=maskfile)
-    setup.update(dryrun=True)
-    job_submit(**setup)
-    _test_subfile_content(setup)
-    _test_output(args, post_run=False)
 
 
-@run_only_on_linux
 @pytest.mark.parametrize("maskfile", [True, False])
-def test_find_errorneous_percentage_signs(tmp_path, maskfile):
-    mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff."
-    args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
-    with pytest.raises(KeyError) as e:
-        job_submit(**setup)
-    assert "problematic '%'" in e.value.args[0]
-
-
-@run_only_on_linux
-@pytest.mark.parametrize("maskfile", [True, False])
-def test_missing_keys(tmp_path, maskfile):
-    mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s"
-    args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
-    with pytest.raises(KeyError) as e:
-        job_submit(**setup)
-    assert "PARAM3" in e.value.args[0]
-
-
-@run_if_not_linux
-def test_not_on_linux(tmp_path):
-    args, setup = _create_setup(tmp_path)
-    with pytest.raises(EnvironmentError) as e:
-        job_submit(**setup)
-    assert "htcondor bindings" in e.value.args[0]
-
-
-@run_only_on_linux
-@pytest.mark.cern_network
-def test_htc_submit():
-    """ This test is here for local testing only. You need to adapt the path
-    and delete the results afterwards manually (so you can check them before."""
-    user = "jdilly"
-    path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
-    path.mkdir(exist_ok=True)
-    args, setup = _create_setup(path)
-
-    job_submit(**setup)
-    _test_subfile_content(setup)
-    _test_output(args, post_run=False)
-    # _test_output(args, post_run=True)  # you can use this if you like after htcondor is done
+def test_job_creation_and_localrun(tmp_path, maskfile):
+    """ Tests that the jobs are created and can be run locally 
+    from mask-string and mask-file. """
+    setup = InputParameters(working_directory=tmp_path, run_local=True)
+    setup.create_mask(as_file=maskfile)
+    job_submit(**asdict(setup))
+    _test_output(setup)
+
+
+# def test_output_directory(tmp_path):
+#     """ Tests that the output is copied to the output destination. 
+#     As a by product it also tests that the jobs are created and can be run locally. """
+#     output_destination = tmp_path / "my_new_output" / "long_path"
+#     args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination)
+#     setup.update(run_local=True)
+#     job_submit(**setup)
+#     _test_output(args)
+
+
+# @run_only_on_linux
+# def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path):
+#     """ Tests that the jobs are created and can be run locally from a multiline mask-string. """
+#     mask = "123\"\" \nsleep 0.1 \n/bin/bash -c  \"echo \"%(PARAM1)s.%(PARAM2)s"
+#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False)
+#     setup.update(run_local=True)
+#     job_submit(**setup)
+#     _test_output(args)
+
+
+# @run_only_on_linux
+# @pytest.mark.parametrize("maskfile", [True, False])
+# def test_job_creation_and_dryrun(tmp_path, maskfile):
+#     """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """
+#     args, setup = _create_setup(tmp_path, mask_file=maskfile)
+#     setup.update(dryrun=True)
+#     job_submit(**setup)
+#     _test_subfile_content(setup)
+#     _test_output(args, post_run=False)
+
+
+# @run_only_on_linux
+# @pytest.mark.parametrize("maskfile", [True, False])
+# def test_find_errorneous_percentage_signs(tmp_path, maskfile):
+#     """ Tests that a key-error is raised on a mask-string with percentage signs, 
+#     that are not part of the replacement parameters. """
+#     mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff."
+#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
+#     with pytest.raises(KeyError) as e:
+#         job_submit(**setup)
+#     assert "problematic '%'" in e.value.args[0]
+
+
+# @run_only_on_linux
+# @pytest.mark.parametrize("maskfile", [True, False])
+# def test_missing_keys(tmp_path, maskfile):
+#     """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """
+#     mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s"
+#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
+#     with pytest.raises(KeyError) as e:
+#         job_submit(**setup)
+#     assert "PARAM3" in e.value.args[0]
+
+
+# @run_if_not_linux
+# def test_not_on_linux(tmp_path):
+#     """ Test that an error is raised if htcondor bindings are not found.
+#     If this tests fails, this might mean, that htcondor bindings are finally 
+#     available for the other platforms. """
+#     args, setup = _create_setup(tmp_path)
+#     with pytest.raises(EnvironmentError) as e:
+#         job_submit(**setup)
+#     assert "htcondor bindings" in e.value.args[0]
+
+
+# @run_only_on_linux
+# @pytest.mark.cern_network
+# def test_htc_submit():
+#     """ This test is here for local testing only. You need to adapt the path
+#     and delete the results afterwards manually (so you can check them before."""
+#     user = "jdilly"
+#     path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
+#     path.mkdir(exist_ok=True)
+#     args, setup = _create_setup(path)
+
+#     job_submit(**setup)
+#     _test_subfile_content(setup)
+#     _test_output(args, post_run=False)
+#     # _test_output(args, post_run=True)  # you can use this if you like after htcondor is done
 
 
 # Helper -----------------------------------------------------------------------
 
 
-def _create_setup(cwd_path: Path, mask_content: str = None, mask_file: bool = True):
-    """ Create a quick setup for Parameters PARAM1 and PARAM2. """
-    out_name = "out.txt"
-    out_dir = "Outputdir"
-
-    args = DotDict(
-        cwd=cwd_path,
-        out_name=out_name,
-        out_dir=out_dir,
-        id="%(PARAM1)s.%(PARAM2)d",
-        mask_name="test_script.mask",
-        ext=".bat" if on_windows() else ".sh",
-        out_file=Path(out_dir, out_name),
-        p1_list=["a", "b"],
-        p2_list=[1, 2, 3],
-        mask_file=mask_file
-    )
-
-    mask_string = _make_executable_string(args, mask_content)
-    if args.mask_file:
-        mask_path = args.cwd / args.mask_name
-        with mask_path.open("w") as f:
-            f.write(mask_string)
-
-    setup = dict(
-        executable=None if on_windows() else "/bin/bash",
-        script_extension=args.ext,
-        job_output_dir=out_dir,
-        mask=str(mask_path) if args.mask_file else mask_string,
-        replace_dict=dict(PARAM1=args.p1_list, PARAM2=args.p2_list),
-        jobid_mask=args.id,
-        jobflavour="workday",
-        resume_jobs=True,
-        check_files=[args.out_name],
-        working_directory=str(args.cwd),
-        dryrun=False,
-        run_local=False,
-        htc_arguments={"max_retries": "4", "some_other_argument": "some_other_parameter"},
-    )
-    return args, setup
-
-
-def _make_executable_string(args, mask_content):
-    if mask_content is None:
-        mask_content = args.id
-
-    if on_windows():
-        mask_string = f'echo {mask_content}> "{args.out_file}"'
-    else:
-        mask_string = f'echo "{mask_content}" > "{args.out_file}"'
-        if not args.mask_file:
-            mask_string = " ".join(['-c "', mask_string, '"'])
-    return f"{mask_string}\n"
-
-
-def _test_subfile_content(setup):
-    subfile = Path(setup['working_directory']) / SUBFILE
+
+def _test_subfile_content(setup: InputParameters):
+    """ Checks some of the content of the subfile (queuehtc.sub). """
+    subfile = setup.working_directory / SUBFILE
     assert subfile.exists()
     with subfile.open("r") as sfile:
         filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line)
         assert filecontents["MY.JobFlavour"].strip('"') == setup["jobflavour"]  # flavour is saved with "" in .sub, and read in with them
         assert filecontents["transfer_output_files"] == setup["job_output_dir"]
-        for key in setup["htc_arguments"].keys():
-            assert filecontents[key] == setup["htc_arguments"][key]
-
-
-def _test_output(args, post_run=True):
-    for p1 in args.p1_list:
-        for p2 in args.p2_list:
-            current_id = args.id % dict(PARAM1=p1, PARAM2=p2)
-            job_name = f"Job.{current_id}"
-            job_dir_path = args.cwd / job_name
-            out_dir_path = job_dir_path / args.out_dir
-            out_file_path = out_dir_path / args.out_name
-
-            assert job_dir_path.exists()
-            assert job_dir_path.is_dir()
-            if args.mask_file:
-                assert (job_dir_path / args.mask_name).with_suffix(args.ext).exists()
-            # assert out_dir_path.exists()  # does not seem to be pre-created anymore (jdilly 2021-05-04)
-            if post_run:
-                assert out_dir_path.is_dir()
-                assert out_file_path.exists()
-                assert out_file_path.is_file()
-
-                with out_file_path.open("r") as f:
-                    assert f.read().strip("\n") == current_id
+        for key in setup.htc_arguments.keys():
+            assert filecontents[key] == setup.htc_arguments[key]
+
+
+def _test_output(setup: InputParameters, post_run: bool = True):
+    """ Checks the validity of the output.  """
+
+    combinations = _generate_combinations(setup.replace_dict)
+    assert len(combinations) == np.prod([len(v) for v in setup.replace_dict.values()])
+    
+    for combination_instance in combinations:
+        current_id = setup.jobid_mask % combination_instance
+        job_name = f"Job.{current_id}"
+        job_dir_path = setup.working_directory / job_name
+        out_dir_path = job_dir_path / setup.job_output_dir
+        out_file_path = out_dir_path / setup.check_files[0]
+
+        assert job_dir_path.exists()
+        assert job_dir_path.is_dir()
+        if isinstance(setup.mask, Path):
+            assert (job_dir_path / setup.mask.name).with_suffix(setup.script_extension).exists()
+        # assert out_dir_path.exists()  # does not seem to be pre-created anymore (jdilly 2021-05-04)
+        if post_run:
+            assert out_dir_path.is_dir()
+            assert out_file_path.exists()
+            assert out_file_path.is_file()
+
+            with out_file_path.open("r") as f:
+                assert f.read().strip("\n") == current_id
+
+
+def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]:
+    """ Creates all possible combinations of values in data as dictionaries. """
+    keys = list(data.keys())
+    all_values = [data[key] for key in keys]
+
+    combinations = [
+        {keys[i]: values[i] for i in range(len(keys))}
+        for values in itertools.product(*all_values)
+    ]
+
+    return combinations
\ No newline at end of file

From 72bc145a73938c65b8d3368ac4f6dfb813bb6407 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 17:28:27 +0100
Subject: [PATCH 06/30] making tests run again

---
 tests/unit/test_job_submitter.py | 209 ++++++++++++++++---------------
 1 file changed, 109 insertions(+), 100 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 709433e..6a6bf92 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -21,8 +21,113 @@
 )
 
 
+@pytest.mark.parametrize("maskfile", [True, False])
+def test_job_creation_and_localrun(tmp_path, maskfile):
+    """ Tests that the jobs are created and can be run locally 
+    from mask-string and mask-file. """
+    setup = InputParameters(working_directory=tmp_path, run_local=True)
+    setup.create_mask(as_file=maskfile)
+    job_submit(**asdict(setup))
+    _test_output(setup)
+
+
+# def test_output_directory(tmp_path):
+#     """ Tests that the output is copied to the output destination. 
+#     As a by product it also tests that the jobs are created and can be run locally. """
+#     output_destination = tmp_path / "my_new_output" / "long_path"
+#     args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination)
+#     setup.update(run_local=True)
+#     job_submit(**setup)
+#     _test_output(args)
+
+
+@run_only_on_linux
+def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path):
+    """ Tests that the jobs are created and can be run locally from a multiline mask-string. """
+    mask = "123\"\" \nsleep 0.1 \n/bin/bash -c  \"echo \"%(PARAM1)s.%(PARAM2)s"
+    setup = InputParameters(working_directory=tmp_path, run_local=True)
+    setup.create_mask(content=mask, as_file=False)
+    job_submit(**asdict(setup))
+    _test_output(setup)
+
+
+@run_only_on_linux
+@pytest.mark.parametrize("maskfile", [True, False])
+def test_job_creation_and_dryrun(tmp_path, maskfile):
+    """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """
+    setup = InputParameters(working_directory=tmp_path, dryrun=True)
+    setup.create_mask(as_file=maskfile)
+    job_submit(**asdict(setup))
+    _test_subfile_content(setup)
+    _test_output(setup, post_run=False)
+
+
+@run_only_on_linux
+@pytest.mark.parametrize("maskfile", [True, False])
+def test_find_errorneous_percentage_signs(tmp_path, maskfile):
+    """ Tests that a key-error is raised on a mask-string with percentage signs, 
+    that are not part of the replacement parameters. """
+    mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff."
+    setup = InputParameters(working_directory=tmp_path)
+    setup.create_mask(content=mask, as_file=maskfile)
+    with pytest.raises(KeyError) as e:
+        job_submit(**asdict(setup))
+    assert "problematic '%'" in e.value.args[0]
+
+
+@run_only_on_linux
+@pytest.mark.parametrize("maskfile", [True, False])
+def test_missing_keys(tmp_path, maskfile):
+    """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """
+    mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s"
+    setup = InputParameters(working_directory=tmp_path)
+    setup.create_mask(content=mask, as_file=maskfile)
+    with pytest.raises(KeyError) as e:
+        job_submit(**asdict(setup))
+    assert "PARAM3" in e.value.args[0]
+
+
+@run_if_not_linux
+def test_not_on_linux(tmp_path):
+    """ Test that an error is raised if htcondor bindings are not found.
+    If this tests fails, this might mean, that htcondor bindings are finally 
+    available for the other platforms. """
+    setup = InputParameters(working_directory=tmp_path)
+    with pytest.raises(EnvironmentError) as e:
+        job_submit(**asdict(setup))
+    assert "htcondor bindings" in e.value.args[0]
+
+
+@run_only_on_linux
+@pytest.mark.cern_network
+def test_htc_submit():
+    """ This test is here for local testing only. You need to adapt the path
+    and delete the results afterwards manually (so you can check them before."""
+    user = "jdilly"
+    path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
+    path.mkdir(exist_ok=True)
+
+
+    # Fix the kerberos ticket path. 
+    # Do klist to find your ticket manually.
+    import os
+    os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029"
+
+    setup = InputParameters(working_directory=path)
+    setup.create_mask()
+    # pre-run ---
+    # job_submit(**asdict(setup))
+    # _test_subfile_content(setup)
+    # _test_output(setup, post_run=False)
+    # post run ---
+    _test_output(setup, post_run=True)  
+
+
+# Helper -----------------------------------------------------------------------
+
 @dataclass
 class InputParameters:
+    """ job_submitter input parameters. """
     working_directory: Path
     executable: Optional[str] = None if on_windows() else "/bin/bash"
     script_extension: Optional[str] =".bat" if on_windows() else ".sh"
@@ -48,10 +153,9 @@ def create_mask(self, name: str = "test_script.mask", content: str = None, as_fi
             mask_string = f'echo {content}> "{output_file!s}"'
         else:
             mask_string = f'echo "{content}" > "{output_file!s}"'
-            if as_file:
+            if not as_file:
                 mask_string = " ".join(['-c "', mask_string, '"'])
         
-        
         mask_string = f"{mask_string}\n"
         
         if as_file:
@@ -63,110 +167,14 @@ def create_mask(self, name: str = "test_script.mask", content: str = None, as_fi
             self.mask = mask_string
 
 
-
-@pytest.mark.parametrize("maskfile", [True, False])
-def test_job_creation_and_localrun(tmp_path, maskfile):
-    """ Tests that the jobs are created and can be run locally 
-    from mask-string and mask-file. """
-    setup = InputParameters(working_directory=tmp_path, run_local=True)
-    setup.create_mask(as_file=maskfile)
-    job_submit(**asdict(setup))
-    _test_output(setup)
-
-
-# def test_output_directory(tmp_path):
-#     """ Tests that the output is copied to the output destination. 
-#     As a by product it also tests that the jobs are created and can be run locally. """
-#     output_destination = tmp_path / "my_new_output" / "long_path"
-#     args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination)
-#     setup.update(run_local=True)
-#     job_submit(**setup)
-#     _test_output(args)
-
-
-# @run_only_on_linux
-# def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path):
-#     """ Tests that the jobs are created and can be run locally from a multiline mask-string. """
-#     mask = "123\"\" \nsleep 0.1 \n/bin/bash -c  \"echo \"%(PARAM1)s.%(PARAM2)s"
-#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=False)
-#     setup.update(run_local=True)
-#     job_submit(**setup)
-#     _test_output(args)
-
-
-# @run_only_on_linux
-# @pytest.mark.parametrize("maskfile", [True, False])
-# def test_job_creation_and_dryrun(tmp_path, maskfile):
-#     """ Tests that the jobs are created as dry-run from mask-file and from mask-string. """
-#     args, setup = _create_setup(tmp_path, mask_file=maskfile)
-#     setup.update(dryrun=True)
-#     job_submit(**setup)
-#     _test_subfile_content(setup)
-#     _test_output(args, post_run=False)
-
-
-# @run_only_on_linux
-# @pytest.mark.parametrize("maskfile", [True, False])
-# def test_find_errorneous_percentage_signs(tmp_path, maskfile):
-#     """ Tests that a key-error is raised on a mask-string with percentage signs, 
-#     that are not part of the replacement parameters. """
-#     mask = "%(PARAM1)s.%(PARAM2)d\nsome stuff # should be 5%\nsome % more % stuff."
-#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
-#     with pytest.raises(KeyError) as e:
-#         job_submit(**setup)
-#     assert "problematic '%'" in e.value.args[0]
-
-
-# @run_only_on_linux
-# @pytest.mark.parametrize("maskfile", [True, False])
-# def test_missing_keys(tmp_path, maskfile):
-#     """ Tests that a key-error is raised on a mask-string with missing keys in the replacement dict. """
-#     mask = "%(PARAM1)s.%(PARAM2)s.%(PARAM3)s"
-#     args, setup = _create_setup(tmp_path, mask_content=mask, mask_file=maskfile)
-#     with pytest.raises(KeyError) as e:
-#         job_submit(**setup)
-#     assert "PARAM3" in e.value.args[0]
-
-
-# @run_if_not_linux
-# def test_not_on_linux(tmp_path):
-#     """ Test that an error is raised if htcondor bindings are not found.
-#     If this tests fails, this might mean, that htcondor bindings are finally 
-#     available for the other platforms. """
-#     args, setup = _create_setup(tmp_path)
-#     with pytest.raises(EnvironmentError) as e:
-#         job_submit(**setup)
-#     assert "htcondor bindings" in e.value.args[0]
-
-
-# @run_only_on_linux
-# @pytest.mark.cern_network
-# def test_htc_submit():
-#     """ This test is here for local testing only. You need to adapt the path
-#     and delete the results afterwards manually (so you can check them before."""
-#     user = "jdilly"
-#     path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
-#     path.mkdir(exist_ok=True)
-#     args, setup = _create_setup(path)
-
-#     job_submit(**setup)
-#     _test_subfile_content(setup)
-#     _test_output(args, post_run=False)
-#     # _test_output(args, post_run=True)  # you can use this if you like after htcondor is done
-
-
-# Helper -----------------------------------------------------------------------
-
-
-
 def _test_subfile_content(setup: InputParameters):
     """ Checks some of the content of the subfile (queuehtc.sub). """
     subfile = setup.working_directory / SUBFILE
     assert subfile.exists()
     with subfile.open("r") as sfile:
         filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line)
-        assert filecontents["MY.JobFlavour"].strip('"') == setup["jobflavour"]  # flavour is saved with "" in .sub, and read in with them
-        assert filecontents["transfer_output_files"] == setup["job_output_dir"]
+        assert filecontents["MY.JobFlavour"].strip('"') == setup.jobflavour  # flavour is saved with "" in .sub, and read in with them
+        assert filecontents["transfer_output_files"] == setup.job_output_dir
         for key in setup.htc_arguments.keys():
             assert filecontents[key] == setup.htc_arguments[key]
 
@@ -175,6 +183,7 @@ def _test_output(setup: InputParameters, post_run: bool = True):
     """ Checks the validity of the output.  """
 
     combinations = _generate_combinations(setup.replace_dict)
+    assert len(combinations)
     assert len(combinations) == np.prod([len(v) for v in setup.replace_dict.values()])
     
     for combination_instance in combinations:

From bf813f33fdb520a83afc5a4a7eec922e6e5e7880 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 17:54:14 +0100
Subject: [PATCH 07/30] fixing mac?

---
 tests/unit/test_job_submitter.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 6a6bf92..62d2b22 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -93,6 +93,7 @@ def test_not_on_linux(tmp_path):
     If this tests fails, this might mean, that htcondor bindings are finally 
     available for the other platforms. """
     setup = InputParameters(working_directory=tmp_path)
+    setup.create_mask()
     with pytest.raises(EnvironmentError) as e:
         job_submit(**asdict(setup))
     assert "htcondor bindings" in e.value.args[0]
@@ -110,8 +111,8 @@ def test_htc_submit():
 
     # Fix the kerberos ticket path. 
     # Do klist to find your ticket manually.
-    import os
-    os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029"
+    # import os
+    # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
 
     setup = InputParameters(working_directory=path)
     setup.create_mask()

From 9d7e6fa95b201df0575b75473e60bcfcf9dea99d Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 19:01:35 +0100
Subject: [PATCH 08/30] added test for output_destination

---
 tests/unit/test_job_submitter.py | 83 ++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 62d2b22..59be8de 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -31,14 +31,17 @@ def test_job_creation_and_localrun(tmp_path, maskfile):
     _test_output(setup)
 
 
-# def test_output_directory(tmp_path):
-#     """ Tests that the output is copied to the output destination. 
-#     As a by product it also tests that the jobs are created and can be run locally. """
-#     output_destination = tmp_path / "my_new_output" / "long_path"
-#     args, setup = _create_setup(tmp_path, mask_file=False, output_destination=output_destination)
-#     setup.update(run_local=True)
-#     job_submit(**setup)
-#     _test_output(args)
+def test_output_directory(tmp_path):
+    """ Tests that the output is copied to the output destination. 
+    As a by product it also tests that the jobs are created and can be run locally. """
+    setup = InputParameters(
+        working_directory=tmp_path, 
+        run_local=True,
+        output_destination=tmp_path / "my_new_output" / "long_path",
+    )
+    setup.create_mask()
+    job_submit(**asdict(setup))
+    _test_output(setup)
 
 
 @run_only_on_linux
@@ -104,24 +107,25 @@ def test_not_on_linux(tmp_path):
 def test_htc_submit():
     """ This test is here for local testing only. You need to adapt the path
     and delete the results afterwards manually (so you can check them before."""
-    user = "jdilly"
-    path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
-    path.mkdir(exist_ok=True)
-
-
     # Fix the kerberos ticket path. 
     # Do klist to find your ticket manually.
     # import os
     # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
 
+    user = "jdilly"
+    path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
+    path.mkdir(exist_ok=True)
+
     setup = InputParameters(working_directory=path)
     setup.create_mask()
+
     # pre-run ---
-    # job_submit(**asdict(setup))
-    # _test_subfile_content(setup)
-    # _test_output(setup, post_run=False)
+    job_submit(**asdict(setup))
+    _test_subfile_content(setup)
+    _test_output(setup, post_run=False)
+
     # post run ---
-    _test_output(setup, post_run=True)  
+    # _test_output(setup, post_run=True)  
 
 
 # Helper -----------------------------------------------------------------------
@@ -190,22 +194,37 @@ def _test_output(setup: InputParameters, post_run: bool = True):
     for combination_instance in combinations:
         current_id = setup.jobid_mask % combination_instance
         job_name = f"Job.{current_id}"
-        job_dir_path = setup.working_directory / job_name
-        out_dir_path = job_dir_path / setup.job_output_dir
-        out_file_path = out_dir_path / setup.check_files[0]
-
-        assert job_dir_path.exists()
-        assert job_dir_path.is_dir()
+        
         if isinstance(setup.mask, Path):
-            assert (job_dir_path / setup.mask.name).with_suffix(setup.script_extension).exists()
-        # assert out_dir_path.exists()  # does not seem to be pre-created anymore (jdilly 2021-05-04)
-        if post_run:
-            assert out_dir_path.is_dir()
-            assert out_file_path.exists()
-            assert out_file_path.is_file()
-
-            with out_file_path.open("r") as f:
-                assert f.read().strip("\n") == current_id
+            assert (setup.working_directory / job_name / setup.mask.name).with_suffix(setup.script_extension).exists()
+
+        def _check_output_content(dir_path: Path):
+                # Check if the code created the folder structure ---
+                job_path = dir_path / job_name
+                
+                assert job_path.exists()
+                assert job_path.is_dir()
+
+                if post_run:  # Check if the jobs created the files ---
+                    out_dir_path = job_path / setup.job_output_dir
+                    out_file_path = out_dir_path / setup.check_files[0]
+                    
+                    assert out_dir_path.is_dir()
+                    assert out_file_path.exists()
+                    assert out_file_path.is_file()
+
+                    with out_file_path.open("r") as f:
+                        assert f.read().strip("\n") == current_id
+
+        # Check local working directory ---
+        _check_output_content(setup.working_directory)
+
+        if setup.output_destination is not None:
+            # Check copy at output destination ---
+            _check_output_content(setup.output_destination)
+
+
+
 
 
 def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]:

From df9ea349a0a0513fdc46b5685750016ab6d112fb Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 19:02:09 +0100
Subject: [PATCH 09/30] cleanup imports

---
 tests/unit/test_job_submitter.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 59be8de..f1cac12 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -1,11 +1,10 @@
 import itertools
-from dataclasses import astuple, dataclass, field, fields, asdict
+from dataclasses import asdict, dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Union
 
 import numpy as np
 import pytest
-from generic_parser import DotDict
 
 from pylhc_submitter.job_submitter import main as job_submit
 from pylhc_submitter.utils.environment_tools import on_linux, on_windows

From e6ca62549071c7ea9a423f917eba051206388e67 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 22:45:26 +0100
Subject: [PATCH 10/30] code cleanup

---
 pylhc_submitter/autosix.py                    |   2 +-
 pylhc_submitter/constants/job_submitter.py    |  33 ++
 pylhc_submitter/job_submitter.py              | 496 ++++++------------
 .../{htc => job_submitter_tools}/__init__.py  |   0
 .../job_submitter_tools/constants.py          |   0
 .../htc_utils.py}                             | 103 ++--
 .../job_submitter_tools/iotools.py            | 245 +++++++++
 .../{htc => job_submitter_tools}/mask.py      |  38 +-
 .../job_submitter_tools/runners.py            | 117 +++++
 .../{environment_tools.py => environment.py}  |   0
 pylhc_submitter/utils/iotools.py              |   3 +-
 tests/unit/test_job_submitter.py              |   7 +-
 12 files changed, 616 insertions(+), 428 deletions(-)
 create mode 100644 pylhc_submitter/constants/job_submitter.py
 rename pylhc_submitter/{htc => job_submitter_tools}/__init__.py (100%)
 create mode 100644 pylhc_submitter/job_submitter_tools/constants.py
 rename pylhc_submitter/{htc/utils.py => job_submitter_tools/htc_utils.py} (76%)
 create mode 100644 pylhc_submitter/job_submitter_tools/iotools.py
 rename pylhc_submitter/{htc => job_submitter_tools}/mask.py (67%)
 create mode 100644 pylhc_submitter/job_submitter_tools/runners.py
 rename pylhc_submitter/utils/{environment_tools.py => environment.py} (100%)

diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py
index f1f908c..f6b720d 100644
--- a/pylhc_submitter/autosix.py
+++ b/pylhc_submitter/autosix.py
@@ -202,7 +202,7 @@
     SIXENV_OPTIONAL,
     AutoSixEnvironment,
 )
-from pylhc_submitter.htc.mask import generate_jobdf_index
+from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index
 from pylhc_submitter.job_submitter import (
     JOBSUMMARY_FILE,
     COLUMN_JOBID,
diff --git a/pylhc_submitter/constants/job_submitter.py b/pylhc_submitter/constants/job_submitter.py
new file mode 100644
index 0000000..aca598c
--- /dev/null
+++ b/pylhc_submitter/constants/job_submitter.py
@@ -0,0 +1,33 @@
+
+"""
+Constants: Job Submitter
+----------------------------------
+
+Collections of constants and paths used in the job-submitter.
+"""
+from pylhc_submitter.constants.external_paths import MADX_BIN, PYTHON2_BIN, PYTHON3_BIN
+
+JOBSUMMARY_FILE = "Jobs.tfs"
+JOBDIRECTORY_PREFIX = "Job"
+CONFIG_FILE = "config.ini"
+
+SCRIPT_EXTENSIONS = {
+    "madx": ".madx",
+    "python3": ".py",
+    "python2": ".py",
+}
+
+EXECUTEABLEPATH = {
+    "madx": MADX_BIN,
+    "python3": PYTHON3_BIN,
+    "python2": PYTHON2_BIN,
+}
+
+
+COLUMN_JOBID = "JobId"
+COLUMN_SHELL_SCRIPT = "ShellScript"
+COLUMN_JOB_DIRECTORY = "JobDirectory"
+COLUMN_DEST_DIRECTORY = "DestDirectory"
+COLUMN_JOB_FILE = "JobFile"
+
+NON_PARAMETER_COLUMNS = (COLUMN_SHELL_SCRIPT, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE, COLUMN_DEST_DIRECTORY)
\ No newline at end of file
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 4f17449..9748eed 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -16,124 +16,170 @@
 For additional information and guides, see the `Job Submitter page
 <https://pylhc.github.io/packages/pylhcsubmitter/job_submitter.html>`_ in the ``OMC`` documentation site.
 
+
 *--Required--*
 
-- **mask** *(str)*: Program mask to use
+- **mask** *(PathOrStr)*:
+
+    Program mask to use
+
+
+- **replace_dict** *(DictAsString)*:
 
-- **replace_dict** *(DictAsString)*: Dict containing the str to replace as
-  keys and values a list of parameters to replace
+    Dict containing the str to replace as keys and values a list of
+    parameters to replace
 
-- **working_directory** *(str)*: Directory where data should be put
+
+- **working_directory** *(PathOrStr)*:
+
+    Directory where data should be put
 
 
 *--Optional--*
 
-- **append_jobs**: Flag to rerun job with finer/wider grid,
-  already existing points will not be reexecuted.
+- **append_jobs**:
+
+    Flag to rerun job with finer/wider grid, already existing points will
+    not be reexecuted.
+
+    action: ``store_true``
+
+
+- **check_files** *(str)*:
+
+    List of files/file-name-masks expected to be in the 'job_output_dir'
+    after a successful job (for appending/resuming). Uses the 'glob'
+    function, so unix-wildcards (*) are allowed. If not given, only the
+    presence of the folder itself is checked.
+
+
+- **dryrun**:
+
+    Flag to only prepare folders and scripts, but does not start/submit
+    jobs. Together with `resume_jobs` this can be use to check which jobs
+    succeeded and which failed.
+
+    action: ``store_true``
+
+
+- **executable** *(PathOrStr)*:
+
+    Path to executable or job-type (of ['madx', 'python3', 'python2']) to
+    use.
+
+    default: ``madx``
+
+
+- **htc_arguments** *(DictAsString)*:
+
+    Additional arguments for htcondor, as Dict-String. For AccountingGroup
+    please use 'accounting_group'. 'max_retries' and 'notification' have
+    defaults (if not given). Others are just passed on.
+
+    default: ``{}``
+
+
+- **job_output_dir** *(str)*:
+
+    The name of the output dir of the job. (Make sure your script puts its
+    data there!)
 
-  Action: ``store_true``
-- **check_files** *(str)*: List of files/file-name-masks expected to be in the
-  'job_output_dir' after a successful job (for appending/resuming). Uses the 'glob'
-  function, so unix-wildcards (*) are allowed. If not given, only the presence of the folder itself is checked.
-- **dryrun**: Flag to only prepare folders and scripts,
-  but does not start/submit jobs.
-  Together with `resume_jobs` this can be use to check which jobs succeeded and which failed.
+    default: ``Outputdata``
 
-  Action: ``store_true``
-- **executable** *(str)*: Path to executable or job-type (of ['madx', 'python3', 'python2']) to use.
 
-- **htc_arguments** *(DictAsString)*: Additional arguments for htcondor, as Dict-String.
-  For AccountingGroup please use 'accounting_group'. 'max_retries' and 'notification' have defaults (if not given).
-  Others are just passed on.
+- **jobflavour** *(str)*:
 
-  Default: ``{}``
-- **job_output_dir** *(str)*: The name of the output dir of the job. (Make sure your script puts its data there!)
+    Jobflavour to give rough estimate of runtime of one job
 
-  Default: ``Outputdata``
-- **jobflavour** *(str)*: Jobflavour to give rough estimate of runtime of one job
+    choices: ``('espresso', 'microcentury', 'longlunch', 'workday', 'tomorrow', 'testmatch', 'nextweek')``
 
-  Choices: ``('espresso', 'microcentury', 'longlunch', 'workday', 'tomorrow', 'testmatch', 'nextweek')``
-  Default: ``workday``
-- **jobid_mask** *(str)*: Mask to name jobs from replace_dict
+    default: ``workday``
 
-- **num_processes** *(int)*: Number of processes to be used if run locally
 
-  Default: ``4``
-- **resume_jobs**: Only do jobs that did not work.
+- **jobid_mask** *(str)*:
 
-  Action: ``store_true``
-- **run_local**: Flag to run the jobs on the local machine. Not suggested.
+    Mask to name jobs from replace_dict
 
-  Action: ``store_true``
-- **script_arguments** *(DictAsString)*: Additional arguments to pass to the script,
-  as dict in key-value pairs ('--' need to be included in the keys).
 
-  Default: ``{}``
-- **script_extension** *(str)*: New extension for the scripts created from the masks.
-  This is inferred automatically for ['madx', 'python3', 'python2']. Otherwise not changed.
+- **num_processes** *(int)*:
 
-- **ssh** *(str)*: Run htcondor from this machine via ssh (needs access to the `working_directory`)
+    Number of processes to be used if run locally
+
+    default: ``4``
+
+
+- **output_destination** *(PathOrStr)*:
+
+    Directory where to store the output of the jobs . (Can be on EOS)
+
+
+- **resume_jobs**:
+
+    Only do jobs that did not work.
+
+    action: ``store_true``
+
+
+- **run_local**:
+
+    Flag to run the jobs on the local machine. Not suggested.
+
+    action: ``store_true``
+
+
+- **script_arguments** *(DictAsString)*:
+
+    Additional arguments to pass to the script, as dict in key-value pairs
+    ('--' need to be included in the keys).
+
+    default: ``{}``
+
+
+- **script_extension** *(str)*:
+
+    New extension for the scripts created from the masks. This is inferred
+    automatically for ['madx', 'python3', 'python2']. Otherwise not
+    changed.
+
+
+- **ssh** *(str)*:
+
+    Run htcondor from this machine via ssh (needs access to the
+    `working_directory`)
 
 
-:author: mihofer, jdilly, fesoubel
 """
-import itertools
 import logging
-import multiprocessing
-import subprocess
 import sys
+from dataclasses import fields
 from pathlib import Path
 
-import numpy as np
-import tfs
 from generic_parser import EntryPointParameters, entrypoint
 from generic_parser.entry_datatypes import DictAsString
 from generic_parser.tools import print_dict_tree
 
-import pylhc_submitter.htc.utils as htcutils
-from pylhc_submitter.htc.mask import (
-    check_percentage_signs_in_mask,
-    create_jobs_from_mask,
-    find_named_variables_in_mask,
-    generate_jobdf_index,
-)
-from pylhc_submitter.htc.utils import (
-    COLUMN_JOB_DIRECTORY,
-    COLUMN_DEST_DIRECTORY,
-    COLUMN_SHELL_SCRIPT,
-    EXECUTEABLEPATH,
-    HTCONDOR_JOBLIMIT,
-    JOBFLAVOURS,
-    _strip_eos_uri,
-)
-from pylhc_submitter.utils.environment_tools import on_windows
-from pylhc_submitter.utils.iotools import PathOrStr, save_config, make_replace_entries_iterable, keys_to_path
+from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS
+from pylhc_submitter.job_submitter_tools.htc_utils import JOBFLAVOURS
+from pylhc_submitter.job_submitter_tools.iotools import CreationOpts, create_jobs, print_stats
+from pylhc_submitter.job_submitter_tools.mask import (check_percentage_signs_in_mask,
+                                                      find_named_variables_in_mask, is_mask_file)
+from pylhc_submitter.job_submitter_tools.runners import RunnerOpts, run_jobs
+from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable,
+                                           save_config)
 from pylhc_submitter.utils.logging_tools import log_setup
 
-JOBSUMMARY_FILE = "Jobs.tfs"
-JOBDIRECTORY_PREFIX = "Job"
-COLUMN_JOBID = "JobId"
-CONFIG_FILE = "config.ini"
-
-SCRIPT_EXTENSIONS = {
-    "madx": ".madx",
-    "python3": ".py",
-    "python2": ".py",
-}
-
 LOG = logging.getLogger(__name__)
 
 
 try:
     import htcondor
-    HAS_HTCONDOR = True
 except ImportError:
     platform = "macOS" if sys.platform == "darwin" else "windows"
     LOG.warning(
         f"htcondor python bindings are linux-only. You can still use job_submitter on {platform}, "
         "but only for local runs."
     )
-    HAS_HTCONDOR = False
+    htcondor = None
 
 
 def get_params():
@@ -280,282 +326,37 @@ def main(opt):
     else:
         LOG.info("Starting Job-submitter.")
 
-    opt = _check_opts(opt)
-    save_config(opt.working_directory, opt, "job_submitter")
-
-    job_df = _create_jobs(
-        opt.working_directory,
-        opt.mask,
-        opt.jobid_mask,
-        opt.replace_dict,
-        opt.job_output_dir,
-        opt.output_destination,
-        opt.append_jobs,
-        opt.executable,
-        opt.script_arguments,
-        opt.script_extension,
-    )
-    job_df, dropped_jobs = _drop_already_ran_jobs(
-        job_df, opt.resume_jobs or opt.append_jobs, opt.job_output_dir, opt.check_files
-    )
+    save_config(Path(opt.working_directory), opt, "job_submitter")
+    creation_opt, runner_opt = check_opts(opt)
 
-    if opt.run_local and not opt.dryrun:
-        _run_local(job_df, opt.num_processes)
-    else:
-        _run_htc(
-            job_df,
-            opt.working_directory,
-            opt.job_output_dir,
-            opt.output_destination,
-            opt.jobflavour,
-            opt.ssh,
-            opt.dryrun,
-            opt.htc_arguments,
-        )
-    if opt.dryrun:
-        _print_stats(job_df.index, dropped_jobs)
-
-
-# Main Functions ---------------------------------------------------------------
-
-
-def _create_jobs(
-    cwd,
-    mask_path_or_string,
-    jobid_mask,
-    replace_dict,
-    output_dir,
-    output_dest,
-    append_jobs,
-    executable,
-    script_args,
-    script_extension,
-) -> tfs.TfsDataFrame:
-    LOG.debug("Creating Jobs.")
-    values_grid = np.array(list(itertools.product(*replace_dict.values())), dtype=object)
-
-    if append_jobs:
-        jobfile_path = cwd / JOBSUMMARY_FILE
-        try:
-            job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID)
-        except FileNotFoundError as filerror:
-            raise FileNotFoundError(
-                "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'"
-            ) from filerror
-        mask = [elem not in job_df[replace_dict.keys()].values for elem in values_grid]
-        njobs = mask.count(True)
-        values_grid = values_grid[mask]
-    else:
-        njobs = len(values_grid)
-        job_df = tfs.TfsDataFrame()
-
-    if njobs == 0:
-        raise ValueError(f"No (new) jobs found!")
-    if njobs > HTCONDOR_JOBLIMIT:
-        LOG.warning(
-            f"You are attempting to submit an important number of jobs ({njobs})."
-            "This can be a high stress on your system, make sure you know what you are doing."
-        )
-
-    LOG.debug(f"Initial number of jobs: {njobs:d}")
-    data_df = tfs.TfsDataFrame(
-        index=generate_jobdf_index(job_df, jobid_mask, replace_dict.keys(), values_grid),
-        columns=list(replace_dict.keys()),
-        data=values_grid,
-    )
-    job_df = tfs.concat([job_df, data_df], sort=False, how_headers='left')
-    job_df = _setup_folders(job_df, cwd, output_dest)
-
-    if htcutils.is_mask_file(mask_path_or_string):
-        LOG.debug("Creating all jobs from mask.")
-        script_extension = _get_script_extension(script_extension, executable, mask_path_or_string)
-        job_df = create_jobs_from_mask(
-            job_df, mask_path_or_string, replace_dict.keys(), script_extension
-        )
-
-    LOG.debug("Creating shell scripts for submission.")
-    job_df = htcutils.write_bash(
-        job_df,
-        output_dir,
-        destination_dir=output_dest,
-        executable=executable,
-        cmdline_arguments=script_args,
-        mask=mask_path_or_string,
-    )
-
-    job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str)
-    job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str)
-    tfs.write(str(cwd / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID)
-    return job_df
-
-
-def _drop_already_ran_jobs(
-    job_df: tfs.TfsDataFrame, drop_jobs: bool, output_dir: str, check_files: str
-):
-    LOG.debug("Dropping already finished jobs, if necessary.")
-    finished_jobs = []
-    if drop_jobs:
-        finished_jobs = [
-            idx
-            for idx, row in job_df.iterrows()
-            if _job_was_successful(row, output_dir, check_files)
-        ]
-        LOG.info(
-            f"{len(finished_jobs):d} of {len(job_df.index):d}"
-            " Jobs have already finished and will be skipped."
-        )
-        job_df = job_df.drop(index=finished_jobs)
-    return job_df, finished_jobs
+    job_df, dropped_jobs = create_jobs(creation_opt)
 
 
-def _run_local(job_df: tfs.TfsDataFrame, num_processes: int) -> None:
-    LOG.info(f"Running {len(job_df.index)} jobs locally in {num_processes:d} processes.")
-    
-    # URI type EOS addresses won't work for copying files from local jobs
-    check_dest = job_df.iloc[0][COLUMN_DEST_DIRECTORY]
-    if not _strip_eos_uri(check_dest) == Path(check_dest):
-        LOG.warning("The output desitnation is likely specified as EOS URI,"
-                     "which will not work during a local run")
-        
-    pool = multiprocessing.Pool(processes=num_processes)
-    res = pool.map(_execute_shell, job_df.iterrows())
-    if any(res):
-        LOG.error("At least one job has failed.")
-        raise RuntimeError("At least one job has failed. Check output logs!")
-
-
-def _run_htc(
-    job_df: tfs.TfsDataFrame,
-    cwd: str,
-    output_dir: str,
-    dest_dir: str,
-    flavour: str,
-    ssh: str,
-    dryrun: bool,
-    additional_htc_arguments: DictAsString,
-) -> None:
-    LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{flavour}'.")
-    LOG.debug("Creating htcondor subfile.")
-
-    # If a different destination for the data is required
-    # is is handled through the job bash files, so remove it from
-    # HTConodor's file transfer specification
-    if dest_dir is None:
-        subfile = htcutils.make_subfile(
-            cwd, job_df, output_dir=output_dir, duration=flavour, **additional_htc_arguments
-        )
-    else:
-        subfile = htcutils.make_subfile(
-            cwd, job_df, duration=flavour, **additional_htc_arguments
-        )
-
-    if not dryrun:
-        LOG.debug("Submitting jobs to htcondor.")
-        htcutils.submit_jobfile(subfile, ssh)
-
-
-def _get_script_extension(script_extension: str, executable: PathOrStr, mask: PathOrStr) -> str:
-    if script_extension is not None:
-        return script_extension
-    return SCRIPT_EXTENSIONS.get(executable, mask.suffix)
-
-
-# Sub Functions ----------------------------------------------------------------
-
-
-def _check_htcondor_presence() -> None:
-    """Checks the ``HAS_HTCONDOR`` variable and raises EnvironmentError if it is ``False``."""
-    if not HAS_HTCONDOR:
-        raise EnvironmentError("htcondor bindings are necessary to run this module.")
+    run_jobs(job_df, runner_opt)
+    print_stats(job_df.index, dropped_jobs)
 
 
-def _setup_folders(job_df: tfs.TfsDataFrame, working_directory: PathOrStr, 
-                   destination_directory: PathOrStr = None) -> tfs.TfsDataFrame:
-    def _return_job_dir(job_id):
-        return working_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}"
-    
-    def _return_dest_dir(job_id):
-        return destination_directory / f"{JOBDIRECTORY_PREFIX}.{job_id}"
-
-    LOG.debug("Setting up folders: ")
-    job_df[COLUMN_JOB_DIRECTORY] = [_return_job_dir(id_) for id_ in job_df.index]
-
-    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
-        job_dir.mkdir(exist_ok=True)
-        LOG.debug(f"   created '{job_dir}'.")
-
-    if destination_directory is None:
-        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY]
-    else:
-        job_df[COLUMN_DEST_DIRECTORY] = [_return_dest_dir(id_) for id_ in job_df.index]
-
-        strip_dest_dir: Path = _strip_eos_uri(destination_directory)
-        strip_dest_dir.mkdir(parents=True, exist_ok=True)
-
-        # Make some symlinks for easy navigation---
-        # Output directory -> Working Directory
-        sym_submission = destination_directory / Path('SUBMISSION_DIR')
-        sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
-
-        # Working Directory -> Output Directory
-        sym_destination = working_directory / Path('OUTPUT_DIR')
-        sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True)
-
-        # Create output dirs per job ---
-        for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
-            _strip_eos_uri(job_dest_dir).mkdir(exist_ok=True)
-            LOG.debug(f"   created '{job_dest_dir}'.")
-
-    return job_df
-
-
-def _job_was_successful(job_row, output_dir, files) -> bool:
-    output_dir = Path(job_row[COLUMN_DEST_DIRECTORY], output_dir)
-    success = output_dir.is_dir() and any(output_dir.iterdir())
-    if success and files is not None and len(files):
-        for f in files:
-            success &= len(list(output_dir.glob(f))) > 0
-    return success
-
-
-def _execute_shell(df_row) -> int:
-    idx, column = df_row
-    cmd = [] if on_windows() else ["sh"]
-
-    with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile:
-        process = subprocess.Popen(
-            cmd + [column[COLUMN_SHELL_SCRIPT]],
-            shell=on_windows(),
-            stdout=logfile,
-            stderr=subprocess.STDOUT,
-            cwd=column[COLUMN_JOB_DIRECTORY],
-        )
-    return process.wait()
-
-
-def _check_opts(opt):
+def check_opts(opt):
+    """ Checks options and sorts them into job-creation and running parameters. """
     LOG.debug("Checking options.")
     if opt.resume_jobs and opt.append_jobs:
         raise ValueError("Select either Resume jobs or Append jobs")
 
     # Paths ---
-    opt = keys_to_path(opt, "working_directory", "executable")
+    opt = keys_to_path(opt, "working_directory", "executable", "output_destination")
 
     if str(opt.executable) in EXECUTEABLEPATH.keys():
         opt.executable = str(opt.executable)
 
-    if htcutils.is_mask_file(opt.mask):
-        mask = Path(opt.mask).read_text()  # checks that mask and dir are there
-        opt["mask"] = Path(opt["mask"])
+    if is_mask_file(opt.mask):
+        mask_content = Path(opt.mask).read_text()  # checks that mask and dir are there
+        opt.mask = Path(opt.mask)
     else:
-        mask = opt.mask
-
-    if "output_destination" in opt and opt["output_destination"] is not None:
-        opt["output_destination"] = Path(opt["output_destination"])
+        mask_content = opt.mask
 
     # Replace dict ---
     dict_keys = set(opt.replace_dict.keys())
-    mask_keys = find_named_variables_in_mask(mask)
+    mask_keys = find_named_variables_in_mask(mask_content)
     not_in_mask = dict_keys - mask_keys
     not_in_dict = mask_keys - dict_keys
 
@@ -575,25 +376,24 @@ def _check_opts(opt):
         [opt.replace_dict.pop(key) for key in not_in_mask]
         if len(opt.replace_dict) == 0:
             raise KeyError("Empty replace-dictionary")
-    check_percentage_signs_in_mask(mask)
+    check_percentage_signs_in_mask(mask_content)
 
     print_dict_tree(opt, name="Input parameter", print_fun=LOG.debug)
     opt.replace_dict = make_replace_entries_iterable(opt.replace_dict)
-    return opt
-
-
-def _print_stats(new_jobs, finished_jobs):
-    """Print some quick statistics."""
-    LOG.info("------------- QUICK STATS ----------------")
-    LOG.info(f"Jobs total:{len(new_jobs) + len(finished_jobs):d}")
-    LOG.info(f"Jobs to run: {len(new_jobs):d}")
-    LOG.info(f"Jobs already finished: {len(finished_jobs):d}")
-    LOG.info("---------- JOBS TO RUN: NAMES -------------")
-    for job_name in new_jobs:
-        LOG.info(job_name)
-    LOG.info("--------- JOBS FINISHED: NAMES ------------")
-    for job_name in finished_jobs:
-        LOG.info(job_name)
+    
+    # Create new classes
+    opt.output_dir = opt.job_output_dir  # renaming
+    
+    creation = CreationOpts(**{f.name: opt[f.name] for f in fields(CreationOpts)})
+    runner = RunnerOpts(**{f.name: opt[f.name] for f in fields(RunnerOpts)})
+    runner.output_dir = None if opt.output_destination else opt.output_dir
+    return creation, runner
+
+
+def _check_htcondor_presence() -> None:
+    """ Raises an error if htcondor is not installed. """
+    if htcondor is None:
+        raise EnvironmentError("htcondor bindings are necessary to run this module.")
 
 
 # Script Mode ------------------------------------------------------------------
diff --git a/pylhc_submitter/htc/__init__.py b/pylhc_submitter/job_submitter_tools/__init__.py
similarity index 100%
rename from pylhc_submitter/htc/__init__.py
rename to pylhc_submitter/job_submitter_tools/__init__.py
diff --git a/pylhc_submitter/job_submitter_tools/constants.py b/pylhc_submitter/job_submitter_tools/constants.py
new file mode 100644
index 0000000..e69de29
diff --git a/pylhc_submitter/htc/utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py
similarity index 76%
rename from pylhc_submitter/htc/utils.py
rename to pylhc_submitter/job_submitter_tools/htc_utils.py
index 47f0eb9..43112a4 100644
--- a/pylhc_submitter/htc/utils.py
+++ b/pylhc_submitter/job_submitter_tools/htc_utils.py
@@ -15,21 +15,26 @@
 import logging
 import subprocess
 from pathlib import Path
-from typing import Union
+from typing import Any, Dict, List, Union
 
 from pandas import DataFrame
 
-from pylhc_submitter.utils.environment_tools import on_windows
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
+                                                     EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
+from pylhc_submitter.job_submitter_tools.iotools import is_eos_path
+from pylhc_submitter.job_submitter_tools.mask import is_mask_file
+from pylhc_submitter.utils.environment import on_windows
 
 try:
     import htcondor
 except ImportError:  # will be handled by job_submitter
     pass
 
-from pylhc_submitter.constants.external_paths import MADX_BIN, PYTHON2_BIN, PYTHON3_BIN
 
 LOG = logging.getLogger(__name__)
 
+# HTC Constants ################################################################
 
 SHEBANG = "#!/bin/bash"
 SUBFILE = "queuehtc.sub"
@@ -37,13 +42,6 @@
 
 HTCONDOR_JOBLIMIT = 100000
 
-EXECUTEABLEPATH = {
-    "madx": MADX_BIN,
-    "python3": PYTHON3_BIN,
-    "python2": PYTHON2_BIN,
-}
-
-
 CMD_SUBMIT = "condor_submit"
 JOBFLAVOURS = (
     "espresso",  # 20 min
@@ -58,11 +56,6 @@
 NOTIFICATIONS = ("always", "complete", "error", "never")
 
 
-COLUMN_SHELL_SCRIPT = "ShellScript"
-COLUMN_JOB_DIRECTORY = "JobDirectory"
-COLUMN_DEST_DIRECTORY = "DestDirectory"
-COLUMN_JOB_FILE = "JobFile"
-
 
 # Subprocess Methods ###########################################################
 
@@ -88,7 +81,7 @@ def submit_jobfile(jobfile: Path, ssh: str):
         LOG.info("Jobs successfully submitted.")
 
 
-def _start_subprocess(command):
+def _start_subprocess(command: List[str]):
     LOG.debug(f"Executing command '{command}'")
     process = subprocess.Popen(
         command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
@@ -116,6 +109,7 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
         notification (str): Notify under certain conditions. Defaults to ``error``.
         priority (int): Priority to order your jobs. Defaults to ``None``.
     """
+    # Pre-defined HTCondor arguments for our jobs
     submit_dict = {
         "MyId": "htcondor",
         "universe": "vanilla",
@@ -126,8 +120,9 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
         "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)",
         "requirements": "Machine =!= LastRemoteHost",
     }
-    submit_dict.update(_map_kwargs(kwargs))
-
+    submit_dict.update(map_kwargs(kwargs))
+    
+    # Let the htcondor create the submit-file
     job = htcondor.Submit(submit_dict)
 
     # add the multiple bash files
@@ -160,49 +155,53 @@ def make_subfile(cwd: Path, job_df: DataFrame, **kwargs):
 def write_bash(
     job_df: DataFrame,
     output_dir: Path = None,
-    destination_dir: Path = None,
     executable: str = "madx",
     cmdline_arguments: dict = None,
     mask: Union[str, Path] = None,
 ) -> DataFrame:
-    """Write the bash-files to be called by ``HTCondor``."""
+    """
+    Write the bash-files to be called by ``HTCondor``, which in turn call the executable.
+    """
     if len(job_df.index) > HTCONDOR_JOBLIMIT:
         raise AttributeError("Submitting too many jobs for HTCONDOR")
 
-    cmds = ""
-    if cmdline_arguments is not None:
-        cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}"
-
-    if executable is None:
-        exec_path = ''
-    else:
-        exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} "
+    exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else ''
+    cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else ''
 
     shell_scripts = [None] * len(job_df.index)
     for idx, (jobid, job) in enumerate(job_df.iterrows()):
         job_dir = Path(job[COLUMN_JOB_DIRECTORY])
         bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}"
         jobfile = job_dir / bash_file_name
+
         LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.")
         with open(jobfile, "w") as f:
+            # Preparation ---
             if not on_windows():
-                f.write(f"{SHEBANG}\n") 
+                f.write(f"{SHEBANG}\n")
+   
             if output_dir is not None:
                 f.write(f"mkdir {str(output_dir)}\n")
+
+            # The actual job execution ---
             f.write(exec_path)
 
+            # Call the mask-file or the filled-template string
             if is_mask_file(mask):
                 f.write(str(job_dir / job[COLUMN_JOB_FILE]))
             else:
-                replace_columns = [column for column in job.index.tolist() if column not in [COLUMN_SHELL_SCRIPT, COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE]]
+                replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS]
                 f.write(mask % dict(zip(replace_columns, job[replace_columns])))
+
+            # Additional commands for the mask/string
             f.write(cmds)
             f.write("\n")
 
-            if destination_dir is not None:
-                if output_dir is not None:
-                    cp_command =  f'cp -r {output_dir} {job[COLUMN_DEST_DIRECTORY]}'
-                if is_eos_path(destination_dir):
+            # Manually copy output (if needed) ---
+            dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
+            if output_dir and dest_dir and output_dir != dest_dir:
+                cp_command =  f'cp -r {output_dir} {dest_dir}'
+                if is_eos_path(dest_dir):
                     cp_command = f'eos {cp_command}'
                     
                 f.write(f'{cp_command}\n')
@@ -213,17 +212,14 @@ def write_bash(
     return job_df
 
 
-# Helper #######################################################################
-
-
-def _map_kwargs(add_dict):
+def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
     """
     Maps the kwargs for the job-file. Some arguments have pre-defined choices and defaults,
     the remaining ones are just passed on.
     """
     new = {}
 
-    # Predefined ones
+    # Predefined mappings 
     htc_map = {
         "duration": ("+JobFlavour", JOBFLAVOURS, "workday"),
         "output_dir": ("transfer_output_files", None, None),
@@ -251,39 +247,14 @@ def _map_kwargs(add_dict):
     return new
 
 
+# Helper #######################################################################
+
 def _maybe_put_in_quotes(key, value):
     if key.startswith("+"):
         return f'"{value}"'
     return value
 
 
-def is_eos_path(path): 
-    path = Path(path) 
-    strip_path_parts = _strip_eos_uri(path).parts 
-    return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos'
-
-
-def _strip_eos_uri(path):
-    # EOS paths for HTCondor are given with URI, strip for direct writing
-    # root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
-    path = Path(path)
-    parts = path.parts
-    outpath = path
-    if parts[0].endswith(':'):
-        # the first two parts are host info, e.g `file: //host/path`
-        outpath = Path('/', *parts[2:])
-    return outpath
-
-
-def is_mask_file(mask):
-    try:
-        return Path(mask).is_file()
-    except OSError:
-        return False
-
-def is_mask_string(mask):
-    return not is_mask_file(mask)
-
 # Script Mode ##################################################################
 
 
diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py
new file mode 100644
index 0000000..bc29777
--- /dev/null
+++ b/pylhc_submitter/job_submitter_tools/iotools.py
@@ -0,0 +1,245 @@
+""" 
+Job Submitter IO-Tools
+----------------------
+
+Tools for input and output for the job-submitter.
+"""
+import itertools
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Sequence, Tuple, Union
+
+import numpy as np
+import tfs
+
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_JOBID, JOBDIRECTORY_PREFIX,
+                                                     JOBSUMMARY_FILE, SCRIPT_EXTENSIONS)
+from pylhc_submitter.job_submitter_tools import htc_utils
+from pylhc_submitter.job_submitter_tools.mask import (create_job_scripts_from_mask,
+                                                      generate_jobdf_index, is_mask_file)
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclass
+class CreationOpts:
+    working_directory: Path
+    mask: Union[Path, str]
+    jobid_mask: str
+    replace_dict: Dict[str, Any]
+    output_dir: Path
+    output_destination: Path
+    append_jobs: bool
+    resume_jobs: bool
+    executable: str
+    check_files: Sequence[str]
+    script_arguments: Dict[str, Any]
+    script_extension: str
+
+    def should_drop_jobs(self) -> bool:
+        return self.append_jobs or self.resume_jobs
+
+
+
+def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
+    LOG.debug("Creating Jobs.")
+
+    # Generate product of replace-dict and compare to existing jobs  ---
+    parameters, values_grid, prev_job_df = _generate_parameter_space(
+        replace_dict=opt.replace_dict,
+        append_jobs=opt.append_jobs,
+        cwd=opt.working_directory,
+    )
+
+    # Check new jobs ---
+    njobs = len(values_grid)
+    if njobs == 0:
+        raise ValueError(f"No (new) jobs found!")
+
+    if njobs > htc_utils.HTCONDOR_JOBLIMIT:
+        LOG.warning(
+            f"You are attempting to submit an important number of jobs ({njobs})."
+            "This can be a high stress on your system, make sure you know what you are doing."
+        )
+
+    LOG.debug(f"Initial number of jobs: {njobs:d}")
+
+    # Generate new job-dataframe ---
+    job_df = tfs.TfsDataFrame(
+        index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid),
+        columns=parameters,
+        data=values_grid,
+    )
+    job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left')
+
+    # Setup folders ---
+    job_df = create_folders(job_df, opt.working_directory, opt.output_destination)
+
+    # Create scripts ---
+    if is_mask_file(opt.mask):
+        LOG.debug("Creating all jobs from mask.")
+        script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask)
+        job_df = create_job_scripts_from_mask(
+            job_df, opt.mask, parameters, script_extension
+        )
+
+    LOG.debug("Creating shell scripts.")
+    job_df = htc_utils.write_bash(
+        job_df,
+        output_dir=opt.output_dir,
+        executable=opt.executable,
+        cmdline_arguments=opt.script_arguments,
+        mask=opt.mask,
+    )
+
+    # Convert paths to strings and write df to file ---
+    job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str)
+    if COLUMN_DEST_DIRECTORY in job_df.columns:
+        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str)
+
+    tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID)
+    
+    # Drop already run jobs ---
+    dropped_jobs = []
+    if opt.should_drop_jobs():
+        job_df, dropped_jobs = _drop_already_run_jobs(
+            job_df, opt.output_dir, opt.check_files
+        )
+    return job_df, dropped_jobs
+
+
+def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, 
+                   destination_directory: Path = None) -> tfs.TfsDataFrame:
+    LOG.debug("Setting up folders: ")
+    
+    jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}"
+    job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index]
+
+    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
+        job_dir.mkdir(exist_ok=True)
+        LOG.debug(f"   created '{job_dir}'.")
+
+    if destination_directory:
+        job_df[COLUMN_DEST_DIRECTORY] = [destination_directory / jobname.format(id_) for id_ in job_df.index]
+
+        strip_dest_dir = strip_eos_uri(destination_directory)
+        strip_dest_dir.mkdir(parents=True, exist_ok=True)
+
+        # Make some symlinks for easy navigation---
+        # Output directory -> Working Directory
+        sym_submission = destination_directory / Path('SUBMISSION_DIR')
+        sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
+
+        # Working Directory -> Output Directory
+        sym_destination = working_directory / Path('OUTPUT_DIR')
+        sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True)
+
+        # Create output dirs per job ---
+        for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
+            strip_eos_uri(job_dest_dir).mkdir(exist_ok=True)
+            LOG.debug(f"   created '{job_dest_dir}'.")
+
+    return job_df
+
+
+def is_eos_path(path: Union[Path, str]) -> bool:
+    """ Check if the given path leads to EOS."""
+    strip_path_parts = strip_eos_uri(path).parts 
+    return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos'
+
+
+def strip_eos_uri(path: Union[Path, str]) -> Path:
+    # EOS paths for HTCondor can be given as URI. Strip for direct writing.
+    # E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    path = Path(path)
+    parts = path.parts
+    outpath = path
+    if parts[0].endswith(':'):
+        # the first two parts are host info, e.g `file: //host/path`
+        outpath = Path('/', *parts[2:])
+    return outpath
+
+
+def print_stats(new_jobs, finished_jobs):
+    """Print some quick statistics."""
+    text = [
+        "\n------------- QUICK STATS ----------------"
+        f"Jobs total:{len(new_jobs) + len(finished_jobs):d}",
+        f"Jobs to run: {len(new_jobs):d}",
+        f"Jobs already finished: {len(finished_jobs):d}",
+        "---------- JOBS TO RUN: NAMES -------------"
+    ]
+    for job_name in new_jobs:
+        text.append(job_name)
+    text += ["--------- JOBS FINISHED: NAMES ------------"]
+    for job_name in finished_jobs:
+        text.append(job_name)
+    LOG.info("\n".join(text))
+
+
+def _generate_parameter_space(
+        replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path
+    ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]:
+    """ Generate parameter space from replace-dict, check for existing jobs. """
+    LOG.debug("Generating parameter space from replace-dict.")
+    parameters = list(replace_dict.keys())
+    values_grid = _generate_values_grid(replace_dict)
+    if not append_jobs:
+        return parameters, values_grid, tfs.TfsDataFrame()
+
+    jobfile_path = cwd / JOBSUMMARY_FILE
+    try:
+        prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID)
+    except FileNotFoundError as filerror:
+        raise FileNotFoundError(
+            "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'"
+        ) from filerror
+    new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid]
+    values_grid = values_grid[new_jobs_mask]
+
+    return parameters, values_grid, prev_job_df
+
+
+def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray:
+    """ Creates an array of the inner-product of the replace-dict. """
+    return np.array(list(itertools.product(*replace_dict.values())), dtype=object)
+
+
+def _drop_already_run_jobs(
+        job_df: tfs.TfsDataFrame, output_dir: str, check_files: str
+    ) -> Tuple[tfs.TfsDataFrame, List[str]]:
+    """ Check for jobs that have already been run and drop them from current job_df. """
+    LOG.debug("Dropping already finished jobs.")
+    finished_jobs = [
+        idx
+        for idx, row in job_df.iterrows()
+        if _job_was_successful(row, output_dir, check_files)
+    ]
+
+    LOG.info(
+        f"{len(finished_jobs):d} of {len(job_df.index):d}"
+        " Jobs have already finished and will be skipped."
+    )
+
+    job_df = job_df.drop(index=finished_jobs)
+    return job_df, finished_jobs
+
+
+def _job_was_successful(job_row, output_dir, files) -> bool:
+    job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY]
+    output_dir = Path(job_dir, output_dir)
+    success = output_dir.is_dir() and any(output_dir.iterdir())
+    if success and files is not None and len(files):
+        for f in files:
+            success &= len(list(output_dir.glob(f))) > 0
+    return success
+
+
+def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str:
+    """ Returns the extension of the script to run based on 
+    either the given value, its executable or the mask. """
+    if script_extension is not None:
+        return script_extension
+    return SCRIPT_EXTENSIONS.get(executable, mask.suffix)
diff --git a/pylhc_submitter/htc/mask.py b/pylhc_submitter/job_submitter_tools/mask.py
similarity index 67%
rename from pylhc_submitter/htc/mask.py
rename to pylhc_submitter/job_submitter_tools/mask.py
index 616e10f..616bb7a 100644
--- a/pylhc_submitter/htc/mask.py
+++ b/pylhc_submitter/job_submitter_tools/mask.py
@@ -8,15 +8,17 @@
 import logging
 import re
 from pathlib import Path
+from typing import Sequence
 
 import pandas as pd
+from numpy.typing import ArrayLike
 
-from pylhc_submitter.htc.utils import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE
+from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE
 
 LOG = logging.getLogger(__name__)
 
 
-def create_jobs_from_mask(
+def create_job_scripts_from_mask(
     job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str
 ) -> pd.DataFrame:
     """
@@ -44,8 +46,8 @@ def create_jobs_from_mask(
     for idx, (jobid, values) in enumerate(job_df.iterrows()):
         jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext)
 
-        with jobfile_fullpath.open("w") as madxjob:
-            madxjob.write(template % dict(zip(replace_keys, values[list(replace_keys)])))
+        with jobfile_fullpath.open("w") as job_file:
+            job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)])))
         jobs[idx] = jobfile_fullpath.name
     job_df[COLUMN_JOB_FILE] = jobs
     return job_df
@@ -70,14 +72,38 @@ def check_percentage_signs_in_mask(mask: str):
     raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.")
 
 
-def generate_jobdf_index(old_df, jobid_mask, keys, values):
-    """ Generates index for jobdf from mask for job_id naming. """
+def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike):
+    """ Generates index for jobdf from mask for job_id naming. 
+    
+    Args:
+        old_df (pd.DataFrame): Existing jobdf.
+        jobid_mask (str): Mask for naming the jobs.
+        keys (Sequence[str]): Keys to be replaced in the mask.
+        values (np.array_like): Values-Grid to be replaced in the mask.
+    """
     if not jobid_mask:
+        # Use integer-range as index, if no mask is given
+        # Start with last index if old_df is not None.
         nold = len(old_df.index) if old_df is not None else 0
         start = nold-1 if nold > 0 else 0
         return range(start, start + values.shape[0])
+
+    # Fill job-id mask
     return [jobid_mask % dict(zip(keys, v)) for v in values]
 
 
+def is_mask_file(mask: str) -> bool:
+    """ Check if given string points to a file. """
+    try:
+        return Path(mask).is_file()
+    except OSError:
+        return False
+
+
+def is_mask_string(mask: str) -> bool:
+    """ Checks that given string does not point to a file. """
+    return not is_mask_file(mask)
+
+
 if __name__ == "__main__":
     raise EnvironmentError(f"{__file__} is not supposed to run as main.")
diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py
new file mode 100644
index 0000000..516813a
--- /dev/null
+++ b/pylhc_submitter/job_submitter_tools/runners.py
@@ -0,0 +1,117 @@
+""" 
+Job Submitter Runners
+---------------------
+
+Defines the methods to run the job-submitter, locally or on HTC.
+"""
+import logging
+import multiprocessing
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import tfs
+
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_SHELL_SCRIPT)
+from pylhc_submitter.job_submitter_tools import htc_utils
+from pylhc_submitter.job_submitter_tools.iotools import strip_eos_uri
+from pylhc_submitter.utils.environment import on_windows
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclass
+class RunnerOpts:
+    working_directory: Path           # Path to the working directory (e.g. afs)
+    jobflavour: Optional[str] = None  # HTCondor job flavour (lengths of the job)
+    output_dir: Optional[str] = None  # Name of the output directory, where jobs store data
+    ssh: Optional[str] = None         # SSH command
+    dryrun: Optional[bool] = False    # Perform only a dry-run, i.e. do all but submit to HTC
+    htc_arguments: Optional[Dict[str, Any]] = None  # Arguments to pass on to htc as keywords
+    run_local: Optional[bool] = False # Run jobs locally
+    num_processes: Optional[int] = 4  # Number of processes to run in parallel (locally)
+
+
+def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """Selects how to run the jobs.
+    
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
+        opt (RunnerOpts): Parameters for the runner 
+    """
+
+    if opt.run_local: 
+        run_local(job_df, opt)
+    else:
+        run_htc(job_df, opt)
+
+
+def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """Run all jobs locally.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
+        opt (RunnerOpts): Parameters for the runner 
+    """
+    if opt.dryrun:
+        LOG.info(f"Dry-run: Skipping local run.")
+        return
+
+    LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.")
+    
+    # URI type EOS addresses won't work for copying files from local jobs
+    check_dest = job_df.get(COLUMN_DEST_DIRECTORY)
+    if check_dest is not None and strip_eos_uri(check_dest.iloc[0]) != Path(check_dest.iloc[0]):
+        LOG.warning("The output destination is likely specified as EOS URI,"
+                     "which will not work during a local run")
+        
+    pool = multiprocessing.Pool(processes=opt.num_processes)
+    res = pool.map(_execute_shell, job_df.iterrows())
+    if any(res):
+        jobs_failed = [j for r, j in zip(res, job_df.index) if r]
+        LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}")
+        raise RuntimeError("At least one job has failed. Check output logs!")
+
+
+def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """ Create submission file and submit the jobs to ``HTCondor``.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
+        opt (RunnerOpts): Parameters for the runner 
+    """
+    LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.")
+    LOG.debug("Creating htcondor subfile.")
+
+    subfile = htc_utils.make_subfile(
+        opt.working_directory, job_df, 
+        output_dir=opt.output_dir, 
+        duration=opt.jobflavour, 
+        **opt.htc_arguments
+    )
+
+    if opt.dryrun:
+        LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.")
+        return
+
+    LOG.debug("Submitting jobs to htcondor.")
+    htc_utils.submit_jobfile(subfile, opt.ssh)
+
+
+# Helper #######################################################################
+
+def _execute_shell(df_row) -> int:
+    _, column = df_row
+    cmd = [] if on_windows() else ["sh"]
+
+    with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile:
+        process = subprocess.Popen(
+            cmd + [column[COLUMN_SHELL_SCRIPT]],
+            shell=on_windows(),
+            stdout=logfile,
+            stderr=subprocess.STDOUT,
+            cwd=column[COLUMN_JOB_DIRECTORY],
+        )
+    return process.wait()
\ No newline at end of file
diff --git a/pylhc_submitter/utils/environment_tools.py b/pylhc_submitter/utils/environment.py
similarity index 100%
rename from pylhc_submitter/utils/environment_tools.py
rename to pylhc_submitter/utils/environment.py
diff --git a/pylhc_submitter/utils/iotools.py b/pylhc_submitter/utils/iotools.py
index 261861e..31a50b5 100644
--- a/pylhc_submitter/utils/iotools.py
+++ b/pylhc_submitter/utils/iotools.py
@@ -4,8 +4,8 @@
 
 Tools for input and output.
 """
-from pathlib import Path
 from datetime import datetime
+from pathlib import Path
 from typing import Iterable
 
 from generic_parser.entry_datatypes import get_instance_faker_meta
@@ -13,7 +13,6 @@
 
 from pylhc_submitter.constants.general import TIME
 
-
 # Output -----------------------------------------------------------------------
 
 
diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index f1cac12..3d3427d 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -7,7 +7,7 @@
 import pytest
 
 from pylhc_submitter.job_submitter import main as job_submit
-from pylhc_submitter.utils.environment_tools import on_linux, on_windows
+from pylhc_submitter.utils.environment import on_linux, on_windows
 
 SUBFILE = "queuehtc.sub"
 
@@ -223,11 +223,8 @@ def _check_output_content(dir_path: Path):
             _check_output_content(setup.output_destination)
 
 
-
-
-
 def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]:
-    """ Creates all possible combinations of values in data as dictionaries. """
+    """ Creates all possible combinations of values in data as a list of dictionaries. """
     keys = list(data.keys())
     all_values = [data[key] for key in keys]
 

From ac8f2426bc9aa305da98bbe9d9b6347b19046d12 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 22:47:14 +0100
Subject: [PATCH 11/30] fixing imports

---
 pylhc_submitter/sixdesk_tools/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylhc_submitter/sixdesk_tools/utils.py b/pylhc_submitter/sixdesk_tools/utils.py
index 2c629fb..135b0c3 100644
--- a/pylhc_submitter/sixdesk_tools/utils.py
+++ b/pylhc_submitter/sixdesk_tools/utils.py
@@ -10,7 +10,7 @@
 
 from pylhc_submitter.constants.autosix import SIXDESKLOCKFILE, get_workspace_path
 from pylhc_submitter.constants.external_paths import SIXDESK_UTILS
-from pylhc_submitter.htc.mask import find_named_variables_in_mask
+from pylhc_submitter.job_submitter_tools.mask import find_named_variables_in_mask
 
 LOG = logging.getLogger(__name__)
 

From 2d50155bfdeca9487fbaa2c31dcd9065bd6909ae Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Tue, 7 Nov 2023 22:50:14 +0100
Subject: [PATCH 12/30] more import fixes

---
 pylhc_submitter/autosix.py | 33 ++++++++-------------------------
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py
index f6b720d..9949adf 100644
--- a/pylhc_submitter/autosix.py
+++ b/pylhc_submitter/autosix.py
@@ -193,34 +193,17 @@
 
 import numpy as np
 import tfs
-from generic_parser import EntryPointParameters, entrypoint, DotDict
+from generic_parser import EntryPointParameters, entrypoint
 from generic_parser.entry_datatypes import DictAsString
 
-from pylhc_submitter.constants.autosix import (
-    HEADER_BASEDIR,
-    SIXENV_REQUIRED,
-    SIXENV_OPTIONAL,
-    AutoSixEnvironment,
-)
+from pylhc_submitter.constants.autosix import (HEADER_BASEDIR, SIXENV_OPTIONAL, SIXENV_REQUIRED,
+                                               AutoSixEnvironment)
+from pylhc_submitter.constants.job_submitter import COLUMN_JOBID, JOBSUMMARY_FILE
 from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index
-from pylhc_submitter.job_submitter import (
-    JOBSUMMARY_FILE,
-    COLUMN_JOBID,
-)
-from pylhc_submitter.sixdesk_tools.create_workspace import (
-    set_max_materialize
-)
-from pylhc_submitter.sixdesk_tools.stages import Stage, STAGE_ORDER
-from pylhc_submitter.sixdesk_tools.utils import (
-    is_locked,
-    check_mask,
-)
-from pylhc_submitter.utils.iotools import (
-    PathOrStr,
-    save_config,
-    make_replace_entries_iterable,
-    keys_to_path
-)
+from pylhc_submitter.sixdesk_tools.stages import STAGE_ORDER, Stage
+from pylhc_submitter.sixdesk_tools.utils import check_mask, is_locked
+from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable,
+                                           save_config)
 from pylhc_submitter.utils.logging_tools import log_setup
 
 LOG = logging.getLogger(__name__)

From a8db8951d2c8e5249da69f9b518cf67b95fcccf0 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 14:46:43 +0100
Subject: [PATCH 13/30] Lots of doc

---
 doc/modules/constants.rst                     |  2 +
 doc/modules/htc.rst                           |  9 --
 doc/modules/job_submitter_tools.rst           | 15 +++
 pylhc_submitter/job_submitter.py              |  4 +-
 .../job_submitter_tools/constants.py          |  0
 .../job_submitter_tools/htc_utils.py          | 97 +++++++++++++++----
 .../job_submitter_tools/iotools.py            | 72 +++++++++++---
 pylhc_submitter/job_submitter_tools/mask.py   | 13 ++-
 .../job_submitter_tools/runners.py            | 15 ++-
 9 files changed, 174 insertions(+), 53 deletions(-)
 delete mode 100644 doc/modules/htc.rst
 create mode 100644 doc/modules/job_submitter_tools.rst
 delete mode 100644 pylhc_submitter/job_submitter_tools/constants.py

diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst
index a0a2e36..4f4f241 100644
--- a/doc/modules/constants.rst
+++ b/doc/modules/constants.rst
@@ -9,6 +9,8 @@ Constants Definitions
 .. automodule:: pylhc_submitter.constants.external_paths
     :members:
 
+.. automodule:: pylhc_submitter.constants.job_submitter
+    :members:
 
 .. automodule:: pylhc_submitter.constants.autosix
     :members:
diff --git a/doc/modules/htc.rst b/doc/modules/htc.rst
deleted file mode 100644
index f4965b0..0000000
--- a/doc/modules/htc.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-HTCondor Tools
-**************************
-
-.. automodule:: pylhc_submitter.htc.utils
-    :members:
-
-
-.. automodule:: pylhc_submitter.htc.mask
-    :members:
diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/job_submitter_tools.rst
new file mode 100644
index 0000000..4f65893
--- /dev/null
+++ b/doc/modules/job_submitter_tools.rst
@@ -0,0 +1,15 @@
+HTCondor Tools
+**************************
+
+.. automodule:: pylhc_submitter.job_submitter_tools.htc_utils
+    :members:
+
+
+.. automodule:: pylhc_submitter.job_submitter_tools.iotools
+    :members:
+
+.. automodule:: pylhc_submitter.job_submitter_tools.mask
+    :members:
+
+.. automodule:: pylhc_submitter.job_submitter_tools.runners
+    :members:
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 9748eed..f2ba77e 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -331,8 +331,8 @@ def main(opt):
 
     job_df, dropped_jobs = create_jobs(creation_opt)
 
-
     run_jobs(job_df, runner_opt)
+
     print_stats(job_df.index, dropped_jobs)
 
 
@@ -383,7 +383,7 @@ def check_opts(opt):
     
     # Create new classes
     opt.output_dir = opt.job_output_dir  # renaming
-    
+
     creation = CreationOpts(**{f.name: opt[f.name] for f in fields(CreationOpts)})
     runner = RunnerOpts(**{f.name: opt[f.name] for f in fields(RunnerOpts)})
     runner.output_dir = None if opt.output_destination else opt.output_dir
diff --git a/pylhc_submitter/job_submitter_tools/constants.py b/pylhc_submitter/job_submitter_tools/constants.py
deleted file mode 100644
index e69de29..0000000
diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py
index 43112a4..9d6f298 100644
--- a/pylhc_submitter/job_submitter_tools/htc_utils.py
+++ b/pylhc_submitter/job_submitter_tools/htc_utils.py
@@ -29,7 +29,9 @@
 try:
     import htcondor
 except ImportError:  # will be handled by job_submitter
-    pass
+    class htcondor:
+        """Dummy HTCondor module. To satisfy the typing. """
+        Submit: Any = None
 
 
 LOG = logging.getLogger(__name__)
@@ -60,17 +62,33 @@
 # Subprocess Methods ###########################################################
 
 
-def create_subfile_from_job(cwd: Path, job: str):
-    """Write file to submit to ``HTCondor``."""
+def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path:
+    """
+    Write file to submit to ``HTCondor``.
+    
+    Args:
+        cwd (Path): working directory
+        submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file)
+
+    Returns:
+        Path: path to sub-file
+
+    """
     subfile = cwd / SUBFILE
     LOG.debug(f"Writing sub-file '{str(subfile)}'.")
     with subfile.open("w") as f:
-        f.write(str(job))
+        f.write(str(submission))
     return subfile
 
 
-def submit_jobfile(jobfile: Path, ssh: str):
-    """Submit subfile to ``HTCondor`` via subprocess."""
+def submit_jobfile(jobfile: Path, ssh: str) -> None:
+    """Submit subfile to ``HTCondor`` via subprocess.
+    
+    Args:
+        jobfile (Path): path to sub-file
+        ssh (str): ssh target
+
+    """
     proc_args = [CMD_SUBMIT, jobfile]
     if ssh:
         proc_args = ["ssh", ssh] + proc_args
@@ -81,7 +99,16 @@ def submit_jobfile(jobfile: Path, ssh: str):
         LOG.info("Jobs successfully submitted.")
 
 
-def _start_subprocess(command: List[str]):
+def _start_subprocess(command: List[str]) -> int:
+    """ Start subprocess and log output. 
+    
+    Args:
+        command (List[str]): command to execute
+
+    Returns:
+        int: return code of the process
+    
+    """
     LOG.debug(f"Executing command '{command}'")
     process = subprocess.Popen(
         command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
@@ -96,9 +123,10 @@ def _start_subprocess(command: List[str]):
 # Job Creation #################################################################
 
 
-def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
+def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str:
     """
-    Function to create an ``HTCondor`` job assuming n_files bash-files.
+    Function to create an ``HTCondor`` submission content for all job-scripts, 
+    i.e. bash-files, in the job_df.
 
     Keyword Args:
         output_dir (str): output directory that will be transferred. Defaults to ``None``.
@@ -108,6 +136,9 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
         retries (int): maximum amount of retries. Default to ``3``.
         notification (str): Notify under certain conditions. Defaults to ``error``.
         priority (int): Priority to order your jobs. Defaults to ``None``.
+
+    Returns:
+        str: HTCondor submission definition.
     """
     # Pre-defined HTCondor arguments for our jobs
     submit_dict = {
@@ -123,7 +154,7 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
     submit_dict.update(map_kwargs(kwargs))
     
     # Let the htcondor create the submit-file
-    job = htcondor.Submit(submit_dict)
+    submission = htcondor.Submit(submit_dict)
 
     # add the multiple bash files
     scripts = [
@@ -133,20 +164,27 @@ def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs):
     args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])]
     queueArgs = ["queue executable, initialdir from (", *args, ")"]
 
-    # ugly but job.setQArgs doesn't take string containing \n
-    # job.setQArgs("\n".join(queueArgs))
-    job = str(job) + "\n".join(queueArgs)
-    LOG.debug(f"Created HTCondor subfile with content: \n{job}")
-    return job
+    # ugly but submission.setQArgs doesn't take string containing '\n':
+    # submission.setQArgs("\n".join(queueArgs))  # doesn't work
+    submission = str(submission) + "\n".join(queueArgs)
+    LOG.debug(f"Created HTCondor subfile with content: \n{submission}")
+    return submission
 
 
 # Main functions ###############################################################
 
 
-def make_subfile(cwd: Path, job_df: DataFrame, **kwargs):
+def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path:
     """
     Creates submit-file for ``HTCondor``.
     For kwargs, see ``create_multijob_for_bashfiles``.
+
+    Args:
+        cwd (Path): working directory
+        job_df (DataFrame): DataFrame containing all the job-information
+
+    Returns:
+        Path: path to the submit-file
     """
     job = create_multijob_for_bashfiles(job_df, **kwargs)
     return create_subfile_from_job(cwd, job)
@@ -161,6 +199,19 @@ def write_bash(
 ) -> DataFrame:
     """
     Write the bash-files to be called by ``HTCondor``, which in turn call the executable.
+    Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script.
+    A shell script is created in each job directory in the dataframe.
+
+    Args:
+        job_df (DataFrame): DataFrame containing all the job-information
+        output_dir (str): output directory that will be transferred. Defaults to ``None``.
+        executable (str): name of the executable. Defaults to ``madx``.
+        cmdline_arguments (dict): additional commandline arguments for the executable
+        mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``.
+
+    Returns:
+        DataFrame: The provided ``job_df`` but with added path to the scripts.
+
     """
     if len(job_df.index) > HTCONDOR_JOBLIMIT:
         raise AttributeError("Submitting too many jobs for HTCONDOR")
@@ -214,8 +265,15 @@ def write_bash(
 
 def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
     """
-    Maps the kwargs for the job-file. Some arguments have pre-defined choices and defaults,
-    the remaining ones are just passed on.
+    Maps the kwargs for the job-file. 
+    Some arguments have pre-defined choices and defaults, the remaining ones are just passed on.
+
+    Args:
+        add_dict (Dict[str, Any]): additional kwargs to add to the defaults.
+
+    Returns:
+        Dict[str, Any]: The mapped kwargs.
+
     """
     new = {}
 
@@ -249,7 +307,8 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
 
 # Helper #######################################################################
 
-def _maybe_put_in_quotes(key, value):
+def _maybe_put_in_quotes(key: str, value: Any) -> Any:
+    """ Put value in quoted strings if key starts with '+' """
     if key.startswith("+"):
         return f'"{value}"'
     return value
diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py
index bc29777..85a1dcc 100644
--- a/pylhc_submitter/job_submitter_tools/iotools.py
+++ b/pylhc_submitter/job_submitter_tools/iotools.py
@@ -11,6 +11,7 @@
 from typing import Any, Dict, List, Sequence, Tuple, Union
 
 import numpy as np
+import pandas as pd
 import tfs
 
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
@@ -25,25 +26,41 @@
 
 @dataclass
 class CreationOpts:
-    working_directory: Path
-    mask: Union[Path, str]
-    jobid_mask: str
-    replace_dict: Dict[str, Any]
-    output_dir: Path
-    output_destination: Path
-    append_jobs: bool
-    resume_jobs: bool
-    executable: str
-    check_files: Sequence[str]
-    script_arguments: Dict[str, Any]
-    script_extension: str
+    """ Options for creating jobs. """
+    working_directory: Path         # Path to working directory (afs)
+    mask: Union[Path, str]          # Path to mask file or mask-string
+    jobid_mask: str                 # Mask for jobid
+    replace_dict: Dict[str, Any]    # Replace-dict
+    output_dir: Path                # Path to local output directory
+    output_destination: Path        # Path to remote output directory (e.g. eos)
+    append_jobs: bool               # Append jobs to existing jobs
+    resume_jobs: bool               # Resume jobs that have already run/failed/got interrupted
+    executable: str                 # Name of executable to call the script (from mask)
+    check_files: Sequence[str]      # List of output files to check for success
+    script_arguments: Dict[str, Any]  # Arguments to pass to script
+    script_extension: str           # Extension of the script to run
 
     def should_drop_jobs(self) -> bool:
+        """ Check if jobs should be dropped after creating the whole parameter space, 
+        e.g. because they already exist. """
         return self.append_jobs or self.resume_jobs
 
 
 
 def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
+    """Main function to prepare all the jobs and folder structure.
+    This greates the value-grid based on the replace-dict and
+    checks for existing jobs (if so desired).
+    A job-dataframe is created - and written out - containing all the information and
+    its values are used to generate the job-scripts.
+    It also creates bash-scripts to call the executable for the job-scripts. 
+
+    Args:
+        opt (CreationOpts): Options for creating jobs 
+
+    Returns:
+        tfs.TfsDataFrame: The job-dataframe containing information for all jobs. 
+    """
     LOG.debug("Creating Jobs.")
 
     # Generate product of replace-dict and compare to existing jobs  ---
@@ -112,6 +129,20 @@ def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
 
 def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, 
                    destination_directory: Path = None) -> tfs.TfsDataFrame:
+    """Create the folder-structure in the given working directory and the 
+    destination directory if given.
+    This creates a folder per job in which then the job-scripts and bash-scripts
+    can be stored later.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
+        working_directory (Path): Path to the working directory
+        destination_directory (Path, optional): Path to the destination directory, 
+        i.e. the directory to copy the outputs to manually. Defaults to None.
+
+    Returns:
+        tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs.
+    """
     LOG.debug("Setting up folders: ")
     
     jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}"
@@ -151,8 +182,10 @@ def is_eos_path(path: Union[Path, str]) -> bool:
 
 
 def strip_eos_uri(path: Union[Path, str]) -> Path:
-    # EOS paths for HTCondor can be given as URI. Strip for direct writing.
-    # E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    """ Strip EOS path information from a path.
+    EOS paths for HTCondor can be given as URI. Strip for direct writing.
+    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    """
     path = Path(path)
     parts = path.parts
     outpath = path
@@ -162,7 +195,7 @@ def strip_eos_uri(path: Union[Path, str]) -> Path:
     return outpath
 
 
-def print_stats(new_jobs, finished_jobs):
+def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]):
     """Print some quick statistics."""
     text = [
         "\n------------- QUICK STATS ----------------"
@@ -227,7 +260,14 @@ def _drop_already_run_jobs(
     return job_df, finished_jobs
 
 
-def _job_was_successful(job_row, output_dir, files) -> bool:
+def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool:
+    """ Determines if the job was successful. 
+    
+    Args:
+        job_row (pd.Series): row from the job_df
+        output_dir (str): Name of the (local) output directory
+        files (List[str]): list of files that should have been generated
+    """
     job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY]
     output_dir = Path(job_dir, output_dir)
     success = output_dir.is_dir() and any(output_dir.iterdir())
diff --git a/pylhc_submitter/job_submitter_tools/mask.py b/pylhc_submitter/job_submitter_tools/mask.py
index 616bb7a..3a2dcaa 100644
--- a/pylhc_submitter/job_submitter_tools/mask.py
+++ b/pylhc_submitter/job_submitter_tools/mask.py
@@ -8,7 +8,7 @@
 import logging
 import re
 from pathlib import Path
-from typing import Sequence
+from typing import Iterable, List, Sequence, Set, Union
 
 import pandas as pd
 from numpy.typing import ArrayLike
@@ -53,11 +53,12 @@ def create_job_scripts_from_mask(
     return job_df
 
 
-def find_named_variables_in_mask(mask: str):
+def find_named_variables_in_mask(mask: str) -> Set[str]:
+    """ Find all variable-names in the mask. """
     return set(re.findall(r"%\((\w+)\)", mask))
 
 
-def check_percentage_signs_in_mask(mask: str):
+def check_percentage_signs_in_mask(mask: str) -> None:
     """ Checks for '%' in the mask, that are not replacement variables. """
     cleaned_mask = re.sub(r"%\((\w+)\)", "", mask)
     n_signs = cleaned_mask.count("%")
@@ -72,7 +73,8 @@ def check_percentage_signs_in_mask(mask: str):
     raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.")
 
 
-def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike):
+def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike
+    ) -> Union[List[str], Iterable[int]]:
     """ Generates index for jobdf from mask for job_id naming. 
     
     Args:
@@ -80,6 +82,9 @@ def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[s
         jobid_mask (str): Mask for naming the jobs.
         keys (Sequence[str]): Keys to be replaced in the mask.
         values (np.array_like): Values-Grid to be replaced in the mask.
+    
+    Returns:
+        List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range.
     """
     if not jobid_mask:
         # Use integer-range as index, if no mask is given
diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py
index 516813a..d85d7a8 100644
--- a/pylhc_submitter/job_submitter_tools/runners.py
+++ b/pylhc_submitter/job_submitter_tools/runners.py
@@ -10,6 +10,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional
+import pandas as pd
 
 import tfs
 
@@ -24,6 +25,7 @@
 
 @dataclass
 class RunnerOpts:
+    """ Options for running the submission. """
     working_directory: Path           # Path to the working directory (e.g. afs)
     jobflavour: Optional[str] = None  # HTCondor job flavour (lengths of the job)
     output_dir: Optional[str] = None  # Name of the output directory, where jobs store data
@@ -41,7 +43,6 @@ def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
         job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
         opt (RunnerOpts): Parameters for the runner 
     """
-
     if opt.run_local: 
         run_local(job_df, opt)
     else:
@@ -102,7 +103,15 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
 
 # Helper #######################################################################
 
-def _execute_shell(df_row) -> int:
+def _execute_shell(df_row: pd.Series) -> int:
+    """ Execute the shell script. 
+    
+    Args:
+        df_row (pd.Series): row in the job-dataframe
+    
+    Returns:
+        int: return code of the process
+    """
     _, column = df_row
     cmd = [] if on_windows() else ["sh"]
 
@@ -114,4 +123,4 @@ def _execute_shell(df_row) -> int:
             stderr=subprocess.STDOUT,
             cwd=column[COLUMN_JOB_DIRECTORY],
         )
-    return process.wait()
\ No newline at end of file
+    return process.wait()

From b4c9ae21a1e8033ab647e06d1ab9b11291493bd6 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 16:02:16 +0100
Subject: [PATCH 14/30] bugfixes

---
 .../job_submitter_tools/htc_utils.py          | 10 +++----
 .../job_submitter_tools/iotools.py            | 30 +++++++++++++------
 .../job_submitter_tools/runners.py            | 10 ++-----
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py
index 9d6f298..49150a5 100644
--- a/pylhc_submitter/job_submitter_tools/htc_utils.py
+++ b/pylhc_submitter/job_submitter_tools/htc_utils.py
@@ -22,7 +22,7 @@
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
                                                      COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
                                                      EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
-from pylhc_submitter.job_submitter_tools.iotools import is_eos_path
+from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri
 from pylhc_submitter.job_submitter_tools.mask import is_mask_file
 from pylhc_submitter.utils.environment import on_windows
 
@@ -252,7 +252,7 @@ def write_bash(
             dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
             if output_dir and dest_dir and output_dir != dest_dir:
                 cp_command =  f'cp -r {output_dir} {dest_dir}'
-                if is_eos_path(dest_dir):
+                if is_eos_uri(dest_dir):
                     cp_command = f'eos {cp_command}'
                     
                 f.write(f'{cp_command}\n')
@@ -278,7 +278,7 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
     new = {}
 
     # Predefined mappings 
-    htc_map = {
+    htc_map = { # name: mapped_name, choices, default
         "duration": ("+JobFlavour", JOBFLAVOURS, "workday"),
         "output_dir": ("transfer_output_files", None, None),
         "accounting_group": ("+AccountingGroup", None, None),
@@ -289,14 +289,14 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
         try:
             value = add_dict.pop(key)
         except KeyError:
-            if default is not None:
-                new[mapped] = default
+            value = default  # could be `None`
         else:
             if choices is not None and value not in choices:
                 raise TypeError(
                     f"{key} needs to be one of '{str(choices).strip('[]')}' but "
                     f"instead was '{value}'"
                 )
+        if value is not None:
             new[mapped] = _maybe_put_in_quotes(mapped, value)
 
     # Pass-Through Arguments
diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py
index 85a1dcc..33631c4 100644
--- a/pylhc_submitter/job_submitter_tools/iotools.py
+++ b/pylhc_submitter/job_submitter_tools/iotools.py
@@ -160,12 +160,14 @@ def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path,
 
         # Make some symlinks for easy navigation---
         # Output directory -> Working Directory
-        sym_submission = destination_directory / Path('SUBMISSION_DIR')
+        sym_submission = strip_dest_dir / Path('SUBMISSION_DIR')
+        sym_submission.unlink(missing_ok=True)
         sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
 
         # Working Directory -> Output Directory
         sym_destination = working_directory / Path('OUTPUT_DIR')
-        sym_destination.symlink_to(destination_directory.resolve(), target_is_directory=True)
+        sym_destination.unlink(missing_ok=True)
+        sym_destination.symlink_to(strip_dest_dir.resolve(), target_is_directory=True)
 
         # Create output dirs per job ---
         for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
@@ -175,10 +177,21 @@ def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path,
     return job_df
 
 
-def is_eos_path(path: Union[Path, str]) -> bool:
-    """ Check if the given path leads to EOS."""
-    strip_path_parts = strip_eos_uri(path).parts 
-    return len(strip_path_parts) > 1 and strip_path_parts[1] == 'eos'
+def is_eos_uri(path: Union[Path, str, None]) -> bool:
+    """ Check if the given path is an EOS-URI as `eos cp` only works with those.
+    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    """
+    if path is None:
+        return False
+
+    parts = Path(path).parts 
+    return (
+        len(parts) >= 3  # at least root:, server, path
+        and
+        parts[0].endswith(':')
+        and
+        parts[2] == 'eos'
+    )
 
 
 def strip_eos_uri(path: Union[Path, str]) -> Path:
@@ -188,11 +201,10 @@ def strip_eos_uri(path: Union[Path, str]) -> Path:
     """
     path = Path(path)
     parts = path.parts
-    outpath = path
     if parts[0].endswith(':'):
         # the first two parts are host info, e.g `file: //host/path`
-        outpath = Path('/', *parts[2:])
-    return outpath
+        return Path('/', *parts[2:])
+    return path 
 
 
 def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]):
diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py
index d85d7a8..06eeceb 100644
--- a/pylhc_submitter/job_submitter_tools/runners.py
+++ b/pylhc_submitter/job_submitter_tools/runners.py
@@ -10,14 +10,14 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional
-import pandas as pd
 
+import pandas as pd
 import tfs
 
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
                                                      COLUMN_SHELL_SCRIPT)
 from pylhc_submitter.job_submitter_tools import htc_utils
-from pylhc_submitter.job_submitter_tools.iotools import strip_eos_uri
+from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri
 from pylhc_submitter.utils.environment import on_windows
 
 LOG = logging.getLogger(__name__)
@@ -61,12 +61,6 @@ def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
         return
 
     LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.")
-    
-    # URI type EOS addresses won't work for copying files from local jobs
-    check_dest = job_df.get(COLUMN_DEST_DIRECTORY)
-    if check_dest is not None and strip_eos_uri(check_dest.iloc[0]) != Path(check_dest.iloc[0]):
-        LOG.warning("The output destination is likely specified as EOS URI,"
-                     "which will not work during a local run")
         
     pool = multiprocessing.Pool(processes=opt.num_processes)
     res = pool.map(_execute_shell, job_df.iterrows())

From bc6ff9c4356c3ac432eafc93dbb910ffd8eb2199 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 18:31:02 +0100
Subject: [PATCH 15/30] uri-bugfixes

---
 doc/modules/job_submitter_tools.rst           |   8 +-
 pylhc_submitter/autosix.py                    |   2 +-
 pylhc_submitter/job_submitter.py              |  16 +-
 .../job_submitter_tools/__init__.py           |   0
 .../job_submitter_tools/htc_utils.py          | 321 ------------------
 .../job_submitter_tools/iotools.py            | 297 ----------------
 pylhc_submitter/job_submitter_tools/mask.py   | 114 -------
 .../job_submitter_tools/runners.py            | 120 -------
 pylhc_submitter/sixdesk_tools/utils.py        |   2 +-
 tests/unit/test_job_submitter.py              |  82 +++--
 10 files changed, 80 insertions(+), 882 deletions(-)
 delete mode 100644 pylhc_submitter/job_submitter_tools/__init__.py
 delete mode 100644 pylhc_submitter/job_submitter_tools/htc_utils.py
 delete mode 100644 pylhc_submitter/job_submitter_tools/iotools.py
 delete mode 100644 pylhc_submitter/job_submitter_tools/mask.py
 delete mode 100644 pylhc_submitter/job_submitter_tools/runners.py

diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/job_submitter_tools.rst
index 4f65893..34be7cc 100644
--- a/doc/modules/job_submitter_tools.rst
+++ b/doc/modules/job_submitter_tools.rst
@@ -1,15 +1,15 @@
 HTCondor Tools
 **************************
 
-.. automodule:: pylhc_submitter.job_submitter_tools.htc_utils
+.. automodule:: pylhc_submitter.submitter.htc_utils
     :members:
 
 
-.. automodule:: pylhc_submitter.job_submitter_tools.iotools
+.. automodule:: pylhc_submitter.submitter.iotools
     :members:
 
-.. automodule:: pylhc_submitter.job_submitter_tools.mask
+.. automodule:: pylhc_submitter.submitter.mask
     :members:
 
-.. automodule:: pylhc_submitter.job_submitter_tools.runners
+.. automodule:: pylhc_submitter.submitter.runners
     :members:
diff --git a/pylhc_submitter/autosix.py b/pylhc_submitter/autosix.py
index 9949adf..09b2903 100644
--- a/pylhc_submitter/autosix.py
+++ b/pylhc_submitter/autosix.py
@@ -199,7 +199,7 @@
 from pylhc_submitter.constants.autosix import (HEADER_BASEDIR, SIXENV_OPTIONAL, SIXENV_REQUIRED,
                                                AutoSixEnvironment)
 from pylhc_submitter.constants.job_submitter import COLUMN_JOBID, JOBSUMMARY_FILE
-from pylhc_submitter.job_submitter_tools.mask import generate_jobdf_index
+from pylhc_submitter.submitter.mask import generate_jobdf_index
 from pylhc_submitter.sixdesk_tools.stages import STAGE_ORDER, Stage
 from pylhc_submitter.sixdesk_tools.utils import check_mask, is_locked
 from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable,
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index f2ba77e..30d08a4 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -159,11 +159,11 @@
 from generic_parser.tools import print_dict_tree
 
 from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS
-from pylhc_submitter.job_submitter_tools.htc_utils import JOBFLAVOURS
-from pylhc_submitter.job_submitter_tools.iotools import CreationOpts, create_jobs, print_stats
-from pylhc_submitter.job_submitter_tools.mask import (check_percentage_signs_in_mask,
+from pylhc_submitter.submitter.htc_utils import JOBFLAVOURS
+from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats
+from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask,
                                                       find_named_variables_in_mask, is_mask_file)
-from pylhc_submitter.job_submitter_tools.runners import RunnerOpts, run_jobs
+from pylhc_submitter.submitter.runners import RunnerOpts, run_jobs
 from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable,
                                            save_config)
 from pylhc_submitter.utils.logging_tools import log_setup
@@ -343,7 +343,7 @@ def check_opts(opt):
         raise ValueError("Select either Resume jobs or Append jobs")
 
     # Paths ---
-    opt = keys_to_path(opt, "working_directory", "executable", "output_destination")
+    opt = keys_to_path(opt, "working_directory", "executable")
 
     if str(opt.executable) in EXECUTEABLEPATH.keys():
         opt.executable = str(opt.executable)
@@ -353,6 +353,12 @@ def check_opts(opt):
         opt.mask = Path(opt.mask)
     else:
         mask_content = opt.mask
+    
+    if is_eos_uri(opt.output_destination) and not ("://" in opt.output_destination and "//eos" in opt.output_destination):
+        raise ValueError(
+            "The 'output_destination' is an EOS-URI but missing '://' or '//eos' (double slashes?). "
+        )
+        
 
     # Replace dict ---
     dict_keys = set(opt.replace_dict.keys())
diff --git a/pylhc_submitter/job_submitter_tools/__init__.py b/pylhc_submitter/job_submitter_tools/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/pylhc_submitter/job_submitter_tools/htc_utils.py b/pylhc_submitter/job_submitter_tools/htc_utils.py
deleted file mode 100644
index 49150a5..0000000
--- a/pylhc_submitter/job_submitter_tools/htc_utils.py
+++ /dev/null
@@ -1,321 +0,0 @@
-"""
-HTCondor Utilities
-------------------
-
-This module provides functionality to create HTCondor jobs and submit them to ``HTCondor``.
-
-``write_bash`` creates bash scripts executing either a python or madx script.
-Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script.
-A shell script is created in each job directory in the dataframe.
-
-``make_subfile`` takes the job dataframe and creates the **.sub** files required for submissions to
-``HTCondor``. The **.sub** file will be put in the working directory. The maximum runtime of one
-job can be specified, standard is 8h.
-"""
-import logging
-import subprocess
-from pathlib import Path
-from typing import Any, Dict, List, Union
-
-from pandas import DataFrame
-
-from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
-                                                     COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
-                                                     EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
-from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri
-from pylhc_submitter.job_submitter_tools.mask import is_mask_file
-from pylhc_submitter.utils.environment import on_windows
-
-try:
-    import htcondor
-except ImportError:  # will be handled by job_submitter
-    class htcondor:
-        """Dummy HTCondor module. To satisfy the typing. """
-        Submit: Any = None
-
-
-LOG = logging.getLogger(__name__)
-
-# HTC Constants ################################################################
-
-SHEBANG = "#!/bin/bash"
-SUBFILE = "queuehtc.sub"
-BASH_FILENAME = "Job"
-
-HTCONDOR_JOBLIMIT = 100000
-
-CMD_SUBMIT = "condor_submit"
-JOBFLAVOURS = (
-    "espresso",  # 20 min
-    "microcentury",  # 1 h
-    "longlunch",  # 2 h
-    "workday",  # 8 h
-    "tomorrow",  # 1 d
-    "testmatch",  # 3 d
-    "nextweek",  # 1 w
-)
-
-NOTIFICATIONS = ("always", "complete", "error", "never")
-
-
-
-# Subprocess Methods ###########################################################
-
-
-def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path:
-    """
-    Write file to submit to ``HTCondor``.
-    
-    Args:
-        cwd (Path): working directory
-        submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file)
-
-    Returns:
-        Path: path to sub-file
-
-    """
-    subfile = cwd / SUBFILE
-    LOG.debug(f"Writing sub-file '{str(subfile)}'.")
-    with subfile.open("w") as f:
-        f.write(str(submission))
-    return subfile
-
-
-def submit_jobfile(jobfile: Path, ssh: str) -> None:
-    """Submit subfile to ``HTCondor`` via subprocess.
-    
-    Args:
-        jobfile (Path): path to sub-file
-        ssh (str): ssh target
-
-    """
-    proc_args = [CMD_SUBMIT, jobfile]
-    if ssh:
-        proc_args = ["ssh", ssh] + proc_args
-    status = _start_subprocess(proc_args)
-    if status:
-        raise RuntimeError("Submit to HTCondor was not successful!")
-    else:
-        LOG.info("Jobs successfully submitted.")
-
-
-def _start_subprocess(command: List[str]) -> int:
-    """ Start subprocess and log output. 
-    
-    Args:
-        command (List[str]): command to execute
-
-    Returns:
-        int: return code of the process
-    
-    """
-    LOG.debug(f"Executing command '{command}'")
-    process = subprocess.Popen(
-        command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-    )
-    for line in process.stdout:
-        htc_line = line.decode("utf-8").strip()
-        if htc_line:
-            LOG.debug(f"{htc_line} (from HTCondor)")
-    return process.wait()
-
-
-# Job Creation #################################################################
-
-
-def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str:
-    """
-    Function to create an ``HTCondor`` submission content for all job-scripts, 
-    i.e. bash-files, in the job_df.
-
-    Keyword Args:
-        output_dir (str): output directory that will be transferred. Defaults to ``None``.
-        duration (str): max duration of the job. Needs to be one of the ``HTCondor`` Jobflavours.
-            Defaults to ``workday``.
-        group (str): force use of accounting group. Defaults to ``None``.
-        retries (int): maximum amount of retries. Default to ``3``.
-        notification (str): Notify under certain conditions. Defaults to ``error``.
-        priority (int): Priority to order your jobs. Defaults to ``None``.
-
-    Returns:
-        str: HTCondor submission definition.
-    """
-    # Pre-defined HTCondor arguments for our jobs
-    submit_dict = {
-        "MyId": "htcondor",
-        "universe": "vanilla",
-        "arguments": "$(ClusterId) $(ProcId)",
-        "output": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).out"),
-        "error": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).err"),
-        "log": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).log"),
-        "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)",
-        "requirements": "Machine =!= LastRemoteHost",
-    }
-    submit_dict.update(map_kwargs(kwargs))
-    
-    # Let the htcondor create the submit-file
-    submission = htcondor.Submit(submit_dict)
-
-    # add the multiple bash files
-    scripts = [
-        str(Path(*parts))
-        for parts in zip(job_df[COLUMN_JOB_DIRECTORY], job_df[COLUMN_SHELL_SCRIPT])
-    ]
-    args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])]
-    queueArgs = ["queue executable, initialdir from (", *args, ")"]
-
-    # ugly but submission.setQArgs doesn't take string containing '\n':
-    # submission.setQArgs("\n".join(queueArgs))  # doesn't work
-    submission = str(submission) + "\n".join(queueArgs)
-    LOG.debug(f"Created HTCondor subfile with content: \n{submission}")
-    return submission
-
-
-# Main functions ###############################################################
-
-
-def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path:
-    """
-    Creates submit-file for ``HTCondor``.
-    For kwargs, see ``create_multijob_for_bashfiles``.
-
-    Args:
-        cwd (Path): working directory
-        job_df (DataFrame): DataFrame containing all the job-information
-
-    Returns:
-        Path: path to the submit-file
-    """
-    job = create_multijob_for_bashfiles(job_df, **kwargs)
-    return create_subfile_from_job(cwd, job)
-
-
-def write_bash(
-    job_df: DataFrame,
-    output_dir: Path = None,
-    executable: str = "madx",
-    cmdline_arguments: dict = None,
-    mask: Union[str, Path] = None,
-) -> DataFrame:
-    """
-    Write the bash-files to be called by ``HTCondor``, which in turn call the executable.
-    Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script.
-    A shell script is created in each job directory in the dataframe.
-
-    Args:
-        job_df (DataFrame): DataFrame containing all the job-information
-        output_dir (str): output directory that will be transferred. Defaults to ``None``.
-        executable (str): name of the executable. Defaults to ``madx``.
-        cmdline_arguments (dict): additional commandline arguments for the executable
-        mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``.
-
-    Returns:
-        DataFrame: The provided ``job_df`` but with added path to the scripts.
-
-    """
-    if len(job_df.index) > HTCONDOR_JOBLIMIT:
-        raise AttributeError("Submitting too many jobs for HTCONDOR")
-
-    exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else ''
-    cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else ''
-
-    shell_scripts = [None] * len(job_df.index)
-    for idx, (jobid, job) in enumerate(job_df.iterrows()):
-        job_dir = Path(job[COLUMN_JOB_DIRECTORY])
-        bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}"
-        jobfile = job_dir / bash_file_name
-
-        LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.")
-        with open(jobfile, "w") as f:
-            # Preparation ---
-            if not on_windows():
-                f.write(f"{SHEBANG}\n")
-   
-            if output_dir is not None:
-                f.write(f"mkdir {str(output_dir)}\n")
-
-            # The actual job execution ---
-            f.write(exec_path)
-
-            # Call the mask-file or the filled-template string
-            if is_mask_file(mask):
-                f.write(str(job_dir / job[COLUMN_JOB_FILE]))
-            else:
-                replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS]
-                f.write(mask % dict(zip(replace_columns, job[replace_columns])))
-
-            # Additional commands for the mask/string
-            f.write(cmds)
-            f.write("\n")
-
-            # Manually copy output (if needed) ---
-            dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
-            if output_dir and dest_dir and output_dir != dest_dir:
-                cp_command =  f'cp -r {output_dir} {dest_dir}'
-                if is_eos_uri(dest_dir):
-                    cp_command = f'eos {cp_command}'
-                    
-                f.write(f'{cp_command}\n')
-
-        shell_scripts[idx] = bash_file_name
-
-    job_df[COLUMN_SHELL_SCRIPT] = shell_scripts
-    return job_df
-
-
-def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Maps the kwargs for the job-file. 
-    Some arguments have pre-defined choices and defaults, the remaining ones are just passed on.
-
-    Args:
-        add_dict (Dict[str, Any]): additional kwargs to add to the defaults.
-
-    Returns:
-        Dict[str, Any]: The mapped kwargs.
-
-    """
-    new = {}
-
-    # Predefined mappings 
-    htc_map = { # name: mapped_name, choices, default
-        "duration": ("+JobFlavour", JOBFLAVOURS, "workday"),
-        "output_dir": ("transfer_output_files", None, None),
-        "accounting_group": ("+AccountingGroup", None, None),
-        "max_retries": ("max_retries", None, 3),
-        "notification": ("notification", NOTIFICATIONS, "error"),
-    }
-    for key, (mapped, choices, default) in htc_map.items():
-        try:
-            value = add_dict.pop(key)
-        except KeyError:
-            value = default  # could be `None`
-        else:
-            if choices is not None and value not in choices:
-                raise TypeError(
-                    f"{key} needs to be one of '{str(choices).strip('[]')}' but "
-                    f"instead was '{value}'"
-                )
-        if value is not None:
-            new[mapped] = _maybe_put_in_quotes(mapped, value)
-
-    # Pass-Through Arguments
-    LOG.debug(f"Remaining arguments to be added: '{str(add_dict).strip('{}'):s}'")
-    new.update(add_dict)
-    return new
-
-
-# Helper #######################################################################
-
-def _maybe_put_in_quotes(key: str, value: Any) -> Any:
-    """ Put value in quoted strings if key starts with '+' """
-    if key.startswith("+"):
-        return f'"{value}"'
-    return value
-
-
-# Script Mode ##################################################################
-
-
-if __name__ == "__main__":
-    raise EnvironmentError(f"{__file__} is not supposed to run as main.")
diff --git a/pylhc_submitter/job_submitter_tools/iotools.py b/pylhc_submitter/job_submitter_tools/iotools.py
deleted file mode 100644
index 33631c4..0000000
--- a/pylhc_submitter/job_submitter_tools/iotools.py
+++ /dev/null
@@ -1,297 +0,0 @@
-""" 
-Job Submitter IO-Tools
-----------------------
-
-Tools for input and output for the job-submitter.
-"""
-import itertools
-import logging
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Sequence, Tuple, Union
-
-import numpy as np
-import pandas as pd
-import tfs
-
-from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
-                                                     COLUMN_JOBID, JOBDIRECTORY_PREFIX,
-                                                     JOBSUMMARY_FILE, SCRIPT_EXTENSIONS)
-from pylhc_submitter.job_submitter_tools import htc_utils
-from pylhc_submitter.job_submitter_tools.mask import (create_job_scripts_from_mask,
-                                                      generate_jobdf_index, is_mask_file)
-
-LOG = logging.getLogger(__name__)
-
-
-@dataclass
-class CreationOpts:
-    """ Options for creating jobs. """
-    working_directory: Path         # Path to working directory (afs)
-    mask: Union[Path, str]          # Path to mask file or mask-string
-    jobid_mask: str                 # Mask for jobid
-    replace_dict: Dict[str, Any]    # Replace-dict
-    output_dir: Path                # Path to local output directory
-    output_destination: Path        # Path to remote output directory (e.g. eos)
-    append_jobs: bool               # Append jobs to existing jobs
-    resume_jobs: bool               # Resume jobs that have already run/failed/got interrupted
-    executable: str                 # Name of executable to call the script (from mask)
-    check_files: Sequence[str]      # List of output files to check for success
-    script_arguments: Dict[str, Any]  # Arguments to pass to script
-    script_extension: str           # Extension of the script to run
-
-    def should_drop_jobs(self) -> bool:
-        """ Check if jobs should be dropped after creating the whole parameter space, 
-        e.g. because they already exist. """
-        return self.append_jobs or self.resume_jobs
-
-
-
-def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
-    """Main function to prepare all the jobs and folder structure.
-    This greates the value-grid based on the replace-dict and
-    checks for existing jobs (if so desired).
-    A job-dataframe is created - and written out - containing all the information and
-    its values are used to generate the job-scripts.
-    It also creates bash-scripts to call the executable for the job-scripts. 
-
-    Args:
-        opt (CreationOpts): Options for creating jobs 
-
-    Returns:
-        tfs.TfsDataFrame: The job-dataframe containing information for all jobs. 
-    """
-    LOG.debug("Creating Jobs.")
-
-    # Generate product of replace-dict and compare to existing jobs  ---
-    parameters, values_grid, prev_job_df = _generate_parameter_space(
-        replace_dict=opt.replace_dict,
-        append_jobs=opt.append_jobs,
-        cwd=opt.working_directory,
-    )
-
-    # Check new jobs ---
-    njobs = len(values_grid)
-    if njobs == 0:
-        raise ValueError(f"No (new) jobs found!")
-
-    if njobs > htc_utils.HTCONDOR_JOBLIMIT:
-        LOG.warning(
-            f"You are attempting to submit an important number of jobs ({njobs})."
-            "This can be a high stress on your system, make sure you know what you are doing."
-        )
-
-    LOG.debug(f"Initial number of jobs: {njobs:d}")
-
-    # Generate new job-dataframe ---
-    job_df = tfs.TfsDataFrame(
-        index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid),
-        columns=parameters,
-        data=values_grid,
-    )
-    job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left')
-
-    # Setup folders ---
-    job_df = create_folders(job_df, opt.working_directory, opt.output_destination)
-
-    # Create scripts ---
-    if is_mask_file(opt.mask):
-        LOG.debug("Creating all jobs from mask.")
-        script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask)
-        job_df = create_job_scripts_from_mask(
-            job_df, opt.mask, parameters, script_extension
-        )
-
-    LOG.debug("Creating shell scripts.")
-    job_df = htc_utils.write_bash(
-        job_df,
-        output_dir=opt.output_dir,
-        executable=opt.executable,
-        cmdline_arguments=opt.script_arguments,
-        mask=opt.mask,
-    )
-
-    # Convert paths to strings and write df to file ---
-    job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str)
-    if COLUMN_DEST_DIRECTORY in job_df.columns:
-        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str)
-
-    tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID)
-    
-    # Drop already run jobs ---
-    dropped_jobs = []
-    if opt.should_drop_jobs():
-        job_df, dropped_jobs = _drop_already_run_jobs(
-            job_df, opt.output_dir, opt.check_files
-        )
-    return job_df, dropped_jobs
-
-
-def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, 
-                   destination_directory: Path = None) -> tfs.TfsDataFrame:
-    """Create the folder-structure in the given working directory and the 
-    destination directory if given.
-    This creates a folder per job in which then the job-scripts and bash-scripts
-    can be stored later.
-
-    Args:
-        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
-        working_directory (Path): Path to the working directory
-        destination_directory (Path, optional): Path to the destination directory, 
-        i.e. the directory to copy the outputs to manually. Defaults to None.
-
-    Returns:
-        tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs.
-    """
-    LOG.debug("Setting up folders: ")
-    
-    jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}"
-    job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index]
-
-    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
-        job_dir.mkdir(exist_ok=True)
-        LOG.debug(f"   created '{job_dir}'.")
-
-    if destination_directory:
-        job_df[COLUMN_DEST_DIRECTORY] = [destination_directory / jobname.format(id_) for id_ in job_df.index]
-
-        strip_dest_dir = strip_eos_uri(destination_directory)
-        strip_dest_dir.mkdir(parents=True, exist_ok=True)
-
-        # Make some symlinks for easy navigation---
-        # Output directory -> Working Directory
-        sym_submission = strip_dest_dir / Path('SUBMISSION_DIR')
-        sym_submission.unlink(missing_ok=True)
-        sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
-
-        # Working Directory -> Output Directory
-        sym_destination = working_directory / Path('OUTPUT_DIR')
-        sym_destination.unlink(missing_ok=True)
-        sym_destination.symlink_to(strip_dest_dir.resolve(), target_is_directory=True)
-
-        # Create output dirs per job ---
-        for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
-            strip_eos_uri(job_dest_dir).mkdir(exist_ok=True)
-            LOG.debug(f"   created '{job_dest_dir}'.")
-
-    return job_df
-
-
-def is_eos_uri(path: Union[Path, str, None]) -> bool:
-    """ Check if the given path is an EOS-URI as `eos cp` only works with those.
-    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
-    """
-    if path is None:
-        return False
-
-    parts = Path(path).parts 
-    return (
-        len(parts) >= 3  # at least root:, server, path
-        and
-        parts[0].endswith(':')
-        and
-        parts[2] == 'eos'
-    )
-
-
-def strip_eos_uri(path: Union[Path, str]) -> Path:
-    """ Strip EOS path information from a path.
-    EOS paths for HTCondor can be given as URI. Strip for direct writing.
-    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
-    """
-    path = Path(path)
-    parts = path.parts
-    if parts[0].endswith(':'):
-        # the first two parts are host info, e.g `file: //host/path`
-        return Path('/', *parts[2:])
-    return path 
-
-
-def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]):
-    """Print some quick statistics."""
-    text = [
-        "\n------------- QUICK STATS ----------------"
-        f"Jobs total:{len(new_jobs) + len(finished_jobs):d}",
-        f"Jobs to run: {len(new_jobs):d}",
-        f"Jobs already finished: {len(finished_jobs):d}",
-        "---------- JOBS TO RUN: NAMES -------------"
-    ]
-    for job_name in new_jobs:
-        text.append(job_name)
-    text += ["--------- JOBS FINISHED: NAMES ------------"]
-    for job_name in finished_jobs:
-        text.append(job_name)
-    LOG.info("\n".join(text))
-
-
-def _generate_parameter_space(
-        replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path
-    ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]:
-    """ Generate parameter space from replace-dict, check for existing jobs. """
-    LOG.debug("Generating parameter space from replace-dict.")
-    parameters = list(replace_dict.keys())
-    values_grid = _generate_values_grid(replace_dict)
-    if not append_jobs:
-        return parameters, values_grid, tfs.TfsDataFrame()
-
-    jobfile_path = cwd / JOBSUMMARY_FILE
-    try:
-        prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID)
-    except FileNotFoundError as filerror:
-        raise FileNotFoundError(
-            "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'"
-        ) from filerror
-    new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid]
-    values_grid = values_grid[new_jobs_mask]
-
-    return parameters, values_grid, prev_job_df
-
-
-def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray:
-    """ Creates an array of the inner-product of the replace-dict. """
-    return np.array(list(itertools.product(*replace_dict.values())), dtype=object)
-
-
-def _drop_already_run_jobs(
-        job_df: tfs.TfsDataFrame, output_dir: str, check_files: str
-    ) -> Tuple[tfs.TfsDataFrame, List[str]]:
-    """ Check for jobs that have already been run and drop them from current job_df. """
-    LOG.debug("Dropping already finished jobs.")
-    finished_jobs = [
-        idx
-        for idx, row in job_df.iterrows()
-        if _job_was_successful(row, output_dir, check_files)
-    ]
-
-    LOG.info(
-        f"{len(finished_jobs):d} of {len(job_df.index):d}"
-        " Jobs have already finished and will be skipped."
-    )
-
-    job_df = job_df.drop(index=finished_jobs)
-    return job_df, finished_jobs
-
-
-def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool:
-    """ Determines if the job was successful. 
-    
-    Args:
-        job_row (pd.Series): row from the job_df
-        output_dir (str): Name of the (local) output directory
-        files (List[str]): list of files that should have been generated
-    """
-    job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY]
-    output_dir = Path(job_dir, output_dir)
-    success = output_dir.is_dir() and any(output_dir.iterdir())
-    if success and files is not None and len(files):
-        for f in files:
-            success &= len(list(output_dir.glob(f))) > 0
-    return success
-
-
-def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str:
-    """ Returns the extension of the script to run based on 
-    either the given value, its executable or the mask. """
-    if script_extension is not None:
-        return script_extension
-    return SCRIPT_EXTENSIONS.get(executable, mask.suffix)
diff --git a/pylhc_submitter/job_submitter_tools/mask.py b/pylhc_submitter/job_submitter_tools/mask.py
deleted file mode 100644
index 3a2dcaa..0000000
--- a/pylhc_submitter/job_submitter_tools/mask.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-Mask Resolver
--------------
-
-This module provides functionality to resolve and write script masks for ``HTCondor`` jobs
-submission.
-"""
-import logging
-import re
-from pathlib import Path
-from typing import Iterable, List, Sequence, Set, Union
-
-import pandas as pd
-from numpy.typing import ArrayLike
-
-from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE
-
-LOG = logging.getLogger(__name__)
-
-
-def create_job_scripts_from_mask(
-    job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str
-) -> pd.DataFrame:
-    """
-    Takes path to mask file, list of parameter to be replaced and pandas dataframe containg per job
-    the job directory where processed mask is to be put, and columns containing the parameter values
-    with column named like replace parameters. Job directories have to be created beforehand.
-    Processed (madx) mask has the same filename as mask but with the given file extension.
-    Input Dataframe is returned with additional column containing path to the processed script
-    files.
-
-    Args:
-        job_df (pd.DataFrame): Job parameters as defined in description.
-        maskfile: `Path` object to the mask file.
-        replace_keys: keys to be replaced (must correspond to columns in ``job_df``).
-        file_ext: file extention to use (defaults to **madx**).
-
-    Returns:
-        The provided ``job_df`` but with added path to the scripts.
-    """
-    with maskfile.open("r") as mfile:
-        template = mfile.read()
-
-    jobname = maskfile.with_suffix("").name
-    jobs = [None] * len(job_df)
-    for idx, (jobid, values) in enumerate(job_df.iterrows()):
-        jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext)
-
-        with jobfile_fullpath.open("w") as job_file:
-            job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)])))
-        jobs[idx] = jobfile_fullpath.name
-    job_df[COLUMN_JOB_FILE] = jobs
-    return job_df
-
-
-def find_named_variables_in_mask(mask: str) -> Set[str]:
-    """ Find all variable-names in the mask. """
-    return set(re.findall(r"%\((\w+)\)", mask))
-
-
-def check_percentage_signs_in_mask(mask: str) -> None:
-    """ Checks for '%' in the mask, that are not replacement variables. """
-    cleaned_mask = re.sub(r"%\((\w+)\)", "", mask)
-    n_signs = cleaned_mask.count("%")
-    if n_signs == 0:
-        return
-
-    # Help the user find the %
-    for idx, line in enumerate(cleaned_mask.split("\n")):
-        if "%" in line:
-            positions = [str(i) for i, char in enumerate(line) if char == "%"]
-            LOG.error(f"Problematic '%' sign(s) in line {idx}, pos {' ,'.join(positions)}.")
-    raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.")
-
-
-def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike
-    ) -> Union[List[str], Iterable[int]]:
-    """ Generates index for jobdf from mask for job_id naming. 
-    
-    Args:
-        old_df (pd.DataFrame): Existing jobdf.
-        jobid_mask (str): Mask for naming the jobs.
-        keys (Sequence[str]): Keys to be replaced in the mask.
-        values (np.array_like): Values-Grid to be replaced in the mask.
-    
-    Returns:
-        List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range.
-    """
-    if not jobid_mask:
-        # Use integer-range as index, if no mask is given
-        # Start with last index if old_df is not None.
-        nold = len(old_df.index) if old_df is not None else 0
-        start = nold-1 if nold > 0 else 0
-        return range(start, start + values.shape[0])
-
-    # Fill job-id mask
-    return [jobid_mask % dict(zip(keys, v)) for v in values]
-
-
-def is_mask_file(mask: str) -> bool:
-    """ Check if given string points to a file. """
-    try:
-        return Path(mask).is_file()
-    except OSError:
-        return False
-
-
-def is_mask_string(mask: str) -> bool:
-    """ Checks that given string does not point to a file. """
-    return not is_mask_file(mask)
-
-
-if __name__ == "__main__":
-    raise EnvironmentError(f"{__file__} is not supposed to run as main.")
diff --git a/pylhc_submitter/job_submitter_tools/runners.py b/pylhc_submitter/job_submitter_tools/runners.py
deleted file mode 100644
index 06eeceb..0000000
--- a/pylhc_submitter/job_submitter_tools/runners.py
+++ /dev/null
@@ -1,120 +0,0 @@
-""" 
-Job Submitter Runners
----------------------
-
-Defines the methods to run the job-submitter, locally or on HTC.
-"""
-import logging
-import multiprocessing
-import subprocess
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-import pandas as pd
-import tfs
-
-from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
-                                                     COLUMN_SHELL_SCRIPT)
-from pylhc_submitter.job_submitter_tools import htc_utils
-from pylhc_submitter.job_submitter_tools.iotools import is_eos_uri
-from pylhc_submitter.utils.environment import on_windows
-
-LOG = logging.getLogger(__name__)
-
-
-@dataclass
-class RunnerOpts:
-    """ Options for running the submission. """
-    working_directory: Path           # Path to the working directory (e.g. afs)
-    jobflavour: Optional[str] = None  # HTCondor job flavour (lengths of the job)
-    output_dir: Optional[str] = None  # Name of the output directory, where jobs store data
-    ssh: Optional[str] = None         # SSH command
-    dryrun: Optional[bool] = False    # Perform only a dry-run, i.e. do all but submit to HTC
-    htc_arguments: Optional[Dict[str, Any]] = None  # Arguments to pass on to htc as keywords
-    run_local: Optional[bool] = False # Run jobs locally
-    num_processes: Optional[int] = 4  # Number of processes to run in parallel (locally)
-
-
-def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
-    """Selects how to run the jobs.
-    
-    Args:
-        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
-        opt (RunnerOpts): Parameters for the runner 
-    """
-    if opt.run_local: 
-        run_local(job_df, opt)
-    else:
-        run_htc(job_df, opt)
-
-
-def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
-    """Run all jobs locally.
-
-    Args:
-        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
-        opt (RunnerOpts): Parameters for the runner 
-    """
-    if opt.dryrun:
-        LOG.info(f"Dry-run: Skipping local run.")
-        return
-
-    LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.")
-        
-    pool = multiprocessing.Pool(processes=opt.num_processes)
-    res = pool.map(_execute_shell, job_df.iterrows())
-    if any(res):
-        jobs_failed = [j for r, j in zip(res, job_df.index) if r]
-        LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}")
-        raise RuntimeError("At least one job has failed. Check output logs!")
-
-
-def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
-    """ Create submission file and submit the jobs to ``HTCondor``.
-
-    Args:
-        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
-        opt (RunnerOpts): Parameters for the runner 
-    """
-    LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.")
-    LOG.debug("Creating htcondor subfile.")
-
-    subfile = htc_utils.make_subfile(
-        opt.working_directory, job_df, 
-        output_dir=opt.output_dir, 
-        duration=opt.jobflavour, 
-        **opt.htc_arguments
-    )
-
-    if opt.dryrun:
-        LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.")
-        return
-
-    LOG.debug("Submitting jobs to htcondor.")
-    htc_utils.submit_jobfile(subfile, opt.ssh)
-
-
-# Helper #######################################################################
-
-def _execute_shell(df_row: pd.Series) -> int:
-    """ Execute the shell script. 
-    
-    Args:
-        df_row (pd.Series): row in the job-dataframe
-    
-    Returns:
-        int: return code of the process
-    """
-    _, column = df_row
-    cmd = [] if on_windows() else ["sh"]
-
-    with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile:
-        process = subprocess.Popen(
-            cmd + [column[COLUMN_SHELL_SCRIPT]],
-            shell=on_windows(),
-            stdout=logfile,
-            stderr=subprocess.STDOUT,
-            cwd=column[COLUMN_JOB_DIRECTORY],
-        )
-    return process.wait()
diff --git a/pylhc_submitter/sixdesk_tools/utils.py b/pylhc_submitter/sixdesk_tools/utils.py
index 135b0c3..d287eb0 100644
--- a/pylhc_submitter/sixdesk_tools/utils.py
+++ b/pylhc_submitter/sixdesk_tools/utils.py
@@ -10,7 +10,7 @@
 
 from pylhc_submitter.constants.autosix import SIXDESKLOCKFILE, get_workspace_path
 from pylhc_submitter.constants.external_paths import SIXDESK_UTILS
-from pylhc_submitter.job_submitter_tools.mask import find_named_variables_in_mask
+from pylhc_submitter.submitter.mask import find_named_variables_in_mask
 
 LOG = logging.getLogger(__name__)
 
diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 3d3427d..6f79dcb 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -7,6 +7,7 @@
 import pytest
 
 from pylhc_submitter.job_submitter import main as job_submit
+from pylhc_submitter.submitter.iotools import get_server_from_uri, is_eos_uri, uri_to_path
 from pylhc_submitter.utils.environment import on_linux, on_windows
 
 SUBFILE = "queuehtc.sub"
@@ -43,6 +44,19 @@ def test_output_directory(tmp_path):
     _test_output(setup)
 
 
+def test_wrong_uri(tmp_path):
+    """ Tests that wrong URI's are identified. """
+    setup = InputParameters(
+        working_directory=tmp_path, 
+        run_local=True,
+        output_destination="root:/eosuser.cern.ch/eos/my_new_output",
+    )
+    setup.create_mask()
+    with pytest.raises(ValueError) as e:
+        job_submit(**asdict(setup))
+    assert "EOS-URI" in str(e)
+
+
 @run_only_on_linux
 def test_job_creation_and_localrun_with_multiline_maskstring(tmp_path):
     """ Tests that the jobs are created and can be run locally from a multiline mask-string. """
@@ -101,30 +115,56 @@ def test_not_on_linux(tmp_path):
     assert "htcondor bindings" in e.value.args[0]
 
 
+def test_eos_uri():
+    """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)"""
+    server = "root://eosuser.cern.ch/"
+    path = "/eos/user/m/mmustermann/"
+    uri = f"{server}{path}"
+    assert is_eos_uri(uri)
+    assert not is_eos_uri(path)
+    assert uri_to_path(uri) == Path(path)
+    assert get_server_from_uri(uri) == server
+
+
 @run_only_on_linux
 @pytest.mark.cern_network
-def test_htc_submit():
-    """ This test is here for local testing only. You need to adapt the path
-    and delete the results afterwards manually (so you can check them before."""
+@pytest.mark.parametrize("uri", [True, False])
+def test_htc_submit(uri: bool):
+    """ This test is here for local testing only. 
+    You need to adapt the path and delete the results afterwards manually."""
     # Fix the kerberos ticket path. 
     # Do klist to find your ticket manually.
-    # import os
+    import os
     # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
+    os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029"
+
+    tmp_name = "htc_temp"
+    if uri:
+        tmp_name = f"{tmp_name}_uri"
 
     user = "jdilly"
-    path = Path("/", "afs", "cern.ch", "user", user[0], user, "htc_temp")
+    path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name)
     path.mkdir(exist_ok=True)
 
-    setup = InputParameters(working_directory=path)
-    setup.create_mask()
+    dest = f"/eos/user/{user[0]}/{user}/{tmp_name}"
+    if uri:
+        dest = f"root://eosuser.cern.ch/{dest}"
 
-    # pre-run ---
-    job_submit(**asdict(setup))
-    _test_subfile_content(setup)
-    _test_output(setup, post_run=False)
+    setup = InputParameters(
+        working_directory=path, 
+        output_destination=dest, 
+        # dryrun=True
+    )
+    setup.create_mask()
 
-    # post run ---
-    # _test_output(setup, post_run=True)  
+    prerun = True
+    prerun = False  # Manually switch here after running.
+    if prerun:
+        job_submit(**asdict(setup))
+        _test_subfile_content(setup)
+        _test_output(setup, post_run=False)
+    else:
+        _test_output(setup, post_run=True)  
 
 
 # Helper -----------------------------------------------------------------------
@@ -178,7 +218,11 @@ def _test_subfile_content(setup: InputParameters):
     with subfile.open("r") as sfile:
         filecontents = dict(line.rstrip().split(" = ") for line in sfile if " = " in line)
         assert filecontents["MY.JobFlavour"].strip('"') == setup.jobflavour  # flavour is saved with "" in .sub, and read in with them
-        assert filecontents["transfer_output_files"] == setup.job_output_dir
+        if setup.output_destination is None:
+            assert filecontents["transfer_output_files"] == setup.job_output_dir
+        else:
+            assert "transfer_output_files" not in filecontents
+
         for key in setup.htc_arguments.keys():
             assert filecontents[key] == setup.htc_arguments[key]
 
@@ -197,14 +241,14 @@ def _test_output(setup: InputParameters, post_run: bool = True):
         if isinstance(setup.mask, Path):
             assert (setup.working_directory / job_name / setup.mask.name).with_suffix(setup.script_extension).exists()
 
-        def _check_output_content(dir_path: Path):
+        def _check_output_content(dir_path: Path, check_output: bool = True):
                 # Check if the code created the folder structure ---
-                job_path = dir_path / job_name
+                job_path = uri_to_path(dir_path) / job_name
                 
                 assert job_path.exists()
                 assert job_path.is_dir()
 
-                if post_run:  # Check if the jobs created the files ---
+                if check_output:  # Check if the jobs created the files ---
                     out_dir_path = job_path / setup.job_output_dir
                     out_file_path = out_dir_path / setup.check_files[0]
                     
@@ -216,11 +260,11 @@ def _check_output_content(dir_path: Path):
                         assert f.read().strip("\n") == current_id
 
         # Check local working directory ---
-        _check_output_content(setup.working_directory)
+        _check_output_content(setup.working_directory, check_output=post_run and setup.output_destination is None)
 
         if setup.output_destination is not None:
             # Check copy at output destination ---
-            _check_output_content(setup.output_destination)
+            _check_output_content(setup.output_destination, check_output=post_run)
 
 
 def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]:

From 8b88de4e9ae01d9e897aff0a73e027bb746d20a8 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 18:55:06 +0100
Subject: [PATCH 16/30] doc and version

---
 doc/Makefile                                  |  4 +-
 doc/_static/css/custom.css                    | 91 +++++++++++++++++++
 doc/_templates/layout.html                    | 12 +++
 doc/conf.py                                   | 27 +++---
 doc/entrypoints/autosix.rst                   |  1 +
 doc/entrypoints/job_submitter.rst             |  1 +
 doc/modules/constants.rst                     |  6 +-
 doc/modules/sixdesk_tools.rst                 |  7 ++
 ...{job_submitter_tools.rst => submitter.rst} |  8 +-
 doc/modules/utils.rst                         |  2 +
 pylhc_submitter/__init__.py                   |  2 +-
 11 files changed, 140 insertions(+), 21 deletions(-)
 create mode 100644 doc/_templates/layout.html
 rename doc/modules/{job_submitter_tools.rst => submitter.rst} (73%)

diff --git a/doc/Makefile b/doc/Makefile
index 08f8749..ff66ee4 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -48,9 +48,9 @@ html:
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
 josch:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) /home/jdilly/Software/Documentation/submitter-doc
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) ../../Documentation/submitter-doc
 	@echo
-	@echo "Build finished. The HTML pages are in /home/jdilly/Software/Documentation/submitter-doc."
+	@echo "Build finished. The HTML pages are in ../../Documentation/submitter-doc."
 
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css
index f201296..eea8824 100644
--- a/doc/_static/css/custom.css
+++ b/doc/_static/css/custom.css
@@ -1,7 +1,41 @@
+:root {
+  --nav-side-width: 300px;  /* default is 300px */
+  /* for 100% width */
+  /*--nav-content-width: 100%;*/
+  /*--local-toc-width: 300px;*/
+  /*--nav-content-width-wide: calc(100% - var(--local-toc-width)); /* 100% here is fullscreen */
+  /*--local-toc-left: calc(100% - var(--local-toc-width)); /* 100% here is w/o sidebar */
+
+  /* for fixed widths */
+  --nav-content-width: 800px;  /* default is 800px */
+  --nav-content-width-wide: var(--nav-content-width);
+  --local-toc-width: calc(100% - var(--nav-content-width-wide));
+  --local-toc-left: calc(var(--nav-content-width-wide) + var(--nav-side-width));
+}
+
+/* main content width */
+.wy-nav-content {
+  max-width: var(--nav-content-width);
+}
+
+/* Sidebar width */
+.wy-nav-side {
+  width: var(--nav-side-width);
+}
+
 .wy-side-nav-search {
   background: rgb(243,244,247);
 }
 
+.wy-side-nav-search > a {
+  color: black;
+}
+
+.wy-side-nav-search> a img.logo {
+  width: 50%;
+}
+
+
 .wy-side-nav-search > div.version {
   color: black;
 }
@@ -182,3 +216,60 @@ em.sig-param span.default_value {
 .rst-content table.field-list th {
   padding: 16px;
 }
+
+
+/* Create local table of contents
+   ------------------------------
+  inspired by https://github.com/readthedocs/sphinx_rtd_theme/pull/919
+  and https://github.com/readthedocs/sphinx_rtd_theme/issues/764
+  see also _templates/layout.html
+ */
+
+#local-table-of-contents {
+  padding-bottom: 20px;
+  /* display: none; */
+}
+
+/* Mask entry of main header (chapter) */
+#local-table-of-contents a[href="#"]{
+  /*display: none;*/
+}
+
+/* indent subsections */
+#local-table-of-contents ul > ul {
+  padding-left: 0px;
+  margin-left: 20px;
+  padding-right: 0;
+  padding-bottom: 5px;
+}
+
+
+#local-table-of-contents-title {
+  margin-bottom: 10px;
+}
+
+/* Show in Sidebar if window width is larger than nav-side + nav-content + toc-width */
+@media screen and (min-width: 1200px) {
+  .wy-nav-content {
+    max-width: var(--nav-content-width-wide);
+  }
+
+  #local-table-of-contents {
+    display: block;
+    position: fixed;
+    margin-left: 15px;
+    overflow-y: auto;
+    height: 95%;
+    top: 45px;
+    left: var(--local-toc-left);
+    width: var(--local-toc-width);
+  }
+
+  #local-table-of-contents-title {
+    display: block;
+    font-size: 16px;
+    width: 100%;
+    padding-top: 10px;
+    padding-bottom: 5px;
+  }
+}
\ No newline at end of file
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
new file mode 100644
index 0000000..aa67d6d
--- /dev/null
+++ b/doc/_templates/layout.html
@@ -0,0 +1,12 @@
+{% extends "!layout.html" %}
+{% block document %} 
+  {%- if toc|length > title|length + 75 %}
+  <nav id="local-table-of-contents" role="navigation" aria-labelledby="local-table-of-contents-title">
+    <h4 id="local-table-of-contents-title">On This Page</h4>
+    {{ toc }}
+  </nav>
+  {%- endif %}
+  
+  {{ super() }}
+{% endblock %}
+
diff --git a/doc/conf.py b/doc/conf.py
index 6fbff20..9da8056 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -31,20 +31,10 @@
     sys.path.insert(0, str(TOPLEVEL_DIR))
 
 
-def about_package(init_posixpath: pathlib.Path) -> dict:
-    """
-    Return package information defined with dunders in __init__.py as a dictionary, when
-    provided with a PosixPath to the __init__.py file.
-    """
-    about_text: str = init_posixpath.read_text()
-    return {
-        entry.split(" = ")[0]: entry.split(" = ")[1].strip('"')
-        for entry in about_text.strip().split("\n")
-        if entry.startswith("__")
-    }
 
-
-ABOUT_PYLHC_SUBMITTER = about_package(ABOUT_FILE)
+ABOUT_PYLHC_SUBMITTER: dict = {}
+with ABOUT_FILE.open("r") as f:
+    exec(f.read(), ABOUT_PYLHC_SUBMITTER)
 
 
 # -- General configuration ------------------------------------------------
@@ -66,9 +56,11 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
     "sphinx.ext.githubpages",
     "sphinx.ext.napoleon",
 ]
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2
 
 # Add any paths that contain templates here, relative to this directory.
-# templates_path = ['_templates']
+templates_path = ['_templates']
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
@@ -84,6 +76,11 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 copyright_ = "2019, pyLHC/OMC-TEAM"
 author = ABOUT_PYLHC_SUBMITTER["__author__"]
 
+# Override link in 'Edit on Github'
+rst_prolog = f"""
+:github_url: {ABOUT_PYLHC_SUBMITTER['__url__']}
+"""
+
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
@@ -98,7 +95,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict:
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
diff --git a/doc/entrypoints/autosix.rst b/doc/entrypoints/autosix.rst
index 470e586..0276dfc 100644
--- a/doc/entrypoints/autosix.rst
+++ b/doc/entrypoints/autosix.rst
@@ -1,2 +1,3 @@
 .. automodule:: pylhc_submitter.autosix
     :members:
+    :noindex:
diff --git a/doc/entrypoints/job_submitter.rst b/doc/entrypoints/job_submitter.rst
index 7854e56..675569c 100644
--- a/doc/entrypoints/job_submitter.rst
+++ b/doc/entrypoints/job_submitter.rst
@@ -1,2 +1,3 @@
 .. automodule:: pylhc_submitter.job_submitter
     :members:
+    :noindex:
diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst
index 4f4f241..c7b3011 100644
--- a/doc/modules/constants.rst
+++ b/doc/modules/constants.rst
@@ -1,17 +1,21 @@
 Constants Definitions
-**************************
+*********************
 
 
 .. automodule:: pylhc_submitter.constants.general
     :members:
+    :noindex:
 
 
 .. automodule:: pylhc_submitter.constants.external_paths
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.constants.job_submitter
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.constants.autosix
     :members:
+    :noindex:
 
diff --git a/doc/modules/sixdesk_tools.rst b/doc/modules/sixdesk_tools.rst
index 072ad58..0271a84 100644
--- a/doc/modules/sixdesk_tools.rst
+++ b/doc/modules/sixdesk_tools.rst
@@ -3,21 +3,28 @@ Sixdesk Tools
 
 .. automodule:: pylhc_submitter.sixdesk_tools.stages
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.create_workspace
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.submit
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.post_process_da
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.extract_data_from_db
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.utils
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.sixdesk_tools.troubleshooting
     :members:
+    :noindex:
diff --git a/doc/modules/job_submitter_tools.rst b/doc/modules/submitter.rst
similarity index 73%
rename from doc/modules/job_submitter_tools.rst
rename to doc/modules/submitter.rst
index 34be7cc..ac54d63 100644
--- a/doc/modules/job_submitter_tools.rst
+++ b/doc/modules/submitter.rst
@@ -1,15 +1,19 @@
-HTCondor Tools
-**************************
+Submitter
+*********
 
 .. automodule:: pylhc_submitter.submitter.htc_utils
     :members:
+    :noindex:
 
 
 .. automodule:: pylhc_submitter.submitter.iotools
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.submitter.mask
     :members:
+    :noindex:
 
 .. automodule:: pylhc_submitter.submitter.runners
     :members:
+    :noindex:
diff --git a/doc/modules/utils.rst b/doc/modules/utils.rst
index aa7ecb0..107fb06 100644
--- a/doc/modules/utils.rst
+++ b/doc/modules/utils.rst
@@ -3,7 +3,9 @@ Utilities
 
 .. automodule:: pylhc_submitter.utils.iotools
     :members:
+    :noindex:
 
 
 .. automodule:: pylhc_submitter.utils.logging_tools
     :members:
+    :noindex:
diff --git a/pylhc_submitter/__init__.py b/pylhc_submitter/__init__.py
index 62de331..ed21d9e 100644
--- a/pylhc_submitter/__init__.py
+++ b/pylhc_submitter/__init__.py
@@ -10,7 +10,7 @@
 __title__ = "pylhc_submitter"
 __description__ = "pylhc-submitter contains scripts to simplify the creation and submission of jobs to HTCondor at CERN"
 __url__ = "https://github.com/pylhc/submitter"
-__version__ = "1.1.1"
+__version__ = "2.0.0"
 __author__ = "pylhc"
 __author_email__ = "pylhc@github.com"
 __license__ = "MIT"

From 796a47770902f8108cf15fd139f4be6ed7374f30 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 18:56:26 +0100
Subject: [PATCH 17/30] missing package

---
 pylhc_submitter/submitter/__init__.py  |   0
 pylhc_submitter/submitter/htc_utils.py | 330 +++++++++++++++++++++++++
 pylhc_submitter/submitter/iotools.py   | 317 ++++++++++++++++++++++++
 pylhc_submitter/submitter/mask.py      | 114 +++++++++
 pylhc_submitter/submitter/runners.py   | 120 +++++++++
 5 files changed, 881 insertions(+)
 create mode 100644 pylhc_submitter/submitter/__init__.py
 create mode 100644 pylhc_submitter/submitter/htc_utils.py
 create mode 100644 pylhc_submitter/submitter/iotools.py
 create mode 100644 pylhc_submitter/submitter/mask.py
 create mode 100644 pylhc_submitter/submitter/runners.py

diff --git a/pylhc_submitter/submitter/__init__.py b/pylhc_submitter/submitter/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py
new file mode 100644
index 0000000..58ffd2a
--- /dev/null
+++ b/pylhc_submitter/submitter/htc_utils.py
@@ -0,0 +1,330 @@
+"""
+HTCondor Utilities
+------------------
+
+This module provides functionality to create HTCondor jobs and submit them to ``HTCondor``.
+
+``write_bash`` creates bash scripts executing either a python or madx script.
+Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script.
+A shell script is created in each job directory in the dataframe.
+
+``make_subfile`` takes the job dataframe and creates the **.sub** files required for submissions to
+``HTCondor``. The **.sub** file will be put in the working directory. The maximum runtime of one
+job can be specified, standard is 8h.
+"""
+import logging
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+from pandas import DataFrame
+
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
+                                                     EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
+from pylhc_submitter.submitter.iotools import is_eos_uri
+from pylhc_submitter.submitter.mask import is_mask_file
+from pylhc_submitter.utils.environment import on_windows
+
+try:
+    import htcondor
+except ImportError:  # will be handled by job_submitter
+    class htcondor:
+        """Dummy HTCondor module. To satisfy the typing. """
+        Submit: Any = None
+
+
+LOG = logging.getLogger(__name__)
+
+# HTC Constants ################################################################
+
+SHEBANG = "#!/bin/bash"
+SUBFILE = "queuehtc.sub"
+BASH_FILENAME = "Job"
+
+HTCONDOR_JOBLIMIT = 100000
+
+CMD_SUBMIT = "condor_submit"
+JOBFLAVOURS = (
+    "espresso",  # 20 min
+    "microcentury",  # 1 h
+    "longlunch",  # 2 h
+    "workday",  # 8 h
+    "tomorrow",  # 1 d
+    "testmatch",  # 3 d
+    "nextweek",  # 1 w
+)
+
+NOTIFICATIONS = ("always", "complete", "error", "never")
+
+
+
+# Subprocess Methods ###########################################################
+
+
+def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit]) -> Path:
+    """
+    Write file to submit to ``HTCondor``.
+    
+    Args:
+        cwd (Path): working directory
+        submission (str, htcondor.Submit): HTCondor submission definition (i.e. content of the file)
+
+    Returns:
+        Path: path to sub-file
+
+    """
+    subfile = cwd / SUBFILE
+    LOG.debug(f"Writing sub-file '{str(subfile)}'.")
+    with subfile.open("w") as f:
+        f.write(str(submission))
+    return subfile
+
+
+def submit_jobfile(jobfile: Path, ssh: str) -> None:
+    """Submit subfile to ``HTCondor`` via subprocess.
+    
+    Args:
+        jobfile (Path): path to sub-file
+        ssh (str): ssh target
+
+    """
+    proc_args = [CMD_SUBMIT, jobfile]
+    if ssh:
+        proc_args = ["ssh", ssh] + proc_args
+    status = _start_subprocess(proc_args)
+    if status:
+        raise RuntimeError("Submit to HTCondor was not successful!")
+    else:
+        LOG.info("Jobs successfully submitted.")
+
+
+def _start_subprocess(command: List[str]) -> int:
+    """ Start subprocess and log output. 
+    
+    Args:
+        command (List[str]): command to execute
+
+    Returns:
+        int: return code of the process
+    
+    """
+    LOG.debug(f"Executing command '{command}'")
+    process = subprocess.Popen(
+        command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    for line in process.stdout:
+        htc_line = line.decode("utf-8").strip()
+        if htc_line:
+            LOG.debug(f"{htc_line} (from HTCondor)")
+    return process.wait()
+
+
+# Job Creation #################################################################
+
+
+def create_multijob_for_bashfiles(job_df: DataFrame, **kwargs) -> str:
+    """
+    Function to create an ``HTCondor`` submission content for all job-scripts, 
+    i.e. bash-files, in the job_df.
+
+    Keyword Args:
+        output_dir (str): output directory that will be transferred. Defaults to ``None``.
+        duration (str): max duration of the job. Needs to be one of the ``HTCondor`` Jobflavours.
+            Defaults to ``workday``.
+        group (str): force use of accounting group. Defaults to ``None``.
+        retries (int): maximum amount of retries. Default to ``3``.
+        notification (str): Notify under certain conditions. Defaults to ``error``.
+        priority (int): Priority to order your jobs. Defaults to ``None``.
+
+    Returns:
+        str: HTCondor submission definition.
+    """
+    # Pre-defined HTCondor arguments for our jobs
+    submit_dict = {
+        "MyId": "htcondor",
+        "universe": "vanilla",
+        "arguments": "$(ClusterId) $(ProcId)",
+        "output": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).out"),
+        "error": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).err"),
+        "log": Path("$(initialdir)", "$(MyId).$(ClusterId).$(ProcId).log"),
+        "on_exit_remove": "(ExitBySignal == False) && (ExitCode == 0)",
+        "requirements": "Machine =!= LastRemoteHost",
+    }
+    submit_dict.update(map_kwargs(kwargs))
+    
+    # Let the htcondor create the submit-file
+    submission = htcondor.Submit(submit_dict)
+
+    # add the multiple bash files
+    scripts = [
+        str(Path(*parts))
+        for parts in zip(job_df[COLUMN_JOB_DIRECTORY], job_df[COLUMN_SHELL_SCRIPT])
+    ]
+    args = [",".join(parts) for parts in zip(scripts, job_df[COLUMN_JOB_DIRECTORY])]
+    queueArgs = ["queue executable, initialdir from (", *args, ")"]
+
+    # ugly but submission.setQArgs doesn't take string containing '\n':
+    # submission.setQArgs("\n".join(queueArgs))  # doesn't work
+    submission = str(submission) + "\n".join(queueArgs)
+    LOG.debug(f"Created HTCondor subfile with content: \n{submission}")
+    return submission
+
+
+# Main functions ###############################################################
+
+
+def make_subfile(cwd: Path, job_df: DataFrame, **kwargs) -> Path:
+    """
+    Creates submit-file for ``HTCondor``.
+    For kwargs, see ``create_multijob_for_bashfiles``.
+
+    Args:
+        cwd (Path): working directory
+        job_df (DataFrame): DataFrame containing all the job-information
+
+    Returns:
+        Path: path to the submit-file
+    """
+    job = create_multijob_for_bashfiles(job_df, **kwargs)
+    return create_subfile_from_job(cwd, job)
+
+
+def write_bash(
+    job_df: DataFrame,
+    output_dir: Path = None,
+    executable: str = "madx",
+    cmdline_arguments: dict = None,
+    mask: Union[str, Path] = None,
+) -> DataFrame:
+    """
+    Write the bash-files to be called by ``HTCondor``, which in turn call the executable.
+    Takes as input `Dataframe`, job type, and optional additional commandline arguments for the script.
+    A shell script is created in each job directory in the dataframe.
+
+    Args:
+        job_df (DataFrame): DataFrame containing all the job-information
+        output_dir (str): output directory that will be transferred. Defaults to ``None``.
+        executable (str): name of the executable. Defaults to ``madx``.
+        cmdline_arguments (dict): additional commandline arguments for the executable
+        mask (Union[str, Path]): string or path to the mask-file. Defaults to ``None``.
+
+    Returns:
+        DataFrame: The provided ``job_df`` but with added path to the scripts.
+
+    """
+    if len(job_df.index) > HTCONDOR_JOBLIMIT:
+        raise AttributeError("Submitting too many jobs for HTCONDOR")
+
+    exec_path = f"{str(EXECUTEABLEPATH.get(executable, executable))} " if executable else ''
+    cmds = f" {' '.join([f'{param} {val}' for param, val in cmdline_arguments.items()])}" if cmdline_arguments else ''
+
+    shell_scripts = [None] * len(job_df.index)
+    for idx, (jobid, job) in enumerate(job_df.iterrows()):
+        job_dir = Path(job[COLUMN_JOB_DIRECTORY])
+        bash_file_name = f"{BASH_FILENAME}.{jobid}.{'bat' if on_windows() else 'sh'}"
+        jobfile = job_dir / bash_file_name
+
+        LOG.debug(f"Writing bash-file {idx:d} '{jobfile}'.")
+        with open(jobfile, "w") as f:
+            # Preparation ---
+            if not on_windows():
+                f.write(f"{SHEBANG}\n")
+   
+            if output_dir is not None:
+                f.write(f"mkdir {str(output_dir)}\n")
+
+            # The actual job execution ---
+            f.write(exec_path)
+
+            # Call the mask-file or the filled-template string
+            if is_mask_file(mask):
+                f.write(str(job_dir / job[COLUMN_JOB_FILE]))
+            else:
+                replace_columns = [column for column in job.index.tolist() if column not in NON_PARAMETER_COLUMNS]
+                f.write(mask % dict(zip(replace_columns, job[replace_columns])))
+
+            # Additional commands for the mask/string
+            f.write(cmds)
+            f.write("\n")
+
+            # Manually copy output (if needed) ---
+            dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
+            if output_dir and dest_dir and output_dir != dest_dir:
+                # Note: only eos-cp needs `/` at the end of dirs, but should not hurt in any case
+                cp_command =  f'cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}'  
+                if is_eos_uri(dest_dir):
+                    cp_command = f'eos {cp_command}'
+                    
+                f.write(f'{cp_command}\n')
+
+        shell_scripts[idx] = bash_file_name
+
+    job_df[COLUMN_SHELL_SCRIPT] = shell_scripts
+    return job_df
+
+
+def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Maps the kwargs for the job-file. 
+    Some arguments have pre-defined choices and defaults, the remaining ones are just passed on.
+
+    Args:
+        add_dict (Dict[str, Any]): additional kwargs to add to the defaults.
+
+    Returns:
+        Dict[str, Any]: The mapped kwargs.
+
+    """
+    new = {}
+
+    # Predefined mappings 
+    htc_map = { # name: mapped_name, choices, default
+        "duration": ("+JobFlavour", JOBFLAVOURS, "workday"),
+        "output_dir": ("transfer_output_files", None, None),
+        "accounting_group": ("+AccountingGroup", None, None),
+        "max_retries": ("max_retries", None, 3),
+        "notification": ("notification", NOTIFICATIONS, "error"),
+    }
+    for key, (mapped, choices, default) in htc_map.items():
+        try:
+            value = add_dict.pop(key)
+        except KeyError:
+            value = default  # could be `None`
+        else:
+            if choices is not None and value not in choices:
+                raise TypeError(
+                    f"{key} needs to be one of '{str(choices).strip('[]')}' but "
+                    f"instead was '{value}'"
+                )
+        if value is not None:
+            new[mapped] = _maybe_put_in_quotes(mapped, value)
+
+    # Pass-Through Arguments
+    LOG.debug(f"Remaining arguments to be added: '{str(add_dict).strip('{}'):s}'")
+    new.update(add_dict)
+    return new
+
+
+# Helper #######################################################################
+
+def _maybe_put_in_quotes(key: str, value: Any) -> Any:
+    """ Put value in quoted strings if key starts with '+' """
+    if key.startswith("+"):
+        return f'"{value}"'
+    return value
+
+
+def _str_ending_with_slash(s: Union[Path, str]) -> str:
+    """ Add a slash at the end of a path if not present. """
+    s  = str(s)
+    if s.endswith("/"):
+        return s
+    return f"{s}/"
+
+
+# Script Mode ##################################################################
+
+
+if __name__ == "__main__":
+    raise EnvironmentError(f"{__file__} is not supposed to run as main.")
diff --git a/pylhc_submitter/submitter/iotools.py b/pylhc_submitter/submitter/iotools.py
new file mode 100644
index 0000000..be5669e
--- /dev/null
+++ b/pylhc_submitter/submitter/iotools.py
@@ -0,0 +1,317 @@
+""" 
+Job Submitter IO-Tools
+----------------------
+
+Tools for input and output for the job-submitter.
+"""
+import itertools
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Sequence, Tuple, Union
+
+import numpy as np
+import pandas as pd
+import tfs
+
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_JOBID, JOBDIRECTORY_PREFIX,
+                                                     JOBSUMMARY_FILE, SCRIPT_EXTENSIONS)
+from pylhc_submitter.submitter import htc_utils
+from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask,
+                                                      generate_jobdf_index, is_mask_file)
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclass
+class CreationOpts:
+    """ Options for creating jobs. """
+    working_directory: Path         # Path to working directory (afs)
+    mask: Union[Path, str]          # Path to mask file or mask-string
+    jobid_mask: str                 # Mask for jobid
+    replace_dict: Dict[str, Any]    # Replace-dict
+    output_dir: Path                # Path to local output directory
+    output_destination: Union[Path, str]  # Path or URI to remote output directory (e.g. eos)
+    append_jobs: bool               # Append jobs to existing jobs
+    resume_jobs: bool               # Resume jobs that have already run/failed/got interrupted
+    executable: str                 # Name of executable to call the script (from mask)
+    check_files: Sequence[str]      # List of output files to check for success
+    script_arguments: Dict[str, Any]  # Arguments to pass to script
+    script_extension: str           # Extension of the script to run
+
+    def should_drop_jobs(self) -> bool:
+        """ Check if jobs should be dropped after creating the whole parameter space, 
+        e.g. because they already exist. """
+        return self.append_jobs or self.resume_jobs
+
+
+
+def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
+    """Main function to prepare all the jobs and folder structure.
+    This greates the value-grid based on the replace-dict and
+    checks for existing jobs (if so desired).
+    A job-dataframe is created - and written out - containing all the information and
+    its values are used to generate the job-scripts.
+    It also creates bash-scripts to call the executable for the job-scripts. 
+
+    Args:
+        opt (CreationOpts): Options for creating jobs 
+
+    Returns:
+        tfs.TfsDataFrame: The job-dataframe containing information for all jobs. 
+    """
+    LOG.debug("Creating Jobs.")
+
+    # Generate product of replace-dict and compare to existing jobs  ---
+    parameters, values_grid, prev_job_df = _generate_parameter_space(
+        replace_dict=opt.replace_dict,
+        append_jobs=opt.append_jobs,
+        cwd=opt.working_directory,
+    )
+
+    # Check new jobs ---
+    njobs = len(values_grid)
+    if njobs == 0:
+        raise ValueError(f"No (new) jobs found!")
+
+    if njobs > htc_utils.HTCONDOR_JOBLIMIT:
+        LOG.warning(
+            f"You are attempting to submit an important number of jobs ({njobs})."
+            "This can be a high stress on your system, make sure you know what you are doing."
+        )
+
+    LOG.debug(f"Initial number of jobs: {njobs:d}")
+
+    # Generate new job-dataframe ---
+    job_df = tfs.TfsDataFrame(
+        index=generate_jobdf_index(prev_job_df, opt.jobid_mask, parameters, values_grid),
+        columns=parameters,
+        data=values_grid,
+    )
+    job_df = tfs.concat([prev_job_df, job_df], sort=False, how_headers='left')
+
+    # Setup folders ---
+    job_df = create_folders(job_df, opt.working_directory, opt.output_destination)
+
+    # Create scripts ---
+    if is_mask_file(opt.mask):
+        LOG.debug("Creating all jobs from mask.")
+        script_extension = _get_script_extension(opt.script_extension, opt.executable, opt.mask)
+        job_df = create_job_scripts_from_mask(
+            job_df, opt.mask, parameters, script_extension
+        )
+
+    LOG.debug("Creating shell scripts.")
+    job_df = htc_utils.write_bash(
+        job_df,
+        output_dir=opt.output_dir,
+        executable=opt.executable,
+        cmdline_arguments=opt.script_arguments,
+        mask=opt.mask,
+    )
+
+    # Convert paths to strings and write df to file ---
+    job_df[COLUMN_JOB_DIRECTORY] = job_df[COLUMN_JOB_DIRECTORY].apply(str)
+    if COLUMN_DEST_DIRECTORY in job_df.columns:
+        job_df[COLUMN_DEST_DIRECTORY] = job_df[COLUMN_DEST_DIRECTORY].apply(str)
+
+    tfs.write(str(opt.working_directory / JOBSUMMARY_FILE), job_df, save_index=COLUMN_JOBID)
+    
+    # Drop already run jobs ---
+    dropped_jobs = []
+    if opt.should_drop_jobs():
+        job_df, dropped_jobs = _drop_already_run_jobs(
+            job_df, opt.output_dir, opt.check_files
+        )
+    return job_df, dropped_jobs
+
+
+def create_folders(job_df: tfs.TfsDataFrame, working_directory: Path, 
+                   destination_directory: Union[Path, str] = None) -> tfs.TfsDataFrame:
+    """Create the folder-structure in the given working directory and the 
+    destination directory if given.
+    This creates a folder per job in which then the job-scripts and bash-scripts
+    can be stored later.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
+        working_directory (Path): Path to the working directory
+        destination_directory (Path, optional): Path to the destination directory, 
+        i.e. the directory to copy the outputs to manually. Defaults to None.
+
+    Returns:
+        tfs.TfsDataFrame: The job-dataframe again, but with the added paths to the job-dirs.
+    """
+    LOG.debug("Setting up folders: ")
+    
+    jobname = f"{JOBDIRECTORY_PREFIX}.{{0}}"
+    job_df[COLUMN_JOB_DIRECTORY] = [working_directory / jobname.format(id_) for id_ in job_df.index]
+
+    for job_dir in job_df[COLUMN_JOB_DIRECTORY]:
+        job_dir.mkdir(exist_ok=True)
+        LOG.debug(f"   created '{job_dir}'.")
+
+    if destination_directory:
+        dest_path = uri_to_path(destination_directory)
+        dest_path.mkdir(parents=True, exist_ok=True)
+
+        server = get_server_from_uri(destination_directory)
+        job_df[COLUMN_DEST_DIRECTORY] = [f"{server}{dest_path / jobname.format(id_)}" for id_ in job_df.index]
+
+        # Make some symlinks for easy navigation---
+        # Output directory -> Working Directory
+        sym_submission = dest_path / Path('SUBMISSION_DIR')
+        sym_submission.unlink(missing_ok=True)
+        sym_submission.symlink_to(working_directory.resolve(), target_is_directory=True)
+
+        # Working Directory -> Output Directory
+        sym_destination = working_directory / Path('OUTPUT_DIR')
+        sym_destination.unlink(missing_ok=True)
+        sym_destination.symlink_to(dest_path.resolve(), target_is_directory=True)
+
+        # Create output dirs per job ---
+        for job_dest_dir in job_df[COLUMN_DEST_DIRECTORY]:
+            uri_to_path(job_dest_dir).mkdir(exist_ok=True)
+            LOG.debug(f"   created '{job_dest_dir}'.")
+
+    return job_df
+
+
+def is_eos_uri(path: Union[Path, str, None]) -> bool:
+    """ Check if the given path is an EOS-URI as `eos cp` only works with those.
+    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    
+    This function does not check the double slashes,
+    to avoid having the user pass a malformed path by accident and then 
+    assuming it is just a path. This is tested for in 
+    :func:`pylhc_submitter.job_submitter.check_opts`.
+    """
+    if path is None:
+        return False
+
+    parts = Path(path).parts 
+    return (
+        len(parts) >= 3  # at least root:, server, path
+        and
+        parts[0].endswith(':')
+        and
+        parts[2] == 'eos'
+    )
+
+
+def uri_to_path(path: Union[Path, str]) -> Path:
+    """ Strip EOS path information from a path.
+    EOS paths for HTCondor can be given as URI. Strip for direct writing.
+    E.g.: root://eosuser.cern.ch//eos/user/a/anabramo/banana.txt
+    """
+    path = Path(path)
+    parts = path.parts
+    if parts[0].endswith(':'):
+        # the first two parts are host info, e.g `file: //host/path`
+        return Path('/', *parts[2:])
+    return path 
+
+
+def get_server_from_uri(path: Union[Path, str]) -> str:
+    """ Get server information from a path.
+    E.g.: root://eosuser.cern.ch//eos/user/a/ -> root://eosuser.cern.ch/
+    """
+    path_part = uri_to_path(path)
+    if path_part == Path(path):
+        return ""
+    
+    server_part = str(path).replace(str(path_part), '')
+    if server_part.endswith("//"):
+        server_part = server_part[:-1]
+    return server_part
+
+
+def print_stats(new_jobs: Sequence[str], finished_jobs: Sequence[str]):
+    """Print some quick statistics."""
+    text = [
+        "\n------------- QUICK STATS ----------------"
+        f"Jobs total:{len(new_jobs) + len(finished_jobs):d}",
+        f"Jobs to run: {len(new_jobs):d}",
+        f"Jobs already finished: {len(finished_jobs):d}",
+        "---------- JOBS TO RUN: NAMES -------------"
+    ]
+    for job_name in new_jobs:
+        text.append(job_name)
+    text += ["--------- JOBS FINISHED: NAMES ------------"]
+    for job_name in finished_jobs:
+        text.append(job_name)
+    LOG.info("\n".join(text))
+
+
+def _generate_parameter_space(
+        replace_dict: Dict[str, Any], append_jobs: bool, cwd: Path
+    ) -> Tuple[List[str], np.ndarray, tfs.TfsDataFrame]:
+    """ Generate parameter space from replace-dict, check for existing jobs. """
+    LOG.debug("Generating parameter space from replace-dict.")
+    parameters = list(replace_dict.keys())
+    values_grid = _generate_values_grid(replace_dict)
+    if not append_jobs:
+        return parameters, values_grid, tfs.TfsDataFrame()
+
+    jobfile_path = cwd / JOBSUMMARY_FILE
+    try:
+        prev_job_df = tfs.read(str(jobfile_path.absolute()), index=COLUMN_JOBID)
+    except FileNotFoundError as filerror:
+        raise FileNotFoundError(
+            "Cannot append jobs, as no previous jobfile was found at " f"'{jobfile_path}'"
+        ) from filerror
+    new_jobs_mask = [elem not in prev_job_df[parameters].values for elem in values_grid]
+    values_grid = values_grid[new_jobs_mask]
+
+    return parameters, values_grid, prev_job_df
+
+
+def _generate_values_grid(replace_dict: Dict[str, Any]) -> np.ndarray:
+    """ Creates an array of the inner-product of the replace-dict. """
+    return np.array(list(itertools.product(*replace_dict.values())), dtype=object)
+
+
+def _drop_already_run_jobs(
+        job_df: tfs.TfsDataFrame, output_dir: str, check_files: str
+    ) -> Tuple[tfs.TfsDataFrame, List[str]]:
+    """ Check for jobs that have already been run and drop them from current job_df. """
+    LOG.debug("Dropping already finished jobs.")
+    finished_jobs = [
+        idx
+        for idx, row in job_df.iterrows()
+        if _job_was_successful(row, output_dir, check_files)
+    ]
+
+    LOG.info(
+        f"{len(finished_jobs):d} of {len(job_df.index):d}"
+        " Jobs have already finished and will be skipped."
+    )
+
+    job_df = job_df.drop(index=finished_jobs)
+    return job_df, finished_jobs
+
+
+def _job_was_successful(job_row: pd.Series, output_dir: str, files: Sequence[str]) -> bool:
+    """ Determines if the job was successful. 
+    
+    Args:
+        job_row (pd.Series): row from the job_df
+        output_dir (str): Name of the (local) output directory
+        files (List[str]): list of files that should have been generated
+    """
+    job_dir = job_row.get(COLUMN_DEST_DIRECTORY) or job_row[COLUMN_JOB_DIRECTORY]
+    output_dir = Path(job_dir, output_dir)
+    success = output_dir.is_dir() and any(output_dir.iterdir())
+    if success and files is not None and len(files):
+        for f in files:
+            success &= len(list(output_dir.glob(f))) > 0
+    return success
+
+
+def _get_script_extension(script_extension: str, executable: Path, mask: Path) -> str:
+    """ Returns the extension of the script to run based on 
+    either the given value, its executable or the mask. """
+    if script_extension is not None:
+        return script_extension
+    return SCRIPT_EXTENSIONS.get(executable, mask.suffix)
diff --git a/pylhc_submitter/submitter/mask.py b/pylhc_submitter/submitter/mask.py
new file mode 100644
index 0000000..3a2dcaa
--- /dev/null
+++ b/pylhc_submitter/submitter/mask.py
@@ -0,0 +1,114 @@
+"""
+Mask Resolver
+-------------
+
+This module provides functionality to resolve and write script masks for ``HTCondor`` jobs
+submission.
+"""
+import logging
+import re
+from pathlib import Path
+from typing import Iterable, List, Sequence, Set, Union
+
+import pandas as pd
+from numpy.typing import ArrayLike
+
+from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE
+
+LOG = logging.getLogger(__name__)
+
+
+def create_job_scripts_from_mask(
+    job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str
+) -> pd.DataFrame:
+    """
+    Takes path to mask file, list of parameter to be replaced and pandas dataframe containg per job
+    the job directory where processed mask is to be put, and columns containing the parameter values
+    with column named like replace parameters. Job directories have to be created beforehand.
+    Processed (madx) mask has the same filename as mask but with the given file extension.
+    Input Dataframe is returned with additional column containing path to the processed script
+    files.
+
+    Args:
+        job_df (pd.DataFrame): Job parameters as defined in description.
+        maskfile: `Path` object to the mask file.
+        replace_keys: keys to be replaced (must correspond to columns in ``job_df``).
+        file_ext: file extention to use (defaults to **madx**).
+
+    Returns:
+        The provided ``job_df`` but with added path to the scripts.
+    """
+    with maskfile.open("r") as mfile:
+        template = mfile.read()
+
+    jobname = maskfile.with_suffix("").name
+    jobs = [None] * len(job_df)
+    for idx, (jobid, values) in enumerate(job_df.iterrows()):
+        jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext)
+
+        with jobfile_fullpath.open("w") as job_file:
+            job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)])))
+        jobs[idx] = jobfile_fullpath.name
+    job_df[COLUMN_JOB_FILE] = jobs
+    return job_df
+
+
+def find_named_variables_in_mask(mask: str) -> Set[str]:
+    """ Find all variable-names in the mask. """
+    return set(re.findall(r"%\((\w+)\)", mask))
+
+
+def check_percentage_signs_in_mask(mask: str) -> None:
+    """ Checks for '%' in the mask, that are not replacement variables. """
+    cleaned_mask = re.sub(r"%\((\w+)\)", "", mask)
+    n_signs = cleaned_mask.count("%")
+    if n_signs == 0:
+        return
+
+    # Help the user find the %
+    for idx, line in enumerate(cleaned_mask.split("\n")):
+        if "%" in line:
+            positions = [str(i) for i, char in enumerate(line) if char == "%"]
+            LOG.error(f"Problematic '%' sign(s) in line {idx}, pos {' ,'.join(positions)}.")
+    raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.")
+
+
+def generate_jobdf_index(old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike
+    ) -> Union[List[str], Iterable[int]]:
+    """ Generates index for jobdf from mask for job_id naming. 
+    
+    Args:
+        old_df (pd.DataFrame): Existing jobdf.
+        jobid_mask (str): Mask for naming the jobs.
+        keys (Sequence[str]): Keys to be replaced in the mask.
+        values (np.array_like): Values-Grid to be replaced in the mask.
+    
+    Returns:
+        List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range.
+    """
+    if not jobid_mask:
+        # Use integer-range as index, if no mask is given
+        # Start with last index if old_df is not None.
+        nold = len(old_df.index) if old_df is not None else 0
+        start = nold-1 if nold > 0 else 0
+        return range(start, start + values.shape[0])
+
+    # Fill job-id mask
+    return [jobid_mask % dict(zip(keys, v)) for v in values]
+
+
+def is_mask_file(mask: str) -> bool:
+    """ Check if given string points to a file. """
+    try:
+        return Path(mask).is_file()
+    except OSError:
+        return False
+
+
+def is_mask_string(mask: str) -> bool:
+    """ Checks that given string does not point to a file. """
+    return not is_mask_file(mask)
+
+
+if __name__ == "__main__":
+    raise EnvironmentError(f"{__file__} is not supposed to run as main.")
diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py
new file mode 100644
index 0000000..f7ab0d0
--- /dev/null
+++ b/pylhc_submitter/submitter/runners.py
@@ -0,0 +1,120 @@
+""" 
+Job Submitter Runners
+---------------------
+
+Defines the methods to run the job-submitter, locally or on HTC.
+"""
+import logging
+import multiprocessing
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import pandas as pd
+import tfs
+
+from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
+                                                     COLUMN_SHELL_SCRIPT)
+from pylhc_submitter.submitter import htc_utils
+from pylhc_submitter.submitter.iotools import is_eos_uri
+from pylhc_submitter.utils.environment import on_windows
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclass
+class RunnerOpts:
+    """ Options for running the submission. """
+    working_directory: Path           # Path to the working directory (e.g. afs)
+    jobflavour: Optional[str] = None  # HTCondor job flavour (lengths of the job)
+    output_dir: Optional[str] = None  # Name of the output directory, where jobs store data
+    ssh: Optional[str] = None         # SSH command
+    dryrun: Optional[bool] = False    # Perform only a dry-run, i.e. do all but submit to HTC
+    htc_arguments: Optional[Dict[str, Any]] = None  # Arguments to pass on to htc as keywords
+    run_local: Optional[bool] = False # Run jobs locally
+    num_processes: Optional[int] = 4  # Number of processes to run in parallel (locally)
+
+
+def run_jobs(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """Selects how to run the jobs.
+    
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
+        opt (RunnerOpts): Parameters for the runner 
+    """
+    if opt.run_local: 
+        run_local(job_df, opt)
+    else:
+        run_htc(job_df, opt)
+
+
+def run_local(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """Run all jobs locally.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information 
+        opt (RunnerOpts): Parameters for the runner 
+    """
+    if opt.dryrun:
+        LOG.info(f"Dry-run: Skipping local run.")
+        return
+
+    LOG.info(f"Running {len(job_df.index)} jobs locally in {opt.num_processes:d} processes.")
+        
+    pool = multiprocessing.Pool(processes=opt.num_processes)
+    res = pool.map(_execute_shell, job_df.iterrows())
+    if any(res):
+        jobs_failed = [j for r, j in zip(res, job_df.index) if r]
+        LOG.error(f"{len(jobs_failed)} of {len(job_df)} jobs have failed:\n {jobs_failed}")
+        raise RuntimeError("At least one job has failed. Check output logs!")
+
+
+def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
+    """ Create submission file and submit the jobs to ``HTCondor``.
+
+    Args:
+        job_df (tfs.TfsDataFrame): DataFrame containing all the job-information
+        opt (RunnerOpts): Parameters for the runner 
+    """
+    LOG.info(f"Submitting {len(job_df.index)} jobs on htcondor, flavour '{opt.jobflavour}'.")
+    LOG.debug("Creating htcondor subfile.")
+
+    subfile = htc_utils.make_subfile(
+        opt.working_directory, job_df, 
+        output_dir=opt.output_dir, 
+        duration=opt.jobflavour, 
+        **opt.htc_arguments
+    )
+
+    if opt.dryrun:
+        LOG.info("Dry run: submission file created, but not submitting jobs to htcondor.")
+        return
+
+    LOG.debug("Submitting jobs to htcondor.")
+    htc_utils.submit_jobfile(subfile, opt.ssh)
+
+
+# Helper #######################################################################
+
+def _execute_shell(df_row: pd.Series) -> int:
+    """ Execute the shell script. 
+    
+    Args:
+        df_row (pd.Series): row in the job-dataframe
+    
+    Returns:
+        int: return code of the process
+    """
+    _, column = df_row
+    cmd = [] if on_windows() else ["sh"]
+
+    with Path(column[COLUMN_JOB_DIRECTORY], "log.tmp").open("w") as logfile:
+        process = subprocess.Popen(
+            cmd + [column[COLUMN_SHELL_SCRIPT]],
+            shell=on_windows(),
+            stdout=logfile,
+            stderr=subprocess.STDOUT,
+            cwd=column[COLUMN_JOB_DIRECTORY],
+        )
+    return process.wait()

From c621b5ff878a049ec9163a561629e91ba2f7a9ec Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 19:02:43 +0100
Subject: [PATCH 18/30] gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 713d8a6..5e133cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -243,9 +243,14 @@ Temporary Items
 # Neovim
 .nvimlog
 
+# Intellij
 /.idea/codeStyles/codeStyleConfig.xml
 /.idea/misc.xml
 /.idea/modules.xml
 /.idea/inspectionProfiles/profiles_settings.xml
 /.idea/vcs.xml
 /.idea/PyhDToolkit.iml
+
+# Other
+tst_*
+

From 3f395ef387b7678be9426de23850bfab0a6957d3 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 19:47:53 +0100
Subject: [PATCH 19/30] fixing macos

---
 pylhc_submitter/submitter/htc_utils.py |  8 +++++---
 tests/unit/test_job_submitter.py       | 13 ++++++-------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py
index 58ffd2a..4482b17 100644
--- a/pylhc_submitter/submitter/htc_utils.py
+++ b/pylhc_submitter/submitter/htc_utils.py
@@ -251,10 +251,12 @@ def write_bash(
             # Manually copy output (if needed) ---
             dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
             if output_dir and dest_dir and output_dir != dest_dir:
-                # Note: only eos-cp needs `/` at the end of dirs, but should not hurt in any case
-                cp_command =  f'cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}'  
                 if is_eos_uri(dest_dir):
-                    cp_command = f'eos {cp_command}'
+                    # Note: eos-cp needs `/` at the end of both, source and target, dirs...
+                    cp_command =  f'eos cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}'  
+                else:
+                    # ...but '/' at the end of source dir copies only the content on macOS.
+                    cp_command =  f'cp -r {output_dir} {_str_ending_with_slash(dest_dir)}'  
                     
                 f.write(f'{cp_command}\n')
 
diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 6f79dcb..990d573 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -88,7 +88,7 @@ def test_find_errorneous_percentage_signs(tmp_path, maskfile):
     setup.create_mask(content=mask, as_file=maskfile)
     with pytest.raises(KeyError) as e:
         job_submit(**asdict(setup))
-    assert "problematic '%'" in e.value.args[0]
+    assert "problematic '%'" in str(e)
 
 
 @run_only_on_linux
@@ -100,7 +100,7 @@ def test_missing_keys(tmp_path, maskfile):
     setup.create_mask(content=mask, as_file=maskfile)
     with pytest.raises(KeyError) as e:
         job_submit(**asdict(setup))
-    assert "PARAM3" in e.value.args[0]
+    assert "PARAM3" in str(e)
 
 
 @run_if_not_linux
@@ -112,7 +112,7 @@ def test_not_on_linux(tmp_path):
     setup.create_mask()
     with pytest.raises(EnvironmentError) as e:
         job_submit(**asdict(setup))
-    assert "htcondor bindings" in e.value.args[0]
+    assert "htcondor bindings" in str(e)
 
 
 def test_eos_uri():
@@ -134,9 +134,8 @@ def test_htc_submit(uri: bool):
     You need to adapt the path and delete the results afterwards manually."""
     # Fix the kerberos ticket path. 
     # Do klist to find your ticket manually.
-    import os
+    # import os
     # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
-    os.environ["KRB5CCNAME"] = "/tmp/krb5cc_106029"
 
     tmp_name = "htc_temp"
     if uri:
@@ -158,7 +157,7 @@ def test_htc_submit(uri: bool):
     setup.create_mask()
 
     prerun = True
-    prerun = False  # Manually switch here after running.
+    # prerun = False  # Manually switch here after running.
     if prerun:
         job_submit(**asdict(setup))
         _test_subfile_content(setup)
@@ -277,4 +276,4 @@ def _generate_combinations(data: Dict[str, Sequence]) -> List[Dict[str, Any]]:
         for values in itertools.product(*all_values)
     ]
 
-    return combinations
\ No newline at end of file
+    return combinations

From cc1c7b536d26da7e98dc09bacd5020d87dfb7e14 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 19:54:41 +0100
Subject: [PATCH 20/30] skip windows

---
 tests/unit/test_job_submitter.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 990d573..70c749d 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -115,6 +115,7 @@ def test_not_on_linux(tmp_path):
     assert "htcondor bindings" in str(e)
 
 
+pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.")
 def test_eos_uri():
     """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)"""
     server = "root://eosuser.cern.ch/"

From a9dcc34502d5446dedde74883955ff392c0c1918 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Wed, 8 Nov 2023 19:57:51 +0100
Subject: [PATCH 21/30] missing @

---
 tests/unit/test_job_submitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 70c749d..8771dc0 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -115,7 +115,7 @@ def test_not_on_linux(tmp_path):
     assert "htcondor bindings" in str(e)
 
 
-pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.")
+@pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.")
 def test_eos_uri():
     """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)"""
     server = "root://eosuser.cern.ch/"

From 6e69ecaa9576b235d5c87a2a7ba9c7b2cbbe146c Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 11:42:40 +0100
Subject: [PATCH 22/30] conf, changelog, additional doc for output_destination

---
 CHANGELOG.md                     | 13 +++++++++++++
 doc/conf.py                      | 18 ++++++++++++++----
 pylhc_submitter/job_submitter.py |  6 ++++--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c0c717..9963c3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # `pylhc-submitter` Changelog
 
+## Version 2.0.0
+
+- General code cleanup/refactoring/documentation.
+ - partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used.
+ - does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module.
+ - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. 
+
+
+- New Feature of `job_submitter`:
+  - `output_destination` input parameter, which sets an output directory in which the folder-stucture 
+  for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, 
+  instead of having the directory transferred back to the `working directory` by htcondor.
+
 ## Version 1.1.1
 
 - Uses `concat` instead of `append` to stack the DataFrames.
diff --git a/doc/conf.py b/doc/conf.py
index 9da8056..283ad0a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -31,10 +31,20 @@
     sys.path.insert(0, str(TOPLEVEL_DIR))
 
 
-
-ABOUT_PYLHC_SUBMITTER: dict = {}
-with ABOUT_FILE.open("r") as f:
-    exec(f.read(), ABOUT_PYLHC_SUBMITTER)
+def about_package(init_posixpath: pathlib.Path) -> dict:
+    """
+    Return package information defined with dunders in __init__.py as a dictionary, when
+    provided with a PosixPath to the __init__.py file.
+    """
+    about_text: str = init_posixpath.read_text()
+    return {
+        entry.split(" = ")[0]: entry.split(" = ")[1].strip('"')
+        for entry in about_text.strip().split("\n")
+        if entry.startswith("__")
+    }
+
+
+ABOUT_PYLHC_SUBMITTER = about_package(ABOUT_FILE)
 
 
 # -- General configuration ------------------------------------------------
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 30d08a4..9032412 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -110,7 +110,8 @@
 
 - **output_destination** *(PathOrStr)*:
 
-    Directory where to store the output of the jobs . (Can be on EOS)
+    Directory to copy the output of the jobs to, sorted into folders per job.
+    Can be on EOS, preferrably via EOS-URI format ('root://eosuser.cern.ch//eos/...').
 
 
 - **resume_jobs**:
@@ -295,7 +296,8 @@ def get_params():
     )
     params.add_parameter(
         name="output_destination",
-        help="Directory where to store the output of the jobs . (Can be on EOS)",
+        help="Directory to copy the output of the jobs to, sorted into folders per job. "
+             "Can be on EOS, preferrably via EOS-URI format ('root://eosuser.cern.ch//eos/...').",
         type=PathOrStr,
     )
     params.add_parameter(

From 147b3c621787f2df1d9a47fcd0a8efc8c10fa8d6 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 11:51:58 +0100
Subject: [PATCH 23/30] extracting HTC constants

---
 pylhc_submitter/constants/autosix.py        |  4 ++--
 pylhc_submitter/constants/external_paths.py |  4 ++--
 pylhc_submitter/constants/general.py        |  4 ++--
 pylhc_submitter/constants/job_submitter.py  |  4 ++--
 pylhc_submitter/job_submitter.py            |  2 +-
 pylhc_submitter/submitter/htc_utils.py      | 24 ++-------------------
 pylhc_submitter/submitter/iotools.py        |  7 +++---
 7 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/pylhc_submitter/constants/autosix.py b/pylhc_submitter/constants/autosix.py
index 39d0945..b91065d 100644
--- a/pylhc_submitter/constants/autosix.py
+++ b/pylhc_submitter/constants/autosix.py
@@ -1,6 +1,6 @@
 """
-Constants: Autosix
-----------------------------------
+Autosix
+-------
 
 Collections of constants and paths used in autosix.
 
diff --git a/pylhc_submitter/constants/external_paths.py b/pylhc_submitter/constants/external_paths.py
index d470cbf..2e7e9e2 100644
--- a/pylhc_submitter/constants/external_paths.py
+++ b/pylhc_submitter/constants/external_paths.py
@@ -1,6 +1,6 @@
 """
-Constants: External Paths
--------------------------
+External Paths
+--------------
 
 Specific constants relating to external paths to be used,
 to help with consistency.
diff --git a/pylhc_submitter/constants/general.py b/pylhc_submitter/constants/general.py
index 59f796d..2c68d93 100644
--- a/pylhc_submitter/constants/general.py
+++ b/pylhc_submitter/constants/general.py
@@ -1,6 +1,6 @@
 """
-Constants: General
-------------------
+General
+-------
 
 General constants to help with consistency.
 """
diff --git a/pylhc_submitter/constants/job_submitter.py b/pylhc_submitter/constants/job_submitter.py
index aca598c..93c1236 100644
--- a/pylhc_submitter/constants/job_submitter.py
+++ b/pylhc_submitter/constants/job_submitter.py
@@ -1,7 +1,7 @@
 
 """
-Constants: Job Submitter
-----------------------------------
+Job Submitter
+-------------
 
 Collections of constants and paths used in the job-submitter.
 """
diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 9032412..6a79982 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -160,7 +160,7 @@
 from generic_parser.tools import print_dict_tree
 
 from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS
-from pylhc_submitter.submitter.htc_utils import JOBFLAVOURS
+from pylhc_submitter.constants.htcondor import JOBFLAVOURS
 from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats
 from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask,
                                                       find_named_variables_in_mask, is_mask_file)
diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py
index 4482b17..64be126 100644
--- a/pylhc_submitter/submitter/htc_utils.py
+++ b/pylhc_submitter/submitter/htc_utils.py
@@ -19,6 +19,8 @@
 
 from pandas import DataFrame
 
+from pylhc_submitter.constants.htcondor import (BASH_FILENAME, CMD_SUBMIT, HTCONDOR_JOBLIMIT,
+                                                JOBFLAVOURS, NOTIFICATIONS, SHEBANG, SUBFILE)
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
                                                      COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
                                                      EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
@@ -36,28 +38,6 @@ class htcondor:
 
 LOG = logging.getLogger(__name__)
 
-# HTC Constants ################################################################
-
-SHEBANG = "#!/bin/bash"
-SUBFILE = "queuehtc.sub"
-BASH_FILENAME = "Job"
-
-HTCONDOR_JOBLIMIT = 100000
-
-CMD_SUBMIT = "condor_submit"
-JOBFLAVOURS = (
-    "espresso",  # 20 min
-    "microcentury",  # 1 h
-    "longlunch",  # 2 h
-    "workday",  # 8 h
-    "tomorrow",  # 1 d
-    "testmatch",  # 3 d
-    "nextweek",  # 1 w
-)
-
-NOTIFICATIONS = ("always", "complete", "error", "never")
-
-
 
 # Subprocess Methods ###########################################################
 
diff --git a/pylhc_submitter/submitter/iotools.py b/pylhc_submitter/submitter/iotools.py
index be5669e..5c66c90 100644
--- a/pylhc_submitter/submitter/iotools.py
+++ b/pylhc_submitter/submitter/iotools.py
@@ -14,12 +14,13 @@
 import pandas as pd
 import tfs
 
+from pylhc_submitter.constants.htcondor import HTCONDOR_JOBLIMIT
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
                                                      COLUMN_JOBID, JOBDIRECTORY_PREFIX,
                                                      JOBSUMMARY_FILE, SCRIPT_EXTENSIONS)
 from pylhc_submitter.submitter import htc_utils
-from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask,
-                                                      generate_jobdf_index, is_mask_file)
+from pylhc_submitter.submitter.mask import (create_job_scripts_from_mask, generate_jobdf_index,
+                                            is_mask_file)
 
 LOG = logging.getLogger(__name__)
 
@@ -75,7 +76,7 @@ def create_jobs(opt: CreationOpts) -> tfs.TfsDataFrame:
     if njobs == 0:
         raise ValueError(f"No (new) jobs found!")
 
-    if njobs > htc_utils.HTCONDOR_JOBLIMIT:
+    if njobs > HTCONDOR_JOBLIMIT:
         LOG.warning(
             f"You are attempting to submit an important number of jobs ({njobs})."
             "This can be a high stress on your system, make sure you know what you are doing."

From a697079a9792ba8d4afda6d6f2fcbbac80eaee6b Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 11:53:01 +0100
Subject: [PATCH 24/30] actually add constants file

---
 pylhc_submitter/constants/htcondor.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 pylhc_submitter/constants/htcondor.py

diff --git a/pylhc_submitter/constants/htcondor.py b/pylhc_submitter/constants/htcondor.py
new file mode 100644
index 0000000..8e9ac24
--- /dev/null
+++ b/pylhc_submitter/constants/htcondor.py
@@ -0,0 +1,24 @@
+"""
+HTCondor
+--------
+
+Constants for the HTCondor parameters.
+"""
+SHEBANG = "#!/bin/bash"
+SUBFILE = "queuehtc.sub"
+BASH_FILENAME = "Job"
+
+HTCONDOR_JOBLIMIT = 100000
+
+CMD_SUBMIT = "condor_submit"
+JOBFLAVOURS = (
+    "espresso",  # 20 min
+    "microcentury",  # 1 h
+    "longlunch",  # 2 h
+    "workday",  # 8 h
+    "tomorrow",  # 1 d
+    "testmatch",  # 3 d
+    "nextweek",  # 1 w
+)
+
+NOTIFICATIONS = ("always", "complete", "error", "never")
\ No newline at end of file

From ed7abbfdce503531a680216209620ad8811c8748 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 11:56:36 +0100
Subject: [PATCH 25/30] typehint for df_row

---
 pylhc_submitter/submitter/htc_utils.py | 5 -----
 pylhc_submitter/submitter/runners.py   | 7 ++++---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py
index 64be126..f1efd4e 100644
--- a/pylhc_submitter/submitter/htc_utils.py
+++ b/pylhc_submitter/submitter/htc_utils.py
@@ -52,7 +52,6 @@ def create_subfile_from_job(cwd: Path, submission: Union[str, htcondor.Submit])
 
     Returns:
         Path: path to sub-file
-
     """
     subfile = cwd / SUBFILE
     LOG.debug(f"Writing sub-file '{str(subfile)}'.")
@@ -67,7 +66,6 @@ def submit_jobfile(jobfile: Path, ssh: str) -> None:
     Args:
         jobfile (Path): path to sub-file
         ssh (str): ssh target
-
     """
     proc_args = [CMD_SUBMIT, jobfile]
     if ssh:
@@ -87,7 +85,6 @@ def _start_subprocess(command: List[str]) -> int:
 
     Returns:
         int: return code of the process
-    
     """
     LOG.debug(f"Executing command '{command}'")
     process = subprocess.Popen(
@@ -191,7 +188,6 @@ def write_bash(
 
     Returns:
         DataFrame: The provided ``job_df`` but with added path to the scripts.
-
     """
     if len(job_df.index) > HTCONDOR_JOBLIMIT:
         raise AttributeError("Submitting too many jobs for HTCONDOR")
@@ -256,7 +252,6 @@ def map_kwargs(add_dict: Dict[str, Any]) -> Dict[str, Any]:
 
     Returns:
         Dict[str, Any]: The mapped kwargs.
-
     """
     new = {}
 
diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py
index f7ab0d0..49d9ce8 100644
--- a/pylhc_submitter/submitter/runners.py
+++ b/pylhc_submitter/submitter/runners.py
@@ -9,7 +9,7 @@
 import subprocess
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
 import pandas as pd
 import tfs
@@ -97,11 +97,12 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
 
 # Helper #######################################################################
 
-def _execute_shell(df_row: pd.Series) -> int:
+def _execute_shell(df_row: Tuple(Any, pd.Series)) -> int:
     """ Execute the shell script. 
     
     Args:
-        df_row (pd.Series): row in the job-dataframe
+        df_row (Tuple[Any, pd.Series]): Row in the job-dataframe as coming from `iterrows()`, 
+                                        i.e. a tuple of (index, series)
     
     Returns:
         int: return code of the process

From a46a6cba2fb98e5d27e78f344411d82cd6cbab93 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:11:27 +0100
Subject: [PATCH 26/30] test naming and test doc

---
 tests/unit/test_job_submitter.py | 48 +++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 8771dc0..567a536 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -44,7 +44,7 @@ def test_output_directory(tmp_path):
     _test_output(setup)
 
 
-def test_wrong_uri(tmp_path):
+def test_detects_wrong_uri(tmp_path):
     """ Tests that wrong URI's are identified. """
     setup = InputParameters(
         working_directory=tmp_path, 
@@ -104,7 +104,7 @@ def test_missing_keys(tmp_path, maskfile):
 
 
 @run_if_not_linux
-def test_not_on_linux(tmp_path):
+def test_htcondor_bindings_not_found_on_nonlinux_os(tmp_path):
     """ Test that an error is raised if htcondor bindings are not found.
     If this tests fails, this might mean, that htcondor bindings are finally 
     available for the other platforms. """
@@ -116,7 +116,7 @@ def test_not_on_linux(tmp_path):
 
 
 @pytest.mark.skipif(on_windows(), reason="Paths are not split on '/' on Windows.")
-def test_eos_uri():
+def test_eos_uri_manipulation_functions():
     """ Unit-test for the EOS-URI parsing. (OH LOOK! An actual unit test!)"""
     server = "root://eosuser.cern.ch/"
     path = "/eos/user/m/mmustermann/"
@@ -129,16 +129,36 @@ def test_eos_uri():
 
 @run_only_on_linux
 @pytest.mark.cern_network
-@pytest.mark.parametrize("uri", [True, False])
-def test_htc_submit(uri: bool):
-    """ This test is here for local testing only. 
-    You need to adapt the path and delete the results afterwards manually."""
-    # Fix the kerberos ticket path. 
-    # Do klist to find your ticket manually.
+@pytest.mark.parametrize("destination", [True, False])
+@pytest.mark.parametrize("uri", [False, True])
+def test_htc_submit(destination: bool, uri: bool):
+    """ This test is here for manual testing.
+    It runs 3 scenarios and each submits 6 jobs to HTCondor.
+    This means you need to be in the cern-network on a machine with afs and eos access
+    and htcondor installed.
+    You need to adapt the path to your user-name and delete the results afterwards manually.
+
+    Scenarios:
+        a) destination = False: Transfer output data back to afs
+        b) destination = True, uri = False: Copy output data to EOS (via eos path)
+        c) destination = True, uri = True: Copy output data to EOS (via eos uri)
+
+    Run this test twice, manually changing `prerun` from "True" to "False" after the jobs are finished.
+     -  `prerun = True`: create the folder structures and submit the jobs.
+     -  `prerun = False`: check that the output data is present.
+    """
+    if uri and not destination:
+        return  # only need to run one of those
+
+    # Fix the kerberos ticket path, in case kerberos doesn't find it.
+    # Do a `klist` in terminal to find your ticket manually and adapt the path.
     # import os
     # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
 
     tmp_name = "htc_temp"
+    if destination:
+        tmp_name = f"{tmp_name}_dest"
+
     if uri:
         tmp_name = f"{tmp_name}_uri"
 
@@ -146,9 +166,11 @@ def test_htc_submit(uri: bool):
     path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name)
     path.mkdir(exist_ok=True)
 
-    dest = f"/eos/user/{user[0]}/{user}/{tmp_name}"
-    if uri:
-        dest = f"root://eosuser.cern.ch/{dest}"
+    dest = None
+    if destination:
+        dest = f"/eos/user/{user[0]}/{user}/{tmp_name}"
+        if uri:
+            dest = f"root://eosuser.cern.ch/{dest}"
 
     setup = InputParameters(
         working_directory=path, 
@@ -158,7 +180,7 @@ def test_htc_submit(uri: bool):
     setup.create_mask()
 
     prerun = True
-    # prerun = False  # Manually switch here after running.
+    # prerun = False  # !! Manually switch here after jobs finished.
     if prerun:
         job_submit(**asdict(setup))
         _test_subfile_content(setup)

From ac4e6e930e85b8fe33f51a7fff41201e04b29bf6 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:20:41 +0100
Subject: [PATCH 27/30] even more doc in test

---
 tests/unit/test_job_submitter.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/tests/unit/test_job_submitter.py b/tests/unit/test_job_submitter.py
index 567a536..f395840 100644
--- a/tests/unit/test_job_submitter.py
+++ b/tests/unit/test_job_submitter.py
@@ -147,31 +147,37 @@ def test_htc_submit(destination: bool, uri: bool):
      -  `prerun = True`: create the folder structures and submit the jobs.
      -  `prerun = False`: check that the output data is present.
     """
-    if uri and not destination:
-        return  # only need to run one of those
-
-    # Fix the kerberos ticket path, in case kerberos doesn't find it.
-    # Do a `klist` in terminal to find your ticket manually and adapt the path.
+    # MANUAL THINGS TO CHANGE ##############################################
+    user = "mmustermann"   # set your username
+    tmp_name = "htc_temp"  # name for the temporary folder (will be created)
+    prerun = True
+    # prerun = False       # switch here after jobs finished.
+    
+    # Uncomment to fix the kerberos ticket, in case htcondor doesn't find it.
+    # Do a `klist` in terminal and adapt the path.
     # import os
     # os.environ["KRB5CCNAME"] = "/tmp/krb5cc_####"
+    ########################################################################
+    if uri and not destination:
+        return  # only need to run one when destination is not set
 
-    tmp_name = "htc_temp"
+    # set working_directory
     if destination:
         tmp_name = f"{tmp_name}_dest"
+        if uri:
+            tmp_name = f"{tmp_name}_uri"
 
-    if uri:
-        tmp_name = f"{tmp_name}_uri"
-
-    user = "jdilly"
     path = Path("/", "afs", "cern.ch", "user", user[0], user, tmp_name)
     path.mkdir(exist_ok=True)
 
+    # set output_destination
     dest = None
     if destination:
         dest = f"/eos/user/{user[0]}/{user}/{tmp_name}"
         if uri:
             dest = f"root://eosuser.cern.ch/{dest}"
 
+    # create setup
     setup = InputParameters(
         working_directory=path, 
         output_destination=dest, 
@@ -179,13 +185,13 @@ def test_htc_submit(destination: bool, uri: bool):
     )
     setup.create_mask()
 
-    prerun = True
-    # prerun = False  # !! Manually switch here after jobs finished.
     if prerun:
+        # submit jobs
         job_submit(**asdict(setup))
         _test_subfile_content(setup)
         _test_output(setup, post_run=False)
     else:
+        # check output
         _test_output(setup, post_run=True)  
 
 

From 63c7c1f22cd9a3c574db93c7f32c3016f115af89 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:38:08 +0100
Subject: [PATCH 28/30] fixing cyclic import

---
 pylhc_submitter/job_submitter.py       | 4 ++--
 pylhc_submitter/submitter/htc_utils.py | 4 ++--
 pylhc_submitter/submitter/runners.py   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pylhc_submitter/job_submitter.py b/pylhc_submitter/job_submitter.py
index 6a79982..6efe9b8 100644
--- a/pylhc_submitter/job_submitter.py
+++ b/pylhc_submitter/job_submitter.py
@@ -159,11 +159,11 @@
 from generic_parser.entry_datatypes import DictAsString
 from generic_parser.tools import print_dict_tree
 
-from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS
 from pylhc_submitter.constants.htcondor import JOBFLAVOURS
+from pylhc_submitter.constants.job_submitter import EXECUTEABLEPATH, SCRIPT_EXTENSIONS
 from pylhc_submitter.submitter.iotools import CreationOpts, create_jobs, is_eos_uri, print_stats
 from pylhc_submitter.submitter.mask import (check_percentage_signs_in_mask,
-                                                      find_named_variables_in_mask, is_mask_file)
+                                            find_named_variables_in_mask, is_mask_file)
 from pylhc_submitter.submitter.runners import RunnerOpts, run_jobs
 from pylhc_submitter.utils.iotools import (PathOrStr, keys_to_path, make_replace_entries_iterable,
                                            save_config)
diff --git a/pylhc_submitter/submitter/htc_utils.py b/pylhc_submitter/submitter/htc_utils.py
index f1efd4e..873fd23 100644
--- a/pylhc_submitter/submitter/htc_utils.py
+++ b/pylhc_submitter/submitter/htc_utils.py
@@ -24,7 +24,7 @@
 from pylhc_submitter.constants.job_submitter import (COLUMN_DEST_DIRECTORY, COLUMN_JOB_DIRECTORY,
                                                      COLUMN_JOB_FILE, COLUMN_SHELL_SCRIPT,
                                                      EXECUTEABLEPATH, NON_PARAMETER_COLUMNS)
-from pylhc_submitter.submitter.iotools import is_eos_uri
+from pylhc_submitter.submitter import iotools
 from pylhc_submitter.submitter.mask import is_mask_file
 from pylhc_submitter.utils.environment import on_windows
 
@@ -227,7 +227,7 @@ def write_bash(
             # Manually copy output (if needed) ---
             dest_dir = job.get(COLUMN_DEST_DIRECTORY) 
             if output_dir and dest_dir and output_dir != dest_dir:
-                if is_eos_uri(dest_dir):
+                if iotools.is_eos_uri(dest_dir):
                     # Note: eos-cp needs `/` at the end of both, source and target, dirs...
                     cp_command =  f'eos cp -r {_str_ending_with_slash(output_dir)} {_str_ending_with_slash(dest_dir)}'  
                 else:
diff --git a/pylhc_submitter/submitter/runners.py b/pylhc_submitter/submitter/runners.py
index 49d9ce8..c215e07 100644
--- a/pylhc_submitter/submitter/runners.py
+++ b/pylhc_submitter/submitter/runners.py
@@ -97,7 +97,7 @@ def run_htc(job_df: tfs.TfsDataFrame, opt: RunnerOpts) -> None:
 
 # Helper #######################################################################
 
-def _execute_shell(df_row: Tuple(Any, pd.Series)) -> int:
+def _execute_shell(df_row: Tuple[Any, pd.Series]) -> int:
     """ Execute the shell script. 
     
     Args:

From 55e8205dc98db003e0e0fadce64d233bfb0e5479 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:48:50 +0100
Subject: [PATCH 29/30] fixes to CHANGELOG

---
 CHANGELOG.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9963c3b..75e8f64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,16 +2,16 @@
 
 ## Version 2.0.0
 
-- General code cleanup/refactoring/documentation.
- - partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used.
- - does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module.
- - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. 
+- General code cleanup/refactoring/documentation:
+  - Partly breaks backward compatibility, if individual methods of the `job_submitter`-functionality have been used.
+  - Does not affect any setups simply calling the `main()` function of `job_submitter.py` or calling the `job_submitter` as a module.
+  - Apart from some fixed imports, following the new structure, the `autosix` module has been untouched. 
 
 
 - New Feature of `job_submitter`:
   - `output_destination` input parameter, which sets an output directory in which the folder-stucture 
-  for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, 
-  instead of having the directory transferred back to the `working directory` by htcondor.
+    for the jobs will be replicated and the job's `job_output_dir` will be copied into "manually" at the end of the job, 
+    instead of having the directory transferred back to the `working directory` by htcondor.
 
 ## Version 1.1.1
 

From b26a9db7f756fcf24ef50a86fcb09a284a913f36 Mon Sep 17 00:00:00 2001
From: JoschD <26184899+JoschD@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:51:59 +0100
Subject: [PATCH 30/30] added constants module to doc

---
 doc/modules/constants.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/modules/constants.rst b/doc/modules/constants.rst
index c7b3011..8474b25 100644
--- a/doc/modules/constants.rst
+++ b/doc/modules/constants.rst
@@ -15,6 +15,10 @@ Constants Definitions
     :members:
     :noindex:
 
+.. automodule:: pylhc_submitter.constants.htcondor
+    :members:
+    :noindex:
+
 .. automodule:: pylhc_submitter.constants.autosix
     :members:
     :noindex: