diff --git a/snappy_pipeline/workflows/homologous_recombination_deficiency/Snakefile b/snappy_pipeline/workflows/homologous_recombination_deficiency/Snakefile index 92f92482d..08cb2c752 100644 --- a/snappy_pipeline/workflows/homologous_recombination_deficiency/Snakefile +++ b/snappy_pipeline/workflows/homologous_recombination_deficiency/Snakefile @@ -61,7 +61,11 @@ rule homologous_recombination_deficiency_link_out_run: rule homologous_recombination_deficiency_scarHRD_install: output: **wf.get_output_files("scarHRD", "install"), - threads: wf.get_resource("scarHRD", "install", "threads") + params: + packages=[ + {"name": "sztup/scarHRD", "repo": "github"}, + {"name": "aroneklund/copynumber", "repo": "github"}, + ], resources: time=wf.get_resource("scarHRD", "install", "time"), memory=wf.get_resource("scarHRD", "install", "memory"), @@ -70,22 +74,7 @@ rule homologous_recombination_deficiency_scarHRD_install: log: **wf.get_log_file("scarHRD", "install"), wrapper: - wf.wrapper_path("scarHRD/install") - - -rule homologous_recombination_deficiency_scarHRD_gcreference: - output: - **wf.get_output_files("scarHRD", "gcreference"), - threads: wf.get_resource("scarHRD", "gcreference", "threads") - resources: - time=wf.get_resource("scarHRD", "gcreference", "time"), - memory=wf.get_resource("scarHRD", "gcreference", "memory"), - partition=wf.get_resource("scarHRD", "gcreference", "partition"), - tmpdir=wf.get_resource("scarHRD", "gcreference", "tmpdir"), - log: - **wf.get_log_file("scarHRD", "gcreference"), - wrapper: - wf.wrapper_path("scarHRD/gcreference") + wf.wrapper_path("r") rule homologous_recombination_deficiency_scarHRD_run: diff --git a/snappy_pipeline/workflows/homologous_recombination_deficiency/__init__.py b/snappy_pipeline/workflows/homologous_recombination_deficiency/__init__.py index bae0d34c6..4ed311632 100644 --- a/snappy_pipeline/workflows/homologous_recombination_deficiency/__init__.py +++ b/snappy_pipeline/workflows/homologous_recombination_deficiency/__init__.py @@ -52,7 +52,6 @@ """ -from collections import OrderedDict import sys from biomedsheets.shortcuts import CancerCaseSheet, is_not_background @@ -66,7 +65,9 @@ LinkOutStepPart, ResourceUsage, ) -from snappy_pipeline.workflows.ngs_mapping import NgsMappingWorkflow +from snappy_pipeline.workflows.somatic_targeted_seq_cnv_calling import ( + SomaticTargetedSeqCnvCallingWorkflow, +) __author__ = "Eric Blanc " @@ -75,8 +76,8 @@ # Default configuration homologous_recombination_deficiency step_config: homologous_recombination_deficiency: - tools: ['scarHRD'] # REQUIRED - available: 'mantis' - path_ngs_mapping: ../ngs_mapping # REQUIRED + tools: ['scarHRD'] # REQUIRED - available: 'scarHRD' + path_cnv_calling: ../somatic_targeted_seq_cnv_calling # REQUIRED scarHRD: genome_name: "grch37" # Must be either "grch37", "grch38" or "mouse" chr_prefix: False @@ -93,81 +94,31 @@ class ScarHRDStepPart(BaseStepPart): #: Class available actions actions = ( "install", - "gcreference", "run", ) def __init__(self, parent): super().__init__(parent) - self.base_path_out = ( - "work/{{mapper}}.scarHRD.{{library_name}}/out/{{mapper}}.scarHRD.{{library_name}}{ext}" - ) - # Build shortcut from cancer bio sample name to matched cancer sample - self.tumor_ngs_library_to_sample_pair = OrderedDict() - for sheet in self.parent.shortcut_sheets: - self.tumor_ngs_library_to_sample_pair.update( - sheet.all_sample_pairs_by_tumor_dna_ngs_library - ) - - def get_normal_lib_name(self, wildcards): - """Return name of normal (non-cancer) library""" - pair = self.tumor_ngs_library_to_sample_pair[wildcards.library_name] - return pair.normal_sample.dna_ngs_library.name def get_input_files(self, action): - def input_function_run(wildcards): - """Helper wrapper function""" - # Get shorcut to Snakemake sub workflow - ngs_mapping = self.parent.sub_workflows["ngs_mapping"] - # Get names of primary libraries of the selected cancer bio sample and the - # corresponding primary normal sample - normal_base_path = ( - "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format( - normal_library=self.get_normal_lib_name(wildcards), **wildcards - ) - ) - tumor_base_path = ( - "output/{mapper}.{library_name}/out/" "{mapper}.{library_name}" - ).format(**wildcards) - return { - "lib_path": "work/R_packages/out/.done", - "gc": "work/static_data/out/{genome_name}_{length}.wig.gz".format( - genome_name=self.config["scarHRD"]["genome_name"], - length=self.config["scarHRD"]["length"], - ), - "normal_bam": ngs_mapping(normal_base_path + ".bam"), - "normal_bai": ngs_mapping(normal_base_path + ".bam.bai"), - "tumor_bam": ngs_mapping(tumor_base_path + ".bam"), - "tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"), - } + self._validate_action(action) - if action == "install": - return None - elif action == "gcreference": - return None - elif action == "run": - return input_function_run - else: - raise UnsupportedActionException( - "Action '{action}' is not supported. Valid options: {valid}".format( - action=action, valid=", ".join(self.actions) - ) - ) + return self._get_input_files_run + + @dictify + def _get_input_files_run(self, wildcards): + self.cnv_calling = self.parent.sub_workflows["cnv_calling"] + base_name = f"{wildcards.mapper}.{wildcards.caller}.{wildcards.library_name}" + yield "done", "work/R_packages/out/scarHRD.done" + yield "seqz", self.cnv_calling(f"output/{base_name}/out/{base_name}.seqz.gz") def get_output_files(self, action): if action == "install": - return {"lib_path": "work/R_packages/out/.done"} - elif action == "gcreference": - return { - "gc": "work/static_data/out/{genome_name}_{length}.wig.gz".format( - genome_name=self.config["scarHRD"]["genome_name"], - length=self.config["scarHRD"]["length"], - ) - } + return {"done": "work/R_packages/out/scarHRD.done"} elif action == "run": return { - "sequenza": "work/{mapper}.scarHRD.{library_name}/out/{mapper}.scarHRD.{library_name}.seqz.gz", - "scarHRD": "work/{mapper}.scarHRD.{library_name}/out/{mapper}.scarHRD.{library_name}.json", + "scarHRD": "work/{mapper}.{caller}.scarHRD.{library_name}/out/{mapper}.{caller}.scarHRD.{library_name}.json", + "scarHRD_md5": "work/{mapper}.{caller}.scarHRD.{library_name}/out/{mapper}.{caller}.scarHRD.{library_name}.json.md5", } else: raise UnsupportedActionException( @@ -180,14 +131,9 @@ def get_output_files(self, action): def _get_log_file(self, action): """Return dict of log files.""" if action == "install": - prefix = "work/R_packages/log/R_packages" - elif action == "gcreference": - prefix = "work/static_data/log/{genome_name}_{length}".format( - genome_name=self.config["scarHRD"]["genome_name"], - length=self.config["scarHRD"]["length"], - ) + prefix = "work/R_packages/log/scarHRD" elif action == "run": - prefix = "work/{mapper}.scarHRD.{library_name}/log/{mapper}.scarHRD.{library_name}" + prefix = "work/{mapper}.{caller}.scarHRD.{library_name}/log/{mapper}.{caller}.scarHRD.{library_name}" else: raise UnsupportedActionException( "Action '{action}' is not supported. Valid options: {valid}".format( @@ -204,32 +150,15 @@ def _get_log_file(self, action): yield key + "_md5", prefix + ext + ".md5" def get_resource_usage(self, action): - """Get Resource Usage - - :param action: Action (i.e., step) in the workflow, example: 'run'. - :type action: str - - :return: Returns ResourceUsage for step. - """ - if action == "install" or action == "gcreference": + self._validate_action(action) + if action == "run": return ResourceUsage( threads=1, - time="02:00:00", # 2 hours - memory="4096M", - partition="short", - ) - elif action == "run": - return ResourceUsage( - threads=2, - time="48:00:00", # 2 hours memory="32G", + time="24:00:00", ) else: - raise UnsupportedActionException( - "Action '{action}' is not supported. Valid options: {valid}".format( - action=action, valid=", ".join(self.actions) - ) - ) + return super().get_resource_usage(action) class HomologousRecombinationDeficiencyWorkflow(BaseStep): @@ -253,16 +182,18 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir) config_lookup_paths, config_paths, workdir, - (NgsMappingWorkflow,), + (SomaticTargetedSeqCnvCallingWorkflow,), ) # Register sub step classes so the sub steps are available self.register_sub_step_classes((ScarHRDStepPart, LinkOutStepPart)) # Initialize sub-workflows - self.register_sub_workflow("ngs_mapping", self.config["path_ngs_mapping"]) + self.register_sub_workflow( + "somatic_targeted_seq_cnv_calling", self.config["path_cnv_calling"], "cnv_calling" + ) @listify def get_result_files(self): - """Return list of result files for the somatic targeted sequencing CNV calling step""" + """Return list of result files for the homologous recombination deficiency step""" tool_actions = {"scarHRD": ("run",)} for sheet in filter(is_not_background, self.shortcut_sheets): for sample_pair in sheet.all_sample_pairs: @@ -282,14 +213,17 @@ def get_result_files(self): tpls = self.sub_steps[tool].get_output_files(action).values() except AttributeError: tpls = self.sub_steps[tool].get_output_files(action) + tpls = list(tpls) + tpls += list(self.sub_steps[tool].get_log_file(action).values()) for tpl in tpls: filenames = expand( tpl, mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"], + caller=["sequenza"], library_name=[sample_pair.tumor_sample.dna_ngs_library.name], ) for f in filenames: - if ".tmp." not in f: + if ".tmp." not in f and not f.endswith("/.done"): yield f.replace("work/", "output/") def check_config(self): @@ -298,3 +232,6 @@ def check_config(self): ("static_data_config", "reference", "path"), "Path to reference FASTA file not configured but required", ) + assert ( + "sequenza" in self.w_config["step_config"]["somatic_targeted_seq_cnv_calling"]["tools"] + ) diff --git a/snappy_wrappers/wrappers/scarHRD/environment.yaml b/snappy_wrappers/wrappers/scarHRD/environment.yaml index b6a8c8b49..9b3866aee 100644 --- a/snappy_wrappers/wrappers/scarHRD/environment.yaml +++ b/snappy_wrappers/wrappers/scarHRD/environment.yaml @@ -3,8 +3,6 @@ channels: - bioconda dependencies: - python =3.9 - - sequenza-utils - r-sequenza - r-devtools - r-data.table - - samtools diff --git a/snappy_wrappers/wrappers/scarHRD/gcreference/environment.yaml b/snappy_wrappers/wrappers/scarHRD/gcreference/environment.yaml deleted file mode 120000 index 2e107ac86..000000000 --- a/snappy_wrappers/wrappers/scarHRD/gcreference/environment.yaml +++ /dev/null @@ -1 +0,0 @@ -../environment.yaml \ No newline at end of file diff --git a/snappy_wrappers/wrappers/scarHRD/gcreference/wrapper.py b/snappy_wrappers/wrappers/scarHRD/gcreference/wrapper.py deleted file mode 100644 index 1b9a2c4b0..000000000 --- a/snappy_wrappers/wrappers/scarHRD/gcreference/wrapper.py +++ /dev/null @@ -1,49 +0,0 @@ -"""CUBI+Snakemake wrapper code for scarHRD (sequenza GC reference file) -""" - -import os - -from snakemake import shell - -__author__ = "Eric Blanc " - -step = snakemake.config["pipeline_step"]["name"] -genome = snakemake.config["static_data_config"]["reference"]["path"] -length = snakemake.config["step_config"][step]["scarHRD"]["length"] - -shell.executable("/bin/bash") - -shell( - r""" -set -x - -# Write out information about conda installation. -conda list >{snakemake.log.conda_list} -conda info >{snakemake.log.conda_info} -md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} -md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} - -# Also pipe stderr to log file -if [[ -n "{snakemake.log.log}" ]]; then - if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - exec 2> >(tee -a "{snakemake.log.log}" >&2) - else - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - echo "No tty, logging disabled" >"{snakemake.log.log}" - fi -fi - -sequenza-utils gc_wiggle --fasta {genome} -w {length} -o {snakemake.output} - -pushd $(dirname {snakemake.output}) -md5sum $(basename {snakemake.output}) > $(basename {snakemake.output}).md5 -""" -) - -# Compute MD5 sums of logs. -shell( - r""" -md5sum {snakemake.log.log} >{snakemake.log.log_md5} -""" -) diff --git a/snappy_wrappers/wrappers/scarHRD/install/environment.yaml b/snappy_wrappers/wrappers/scarHRD/install/environment.yaml deleted file mode 120000 index 2e107ac86..000000000 --- a/snappy_wrappers/wrappers/scarHRD/install/environment.yaml +++ /dev/null @@ -1 +0,0 @@ -../environment.yaml \ No newline at end of file diff --git a/snappy_wrappers/wrappers/scarHRD/install/wrapper.py b/snappy_wrappers/wrappers/scarHRD/install/wrapper.py deleted file mode 100644 index a9a9c15e0..000000000 --- a/snappy_wrappers/wrappers/scarHRD/install/wrapper.py +++ /dev/null @@ -1,48 +0,0 @@ -"""CUBI+Snakemake wrapper code for scarHRD (non-conda package installation) -""" - -import os - -from snakemake import shell - -__author__ = "Eric Blanc " - -lib_path = os.path.dirname(snakemake.output.lib_path) - -shell.executable("/bin/bash") - -shell( - r""" -set -x - -# Write out information about conda installation. -conda list >{snakemake.log.conda_list} -conda info >{snakemake.log.conda_info} -md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} -md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} - -# Also pipe stderr to log file -if [[ -n "{snakemake.log.log}" ]]; then - if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - exec 2> >(tee -a "{snakemake.log.log}" >&2) - else - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - echo "No tty, logging disabled" >"{snakemake.log.log}" - fi -fi - -R --vanilla --slave << __EOF -devtools::install_github("aroneklund/copynumber", lib="{lib_path}", upgrade="never") -devtools::install_github("sztup/scarHRD", lib="{lib_path}", upgrade="never") -__EOF -touch {snakemake.output} -""" -) - -# Compute MD5 sums of logs. -shell( - r""" -md5sum {snakemake.log.log} >{snakemake.log.log_md5} -""" -) diff --git a/snappy_wrappers/wrappers/scarHRD/run/wrapper.py b/snappy_wrappers/wrappers/scarHRD/run/wrapper.py index 6e414eade..04659e66a 100644 --- a/snappy_wrappers/wrappers/scarHRD/run/wrapper.py +++ b/snappy_wrappers/wrappers/scarHRD/run/wrapper.py @@ -53,9 +53,10 @@ | sequenza-utils seqz_binning -w {length} -s - \ | gzip > {snakemake.output.sequenza} +export R_LIBS_PATH="{lib_path}" +export VROOM_CONNECTION_SIZE=2000000000 + cat << __EOF | R --vanilla --slave -.libPaths(c("{lib_path}", .libPaths())) -Sys.setenv(VROOM_CONNECTION_SIZE=2000000000) library("scarHRD") tbl <- scar_score("{snakemake.output.sequenza}", reference="{genome_name}", seqz=TRUE, chr.in.name={chr_in_name}) diff --git a/tests/snappy_pipeline/workflows/test_workflows_homologous_recombination_deficiency.py b/tests/snappy_pipeline/workflows/test_workflows_homologous_recombination_deficiency.py index 3183a63e7..4ec289fd5 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_homologous_recombination_deficiency.py +++ b/tests/snappy_pipeline/workflows/test_workflows_homologous_recombination_deficiency.py @@ -31,12 +31,12 @@ def minimal_config(): step_config: ngs_mapping: tools: - dna: ['bwa'] - bwa: - path_index: /path/to/bwa/index.fasta + dna: [bwa] + somatic_targeted_seq_cnv_calling: + tools: ['sequenza'] homologous_recombination_deficiency: tools: ['scarHRD'] - path_ngs_mapping: ../ngs_mapping # REQUIRED + path_cnv_calling: ../somatic_targeted_seq_cnv_calling # REQUIRED data_sets: first_batch: @@ -64,7 +64,7 @@ def homologous_recombination_deficiency_workflow( """Return HomologousRecombinationDeficiencyWorkflow object pre-configured with cancer sheet""" # Patch out file-system related things in abstract (the crawling link in step is defined there) patch_module_fs("snappy_pipeline.workflows.abstract", cancer_sheet_fake_fs, mocker) - dummy_workflow.globals = {"ngs_mapping": lambda x: "NGS_MAPPING/" + x} + dummy_workflow.globals = {"cnv_calling": lambda x: "SOMATIC_CNV_CALLING/" + x} # Construct the workflow object return HomologousRecombinationDeficiencyWorkflow( dummy_workflow, @@ -80,14 +80,12 @@ def homologous_recombination_deficiency_workflow( def test_scarHRD_step_part_get_input_files_run(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_input_files() - run""" - wildcards = Wildcards(fromdict={"library_name": "P001-T1-DNA1-WGS1", "mapper": "bwa"}) + wildcards = Wildcards( + fromdict={"mapper": "bwa", "caller": "sequenza", "library_name": "P001-T1-DNA1-WGS1"} + ) expected = { - "lib_path": "work/R_packages/out/.done", - "gc": "work/static_data/out/grch37_50.wig.gz", - "normal_bai": "NGS_MAPPING/output/bwa.P001-N1-DNA1-WGS1/out/bwa.P001-N1-DNA1-WGS1.bam.bai", - "normal_bam": "NGS_MAPPING/output/bwa.P001-N1-DNA1-WGS1/out/bwa.P001-N1-DNA1-WGS1.bam", - "tumor_bai": "NGS_MAPPING/output/bwa.P001-T1-DNA1-WGS1/out/bwa.P001-T1-DNA1-WGS1.bam.bai", - "tumor_bam": "NGS_MAPPING/output/bwa.P001-T1-DNA1-WGS1/out/bwa.P001-T1-DNA1-WGS1.bam", + "done": "work/R_packages/out/scarHRD.done", + "seqz": "SOMATIC_CNV_CALLING/output/bwa.sequenza.P001-T1-DNA1-WGS1/out/bwa.sequenza.P001-T1-DNA1-WGS1.seqz.gz", } actual = homologous_recombination_deficiency_workflow.get_input_files("scarHRD", "run")( wildcards @@ -98,10 +96,12 @@ def test_scarHRD_step_part_get_input_files_run(homologous_recombination_deficien def test_scarHRD_step_part_get_output_files_run(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_output_files() - run""" # Define expected - base_name_out = "work/{mapper}.scarHRD.{library_name}/out/{mapper}.scarHRD.{library_name}" + base_name_out = ( + "work/{mapper}.{caller}.scarHRD.{library_name}/out/{mapper}.{caller}.scarHRD.{library_name}" + ) expected = { - "sequenza": base_name_out + ".seqz.gz", "scarHRD": base_name_out + ".json", + "scarHRD_md5": base_name_out + ".json.md5", } # Get actual actual = homologous_recombination_deficiency_workflow.get_output_files("scarHRD", "run") @@ -110,7 +110,9 @@ def test_scarHRD_step_part_get_output_files_run(homologous_recombination_deficie def test_scarHRD_step_part_get_log_file_run(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_log_file() - run""" - base_name = "work/{mapper}.scarHRD.{library_name}/log/{mapper}.scarHRD.{library_name}" + base_name = ( + "work/{mapper}.{caller}.scarHRD.{library_name}/log/{mapper}.{caller}.scarHRD.{library_name}" + ) expected = get_expected_log_files_dict(base_out=base_name) actual = homologous_recombination_deficiency_workflow.get_log_file("scarHRD", "run") assert actual == expected @@ -119,7 +121,7 @@ def test_scarHRD_step_part_get_log_file_run(homologous_recombination_deficiency_ def test_scarHRD_step_part_get_resource_usage_run(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_resource() - run""" # Define expected - expected_dict = {"threads": 2, "time": "48:00:00", "memory": "32G", "partition": "medium"} + expected_dict = {"threads": 1, "time": "24:00:00", "memory": "32G", "partition": "medium"} # Evaluate for resource, expected in expected_dict.items(): msg_error = f"Assertion error for resource '{resource}'." @@ -132,7 +134,7 @@ def test_scarHRD_step_part_get_resource_usage_run(homologous_recombination_defic def test_scarHRD_step_part_get_output_files_install(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_output_files() - install""" # Define expected - expected = {"lib_path": "work/R_packages/out/.done"} + expected = {"done": "work/R_packages/out/scarHRD.done"} # Get actual actual = homologous_recombination_deficiency_workflow.get_output_files("scarHRD", "install") assert actual == expected @@ -140,7 +142,7 @@ def test_scarHRD_step_part_get_output_files_install(homologous_recombination_def def test_scarHRD_step_part_get_log_file_install(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_log_file() - install""" - base_name = "work/R_packages/log/R_packages" + base_name = "work/R_packages/log/scarHRD" expected = get_expected_log_files_dict(base_out=base_name) actual = homologous_recombination_deficiency_workflow.get_log_file("scarHRD", "install") assert actual == expected @@ -149,7 +151,7 @@ def test_scarHRD_step_part_get_log_file_install(homologous_recombination_deficie def test_scarHRD_step_part_get_resource_usage_install(homologous_recombination_deficiency_workflow): """Tests ScarHRDStepPart.get_resource() - install""" # Define expected - expected_dict = {"threads": 1, "time": "02:00:00", "memory": "4096M", "partition": "short"} + expected_dict = {"threads": 1, "time": "01:00:00", "memory": "2G", "partition": "medium"} # Evaluate for resource, expected in expected_dict.items(): msg_error = f"Assertion error for resource '{resource}'." @@ -159,25 +161,6 @@ def test_scarHRD_step_part_get_resource_usage_install(homologous_recombination_d assert actual == expected, msg_error -def test_scarHRD_step_part_get_output_files_gcreference( - homologous_recombination_deficiency_workflow, -): - """Tests ScarHRDStepPart.get_output_files() - gcreference""" - # Define expected - expected = {"gc": "work/static_data/out/grch37_50.wig.gz"} - # Get actual - actual = homologous_recombination_deficiency_workflow.get_output_files("scarHRD", "gcreference") - assert actual == expected - - -def test_scarHRD_step_part_get_log_file_gcreference(homologous_recombination_deficiency_workflow): - """Tests ScarHRDStepPart.get_log_file() - gcreference""" - base_name = "work/static_data/log/grch37_50" - expected = get_expected_log_files_dict(base_out=base_name) - actual = homologous_recombination_deficiency_workflow.get_log_file("scarHRD", "gcreference") - assert actual == expected - - # Tests for SomaticMsiCallingWorkflow -------------------------------------------------------------- @@ -188,12 +171,18 @@ def test_homologous_recombination_deficiency_workflow(homologous_recombination_d assert list(sorted(homologous_recombination_deficiency_workflow.sub_steps.keys())) == expected # Check result file construction expected = [ - "output/bwa.scarHRD.P001-T1-DNA1-WGS1/out/bwa.scarHRD.P001-T1-DNA1-WGS1.json", - "output/bwa.scarHRD.P001-T1-DNA1-WGS1/out/bwa.scarHRD.P001-T1-DNA1-WGS1.seqz.gz", - "output/bwa.scarHRD.P002-T1-DNA1-WGS1/out/bwa.scarHRD.P002-T1-DNA1-WGS1.json", - "output/bwa.scarHRD.P002-T1-DNA1-WGS1/out/bwa.scarHRD.P002-T1-DNA1-WGS1.seqz.gz", - "output/bwa.scarHRD.P002-T2-DNA1-WGS1/out/bwa.scarHRD.P002-T2-DNA1-WGS1.json", - "output/bwa.scarHRD.P002-T2-DNA1-WGS1/out/bwa.scarHRD.P002-T2-DNA1-WGS1.seqz.gz", + "output/bwa.sequenza.scarHRD.P001-T1-DNA1-WGS1/out/bwa.sequenza.scarHRD.P001-T1-DNA1-WGS1.json", + "output/bwa.sequenza.scarHRD.P002-T1-DNA1-WGS1/out/bwa.sequenza.scarHRD.P002-T1-DNA1-WGS1.json", + "output/bwa.sequenza.scarHRD.P002-T2-DNA1-WGS1/out/bwa.sequenza.scarHRD.P002-T2-DNA1-WGS1.json", + "output/bwa.sequenza.scarHRD.P001-T1-DNA1-WGS1/out/bwa.sequenza.scarHRD.P001-T1-DNA1-WGS1.json.md5", + "output/bwa.sequenza.scarHRD.P002-T1-DNA1-WGS1/out/bwa.sequenza.scarHRD.P002-T1-DNA1-WGS1.json.md5", + "output/bwa.sequenza.scarHRD.P002-T2-DNA1-WGS1/out/bwa.sequenza.scarHRD.P002-T2-DNA1-WGS1.json.md5", + ] + expected += [ + f"output/bwa.sequenza.scarHRD.P00{i[0]}-T{i[1]}-DNA1-WGS1/log/bwa.sequenza.scarHRD.P00{i[0]}-T{i[1]}-DNA1-WGS1.{ext}{chksum}" + for i in ((1, 1), (2, 1), (2, 2)) + for ext in ("log", "conda_list.txt", "conda_info.txt") + for chksum in ("", ".md5") ] actual = set(homologous_recombination_deficiency_workflow.get_result_files()) expected = set(expected)