diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e4614386..c811d442 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -55,4 +55,4 @@ jobs: with: directory: .test snakefile: workflow/Snakefile - args: "--configfile .test/config/config.yaml --report report.zip" + args: "--configfile .test/config/config.yaml --cores 2 --report report.zip" diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index 65b56a54..e84922cd 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -26,9 +26,11 @@ rule cyrcular_annotate_graph: graph="results/calling/graphs/{group}.graph", gene_annotation="resources/gene_annotation.gff3.gz", regulatory_annotation="resources/regulatory_annotation.gff3.gz", - repeat_annotation=lambda wc: "resources/repeat_masker.fa.out.gz" - if config["reference"].get("repeat_masker_download_link", "") - else "", + repeat_annotation=lambda wc: ( + "resources/repeat_masker.fa.out.gz" + if config["reference"].get("repeat_masker_download_link", "") + else "" + ), output: annotated="results/calling/graphs/{group}.annotated.graph", threads: 1 @@ -39,9 +41,11 @@ rule cyrcular_annotate_graph: conda: "../envs/cyrcular.yaml" params: - repeat_annotation=lambda wc, input: f" --repeat-annotation {input.repeat_annotation} " - if config["reference"].get("repeat_masker_download_link", "") - else "", + repeat_annotation=lambda wc, input: ( + f" --repeat-annotation {input.repeat_annotation} " + if config["reference"].get("repeat_masker_download_link", "") + else "" + ), shell: "cyrcular graph annotate " " --reference {input.reference} " diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 5ba61120..12192c60 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -74,7 +74,7 @@ def get_all_input(wildcards): def pairhmm_mode(wildcards): - if samples.loc[wildcards.sample]["platform"] == "nanopore": + if samples.loc[wildcards.sample]["platform"].lower() == "nanopore": mode = "homopolymer" else: mode = "exact" @@ -88,17 +88,23 @@ def get_group_candidates(wildcards): scenario = scenario_name(wildcards) if scenario == "nanopore_only": sample = list( - samples.query(f"group == '{group}' & platform == 'nanopore'")["sample_name"] + samples.query(f"group == '{group}' & platform.str.lower() == 'nanopore'")[ + "sample_name" + ] )[0] return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf" elif scenario == "illumina_only": sample = list( - samples.query(f"group == '{group}' & platform == 'illumina'")["sample_name"] + samples.query(f"group == '{group}' & platform.str.lower() == 'illumina'")[ + "sample_name" + ] )[0] return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf" elif scenario == "nanopore_with_illumina_support": sample = list( - samples.query(f"group == '{group}' & platform == 'nanopore'")["sample_name"] + samples.query(f"group == '{group}' & platform.str.lower() == 'nanopore'")[ + "sample_name" + ] )[0] return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf" else: @@ -128,17 +134,21 @@ def get_observations(wildcards): observations = [] - has_nanopore = len(s.query("platform == 'nanopore'")["sample_name"]) > 0 - has_illumina = len(s.query("platform == 'illumina'")["sample_name"]) > 0 + has_nanopore = len(s.query("platform.str.lower() == 'nanopore'")["sample_name"]) > 0 + has_illumina = len(s.query("platform.str.lower() == 'illumina'")["sample_name"]) > 0 if has_nanopore: - for sample_nanopore in list(s.query("platform == 'nanopore'")["sample_name"]): + for sample_nanopore in list( + s.query("platform.str.lower() == 'nanopore'")["sample_name"] + ): observations.append( f"results/calling/calls/observations/{sample_nanopore}.{{scatteritem}}.bcf" ) if has_illumina: - for sample_illumina in list(s.query("platform == 'illumina'")["sample_name"]): + for sample_illumina in list( + s.query("platform.str.lower() == 'illumina'")["sample_name"] + ): observations.append( f"results/calling/calls/observations/{sample_illumina}.{{scatteritem}}.bcf" ) @@ -150,17 +160,17 @@ def scenario_name(wildcards): s = samples.query(f"group == '{wildcards.group}'") num_samples_in_group = len(s) if num_samples_in_group == 1: - if "illumina" in set(s["platform"]): + if "illumina" in set(s["platform"].str.lower()): return "illumina_only" - elif "nanopore" in set(s["platform"]): + elif "nanopore" in set(s["platform"].str.lower()): return "nanopore_only" else: - platforms = ", ".join(set(s["platform"])) + platforms = ", ".join(set(s["platform"].str.lower())) raise ValueError( f"Single sample scenario not defined for platforms {platforms}" ) elif num_samples_in_group == 2: - if len(set(s["platform"]) - {"illumina", "nanopore"}) == 0: + if len(set(s["platform"].str.lower()) - {"illumina", "nanopore"}) == 0: return "nanopore_with_illumina_support" else: raise ValueError( @@ -191,9 +201,9 @@ def get_scenario(wildcards): def get_minimap2_mapping_params(wildcards): - if samples.loc[wildcards.sample]["platform"] == "nanopore": + if samples.loc[wildcards.sample]["platform"].lower() == "nanopore": return "-x map-ont" - elif samples.loc[wildcards.sample]["platform"] == "illumina": + elif samples.loc[wildcards.sample]["platform"].lower() == "illumina": return "-x sr" else: return "" diff --git a/workflow/rules/map.smk b/workflow/rules/map.smk index 8fce0193..baea8795 100644 --- a/workflow/rules/map.smk +++ b/workflow/rules/map.smk @@ -11,7 +11,7 @@ rule minimap2_bam: params: extra=get_minimap2_mapping_params, # optional sorting="coordinate", # optional: Enable sorting. Possible values: 'none', 'queryname' or 'coordinate' - sort_extra=lambda wc, threads: f"-@ {min(threads, 4)}", # optional: extra arguments for samtools/picard + sort_extra=lambda wc, threads: f"-@ {min(threads , 4)}", # optional: extra arguments for samtools/picard threads: workflow.cores // 2 wrapper: "v1.25.0/bio/minimap2/aligner" @@ -27,9 +27,11 @@ rule merge_fastqs: wildcard_constraints: read="single|R1|R2", params: - cmd=lambda wc: "pigz -dc" - if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc)))) - else "cat", + cmd=lambda wc: ( + "pigz -dc" + if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc)))) + else "cat" + ), conda: "../envs/pigz.yaml" shell: diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 752802db..cfe7c744 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -45,8 +45,6 @@ rule minimap2_index: ), log: "logs/minimap2_index/genome.log", - benchmark: - "benchmarks/minimap2_index/genome.txt" params: extra="", # optional additional args cache: True @@ -65,8 +63,6 @@ rule download_regulatory_annotation: "logs/download_regulatory_annotation.log", params: release=config["reference"].get("release", "107"), - benchmark: - "benchmarks/download_regulatory_annotation.txt" cache: "omit-software" # save space and time with between workflow caching (see docs) conda: "../envs/wget.yaml" @@ -81,8 +77,6 @@ rule download_repeatmasker_annotation: "logs/download_repeatmasker_annotation.log", params: download_link=config["reference"].get("repeat_masker_download_link", ""), - benchmark: - "benchmarks/download_repeatmasker_annotation.txt" cache: "omit-software" # save space and time with between workflow caching (see docs) conda: "../envs/wget.yaml" diff --git a/workflow/schemas/samples.schema.yaml b/workflow/schemas/samples.schema.yaml index e3a15f87..84591511 100644 --- a/workflow/schemas/samples.schema.yaml +++ b/workflow/schemas/samples.schema.yaml @@ -14,7 +14,9 @@ properties: type: string enum: - "illumina" + - "ILLUMINA" - "nanopore" + - "NANOPORE" description: sequencing platform