Skip to content

Commit

Permalink
Merge pull request #17 from snakemake-workflows/feat/allow-for-all-ca…
Browse files Browse the repository at this point in the history
…ps-platform-spec

feat: allow for all caps platform spec
  • Loading branch information
dlaehnemann authored Apr 8, 2024
2 parents ccd556a + d6b7dcf commit caec65f
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ jobs:
with:
directory: .test
snakefile: workflow/Snakefile
args: "--configfile .test/config/config.yaml --report report.zip"
args: "--configfile .test/config/config.yaml --cores 2 --report report.zip"
16 changes: 10 additions & 6 deletions workflow/rules/annotate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ rule cyrcular_annotate_graph:
graph="results/calling/graphs/{group}.graph",
gene_annotation="resources/gene_annotation.gff3.gz",
regulatory_annotation="resources/regulatory_annotation.gff3.gz",
repeat_annotation=lambda wc: "resources/repeat_masker.fa.out.gz"
if config["reference"].get("repeat_masker_download_link", "")
else "",
repeat_annotation=lambda wc: (
"resources/repeat_masker.fa.out.gz"
if config["reference"].get("repeat_masker_download_link", "")
else ""
),
output:
annotated="results/calling/graphs/{group}.annotated.graph",
threads: 1
Expand All @@ -39,9 +41,11 @@ rule cyrcular_annotate_graph:
conda:
"../envs/cyrcular.yaml"
params:
repeat_annotation=lambda wc, input: f" --repeat-annotation {input.repeat_annotation} "
if config["reference"].get("repeat_masker_download_link", "")
else "",
repeat_annotation=lambda wc, input: (
f" --repeat-annotation {input.repeat_annotation} "
if config["reference"].get("repeat_masker_download_link", "")
else ""
),
shell:
"cyrcular graph annotate "
" --reference {input.reference} "
Expand Down
38 changes: 24 additions & 14 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_all_input(wildcards):


def pairhmm_mode(wildcards):
if samples.loc[wildcards.sample]["platform"] == "nanopore":
if samples.loc[wildcards.sample]["platform"].lower() == "nanopore":
mode = "homopolymer"
else:
mode = "exact"
Expand All @@ -88,17 +88,23 @@ def get_group_candidates(wildcards):
scenario = scenario_name(wildcards)
if scenario == "nanopore_only":
sample = list(
samples.query(f"group == '{group}' & platform == 'nanopore'")["sample_name"]
samples.query(f"group == '{group}' & platform.str.lower() == 'nanopore'")[
"sample_name"
]
)[0]
return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf"
elif scenario == "illumina_only":
sample = list(
samples.query(f"group == '{group}' & platform == 'illumina'")["sample_name"]
samples.query(f"group == '{group}' & platform.str.lower() == 'illumina'")[
"sample_name"
]
)[0]
return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf"
elif scenario == "nanopore_with_illumina_support":
sample = list(
samples.query(f"group == '{group}' & platform == 'nanopore'")["sample_name"]
samples.query(f"group == '{group}' & platform.str.lower() == 'nanopore'")[
"sample_name"
]
)[0]
return f"results/calling/candidate-calls/{sample}.{{scatteritem}}.bcf"
else:
Expand Down Expand Up @@ -128,17 +134,21 @@ def get_observations(wildcards):

observations = []

has_nanopore = len(s.query("platform == 'nanopore'")["sample_name"]) > 0
has_illumina = len(s.query("platform == 'illumina'")["sample_name"]) > 0
has_nanopore = len(s.query("platform.str.lower() == 'nanopore'")["sample_name"]) > 0
has_illumina = len(s.query("platform.str.lower() == 'illumina'")["sample_name"]) > 0

if has_nanopore:
for sample_nanopore in list(s.query("platform == 'nanopore'")["sample_name"]):
for sample_nanopore in list(
s.query("platform.str.lower() == 'nanopore'")["sample_name"]
):
observations.append(
f"results/calling/calls/observations/{sample_nanopore}.{{scatteritem}}.bcf"
)

if has_illumina:
for sample_illumina in list(s.query("platform == 'illumina'")["sample_name"]):
for sample_illumina in list(
s.query("platform.str.lower() == 'illumina'")["sample_name"]
):
observations.append(
f"results/calling/calls/observations/{sample_illumina}.{{scatteritem}}.bcf"
)
Expand All @@ -150,17 +160,17 @@ def scenario_name(wildcards):
s = samples.query(f"group == '{wildcards.group}'")
num_samples_in_group = len(s)
if num_samples_in_group == 1:
if "illumina" in set(s["platform"]):
if "illumina" in set(s["platform"].str.lower()):
return "illumina_only"
elif "nanopore" in set(s["platform"]):
elif "nanopore" in set(s["platform"].str.lower()):
return "nanopore_only"
else:
platforms = ", ".join(set(s["platform"]))
platforms = ", ".join(set(s["platform"].str.lower()))
raise ValueError(
f"Single sample scenario not defined for platforms {platforms}"
)
elif num_samples_in_group == 2:
if len(set(s["platform"]) - {"illumina", "nanopore"}) == 0:
if len(set(s["platform"].str.lower()) - {"illumina", "nanopore"}) == 0:
return "nanopore_with_illumina_support"
else:
raise ValueError(
Expand Down Expand Up @@ -191,9 +201,9 @@ def get_scenario(wildcards):


def get_minimap2_mapping_params(wildcards):
if samples.loc[wildcards.sample]["platform"] == "nanopore":
if samples.loc[wildcards.sample]["platform"].lower() == "nanopore":
return "-x map-ont"
elif samples.loc[wildcards.sample]["platform"] == "illumina":
elif samples.loc[wildcards.sample]["platform"].lower() == "illumina":
return "-x sr"
else:
return ""
Expand Down
10 changes: 6 additions & 4 deletions workflow/rules/map.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ rule minimap2_bam:
params:
extra=get_minimap2_mapping_params, # optional
sorting="coordinate", # optional: Enable sorting. Possible values: 'none', 'queryname' or 'coordinate'
sort_extra=lambda wc, threads: f"-@ {min(threads, 4)}", # optional: extra arguments for samtools/picard
sort_extra=lambda wc, threads: f"-@ {min(threads , 4)}", # optional: extra arguments for samtools/picard
threads: workflow.cores // 2
wrapper:
"v1.25.0/bio/minimap2/aligner"
Expand All @@ -27,9 +27,11 @@ rule merge_fastqs:
wildcard_constraints:
read="single|R1|R2",
params:
cmd=lambda wc: "pigz -dc"
if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc))))
else "cat",
cmd=lambda wc: (
"pigz -dc"
if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc))))
else "cat"
),
conda:
"../envs/pigz.yaml"
shell:
Expand Down
6 changes: 0 additions & 6 deletions workflow/rules/ref.smk
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ rule minimap2_index:
),
log:
"logs/minimap2_index/genome.log",
benchmark:
"benchmarks/minimap2_index/genome.txt"
params:
extra="", # optional additional args
cache: True
Expand All @@ -65,8 +63,6 @@ rule download_regulatory_annotation:
"logs/download_regulatory_annotation.log",
params:
release=config["reference"].get("release", "107"),
benchmark:
"benchmarks/download_regulatory_annotation.txt"
cache: "omit-software" # save space and time with between workflow caching (see docs)
conda:
"../envs/wget.yaml"
Expand All @@ -81,8 +77,6 @@ rule download_repeatmasker_annotation:
"logs/download_repeatmasker_annotation.log",
params:
download_link=config["reference"].get("repeat_masker_download_link", ""),
benchmark:
"benchmarks/download_repeatmasker_annotation.txt"
cache: "omit-software" # save space and time with between workflow caching (see docs)
conda:
"../envs/wget.yaml"
Expand Down
2 changes: 2 additions & 0 deletions workflow/schemas/samples.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ properties:
type: string
enum:
- "illumina"
- "ILLUMINA"
- "nanopore"
- "NANOPORE"
description: sequencing platform


Expand Down

0 comments on commit caec65f

Please sign in to comment.