From 3b27809d235fe99490ce4fa135e5a8bfb4469671 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Wed, 3 Apr 2024 18:05:31 +0200 Subject: [PATCH 1/5] change trim_qc_reads -> trim_qc_reads_pe/se --- etc/settings.yaml.in | 5 ++++- snakefile.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index 82816ff..2f23875 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -85,7 +85,10 @@ execution: translate_sample_sheet_for_report: threads: 1 memory: 500 - trim_qc_reads: + trim_qc_reads_pe: + threads: 1 + memory: 4000 + trim_qc_reads_se: threads: 1 memory: 4000 star_index: diff --git a/snakefile.py b/snakefile.py index 43e6b5d..024b63a 100644 --- a/snakefile.py +++ b/snakefile.py @@ -324,7 +324,7 @@ def trim_reads_input(args): html=os.path.join(QC_DIR, "{sample}.pe.fastp.html"), json=os.path.join(QC_DIR, "{sample}.pe.fastp.json") #notice that multiqc recognizes files ending with fast.json resources: - mem_mb = config['execution']['rules']['trim_qc_reads']['memory'] + mem_mb = config['execution']['rules']['trim_qc_reads_pe']['memory'] log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log') shell: "{FASTP_EXEC} --in1 {input[0]} --in2 {input[1]} --out1 {output.r1} --out2 {output.r2} -h {output.html} -j {output.json} >> {log} 2>&1" @@ -336,7 +336,7 @@ def trim_reads_input(args): html=os.path.join(QC_DIR, "{sample}.se.fastp.html"), json=os.path.join(QC_DIR, "{sample}.se.fastp.json") #notice that multiqc recognizes files ending with fast.json resources: - mem_mb = config['execution']['rules']['trim_qc_reads']['memory'] + mem_mb = config['execution']['rules']['trim_qc_reads_se']['memory'] log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log') shell: "{FASTP_EXEC} --in1 {input[0]} --out1 {output.r} -h {output.html} -j {output.json} >> {log} 2>&1 " From ca16a542d3d0a741839c4fe6232b1805ce976cd9 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Wed, 3 Apr 2024 18:07:15 +0200 Subject: [PATCH 2/5] change hisat2-build -> hisat2_index and hisat2 -> hisat2_map --- etc/settings.yaml.in | 4 ++-- snakefile.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index 2f23875..a68a8cc 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -94,7 +94,7 @@ execution: star_index: threads: 2 memory: 32000 - hisat2-build: + hisat2_index: threads: 2 memory: 32000 salmon_index: @@ -115,7 +115,7 @@ execution: star_map: threads: 2 memory: 16000 - hisat2: + hisat2_map: threads: 2 memory: 8000 index_bam: diff --git a/snakefile.py b/snakefile.py index 024b63a..63b3123 100644 --- a/snakefile.py +++ b/snakefile.py @@ -81,8 +81,8 @@ def tool(name): MEGADEPTH_EXEC = tool('megadepth') STAR_INDEX_THREADS = config['execution']['rules']['star_index']['threads'] -HISAT2_BUILD_THREADS = config['execution']['rules']['hisat2-build']['threads'] -HISAT2_THREADS = config['execution']['rules']['hisat2']['threads'] +HISAT2_BUILD_THREADS = config['execution']['rules']['hisat2_index']['threads'] +HISAT2_THREADS = config['execution']['rules']['hisat2_map']['threads'] STAR_MAP_THREADS = config['execution']['rules']['star_map']['threads'] SALMON_INDEX_THREADS = config['execution']['rules']['salmon_index']['threads'] SALMON_QUANT_THREADS = config['execution']['rules']['salmon_quant']['threads'] @@ -360,7 +360,7 @@ def trim_reads_input(args): output: [os.path.join(OUTPUT_DIR, "hisat2_index", f"{GENOME_BUILD}_index.{n}.ht2l") for n in [1, 2, 3, 4, 5, 6, 7, 8]] resources: - mem_mb = config['execution']['rules']['hisat2-build']['memory'] + mem_mb = config['execution']['rules']['hisat2_index']['memory'] params: index_directory = os.path.join(OUTPUT_DIR, "hisat2_index"), log: os.path.join(LOG_DIR, 'hisat2_index.log') @@ -406,7 +406,7 @@ def hisat2_file_arguments(args): output: os.path.join(MAPPED_READS_DIR, 'hisat2', '{sample}_Aligned.sortedByCoord.out.bam') resources: - mem_mb = config['execution']['rules']['hisat2']['memory'] + mem_mb = config['execution']['rules']['hisat2_map']['memory'] params: samfile = lambda wildcards: os.path.join(MAPPED_READS_DIR, 'hisat2', "_".join([wildcards.sample, 'Aligned.out.sam'])), index_dir = rules.hisat2_index.params.index_directory, From 490d821705205cefd5f631fcab92dfb1555ab827 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Wed, 3 Apr 2024 18:08:02 +0200 Subject: [PATCH 3/5] change counts_from_salmon -> counts_from_SALMON --- etc/settings.yaml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index a68a8cc..3349c3c 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -103,7 +103,7 @@ execution: salmon_quant: threads: 8 memory: 6000 - counts_from_salmon: + counts_from_SALMON: threads: 1 memory: 200 collate_read_counts: From 798d5ffe6ea44c2ac3d5b9e1440e7db0a7e6f6ac Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Wed, 3 Apr 2024 18:08:35 +0200 Subject: [PATCH 4/5] change report --> report1/2/3, deseq_collate_report1/2/3 --- etc/settings.yaml.in | 17 ++++++++++++++++- snakefile.py | 12 ++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index 3349c3c..5e7de0d 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -136,7 +136,22 @@ execution: check_annotation_files: threads: 1 memory: 16000 - reports: + report1: + threads: 1 + memory: 4000 + deseq_collate_report1: + threads: 1 + memory: 4000 + report2: + threads: 1 + memory: 4000 + deseq_collate_report2: + threads: 1 + memory: 4000 + report3: + threads: 1 + memory: 4000 + deseq_collate_report3: threads: 1 memory: 4000 diff --git a/snakefile.py b/snakefile.py index 63b3123..4eb69b4 100644 --- a/snakefile.py +++ b/snakefile.py @@ -607,7 +607,7 @@ def hisat2_file_arguments(args): os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html'), os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['report1']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1" @@ -624,7 +624,7 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", MAPPER, 'collated.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['deseq_collate_report1']['memory'] shell: "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1" @@ -647,7 +647,7 @@ def hisat2_file_arguments(args): os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html'), os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.transcripts.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['report2']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1" rule deseq_collate_report2: @@ -663,7 +663,7 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", 'salmon', 'collated.transcripts.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['deseq_collate_report2']['memory'] shell: "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1" @@ -686,7 +686,7 @@ def hisat2_file_arguments(args): os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html'), os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['report3']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1" rule deseq_collate_report3: @@ -702,6 +702,6 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", 'salmon', 'collated.genes.deseq_results.tsv') resources: - mem_mb = config['execution']['rules']['reports']['memory'] + mem_mb = config['execution']['rules']['deseq_collate_report3']['memory'] shell: "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1" From be44e322f6fb8f6c82e7e78148dbe964ea5383a3 Mon Sep 17 00:00:00 2001 From: Alexander Blume Date: Wed, 3 Apr 2024 18:48:21 +0200 Subject: [PATCH 5/5] fix keyError for counts_from_SALMON --- snakefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakefile.py b/snakefile.py index 4eb69b4..73f3bc0 100644 --- a/snakefile.py +++ b/snakefile.py @@ -476,7 +476,7 @@ def hisat2_file_arguments(args): os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"), os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv") resources: - mem_mb = config['execution']['rules']['counts_from_salmon']['memory'] + mem_mb = config['execution']['rules']['counts_from_SALMON']['memory'] log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log') shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1"