From 3b27809d235fe99490ce4fa135e5a8bfb4469671 Mon Sep 17 00:00:00 2001
From: Alexander Blume <alex.gos90@gmail.com>
Date: Wed, 3 Apr 2024 18:05:31 +0200
Subject: [PATCH 1/5] change trim_qc_reads -> trim_qc_reads_pe/se

---
 etc/settings.yaml.in | 5 ++++-
 snakefile.py         | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in
index 82816ff..2f23875 100755
--- a/etc/settings.yaml.in
+++ b/etc/settings.yaml.in
@@ -85,7 +85,10 @@ execution:
     translate_sample_sheet_for_report:
       threads: 1
       memory: 500
-    trim_qc_reads:
+    trim_qc_reads_pe:
+      threads: 1
+      memory: 4000
+    trim_qc_reads_se:
       threads: 1
       memory: 4000
     star_index:
diff --git a/snakefile.py b/snakefile.py
index 43e6b5d..024b63a 100644
--- a/snakefile.py
+++ b/snakefile.py
@@ -324,7 +324,7 @@ def trim_reads_input(args):
     html=os.path.join(QC_DIR, "{sample}.pe.fastp.html"),
     json=os.path.join(QC_DIR, "{sample}.pe.fastp.json") #notice that multiqc recognizes files ending with fast.json
   resources:
-    mem_mb = config['execution']['rules']['trim_qc_reads']['memory']
+    mem_mb = config['execution']['rules']['trim_qc_reads_pe']['memory']
   log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log')
   shell: "{FASTP_EXEC} --in1 {input[0]} --in2 {input[1]} --out1 {output.r1} --out2 {output.r2} -h {output.html} -j {output.json} >> {log} 2>&1"
 
@@ -336,7 +336,7 @@ def trim_reads_input(args):
     html=os.path.join(QC_DIR, "{sample}.se.fastp.html"),
     json=os.path.join(QC_DIR, "{sample}.se.fastp.json") #notice that multiqc recognizes files ending with fast.json
   resources:
-    mem_mb = config['execution']['rules']['trim_qc_reads']['memory']
+    mem_mb = config['execution']['rules']['trim_qc_reads_se']['memory']
   log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log')
   shell: "{FASTP_EXEC} --in1 {input[0]} --out1 {output.r} -h {output.html} -j {output.json} >> {log} 2>&1 "
 

From ca16a542d3d0a741839c4fe6232b1805ce976cd9 Mon Sep 17 00:00:00 2001
From: Alexander Blume <alex.gos90@gmail.com>
Date: Wed, 3 Apr 2024 18:07:15 +0200
Subject: [PATCH 2/5] change hisat2-build -> hisat2_index and hisat2 ->
 hisat2_map

---
 etc/settings.yaml.in | 4 ++--
 snakefile.py         | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in
index 2f23875..a68a8cc 100755
--- a/etc/settings.yaml.in
+++ b/etc/settings.yaml.in
@@ -94,7 +94,7 @@ execution:
     star_index:
       threads: 2
       memory: 32000
-    hisat2-build:
+    hisat2_index:
       threads: 2 
       memory: 32000
     salmon_index:
@@ -115,7 +115,7 @@ execution:
     star_map:
       threads: 2
       memory: 16000
-    hisat2:
+    hisat2_map:
       threads: 2
       memory: 8000
     index_bam:
diff --git a/snakefile.py b/snakefile.py
index 024b63a..63b3123 100644
--- a/snakefile.py
+++ b/snakefile.py
@@ -81,8 +81,8 @@ def tool(name):
 MEGADEPTH_EXEC = tool('megadepth')
 
 STAR_INDEX_THREADS   = config['execution']['rules']['star_index']['threads']
-HISAT2_BUILD_THREADS = config['execution']['rules']['hisat2-build']['threads']
-HISAT2_THREADS       = config['execution']['rules']['hisat2']['threads']
+HISAT2_BUILD_THREADS = config['execution']['rules']['hisat2_index']['threads']
+HISAT2_THREADS       = config['execution']['rules']['hisat2_map']['threads']
 STAR_MAP_THREADS     = config['execution']['rules']['star_map']['threads']
 SALMON_INDEX_THREADS = config['execution']['rules']['salmon_index']['threads']
 SALMON_QUANT_THREADS = config['execution']['rules']['salmon_quant']['threads']
@@ -360,7 +360,7 @@ def trim_reads_input(args):
     output:
         [os.path.join(OUTPUT_DIR, "hisat2_index", f"{GENOME_BUILD}_index.{n}.ht2l") for n in [1, 2, 3, 4, 5, 6, 7, 8]]
     resources:
-        mem_mb = config['execution']['rules']['hisat2-build']['memory']
+        mem_mb = config['execution']['rules']['hisat2_index']['memory']
     params:
         index_directory = os.path.join(OUTPUT_DIR, "hisat2_index"),
     log: os.path.join(LOG_DIR, 'hisat2_index.log')
@@ -406,7 +406,7 @@ def hisat2_file_arguments(args):
   output:
     os.path.join(MAPPED_READS_DIR, 'hisat2', '{sample}_Aligned.sortedByCoord.out.bam')
   resources:
-    mem_mb = config['execution']['rules']['hisat2']['memory']
+    mem_mb = config['execution']['rules']['hisat2_map']['memory']
   params:
     samfile = lambda wildcards: os.path.join(MAPPED_READS_DIR, 'hisat2', "_".join([wildcards.sample, 'Aligned.out.sam'])),
     index_dir = rules.hisat2_index.params.index_directory,

From 490d821705205cefd5f631fcab92dfb1555ab827 Mon Sep 17 00:00:00 2001
From: Alexander Blume <alex.gos90@gmail.com>
Date: Wed, 3 Apr 2024 18:08:02 +0200
Subject: [PATCH 3/5] change  counts_from_salmon -> counts_from_SALMON

---
 etc/settings.yaml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in
index a68a8cc..3349c3c 100755
--- a/etc/settings.yaml.in
+++ b/etc/settings.yaml.in
@@ -103,7 +103,7 @@ execution:
     salmon_quant:
       threads: 8
       memory: 6000
-    counts_from_salmon: 
+    counts_from_SALMON: 
       threads: 1
       memory: 200
     collate_read_counts:

From 798d5ffe6ea44c2ac3d5b9e1440e7db0a7e6f6ac Mon Sep 17 00:00:00 2001
From: Alexander Blume <alex.gos90@gmail.com>
Date: Wed, 3 Apr 2024 18:08:35 +0200
Subject: [PATCH 4/5] change report --> report1/2/3, deseq_collate_report1/2/3

---
 etc/settings.yaml.in | 17 ++++++++++++++++-
 snakefile.py         | 12 ++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in
index 3349c3c..5e7de0d 100755
--- a/etc/settings.yaml.in
+++ b/etc/settings.yaml.in
@@ -136,7 +136,22 @@ execution:
     check_annotation_files: 
       threads: 1
       memory: 16000
-    reports:
+    report1:
+      threads: 1
+      memory: 4000  
+    deseq_collate_report1:
+      threads: 1
+      memory: 4000  
+    report2:
+      threads: 1
+      memory: 4000  
+    deseq_collate_report2:
+      threads: 1
+      memory: 4000  
+    report3:
+      threads: 1
+      memory: 4000  
+    deseq_collate_report3:
       threads: 1
       memory: 4000  
 
diff --git a/snakefile.py b/snakefile.py
index 63b3123..4eb69b4 100644
--- a/snakefile.py
+++ b/snakefile.py
@@ -607,7 +607,7 @@ def hisat2_file_arguments(args):
     os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html'),
     os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['report1']['memory']
   shell:
     "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}'  --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"
 
@@ -624,7 +624,7 @@ def hisat2_file_arguments(args):
   output:
     os.path.join(OUTPUT_DIR, "report", MAPPER, 'collated.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['deseq_collate_report1']['memory']
   shell:
     "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1"
 
@@ -647,7 +647,7 @@ def hisat2_file_arguments(args):
     os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html'),
     os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.transcripts.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['report2']['memory']
   shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"
 
 rule deseq_collate_report2:
@@ -663,7 +663,7 @@ def hisat2_file_arguments(args):
   output:
     os.path.join(OUTPUT_DIR, "report", 'salmon', 'collated.transcripts.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['deseq_collate_report2']['memory']
   shell:
     "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1"
 
@@ -686,7 +686,7 @@ def hisat2_file_arguments(args):
     os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html'),
     os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['report3']['memory']
   shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"
 
 rule deseq_collate_report3:
@@ -702,6 +702,6 @@ def hisat2_file_arguments(args):
   output:
     os.path.join(OUTPUT_DIR, "report", 'salmon', 'collated.genes.deseq_results.tsv')
   resources:
-    mem_mb = config['execution']['rules']['reports']['memory']
+    mem_mb = config['execution']['rules']['deseq_collate_report3']['memory']
   shell:
     "{RSCRIPT_EXEC} {params.script} {params.mapper} {params.inpdir} {params.outdir} >> {log} 2>&1"

From be44e322f6fb8f6c82e7e78148dbe964ea5383a3 Mon Sep 17 00:00:00 2001
From: Alexander Blume <alex.gos90@gmail.com>
Date: Wed, 3 Apr 2024 18:48:21 +0200
Subject: [PATCH 5/5] fix keyError for counts_from_SALMON

---
 snakefile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snakefile.py b/snakefile.py
index 4eb69b4..73f3bc0 100644
--- a/snakefile.py
+++ b/snakefile.py
@@ -476,7 +476,7 @@ def hisat2_file_arguments(args):
       os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"),
       os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv")
   resources:
-      mem_mb = config['execution']['rules']['counts_from_salmon']['memory']
+      mem_mb = config['execution']['rules']['counts_from_SALMON']['memory']
   log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log')
   shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1"