diff --git a/modules/gatk_rnaseq/1.0/config/default.yaml b/modules/gatk_rnaseq/1.0/config/default.yaml index 2cf50aee..72b4d793 100644 --- a/modules/gatk_rnaseq/1.0/config/default.yaml +++ b/modules/gatk_rnaseq/1.0/config/default.yaml @@ -13,6 +13,10 @@ lcr-modules: options: java_opts: "-XX:ConcGCThreads=1" gatk_splitntrim: " -fixNDN TRUE -RF GoodCigarReadFilter " + gatk_addRG: + platform: "illumina" + unit: "unit1" + stringency: "LENIENT" gatk_baserecalibrator: "" gatk_applybqsr: "" gatk_variant_calling: @@ -34,11 +38,13 @@ lcr-modules: conda_envs: + picard: "{MODSDIR}/envs/picard-2.22.3.yaml" gatk_rnaseq: "{MODSDIR}/envs/gatk-4.1.8.1.yaml" bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml" threads: gatk_splitntrim: 12 + gatk_addRG: 12 gatk_base_recalibration: 12 gatk_applybqsr: 12 gatk_variant_calling: 24 @@ -50,15 +56,22 @@ lcr-modules: resources: gatk_splitntrim: mem_mb: 48000 + bam: 1 + gatk_addRG: + mem_mb: 12000 + bam: 1 gatk_base_recalibration: mem_mb: 12000 + bam: 1 gatk_applybqsr: mem_mb: 12000 + bam: 1 gatk_variant_calling: mem_mb: 48000 bam: 1 gatk_variant_filtration: mem_mb: 12000 + bam: 1 merge_vcfs: mem_mb: 10000 gatk_rnaseq_passed: diff --git a/modules/gatk_rnaseq/1.0/envs/picard-2.22.3.yaml b/modules/gatk_rnaseq/1.0/envs/picard-2.22.3.yaml new file mode 120000 index 00000000..8bcf2e66 --- /dev/null +++ b/modules/gatk_rnaseq/1.0/envs/picard-2.22.3.yaml @@ -0,0 +1 @@ +../../../../envs/picard/picard-2.22.3.yaml \ No newline at end of file diff --git a/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk b/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk index 85653697..493a5504 100644 --- a/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk +++ b/modules/gatk_rnaseq/1.0/gatk_rnaseq.smk @@ -92,9 +92,33 @@ rule _gatk_splitntrim: > {log.stdout} 2> {log.stderr} """) +rule _gatk_addRG: + input: + bam = str(rules._gatk_splitntrim.output) + output: + bam = temp(CFG["dirs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.withRG.bam") + params: + sampleName = "{sample_id}", + platform = CFG["options"]["gatk_addRG"]["platform"], + unit = CFG["options"]["gatk_addRG"]["unit"], + stringency = CFG["options"]["gatk_addRG"]["stringency"] + conda: + CFG["conda_envs"]["picard"] + log: + stdout = CFG["logs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.addRG.stdout.log" + threads: + CFG["threads"]["gatk_addRG"] + resources: + **CFG["resources"]["gatk_addRG"] + shell: + op.as_one_line(""" + picard AddOrReplaceReadGroups VALIDATION_STRINGENCY={params.stringency} I={input.bam} O={output.bam} RGLB={params.sampleName} RGPL={params.platform} RGPU={params.unit} RGSM={params.sampleName} > {log.stdout} 2>&1 && + samtools index {output.bam} + """) + rule _gatk_base_recalibration: input: - bam = str(rules._gatk_splitntrim.output), + bam = str(rules._gatk_addRG.output), fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa") output: table = CFG["dirs"]["base_recal_report"] + "{seq_type}--{genome_build}/{sample_id}.recalibration_report.grp" @@ -120,7 +144,7 @@ rule _gatk_base_recalibration: rule _gatk_applybqsr: input: - bam = str(rules._gatk_splitntrim.output.bam), + bam = str(rules._gatk_addRG.output.bam), table = str(rules._gatk_base_recalibration.output.table), fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa") output: @@ -144,15 +168,6 @@ rule _gatk_applybqsr: -R {input.fasta} -I {input.bam} -bqsr-recal-file {input.table} -O {output.bam} > {log.stdout} 2> {log.stderr} """) -# Symlink chromosomes used for parallelization -checkpoint _gatk_rnaseq_input_chrs: - input: - chrs = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.txt") - output: - chrs = CFG["dirs"]["inputs"] + "chroms/{genome_build}/main_chromosomes.txt" - run: - op.relative_symlink(input.chrs, output.chrs) - rule _gatk_variant_calling: input: @@ -186,7 +201,7 @@ rule _gatk_variant_calling: def _gatk_rnaseq_get_chr_vcfs(wildcards): CFG = config["lcr-modules"]["gatk_rnaseq"] - chrs = checkpoints._gatk_rnaseq_input_chrs.get(**wildcards).output.chrs + chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes.txt") with open(chrs) as file: chrs = file.read().rstrip("\n").split("\n") vcfs = expand( @@ -198,7 +213,7 @@ def _gatk_rnaseq_get_chr_vcfs(wildcards): def _gatk_rnaseq_get_chr_tbis(wildcards): CFG = config["lcr-modules"]["gatk_rnaseq"] - chrs = checkpoints._gatk_rnaseq_input_chrs.get(**wildcards).output.chrs + chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes.txt") with open(chrs) as file: chrs = file.read().rstrip("\n").split("\n") tbis = expand(