Skip to content

Commit

Permalink
Added an RG rule before trimming
Browse files Browse the repository at this point in the history
  • Loading branch information
Jwong684 committed Jul 22, 2021
1 parent dc4620f commit 4abb545
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
13 changes: 13 additions & 0 deletions modules/gatk_rnaseq/1.0/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ lcr-modules:
options:
java_opts: "-XX:ConcGCThreads=1"
gatk_splitntrim: " -fixNDN TRUE -RF GoodCigarReadFilter "
gatk_addRG:
platform: "illumina"
unit: "unit1"
stringency: "LENIENT"
gatk_baserecalibrator: ""
gatk_applybqsr: ""
gatk_variant_calling:
Expand All @@ -34,11 +38,13 @@ lcr-modules:


conda_envs:
picard: "{MODSDIR}/envs/picard-2.22.3.yaml"
gatk_rnaseq: "{MODSDIR}/envs/gatk-4.1.8.1.yaml"
bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml"

threads:
gatk_splitntrim: 12
gatk_addRG: 12
gatk_base_recalibration: 12
gatk_applybqsr: 12
gatk_variant_calling: 24
Expand All @@ -50,15 +56,22 @@ lcr-modules:
resources:
gatk_splitntrim:
mem_mb: 48000
bam: 1
gatk_addRG:
mem_mb: 12000
bam: 1
gatk_base_recalibration:
mem_mb: 12000
bam: 1
gatk_applybqsr:
mem_mb: 12000
bam: 1
gatk_variant_calling:
mem_mb: 48000
bam: 1
gatk_variant_filtration:
mem_mb: 12000
bam: 1
merge_vcfs:
mem_mb: 10000
gatk_rnaseq_passed:
Expand Down
1 change: 1 addition & 0 deletions modules/gatk_rnaseq/1.0/envs/picard-2.22.3.yaml
41 changes: 28 additions & 13 deletions modules/gatk_rnaseq/1.0/gatk_rnaseq.smk
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,33 @@ rule _gatk_splitntrim:
> {log.stdout} 2> {log.stderr}
""")

rule _gatk_addRG:
input:
bam = str(rules._gatk_splitntrim.output)
output:
bam = temp(CFG["dirs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.withRG.bam")
params:
sampleName = "{sample_id}",
platform = CFG["options"]["gatk_addRG"]["platform"],
unit = CFG["options"]["gatk_addRG"]["unit"],
stringency = CFG["options"]["gatk_addRG"]["stringency"]
conda:
CFG["conda_envs"]["picard"]
log:
stdout = CFG["logs"]["gatk_splitntrim"] + "bam_withRG/{seq_type}--{genome_build}/{sample_id}.addRG.stdout.log"
threads:
CFG["threads"]["gatk_addRG"]
resources:
**CFG["resources"]["gatk_addRG"]
shell:
op.as_one_line("""
picard AddOrReplaceReadGroups VALIDATION_STRINGENCY={params.stringency} I={input.bam} O={output.bam} RGLB={params.sampleName} RGPL={params.platform} RGPU={params.unit} RGSM={params.sampleName} > {log.stdout} 2>&1 &&
samtools index {output.bam}
""")

rule _gatk_base_recalibration:
input:
bam = str(rules._gatk_splitntrim.output),
bam = str(rules._gatk_addRG.output),
fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa")
output:
table = CFG["dirs"]["base_recal_report"] + "{seq_type}--{genome_build}/{sample_id}.recalibration_report.grp"
Expand All @@ -120,7 +144,7 @@ rule _gatk_base_recalibration:

rule _gatk_applybqsr:
input:
bam = str(rules._gatk_splitntrim.output.bam),
bam = str(rules._gatk_addRG.output.bam),
table = str(rules._gatk_base_recalibration.output.table),
fasta = reference_files("genomes/{genome_build}/genome_fasta/genome.fa")
output:
Expand All @@ -144,15 +168,6 @@ rule _gatk_applybqsr:
-R {input.fasta} -I {input.bam} -bqsr-recal-file {input.table} -O {output.bam} > {log.stdout} 2> {log.stderr}
""")

# Symlink chromosomes used for parallelization
checkpoint _gatk_rnaseq_input_chrs:
input:
chrs = reference_files("genomes/{genome_build}/genome_fasta/main_chromosomes.txt")
output:
chrs = CFG["dirs"]["inputs"] + "chroms/{genome_build}/main_chromosomes.txt"
run:
op.relative_symlink(input.chrs, output.chrs)


rule _gatk_variant_calling:
input:
Expand Down Expand Up @@ -186,7 +201,7 @@ rule _gatk_variant_calling:

def _gatk_rnaseq_get_chr_vcfs(wildcards):
CFG = config["lcr-modules"]["gatk_rnaseq"]
chrs = checkpoints._gatk_rnaseq_input_chrs.get(**wildcards).output.chrs
chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes.txt")
with open(chrs) as file:
chrs = file.read().rstrip("\n").split("\n")
vcfs = expand(
Expand All @@ -198,7 +213,7 @@ def _gatk_rnaseq_get_chr_vcfs(wildcards):

def _gatk_rnaseq_get_chr_tbis(wildcards):
CFG = config["lcr-modules"]["gatk_rnaseq"]
chrs = checkpoints._gatk_rnaseq_input_chrs.get(**wildcards).output.chrs
chrs = reference_files("genomes/" + wildcards.genome_build + "/genome_fasta/main_chromosomes.txt")
with open(chrs) as file:
chrs = file.read().rstrip("\n").split("\n")
tbis = expand(
Expand Down

0 comments on commit 4abb545

Please sign in to comment.