Skip to content

Commit

Permalink
update mutect2 script to include scatter option
Browse files Browse the repository at this point in the history
  • Loading branch information
berntpopp committed Sep 9, 2023
1 parent f2c59dd commit 3104321
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 25 deletions.
4 changes: 3 additions & 1 deletion analyses/calling/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ reference: 'analysis/ref/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.
reference_unpacked: 'analysis/ref/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna'
panel_of_normals: 'analysis/GATK_resource_bundle/1000g_pon.hg38.vcf.gz'
af_only_gnomad: 'analysis/GATK_resource_bundle/af-only-gnomad.hg38.vcf.gz'
reference_version: 'GRCh38'
reference_version: 'GRCh38'
mutect_scatter_by_chromosome: True
final_bam_file_extension: ".merged.dedup.bqsr.bam"
62 changes: 38 additions & 24 deletions analyses/calling/mutect2_calling.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def get_mem_from_threads(wildcards, threads):
# ----------------------------------------------------------------------------------- #
# Extract user-defined input and output directories and reference (unpacked) file from the configuration file
INPUT_DIR = config["final_bam_folder"]
FINAL_BAM_FILE_EXTENSION = config.get("final_bam_file_extension", ".bam")
OUTPUT_DIR = config["output_folder"]
REFERENCE_FILE = config["reference_unpacked"]
PANEL_OF_NORMALS = config["panel_of_normals"]
Expand All @@ -36,57 +37,70 @@ with open('calling_metadata.tsv', 'r') as f:
prefix_results = functools.partial(os.path.join, config['output_folder'])
VARIANT_DIR = prefix_results('variant_calls')
LOG_DIR = prefix_results('logs')
# ----------------------------------------------------------------------------------- #

# List of chromosomes to loop through
chromosomes = [f"chr{i}" for i in range(1, 22)] + ["chrX", "chrY"]
# ----------------------------------------------------------------------------------- #

# ----------------------------------------------------------------------------------- #
# Rule "all": Defines the final output of the pipeline
# Define the rules
rule all:
input:
expand(
f"{VARIANT_DIR}/{{individual1}}_{{analysis}}_{{chromosome}}.vcf.gz",
individual1={row['individual1'] for row in metadata_dict.values()},
analysis={row['analysis'] for row in metadata_dict.values()},
chromosome=chromosomes
individual1=[row['individual1'] for row in metadata_dict.values()],
analysis=[row['analysis'] for row in metadata_dict.values()],
chromosome=chromosomes if config.get('mutect_scatter_by_chromosome', False) else ['all']
)

# ----------------------------------------------------------------------------------- #
# Rule "mutect2_call": Performs variant calling using GATK's Mutect2
rule mutect2_call:

rule call_variants:
input:
bam1 = lambda wildcards: "{}/{}.merged.dedup.bqsr.bam".format(INPUT_DIR, metadata_dict["{}_{}".format(wildcards.individual1, wildcards.analysis)]['bam1_file_basename']),
bam2 = lambda wildcards: "{}/{}.merged.dedup.bqsr.bam".format(INPUT_DIR, metadata_dict["{}_{}".format(wildcards.individual1, wildcards.analysis)]['bam2_file_basename'])
bam1 = lambda wildcards: os.path.join(INPUT_DIR, metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"]['bam1_file_basename'] + FINAL_BAM_FILE_EXTENSION),
bam2 = lambda wildcards: os.path.join(INPUT_DIR, metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"].get('bam2_file_basename', '') + (FINAL_BAM_FILE_EXTENSION if metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"].get('bam2_file_basename') else '')),
output:
variant_file = f"{VARIANT_DIR}/{{individual1}}_{{analysis}}_{{chromosome}}.vcf.gz"
threads: 4
variant_file = f"{VARIANT_DIR}/{{individual1}}_{{analysis}}_{{chromosome}}.vcf.gz",
threads: 4 # Adjust as necessary
resources:
mem_mb = get_mem_from_threads, # Memory in MB based on the number of threads
time = '72:00:00', # Time limit for the job
tmpdir = SCRATCH_DIR # Temporary directory
mem_mb = get_mem_from_threads,
time = '72:00:00',
tmpdir = SCRATCH_DIR
conda:
"gatk" # This sets the Conda environment for this rule
"gatk"
params:
reference = REFERENCE_FILE,
normal_sample = lambda wildcards: metadata_dict["{}_{}".format(wildcards.individual1, wildcards.analysis)]['sample2'],
individual = lambda wildcards: metadata_dict["{}_{}".format(wildcards.individual1, wildcards.analysis)]['individual1'],
analysis = lambda wildcards: metadata_dict["{}_{}".format(wildcards.individual1, wildcards.analysis)]['analysis'],
normal_sample = lambda wildcards: metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"].get('sample2', ''),
individual = lambda wildcards: metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"]['individual1'],
analysis = lambda wildcards: metadata_dict[f"{wildcards.individual1}_{wildcards.analysis}"]['analysis'],
panel_of_normals = PANEL_OF_NORMALS,
af_only_gnomad = AF_ONLY_GNOMAD
log:
mutect2 = f"{LOG_DIR}/{{individual1}}_{{analysis}}_{{chromosome}}.mutect2.log"
shell:
"""
# Determine if bam2 file is provided and set the respective options
bam2_option=""
normal_sample_option=""
if [ ! -z "{input.bam2}" ] && [ "{input.bam2}" != "results/dedup" ]; then
bam2_option="-I {input.bam2}"
normal_sample_option="-normal {params.normal_sample}"
fi
# Determine if scattering by chromosome is enabled and set the respective option
scatter_option=""
if [ "{config[mutect_scatter_by_chromosome]}" = "True" ] && [ "{wildcards.chromosome}" != "all" ]; then
scatter_option="-L {wildcards.chromosome}"
fi
gatk --java-options '-Xms4000m -Xmx10g -Djava.io.tmpdir={resources.tmpdir}' Mutect2 \
-R {params.reference} \
-I {input.bam1} \
-I {input.bam2} \
-normal {params.normal_sample} \
$bam2_option \
$normal_sample_option \
--germline-resource {params.af_only_gnomad} \
--panel-of-normals {params.panel_of_normals} \
--f1r2-tar-gz {params.individual}_{params.analysis}_{wildcards.chromosome}.f1r2.tar.gz \
-L {wildcards.chromosome} \
--f1r2-tar-gz {OUTPUT_DIR}/{params.individual}_{params.analysis}_{wildcards.chromosome}.f1r2.tar.gz \
$scatter_option \
-O {output.variant_file} 2> {log.mutect2}
"""

# ----------------------------------------------------------------------------------- #

0 comments on commit 3104321

Please sign in to comment.