-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessing_hg38.yaml
executable file
·115 lines (105 loc) · 2.74 KB
/
preprocessing_hg38.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
info:
name: "Preprocessing_FastQ_to_BAM"
desc: "Ghislain Fournous"
log_dir: "output/Logs"
steps_status_dir: "output/FinishedSteps"
workflow_steps:
#
# Mapping
#
-
tool: "BWA_MEM_CSV"
id: "bwa_mem_loop"
desc: "BWA-mem_csv"
depend_from: ""
step_options:
bwa_bin: "bwa"
core: 16
input_csv: "/Workflow/input.csv"
output_dir: "output/01_mapping_bwa"
output_suffix: "_map"
bwa_index: "references/Homo_sapiens_assembly38.fasta"
command_line_options:
bwa:
- "-M"
#
# PreProcessing
#
-
tool: "Picard2MultiInput"
id: "Picard_SortMergeSam"
desc: "picard sorting"
depend_from: ["bwa_mem_loop"]
step_options:
input_dir: "output/01_mapping_bwa"
input_files: "*.sam"
group_by_samples: "true"
group_spliter: "_"
group_on_index: 0
output_dir: "output/02_bamsorted"
output_suffix: "_sorted"
java_bin: "java"
picard2_jar: "/Jar/picard.jar"
picard2_command: "MergeSamFiles"
command_line_options:
java:
- "-Xmx15G"
- "-Djava.io.tmpdir=/tmp/output/tmp"
picard2:
- "CREATE_INDEX=true"
- "USE_THREADING=true"
- "MAX_RECORDS_IN_RAM=1000000"
- "SORT_ORDER=coordinate"
- "VALIDATION_STRINGENCY=LENIENT"
-
tool: "Picard2"
id: "picard_mark_duplicate"
desc: "picard mark duplicate and merge lanes"
depend_from: ["Picard_SortMergeSam"]
step_options:
input_dir: "output/02_bamsorted"
input_files: "*.bam"
output_dir: "output/03_markdup"
output_suffix: "_markdup"
java_bin: "java"
picard2_jar: "/Jar/picard.jar"
picard2_command: "MarkDuplicates"
command_line_options:
java:
- "-Xmx15G"
- "-Djava.io.tmpdir=/tmp/output/tmp"
picard2:
- "METRICS_FILE=samples.metrics"
- "VALIDATION_STRINGENCY=LENIENT"
- "CREATE_INDEX=true"
- "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000"
-
tool: "GATK_BaseRecalibrator_PrintReads"
id: "GATK_BaseRecalibrator"
desc: "GATK Fix Mate Base Recalibration"
depend_from: ["picard_mark_duplicate"]
result: "true"
step_options:
core: 10
input_dir: "output/03_markdup"
input_files: "*.bam"
output_dir: "output/04_gatk_recal"
output_suffix: "_recalib"
java_bin: "java"
gatk_jar: "/Jar/GenomeAnalysisTK.jar"
ref_path: "references/Homo_sapiens_assembly38.fasta"
command_line_options:
java:
- "-Xmx15G"
- "-Djava.io.tmpdir=/tmp/output/tmp"
gatk:
- " "
gatk_recal:
- " -knownSites references/1000G_phase1.snps.high_confidence.hg38.vcf.gz "
- " -knownSites references/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz "
- " -knownSites references/dbsnp_146.hg38.vcf.gz "
- " -cov ContextCovariate "
- " -cov CycleCovariate "
gatk_PrintReads:
- " -nct "