-
Notifications
You must be signed in to change notification settings - Fork 0
/
Illumina_QC.sh
60 lines (51 loc) · 1.81 KB
/
Illumina_QC.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash -e
# Process one Illumina paired-end fastq set
# Originally written by Chris Berthiaume
# Modified by Ryan Groussman
# Requirements
# - flash 1.2.11 binary is in path
# - trimfastq v0.4 binary is in path
# - trimmomatic is in path
# - fastqc is in path
# Adapter file path:
ADAPTER_FILE="/home/ubuntu/scripts/TruSeq2-PE.fa"
# Argument parsing
usage="usage: process.sh read1.fastq read2.fastq output_prefix"
if [[ $# -eq 0 ]]; then
echo "$usage"
exit 1
fi
if [[ ! -f "$1" ]]; then
echo "File '$1' not found"
echo "$usage"
exit 1
fi
if [[ ! -f "$2" ]]; then
echo "File '$2' not found"
echo "$usage"
exit 1
fi
if [[ -z "$3" ]]; then
echo "Missing output prefix"
echo "$usage"
exit 1
fi
# MD5 calculation on raw files
openssl md5 "$1" > "$3.raw_md5sums.txt"
openssl md5 "$2" >> "$3.raw_md5sums.txt"
# Trim and filter
trimmomatic PE "$1" "$2" \
"$3.1.paired.trim.fastq.gz" "$3.1.unpaired.trim.fastq.gz" \
"$3.2.paired.trim.fastq.gz" "$3.2.unpaired.trim.fastq.gz" \
ILLUMINACLIP:"$ADAPTER_FILE":2:30:10:1:true \
MAXINFO:135:0.5 LEADING:3 TRAILING:3 MINLEN:60 AVGQUAL:20 >>"$3.trimmomatic.log" 2>&1
# Merge pairs
# -r 150 : read length 150
# -f 250 : fragment length 250
# -s 25 : fragment length stdev (~10% of fragment length)
flash --version >"$3.flash.log" 2>&1 # record flash version
echo "flash --compress-prog=pigz --suffix=gz -o $3.flash -r 150 -f 250 -s 25 --interleaved-output $3.1.paired.trim.fastq.gz $3.2.paired.trim.fastq.gz" >>"$3.flash.log" 2>&1
flash --compress-prog=pigz --suffix=gz -o "$3.flash" -r 150 -f 250 -s 25 --interleaved-output "$3.1.paired.trim.fastq.gz" "$3.2.paired.trim.fastq.gz" >>"$3.flash.log" 2>&1
# FASTQC reports
fastqc "$1" "$2" "$3.1.paired.trim.fastq.gz" "$3.2.paired.trim.fastq.gz" "$3.flash.extendedFrags.fastq.gz"
# pigz "$3".*trimAT.log