-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwgbs_cfDNA_alignment_calling.sub
81 lines (66 loc) · 2.74 KB
/
wgbs_cfDNA_alignment_calling.sub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
#PBS -P FFbigdata
#PBS -N wgbs_align
#PBS -l select=1:ncpus=12:mem=72GB
#PBS -l walltime=64:00:00
#PBS -M [email protected]
#PBS -q alloc-op
module load fastqc
module load cutadapt/1.8.3
module load python/3.5.1
pip install multiqc
module load bismark
module load bowtie2
module load samtools
module load trimgalore
# Specify project directory where fastq files are contained in subdirectories
DIR=/project/RDS-SMS-FFbigdata-RW/Epigenetics/wgbs/CHA4401_20171205_wgbscfdna-55759704/fastq
# Specify directory of bisulfite converted reference genome
BISGENOME=/project/RDS-SMS-FFbigdata-RW/local_lib/genomes/hg19/
#############################
### Fastqc & and trimming ###
#############################
#fastqc; pre_cut
cd $DIR
mkdir precut
fastqc *fastq.gz -o precut
# summarize with multiqc
multiqc ./precut/. -o precut
# trim galore; trim poor quality < Q20
cd $DIR/
for i in $(ls *.fastq.gz | rev | cut -c 17- | rev | uniq)
do
trim_galore --paired --fastqc ${i}_R1_001.fastq.gz ${i}_R2_001.fastq.gz
done
# multiqc; post_cut
mkdir postcut
mv *fastqc* postcut
multiqc ./postcut/. -o postcut
########################
### bismark alignment ###
#########################
cd $DIR/
for i in $(ls *val_1.fq.gz | rev | cut -c 20- | rev | uniq)
do
bismark --bowtie2 --bam $BISGENOME -1 ${i}_R1_001_val_1.fq.gz -2 ${i}_R2_001_val_2.fq.gz
done
###########################
### bismark deduplicate ###
###########################
FILES=$DIR/*
for f in $FILES
do
if [[ "$f" == *.bam ]]
then
deduplicate_bismark --bam $f >> $DIR/deduplicate_bismark_output.txt
fi
done
######################################
### bismark methylation exctractor ###
######################################
bismark_methylation_extractor -p --comprehensive --include_overlap --ignore 5 --ignore_r2 5 --ignore_3prime 2 --ignore_3prime_r2 2 --cytosine_report --CX --genome_folder --multicore 4 --gzip $BISGENOME *.sam*
######################
### bismark report ###
######################
cd $DIR/
bismark2report