forked from ospupegam/ngs2-assignment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal code.sh(2-5)
75 lines (52 loc) · 4.36 KB
/
final code.sh(2-5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
mkdir /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22
wget http://genomedata.org/rnaseq-tutorial/fasta/GRCh38/chr22_with_ERCC92.fa
gunzip chr22_with_ERCC92.fa.gz
#Mapping to the reference
conda install star
genomeDir=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22
mkdir $genomeDir
STAR --runMode genomeGenerate --genomeDir $genomeDir --genomeFastaFiles /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa --runThreadN 1
#Alignment jobs
cd /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass
runDir=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass
mkdir $runDir
cd $runDir
cp /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/SRR8797509_1.part_001.fastq.gz /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_1.part_001.fastq.gz
cp /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/SRR8797509_2.part_001.fastq.gz /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_2.part_001.fastq.gz
gunzip SRR8797509_1.part_001.fastq.gz
gunzip SRR8797509_2.part_001.fastq.gz
STAR --genomeDir $genomeDir --readFilesIn /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_1.part_001.fastq /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_2.part_001.fastq --runThreadN 1
# 2pass
genomeDir=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22_2pass
mkdir $genomeDir
STAR --runMode genomeGenerate --genomeDir $genomeDir --genomeFastaFiles /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa --sjdbFileChrStartEnd /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SJ.out.tab --sjdbOverhang 150 --runThreadN 1
runDir=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/2pass
mkdir $runDir
cd $runDir
STAR --genomeDir $genomeDir --readFilesIn /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_1.part_001.fastq /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/SRR8797509_2.part_001.fastq --runThreadN 1
#Add read groups, sort, mark duplicates, and create index
picard_path=$CONDA_PREFIX/share/picard-2.19.2-0
java -jar $picard_path/picard.jar AddOrReplaceReadGroups I=Aligned.out.sam O=rg_added_sorted.bam SO=coordinate RGID=id RGLB=library RGPL=platform RGPU=machine RGSM=sample
java -jar $picard_path/picard.jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics
samtools view -H rg_added_sorted.bam
samtools view -H dedupped.bam
# Split'N'Trim and reassign mapping qualities
samtools faidx /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa
java -Xmx2g -jar $picard_path/picard.jar CreateSequenceDictionary R=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa O=./chr22_with_ERCC92.dict
java -jar gatk SplitNCigarReads -R /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa -I dedupped.bam -O split.bam
#indexing
picard_path=$CONDA_PREFIX/share/picard-2.19.2-0
java -Xmx2g -jar $picard_path/picard.jar BuildBamIndex VALIDATION_STRINGENCY=LENIENT INPUT=split.bam
java -Xmx2g -jar $picard_path/picard.jar CreateSequenceDictionary R=/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa O=./chr22_with_ERCC92.dict
samtools faidx /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa
# Download known varinats
cd ~/media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass
wget ftp://ftp.ensembl.org/pub/grch37/current/variation/vcf/homo_sapiens/homo_sapiens-chr22.vcf.gz -O chr22.vcf.gz
gunzip chr22.vcf.gz
# base recalibration:
gatk --java-options "-Xmx2G" BaseRecalibrator -R /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa -I split.bam --known-sites fam_chr22.vcf -O split.report
#BQSR
gatk --java-options "-Xmx2G" ApplyBQSR -R /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa -I split.bam -bqsr split.report -O split.bqsr.bam --add-output-sam-program-record --emit-original-quals
# Variant calling
gatk --java-options "-Xmx4g" HaplotypeCaller -R /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa -I /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/1pass/split.bam -O chr22.g.vcf.gz -ERC GVCF
gatk VariantFiltration -R /media/ngs-01/My_Passport/ngs2workspace/ngs2_ass/chr22/chr22_with_ERCC92.fa -V chr22.g.vcf.gz -O outputchr22.g.vcf.gz --filter-name "my_filter1" --filter-expression "AB < 0.2" --filter-name "my_filter2" --filter-expression "MQ0 > 50"