-
Notifications
You must be signed in to change notification settings - Fork 7
/
Detonate.sh
executable file
·193 lines (146 loc) · 4.53 KB
/
Detonate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/bin/bash
# input - diretório contendo os arquivos de entrada no formato .fastq
input=$1
#Diretorio que contem os arquivos processados que servirão para alinhamento com o bowtie2
refassembly=$2
# Diretorio que contem a montagem de novo Trinity.fa
reftrans=$3
# Arquivo que contem o transcriptoma de referencia mais proximo para ser utilizado como base, para se fazer a distribuição dos transcritos
if [ ! ${input} ]
then
echo "Missing input directory"
exit
else
if [ ! -d ${input} ]
then
echo "Wrong input directory ${input}"
exit
fi
fi
if [ ! ${refassembly} ]
then
echo "Missing Trinity / PASA output directory"
exit
else
if [ ! -d ${refassembly} ]
then
echo "Wrong Trinity / PASA output directory ${refassembly}"
exit
fi
fi
if [ ! ${reftrans} ]
then
echo "Missing reference transcriptome fasta file"
exit
else
if [ ! -d ${refs} ]
then
echo "Wrong reference transcriptome fasta file ${reftrans}"
exit
fi
fi
# output - diretório para armazenar o resultado do processo de montagem
output=$4
if [ ! ${output} ]
then
echo "Missing output directory"
exit
else
if [ ! -d ${output} ]
then
echo "Wrong output directory ${output}"
exit
fi
fi
readlen=$5
if [ ! ${readlen} ]
then
readlen=150
echo "Warning: Read length not defined. Using default (${readlen})"
fi
num_threads=4
prefix=$6
if [ ! ${prefix} ]
then
prefix='Trinity'
fi
reffasta=`find ${refassembly} -type f -name "Trinity.fasta"`
refgene_trans_map=`find ${refassembly} -type f -name "Trinity.fasta.gene_trans_map"`
if [ ! ${reffasta} ]; then
echo "Error: Could'nt find reference assembly fasta file using prefix (${prefix})"
exit
fi
if [ ! ${refgene_trans_map} ]; then
echo "Error: Could'nt find reference assembly gene_trans_map file using prefix (${prefix})"
exit
fi
### Arquivos e diretórios de saída (output)
basedir_out="${output}"
mkdir -p ${basedir_out}
bowtie2_index="${basedir_out}/bowtie2_index"
mkdir -p ${bowtie2_index}
bowtie2_index="${bowtie2_index}/transcriptome"
bowtie2="${basedir_out}/bowtie2"
mkdir -p ${bowtie2}
if [ ! -e "${bowtie2_index}.1.bt2" ]; then
bowtie2-build -f ${reffasta} ${bowtie2_index} \
> ${bowtie2_index}.bowtie2-build.log.out.txt \
2> ${bowtie2_index}.bowtie2-build.log.err.txt
fi
if [ ! -e "${basedir_out}/All.aligned.bam" ]; then
for r1 in `ls ${input}/*_1.fastq`; do
r2=`echo ${r1} | sed 's/_1.fastq/_2.fastq/'`
if [ ! -e "${r2}" ]; then
echo "Read 2 (${r2}) paired with Read 1 ($r1) not found."
exit
fi
name=`basename ${r1} | sed 's/_1.fastq//'`
if [ ! -e "${bowtie2}/${name}.bam" ]; then
echo -e "Alinhamento com bowtie2 ${name}: ${r1} & ${r2} ...\n"
bowtie2 -p ${num_threads} \
--dpad 0 \
--gbar 99999999 \
--mp 1,1 \
--np 1 \
--score-min L,0,-0.1 \
--no-mixed \
--no-discordant \
-t -I 1 -X 1000 \
--end-to-end \
--fr \
--very-sensitive \
--reorder \
-x ${bowtie2_index} \
-1 ${r1} \
-2 ${r2} \
2> ${bowtie2}/${name}.log.txt \
| samtools view -S -b -o ${bowtie2}/${name}.bam -
if [ ! -e "${basedir_out}/Alg_bowtie2_header.sam" ]; then
echo "Samtools View header"
samtools view -H ${bowtie2}/${name}.bam > ${basedir_out}/Alg_bowtie2_header.sam
fi
fi
done
if [ ! -e "${basedir_out}/All.aligned.bam" ]; then
bamfiles=()
bamfiles=( $( find ${bowtie2} -name '*.bam' ) )
echo " Merging bamfiles"
samtools merge -f ${basedir_out}/All.aligned.bam ${bamfiles[*]}
samtools merge --threads ${num_threads} -f -h ${basedir_out}/Alg_bowtie2_header.sam ${basedir_out}/All.aligned.bam ${bamfiles[*]}
fi
fi
if [ ! -e "${basedir_out}/transcript-len-distrib.txt" ]; then
echo "Avaliando a distribuição dos tamanhos dos transcritos"
rsem-eval-estimate-transcript-length-distribution ${reftrans} ${basedir_out}/transcript-len-distrib.txt
fi
if [ ! -e "${basedir_out}/PE-detonate.score" ]; then
echo "Calculando score"
rsem-eval-calculate-score --forward-prob 0.5 \
--transcript-to-gene-map ${refgene_trans_map} \
--tag '' \
--num-threads ${num_threads} \
--transcript-length-parameters ${basedir_out}/transcript-len-distrib.txt \
--paired-end \
--bam ${basedir_out}/All.aligned.bam \
${reffasta} ${basedir_out}/PE-detonate ${readlen}
fi