-
Notifications
You must be signed in to change notification settings - Fork 1
/
mergenodes.sh
executable file
·162 lines (149 loc) · 5.91 KB
/
mergenodes.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/bin/sh
###############################
# input files are sorted bam files with readgroup info added to each file
# files were the result of splitting input file prior to running alignment
###############################
if [ $# != 11 ]
then
MSG="parameter mismatch."
echo -e "jobid:${PBS_JOBID}\nprogram=$0 stopped at line=$LINENO.\nReason=$MSG" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine""
exit 1;
else
set -x
echo `date`
outputdir=$1
infiles=$2
outfilewdups=$3
outfilenodups=$4
tmpfilewdups=$5
dupparms=$6
runfile=$7
elog=$8
olog=$9
email=${10}
scriptfile=$0
qsubfile=${11}
LOGS="jobid:${PBS_JOBID}\nqsubfile=$qsubfile\nerrorlog=$elog\noutputlog=$olog"
#sanity check
if [ ! -s $runfile ]
then
MSG="$runfile file not found"
exit 1;
fi
markdup=$( echo $dupparms | tr "_" "\n" | grep -w dup | cut -d '=' -f2 )
deldup=$( echo $dupparms | tr "_" "\n" | grep -w flag | cut -d '=' -f2 )
picardir=$( cat $runfile | grep -w PICARDIR | cut -d "=" -f2 )
samdir=$( cat $runfile | grep -w SAMDIR | cut -d "=" -f2 )
javamodule=$( cat $runfile | grep -w JAVAMODULE | cut -d "=" -f2 )
if [ ! -d $outputdir ]
then
MSG="$outputdir results directory not found"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
if [ ! -d $picardir ]
then
MSG="$picardir picard directory not found"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
if [ ! -d $samdir ]
then
MSG="$samdir samtools directory not found"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
if [ `expr length $infiles` -lt 1 ]
then
MSG="$infiles empty list of files to merge"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
if [ -z $javamodule ]
then
MSG="Value for JAVAMODULE must be specified in configuration file"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
else
`/usr/local/modules-3.2.9.iforge/Modules/bin/modulecmd bash load $javamodule`
fi
listfiles=$( echo $infiles | tr ":" " " )
cd $outputdir
echo "found sorted bam files to be merged. all set to go..."
java -Xmx6g -Xms512m -jar $picardir/MergeSamFiles.jar $listfiles \
MAX_RECORDS_IN_RAM=null \
TMP_DIR=$outputdir \
OUTPUT=$tmpfilewdups \
CREATE_INDEX=true \
USE_THREADING=true \
VALIDATION_STRINGENCY=SILENT
#$samdir/samtools merge $outfilewdups $infiles
echo `date`
if [ ! -s $tmpfilewdups ]
then
MSG="$tmpfilewdups file not created. picard-merge step failed"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
echo "indexing merged bam file"
$samdir/samtools index $tmpfilewdups
$samdir/samtools flagstat $tmpfilewdups > $tmpfilewdups.flagstat
$samdir/samtools view -H $tmpfilewdups > $tmpfilewdups.header
echo `date`
# marking and or removing duplicates
if [ $markdup == "YES" ]
then
echo "marking duplicates in sorted bam file"
java -Xmx6g -Xms512m -jar $picardir/MarkDuplicates.jar \
INPUT=$tmpfilewdups \
OUTPUT=$outfilewdups \
TMP_DIR=$outputdir \
METRICS_FILE=$outfilewdups.dup.metrics \
MAX_RECORDS_IN_RAM=null \
CREATE_INDEX=true \
VALIDATION_STRINGENCY=SILENT
echo `date`
if [ ! -s $outfilewdups ]
then
MSG="$outfilewdups file not created. markDuplicates step failed"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
echo "indexing bam file w marked duplicates"
$samdir/samtools index $outfilewdups
$samdir/samtools flagstat $outfilewdups > $outfilewdups.flagstat
$samdir/samtools view -H $outfilewdups > $outfilewdups.header
else
echo "we need to copy tmpfilewdups to outfilewdups now"
echo "in case realignment follows"
`cp $tmpfilewdups $outfilewdups`
`cp $tmpfilewdups.flagstat $outfilewdups.flagstat`
`cp $tmpfilewdups.header $outfilewdups.header`
fi
if [ $deldup == "TRUE" ]
then
echo "removing marked duplicates in sorted bam file"
java -Xmx6g -Xms512m -jar $picardir/MarkDuplicates.jar \
INPUT=$tmpfilewdups \
OUTPUT=$outfilenodups \
TMP_DIR=$outputdir \
METRICS_FILE=$outfilenodups.dup.metrics \
MAX_RECORDS_IN_RAM=null \
CREATE_INDEX=true \
REMOVE_DUPLICATES=true \
VALIDATION_STRINGENCY=SILENT
echo `date`
if [ ! -s $outfilenodups ]
then
MSG="$outfilenodups file not created. RemoveDuplicates step failed"
echo -e "program=$scriptfile stopped at line=$LINENO.\nReason=$MSG\n$LOGS" | ssh iforge "mailx -s '[Support #200] Mayo variant identification pipeline' "$redmine,$email""
exit 1;
fi
echo "indexing bam file w removed duplicates"
$samdir/samtools index $outfilenodups
$samdir/samtools flagstat $outfilenodups > $outfilenodups.flagstat
$samdir/samtools view -H $outfilenodups > $outfilenodups.header
fi
echo `date`
fi