-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path06_annotate_function.sh
executable file
·66 lines (60 loc) · 1.84 KB
/
06_annotate_function.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks=16 --mem 16gb
#SBATCH --output=logs/annotfunc.%a.log
#SBATCH --time=2-0:00:00
#SBATCH -p intel -J annotfunc
module unload miniconda2 miniconda3 perl python
module load funannotate
module load phobius
export FUNANNOTATE_DB=/bigdata/stajichlab/shared/lib/funannotate_db
CPUS=$SLURM_CPUS_ON_NODE
OUTDIR=annotate
INDIR=genomes
SAMPFILE=samples.csv
BUSCO=fungi_odb10
if [ -z $CPUS ]; then
CPUS=1
fi
N=${SLURM_ARRAY_TASK_ID}
if [ -z $N ]; then
N=$1
if [ -z $N ]; then
echo "need to provide a number by --array or cmdline"
exit
fi
fi
MAX=`wc -l $SAMPFILE | awk '{print $1}'`
if [ $N -gt $MAX ]; then
echo "$N is too big, only $MAX lines in $SAMPFILE"
exit
fi
IFS=,
tail -n +2 $SAMPFILE | sed -n ${N}p | while read SPECIES STRAIN PHYLUM BIOSAMPLE BIOPROJECT LOCUSTAG
do
BASE=$(echo -n "$SPECIES $STRAIN" | perl -p -e 's/\s+/_/g')
STRAIN_NOSPACE=$(echo -n "$STRAIN" | perl -p -e 's/\s+/_/g')
echo "$BASE"
MASKED=$(realpath $INDIR/$BASE.masked.fasta)
if [ ! -f $MASKED ]; then
echo "Cannot find $BASE.masked.fasta in $INDIR - may not have been run yet"
exit
fi
TEMPLATE=$(realpath lib/sbt/$STRAIN_NOSPACE.sbt)
if [ ! -f $TEMPLATE ]; then
echo "NO TEMPLATE for $name"
exit
fi
ANTISMASHRESULT=$OUTDIR/$name/annotate_misc/antiSMASH.results.gbk
echo "$name $species"
if [[ ! -f $ANTISMASHRESULT && -d $OUTDIR/$name/antismash_local ]]; then
ANTISMASH=$OUTDIR/$name/antismash_local/${SPECIES}_$name.gbk
if [ ! -f $ANTISMASH ]; then
echo "CANNOT FIND $ANTISMASH in $OUTDIR/$name/antismash_local"
else
rsync -a $ANTISMASH $ANTISMASHRESULT
fi
fi
# need to add detect for antismash and then add that
funannotate annotate --sbt $TEMPLATE --busco_db $BUSCO -i $OUTDIR/$BASE --species "$SPECIES" --strain "$STRAIN" --cpus $CPUS $MOREFEATURE $EXTRAANNOT
done