-
Notifications
You must be signed in to change notification settings - Fork 2
/
validate
executable file
·135 lines (109 loc) · 3.34 KB
/
validate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
# exit script if one command fails
set -o errexit
# exit script if Variable is not set
set -o nounset
INPUT=/bbx/mnt/input/biobox.yaml
OUTPUT=/bbx/mnt/output
METADATA=/bbx/mnt/metadata
# Since this script is the entrypoint to your container
# you can access the task in `docker run task` as the first argument
TASK=$1
# Ensure the biobox.yaml file is valid
validate-biobox-file \
--input ${INPUT} \
--schema /schema.yaml \
# Use grep to get $TASK in /Taskfile
CMD=$(egrep ^${TASK}: /Taskfile | cut -f 2 -d ':')
if [[ -z ${CMD} ]]; then
echo "Abort, no task found for '${TASK}'."
exit 1
fi
# if /bbx/metadata is mounted create log.txt
if [ -d "$METADATA" ]; then
CMD="($CMD) >& $METADATA/log.txt"
fi
TMP_DIR=$OUTPUT
INPUT_JSON="${TMP_DIR}/biobox.json"
$(yaml2json < ${INPUT} > $INPUT_JSON)
ARGUMENTS=$(jq --raw-output '.arguments' $INPUT_JSON )
#get assembly (fasta)
ASSEMBLY=$( echo $ARGUMENTS | jq --raw-output 'select(has("assemblies")) | .assemblies[].path' | tr '\n' ' ' | xargs | tr ' ' ',')
LABELS=$( echo $ARGUMENTS | jq --raw-output 'select(has("assemblies")) | .assemblies[].id | select (.!=null)' | tr '\n' ' ' | xargs | tr ' ' ',')
#get gzipped fastq reads
READS=$( echo $ARGUMENTS | jq --raw-output 'select(has("reads")) | .reads[].path' | tr '\n' ' ')
# check whether reads are single or paired end
END=$( echo $ARGUMENTS | jq --raw-output 'select(has("reads")) | .reads[].type' | tr '\n' ' ')
#get cache
CACHE=$( echo $ARGUMENTS | jq --raw-output 'select(has("cache")) | .cache ' | tr '\n' ' ')
#check if cache is defined
if [ ! -z "$CACHE" ]; then
TMP_DIR=$CACHE
fi
UNZIPPED_READS=""
# we need reads unzipped (keep name, strip path, extract to tmp)
for f in $READS; do gunzip -c $f > $TMP_DIR$"/"${f##*/}; UNZIPPED_READS=$UNZIPPED_READS$TMP_DIR$"/"${f##*/}$" "; done
SINGLE_ENDED="-r "
PAIRED_1="-1 "
PAIRED_2="-2 "
# sort by single/paired end
READ_ARR=( $UNZIPPED_READS )
I=0
for typ in $END; do
if [ $typ = "single" ]; then
mv ${READ_ARR[$I]} ${READ_ARR[$I]%.gz} # strip .gz
SINGLE_ENDED=$SINGLE_ENDED${READ_ARR[$I]%.*}$","
else # can only be paired then
perl unshuffle.pl -f ${READ_ARR[$I]} -o `pwd` -n ${READ_ARR[$I]%.*} # implicitely strips .gz
# rm -f ${READ_ARR[$I]} # delete temporary
PAIRED_1=$PAIRED_1${READ_ARR[$I]%.*}$".1,"
PAIRED_2=$PAIRED_2${READ_ARR[$I]%.*}$".2,"
fi
I=$(expr $I + 1)
done
# remove trailing ,
SINGLE_ENDED=${SINGLE_ENDED%,}
PAIRED_1=${PAIRED_1%,}
PAIRED_2=${PAIRED_2%,}
if [ ${SINGLE_ENDED##* } ]; then #only paired
UNZIPPED_READS=$SINGLE_ENDED
elif [ ${PAIRED_1##* } ]; then #only single ended
UNZIPPED_READS=$PAIRED_1$" "$PAIRED_2
else
UNZIPPED_READS=$SINGLE_ENDED$" "$PAIRED_1$" "$PAIRED_2
fi
FNAME=""
if [ ! -z "$LABELS" ]; then
LBLS=${LABELS//,/ }
LABELS=$"--assembly-names "$LABELS
for lbl in $LBLS; do
FNAME=$FNAME$" - name: summary_$lbl
type: tsv
inline: false
value: $lbl/summary.tsv
description: all detected errors for assembly $lbl
"
done
else
I=0
for a in $ASSEMBLY; do
FNAME=$FNAME$" - name: summary_$I
type: tsv
inline: false
value: asm_$I/summary.tsv
description: all detected errors for assembly $I
"
I=$(expr $I + 1)
done
fi
eval ${CMD}
cat << EOF > ${OUTPUT}/biobox.yaml
version: 0.2.0
results:
- name: plot
type: pdf
inline: false
value: comparison_plots.pdf
description: Combined plot of the different errors
$FNAME
EOF