-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathslice-daily.bash
105 lines (88 loc) · 2.23 KB
/
slice-daily.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
# set -ex
echo ""
echo ""
echo "################################"
echo "# Daily Slice #"
echo "# $(date) #"
echo "################################"
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
--dataset)
dataset=$2
shift
shift
;;
--slug)
slug=$2
shift
shift
;;
--workdir)
workdir=$2
shift
shift
;;
--slice-tool-path)
slice_tool=$2
shift
shift
;;
--afb-hostname)
afb_hostname=$2
shift
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}"
echo " dataset=$dataset"
echo " slug=$slug"
echo " workdir=$workdir"
echo " slice_tool=$slice_tool"
echo " afb_hostname=$afb_hostname"
pwd=`pwd`
cd $workdir
echo "downloading tarball from mcs"
wget --quiet --no-check-certificate https://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/$dataset.latest.tar
tarball=$dataset.latest.tar
echo "getting extraction dirname"
ext_lines=`tar tf $tarball`
readarray -t ext_arr <<< "$ext_lines"
extraction_dirname="$workdir/${ext_arr[0]}"
echo " extraction_dirname=$extraction_dirname"
echo "decompressing tarball"
tar xf $tarball
rm $tarball
echo "setting dates to build the slice"
yesterday=`date --date="1 day ago" +"%Y-%m-%d"`
echo " yesterday=$yesterday"
echo "slicing data.csv.gz"
python3 $slice_tool $extraction_dirname $yesterday $yesterday
rm -r $extraction_dirname
echo "renaming the slice output directory"
sliced_dirname="${extraction_dirname::-1}.from-$yesterday-to-$yesterday"
echo " sliced_dirname=$sliced_dirname"
renamed="$slug.daily.$yesterday"
echo " renamed=$renamed"
mv $sliced_dirname $renamed
echo "tarring up the sliced directory"
tarball=$renamed.tar
echo " tarball=$tarball"
tar cf $tarball $renamed
rm -r $renamed
echo "uploading the slice tarball to s3"
/home/vforgione/.local/bin/aws s3 cp $tarball s3://aot-tarballs/ --quiet
/home/vforgione/.local/bin/aws s3api put-object-tagging --bucket aot-tarballs --key $tarball --tagging "TagSet=[{Key=slice,Value=daily}]"
rm $tarball
echo "notifying afb to update metadata"
curl --silent https://$afb_hostname/data-sets/$slug/process?archive=$tarball
cd $pwd
echo "done!"