-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.sh
executable file
·55 lines (45 loc) · 1.33 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# $1 = cluster type
# $2 = cluster_size
rm -r books*
mkdir books0 # 100MB
mkdir books1 # 500MB
mkdir books2 # 1GB
chmod +x ./download.sh
chmod +x ./mapper.py
chmod +x ./reducer.py
chmod +x ./sequential.py
cd books0
../download.sh
cd ..
for i in {1..5}
do
cp --backup=numbered ./books0/* ./books1
done
for i in {1..2}
do
cp --backup=numbered ./books1/* ./books2
done
problem_sizes=("100MB" "500MB" "1GB")
for i in {0..2}
do
echo "$1,$2,${problem_sizes[$i]}"
cd books$i
hdfs dfs -rm -r books-input$i
hdfs dfs -mkdir books-input$i
hdfs dfs -put *.txt* books-input$i
for iteration in {1..3}
do
echo "seq$iteration"
echo -n "$1,$2,${problem_sizes[$i]}," >> ../measurements_seq.txt
(time python3 ../sequential.py >> ../result$i.txt) 2>&1 >/dev/null | grep 'real' | awk '{ print $2 }' >> ../measurements_seq.txt
rm ../result$i.txt
done
cd ..
for iteration in {1..3}
do
echo "par$iteration"
hdfs dfs -rm -r books-output$i
echo -n "$1,$2,${problem_sizes[$i]}," >> ./measurements_par.txt
(time hadoop jar /usr/lib/hadoop/hadoop-streaming.jar -files mapper.py,reducer.py -mapper mapper.py -reducer reducer.py -input books-input$i -output books-output$i) 2>&1 >/dev/null | grep 'real' | awk '{ print $2 }' >> ./measurements_par.txt
done
done