-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscript.sh
executable file
·137 lines (109 loc) · 4.47 KB
/
script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/bin/bash
# set -e
ROOT_DIR=$(pwd)
TOOLS_DIR=$ROOT_DIR/tpcds-kit/tools
DATA_DIR=$ROOT_DIR/data
QUERIES_DIR=$ROOT_DIR/queries
OUTPUT_DIR=$ROOT_DIR/output
# Function to debug and record the timestamp of each step of the script
log() {
echo `date +"%Y-%m-%d %H-%M-%S"` $1
}
usage() {
echo "usage: ./script.sh <SCALE_FACTOR> <DATABASE> <COMMAND>"
echo "COMMANDS: generate_data, create_db, load_db, generate_queries, run_queries, all"
exit 1
}
# it expected 3 arguments
if [ $# -lt 3 ]; then
usage
fi
SCALE_FACTOR=$1
DATABASE=$2
generate_data() {
log "------------ Generating data! -----------------------"
mkdir -p $DATA_DIR && \
cd $TOOLS_DIR && \
./dsdgen -SCALE ${SCALE_FACTOR} -DIR $DATA_DIR -FORCE -VERBOSE
}
create_db() {
log "------------ Setting-up database! -------------------"
sudo mysql -uroot -e "DROP DATABASE IF EXISTS ${DATABASE}" && \
sudo mysql -uroot -e "CREATE DATABASE ${DATABASE}" && \
sudo mysql -uroot $DATABASE < $TOOLS_DIR/tpcds.sql
}
# For every .dat file generated by dsdgen, converts empty values to '\N', save in a .dsv file and
# then upload the latter using mysqlimport. Then runs the alter tables from the file with constraints.
load_db() {
log "------------ Loading data into database! ------------"
# for every generated data file
ls -Sr $DATA_DIR/*.dat | while read dat_file; do
# creates dsv file
table=`basename $dat_file .dat`
dsv_file=$DATA_DIR/$table.dsv
log "----------------- ${table}"
# converts empty values to '\N' (NULL)
sed -e 's#||#|\\N|#g' -e 's#^|#\\N|#g' -e 's#||#|\\N|#g' $dat_file > $dsv_file # converts empty values to '\N' (NULL)
# splits into 10 MB files
split -C 10M $dsv_file $DATA_DIR/$table.split_
# import splitted files using 8 threads
ls $DATA_DIR/$table.split_* | xargs -P8 -I % sh -c 'sudo mysqlimport --local --default-character-set=latin1 --replace --silent --fields-terminated-by='"'"'|'"' $DATABASE "'% && rm -f %;'
rm -f $dsv_file
done
log "------------ Apply constraints to database! ---------"
cat $TOOLS_DIR/tpcds_ri.sql | egrep -v "(^--.*|^$)" | xargs -P8 -I % sudo mysql -uroot $DATABASE -e %
}
# Generate the queries using dsqgen and splits the output file into individual query files for
# every template. It also saves a file with the query order.
generate_queries() {
log "------------ Generating queries! --------------------"
mkdir -p $QUERIES_DIR && \
cd $TOOLS_DIR && \
./dsqgen -DIRECTORY ../query_templates/ -INPUT ../query_templates/mysql_templates.lst -QUIET Y -SCALE ${SCALE_FACTOR} -OUTPUT_DIR $QUERIES_DIR -DIALECT mysql
awk -v dir="$QUERIES_DIR" '/^-- start query/{close (prev); name=$NF; gsub(/tpl/, "sql", name);}{prev=dir"/"name; print > (prev);}' $QUERIES_DIR"/query_0.sql"
awk -v dir="$QUERIES_DIR" '/^-- start query/{name=$NF; gsub(/tpl/, "sql", name); print dir"/"name}' $QUERIES_DIR"/query_0.sql" > $QUERIES_DIR/query_order.txt
log "----------------- "$(grep '' -c $QUERIES_DIR/query_order.txt)" queries generated!"
}
run_queries() {
log "------------ Running queries! -----------------------"
mkdir -p $OUTPUT_DIR && mkdir -p $OUTPUT_DIR/res && mkdir -p $OUTPUT_DIR/err # create the output folders
while read query_file; do # for every query considering the query order
((i++))
log "----------------- ${i} - "`basename $query_file .sql`
RESULT_FILE="$OUTPUT_DIR/res/`basename $query_file .sql`.res"
ERROR_FILE="$OUTPUT_DIR/err/`basename $query_file .sql`.err"
MYSQL_LOG_FILE="/var/log/mysql/`basename $query_file .sql`.log"
# create/truncate and set the variable for log file
rm -f $MYSQL_LOG_FILE
:> $MYSQL_LOG_FILE
sudo chown mysql:mysql $MYSQL_LOG_FILE
sudo mysql -uroot -e "SET GLOBAL slow_query_log_file = '${MYSQL_LOG_FILE}';"
# execute the query
sudo mysql -uroot $DATABASE < $query_file > $RESULT_FILE 2> $ERROR_FILE
# remove output files if they are empty
[ -s "$ERROR_FILE" ] && log "---------------------- ERROR"
[ -s "$ERROR_FILE" ] || rm -f "$ERROR_FILE"
[ -s "$MYSQL_LOG_FILE" ] || rm -f "$MYSQL_LOG_FILE"
[ -s "$RESULT_FILE" ] || rm -f "$RESULT_FILE"
done < $QUERIES_DIR/query_order.txt
}
if [ "$3" == "generate_data" ]; then
generate_data
elif [ "$3" == "create_db" ]; then
create_db
elif [ "$3" == "load_db" ]; then
load_db
elif [ "$3" == "generate_queries" ]; then
generate_queries
elif [ "$3" == "run_queries" ]; then
run_queries
elif [ "$3" == "all" ]; then
generate_data && \
create_db && \
load_db && \
generate_queries && \
run_queries
else
usage
fi
log "------------ DONE! ----------------------------------"