-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathDockerfile
156 lines (124 loc) · 6.04 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
FROM ubuntu:16.04
MAINTAINER Mohamed Nadjib Mami <[email protected]>
# Install necessary utility software
RUN set -x && \
apt-get update --fix-missing && \
apt-get install -y --no-install-recommends curl vim openjdk-8-jdk-headless apt-transport-https openssh-server openssh-client wget maven git python telnet wget unzip time && \
# cleanup
apt-get clean
# Update environment
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
ENV HADOOP_VERSION 2.9.2
ENV HADOOP_URL http://mirror.synyx.de/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
# Configure SSH
# COPY ssh_config /root/.ssh/config
RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa \
&& cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys \
&& chmod 0600 ~/.ssh/authorized_keys
# Install Hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
ENV HDFS_PORT 9000
RUN set -x && \
curl -fSL -o - "$HADOOP_URL" | tar xz -C /usr/local && \
mv /usr/local/hadoop-$HADOOP_VERSION /usr/local/hadoop
# Configure Hadoop
RUN sed -i 's@\${JAVA_HOME}@'"$JAVA_HOME"'@g' $HADOOP_CONF_DIR/hadoop-env.sh
RUN sed -ri ':a;N;$!ba;s@(<configuration>).*(</configuration>)@\1<property><name>fs.default.name</name><value>hdfs://localhost:'"$HDFS_PORT"'</value></property>\2@g' $HADOOP_CONF_DIR/core-site.xml
RUN sed -ri ':a;N;$!ba;s@(<configuration>).*(</configuration>)@\1<property><name>dfs.replication</name><value>1</value></property>\2@g' $HADOOP_CONF_DIR/hdfs-site.xml
# Install Hive
ENV HIVE_VERSION 3.1.1
ENV HIVE_URL https://www-eu.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz
ENV HIVE_HOME /usr/local/hive
RUN set -x && \
curl -fSL -o - "$HIVE_URL" | tar xz -C /usr/local && \
mv /usr/local/apache-hive-$HIVE_VERSION-bin /usr/local/hive
RUN wget http://central.maven.org/maven2/mysql/mysql-connector-java/8.0.13/mysql-connector-java-8.0.13.jar && \
mv mysql-connector-java-8.0.13.jar /usr/local/hive/lib
COPY evaluation/Hive_files/hive-site.xml $HIVE_HOME/conf/
# Install Presto (Server and CLI)
ENV PRESTO_VERSION 304
ENV PRESTO_URL http://central.maven.org/maven2/io/prestosql/presto-server/${PRESTO_VERSION}/presto-server-${PRESTO_VERSION}.tar.gz
ENV PRESTO_CLI_URL http://central.maven.org/maven2/io/prestosql/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar
RUN set -x && \
curl -fSL -o - "$PRESTO_URL" | tar xz -C /usr/local && \
mv /usr/local/presto-server-${PRESTO_VERSION} /usr/local/presto
RUN set -x && \
wget ${PRESTO_CLI_URL} && \
mv presto-cli-${PRESTO_VERSION}-executable.jar /usr/local/presto/presto && \
chmod +x /usr/local/presto/presto
# Configure Presto
ENV PRESTO_HOME /usr/local/presto
RUN set -x && \
mkdir ${PRESTO_HOME}/etc && \
mkdir ${PRESTO_HOME}/etc/catalog && \
mkdir /var/lib/presto
# If you change the latter, also change it in node.properties config file
COPY evaluation/Presto_files/config/* /usr/local/presto/etc/
COPY evaluation/Presto_files/catalog/* /usr/local/presto/etc/catalog/
# Install MongoDB
RUN set -x && \
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2930ADAE8CAF5059EE73BB4B58712A2291FA4AD5 && \
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.6 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-3.6.list && \
apt-get update && \
apt-get install -y mongodb-org && \
mkdir -p /data/db
# Install Cassandra
RUN set -x && \
echo "deb http://www.apache.org/dist/cassandra/debian 311x main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list && \
curl https://www.apache.org/dist/cassandra/KEYS | apt-key add - && \
apt-get update && \
apt-key adv --keyserver pool.sks-keyservers.net --recv-key A278B781FE4B2BDA && \
apt-get -y install cassandra
# Install MySQL
RUN set -x && \
echo 'mysql-server mysql-server/root_password password root' | debconf-set-selections && \
echo 'mysql-server mysql-server/root_password_again password root' | debconf-set-selections && \
apt-get update && \
apt-get install -y --no-install-recommends vim && \
apt-get -y install mysql-server
# to solve "Can't open and lock privilege tables: Table storage engine for 'user' doesn't have this option"
# sed -i -e "s/^bind-address\s*=\s*127.0.0.1/bind-address = 0.0.0.0/" /etc/mysql/my.cnf && \
# /etc/init.d/mysql start
# Install Spark
ENV SPARK_VERSION 2.4.0
RUN set -x && \
curl -fSL -o - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz | tar xz -C /usr/local && \
mv /usr/local/spark-${SPARK_VERSION}-bin-hadoop2.7 /usr/local/spark
# Generate BSBM data
RUN set -x && \
apt-get update --fix-missing && \
apt-get install -y unzip
RUN set -x && \
wget -O bsbm.zip https://sourceforge.net/projects/bsbmtools/files/latest/download && \
unzip bsbm.zip && \
rm bsbm.zip && \
cd bsbmtools-0.2 && \
./generate -fc -pc 1000 -s sql -fn /root/data/input && \
cd /root/data/input && \
ls && \
rm 01* 02* 05* 06* 07*
# Due to a (yet) explaineable behavior from spark-submit assembly plugin,
# jena-arq and presto-jdbc are not being picked up during the assembly of Squerall
# So we will provide them temporarily during spark submit
ENV JENA_VERSION 3.9.0
RUN set -x && \
wget http://central.maven.org/maven2/org/apache/jena/jena-arq/${JENA_VERSION}/jena-arq-${JENA_VERSION}.jar && \
mv jena-arq-${JENA_VERSION}.jar /root
RUN set -x && \
wget http://central.maven.org/maven2/io/prestosql/presto-jdbc/${PRESTO_VERSION}/presto-jdbc-${PRESTO_VERSION}.jar && \
mv presto-jdbc-${PRESTO_VERSION}.jar /root
COPY evaluation/SQLtoNOSQL /root/SQLtoNOSQL
COPY evaluation/input_files/* /root/input/
COPY evaluation/input_files/queries/* /root/input/queries/
# just to force rebuild
RUN ls
RUN set -x && \
# Install Squerall
cd /usr/local && \
git clone https://github.com/EIS-Bonn/Squerall.git && \
cd Squerall && \
mvn package
COPY evaluation/scripts/* /root/
RUN echo "\nbash /root/welcome.sh\n" >> /root/.profile
CMD ["/bin/bash","--login"]