-
Notifications
You must be signed in to change notification settings - Fork 838
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Updating regular Docker Images for helm chart. (#885)
* Updating regular Docker Images from helm chart. * Removed image dependency * Removed FBProphet * Added maintainer.
- Loading branch information
1 parent
96f0b77
commit b431a61
Showing
26 changed files
with
377 additions
and
181 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,55 @@ | ||
FROM java:openjdk-8-jdk | ||
MAINTAINER Dalitso Banda <[email protected]> | ||
FROM openjdk:8-jdk-slim-buster | ||
LABEL maintainer="Dalitso Banda [email protected]" | ||
|
||
# Get Spark from US Apache mirror. | ||
ENV APACHE_SPARK_VERSION 2.4.5 | ||
ENV HADOOP_VERSION 3.2.1 | ||
|
||
RUN echo "$LOG_TAG Getting SPARK_HOME" && \ | ||
apt-get update && \ | ||
# build deps and deps for c bindings for cntk | ||
apt-get install -y build-essential && \ | ||
apt-get install -y autoconf automake libtool curl make unzip && \ | ||
mkdir -p /opt && \ | ||
cd /opt && \ | ||
curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz | \ | ||
tar -xz && \ | ||
ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \ | ||
echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \ | ||
export SPARK_HOME=/opt/spark | ||
|
||
RUN echo "downloading hadoop" && \ | ||
apt-get install -y wget && \ | ||
cd /tmp && \ | ||
wget http://apache.claz.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \ | ||
tar -xz && \ | ||
mv /tmp/hadoop-${HADOOP_VERSION} /opt/hadoop && \ | ||
echo "export HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \ | ||
echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \ | ||
rm -rf /opt/hadoop/share/doc | ||
|
||
RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh | ||
ENV HADOOP_HOME=/opt/hadoop | ||
ADD jars /jars | ||
|
||
# if numpy is installed on a driver it needs to be installed on all | ||
# workers, so install it everywhere | ||
RUN apt-get update && \ | ||
apt install -y python3-pip && \ | ||
pip3 install numpy && \ | ||
pip3 install matplotlib && \ | ||
pip3 install pandas==0.24.1 && \ | ||
pip3 install scikit-learn && \ | ||
pip3 install pyarrow==0.11.1 && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Final config | ||
ADD spark-config/log4j.properties /opt/spark/conf/log4j.properties | ||
ADD spark-config/start-common.sh spark-config/start-worker spark-config/start-master / | ||
ADD spark-config/core-site.xml /opt/spark/conf/core-site.xml | ||
ADD spark-config/spark-defaults.conf /opt/spark/conf/spark-defaults.conf | ||
ENV PATH $PATH:/opt/spark/bin | ||
|
||
ENV LIVY_VERSION="git_master" | ||
ENV LIVY_COMMIT="02550f7919b7348b6a7270cf806e031670037b2f" | ||
|
@@ -9,91 +59,43 @@ ENV LOG_TAG="[LIVY_${LIVY_VERSION}]:" \ | |
LC_ALL=en_US.UTF-8 | ||
|
||
RUN echo "$LOG_TAG Install essentials" && \ | ||
apt-get -y update && \ | ||
apt-get install -y locales && \ | ||
locale-gen $LANG && \ | ||
apt-get install -y git wget grep curl sed && \ | ||
apt-get install -y python-setuptools && \ | ||
apt-get autoclean && apt-get autoremove && \ | ||
echo "$LOG_TAG Install python dependencies" && \ | ||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py && \ | ||
apt-get install -y python-dev libpython3-dev build-essential pkg-config gfortran && \ | ||
pip install -U pip setuptools wheel && \ | ||
apt-get update && \ | ||
apt-get install -y git wget curl && \ | ||
echo "$LOG_TAG setting python dependencies" && \ | ||
ln -s /usr/bin/python3 /usr/bin/python && \ | ||
echo "$LOG_TAG Getting maven" && \ | ||
wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \ | ||
tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ && \ | ||
rm -rf apache-maven-3.3.9-bin.tar.gz && \ | ||
ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn && \ | ||
echo "$LOG_TAG Download and build Livy source" && \ | ||
git clone https://github.com/apache/incubator-livy.git ${LIVY_HOME}_src && \ | ||
cd ${LIVY_HOME}_src && \ | ||
git checkout ${LIVY_COMMIT} && \ | ||
mvn package -DskipTests && \ | ||
ls ${LIVY_HOME}_src && \ | ||
mv ${LIVY_HOME}_src ${LIVY_HOME} && \ | ||
echo "$LOG_TAG Cleanup" && \ | ||
apt-get purge -y --auto-remove build-essential pkg-config gfortran libpython3-dev && \ | ||
apt-get autoremove && \ | ||
apt-get autoclean && \ | ||
apt-get clean && \ | ||
rm -rf /usr/local/apache-maven-3.3.9 && \ | ||
rm -rf /root/.ivy2 && \ | ||
rm -rf /root/.npm && \ | ||
rm -rf /root/.m2 && \ | ||
rm -rf /root/.cache && \ | ||
rm -rf /tmp/* | ||
|
||
# Get Spark from US Apache mirror. | ||
ENV APACHE_SPARK_VERSION 2.4.0 | ||
ENV HADOOP_VERSION 3.2.0 | ||
ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1" | ||
|
||
RUN echo "$LOG_TAG Getting SPARK_HOME" && \ | ||
mkdir -p /opt && \ | ||
cd /opt && \ | ||
curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz | \ | ||
tar -xz && \ | ||
ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \ | ||
echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \ | ||
export SPARK_HOME=/opt/spark | ||
|
||
RUN echo "$LOG_TAG building hadoop" && \ | ||
apt-get update && \ | ||
apt-get install -y make autoconf automake libtool g++ unzip && \ | ||
cd / && \ | ||
git clone https://github.com/apache/hadoop.git hadoop_src&& \ | ||
mkdir /hadoop_deps && cd /hadoop_deps && \ | ||
wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 && \ | ||
tar xvf protobuf-2.5.0.tar.bz2 && \ | ||
cd protobuf-2.5.0 && \ | ||
./configure && make && make install && ldconfig && \ | ||
cd /hadoop_src && git checkout ${HADOOP_GIT_COMMIT} && mvn package -Pdist -DskipTests -Dtar && \ | ||
mv hadoop-dist/target/hadoop-${HADOOP_VERSION} /opt/hadoop && \ | ||
rm -r /hadoop_src && \ | ||
rm -rf /root/.ivy2 && \ | ||
rm -rf /root/.m2 && \ | ||
export HADOOP_HOME=/opt/hadoop && \ | ||
echo "\nexport HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \ | ||
echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \ | ||
apt-get purge -y --auto-remove g++ make build-essential autoconf automake && \ | ||
cd / && rm -rf /hadoop_deps | ||
|
||
RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh | ||
ENV HADOOP_HOME=/opt/hadoop | ||
ADD jars /jars | ||
|
||
ENV HADOOP_CONF_DIR /opt/hadoop/conf | ||
ENV CONF_DIR /livy/conf | ||
ENV SPARK_CONF_DIR /opt/spark/conf | ||
|
||
RUN mv ${LIVY_HOME}_src ${LIVY_HOME} | ||
ADD livy.conf ${LIVY_HOME}/conf | ||
EXPOSE 8998 | ||
|
||
WORKDIR ${LIVY_HOME} | ||
|
||
RUN mkdir logs && export SPARK_HOME=/opt/spark && export HADOOP_HOME=/opt/hadoop && export SPARK_CONF_DIR=/opt/spark/conf | ||
RUN mkdir logs | ||
|
||
#hive needed for livy pyspark | ||
RUN wget http://central.maven.org/maven2/org/apache/spark/spark-hive_2.11/2.4.0/spark-hive_2.11-2.4.0.jar -P /opt/spark/jars | ||
RUN wget https://repo1.maven.org/maven2/org/apache/spark/spark-hive_2.11/2.4.5/spark-hive_2.11-2.4.5.jar -P /opt/spark/jars | ||
|
||
CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && echo '\nlivy.spark.master' $SPARK_MASTER >> /livy/conf/livy.conf && bin/livy-server"] | ||
CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && echo '\nlivy.spark.master' $SPARK_MASTER >> /livy/conf/livy.conf && bin/livy-server"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<?xml version="1.0"?> | ||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | ||
|
||
<configuration> | ||
<property> | ||
<name>fs.gs.impl</name> | ||
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value> | ||
<description>The FileSystem for gs: (GCS) uris.</description> | ||
</property> | ||
<property> | ||
<name>fs.AbstractFileSystem.gs.impl</name> | ||
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value> | ||
<description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description> | ||
</property> | ||
<property> | ||
<name>fs.gs.project.id</name> | ||
<value>NOT_RUNNING_INSIDE_GCE</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Set everything to be logged to the console | ||
log4j.rootCategory=INFO, console | ||
log4j.appender.console=org.apache.log4j.ConsoleAppender | ||
log4j.appender.console.target=System.err | ||
log4j.appender.console.layout=org.apache.log4j.PatternLayout | ||
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n | ||
|
||
# Settings to quiet third party logs that are too verbose | ||
log4j.logger.org.spark-project.jetty=WARN | ||
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR | ||
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO | ||
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
spark.app.id KubernetesSpark |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
# Copyright 2015 The Kubernetes Authors All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id) | ||
|
||
if [[ -n "${PROJECT_ID}" ]]; then | ||
sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml | ||
fi | ||
|
||
# We don't want any of the incoming service variables, we'd rather use | ||
# DNS. But this one interferes directly with Spark. | ||
unset SPARK_MASTER_PORT | ||
|
||
# spark.{executor,driver}.extraLibraryPath don't actually seem to | ||
# work, this seems to be the only reliable way to get the native libs | ||
# picked up. | ||
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
|
||
# Copyright 2015 The Kubernetes Authors All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
. start-common.sh | ||
|
||
echo "$(hostname -i) spark-master" >> /etc/hosts | ||
|
||
# Run spark-class directly so that when it exits (or crashes), the pod restarts. | ||
/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
# Copyright 2015 The Kubernetes Authors All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
. start-common.sh | ||
|
||
if ! getent hosts spark-master; then | ||
echo "=== Cannot resolve the DNS entry for spark-master. Has the service been created yet, and is SkyDNS functional?" | ||
echo "=== See http://kubernetes.io/v1.1/docs/admin/dns.html for more details on DNS integration." | ||
echo "=== Sleeping 10s before pod exit." | ||
sleep 10 | ||
exit 0 | ||
fi | ||
|
||
# Run spark-class directly so that when it exits (or crashes), the pod restarts. | ||
/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 --webui-port 8081 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,15 @@ | ||
FROM java:openjdk-8-jdk | ||
FROM openjdk:8-jdk-slim-buster | ||
LABEL maintainer="Dalitso Banda [email protected]" | ||
|
||
# Get Spark from US Apache mirror. | ||
ENV APACHE_SPARK_VERSION 2.4.0 | ||
ENV HADOOP_VERSION 3.2.0 | ||
ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1" | ||
ENV APACHE_SPARK_VERSION 2.4.5 | ||
ENV HADOOP_VERSION 3.2.1 | ||
|
||
RUN echo "$LOG_TAG Getting SPARK_HOME" && \ | ||
apt-get update && \ | ||
# build deps and deps for c bindings for cntk | ||
apt-get install -y build-essential && \ | ||
apt-get install -y autoconf automake libtool curl make unzip && \ | ||
mkdir -p /opt && \ | ||
cd /opt && \ | ||
curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz | \ | ||
|
@@ -14,55 +18,35 @@ RUN echo "$LOG_TAG Getting SPARK_HOME" && \ | |
echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \ | ||
export SPARK_HOME=/opt/spark | ||
|
||
RUN echo "$LOG_TAG Getting maven" && \ | ||
wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \ | ||
tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ && \ | ||
ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn | ||
|
||
RUN echo "$LOG_TAG building hadoop" && \ | ||
echo "deb http://deb.debian.org/debian stretch main" >> /etc/apt/sources.list && \ | ||
apt-get update && \ | ||
# build deps and deps for c bindings for cntk | ||
apt-get install -y g++ gcc-6 libstdc++-6-dev make build-essential && \ | ||
apt-get install -y autoconf automake libtool curl make unzip && \ | ||
cd / && \ | ||
git clone https://github.com/apache/hadoop.git hadoop_src&& \ | ||
mkdir /hadoop_deps && cd /hadoop_deps && \ | ||
wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 && \ | ||
tar xvf protobuf-2.5.0.tar.bz2 && \ | ||
cd protobuf-2.5.0 && \ | ||
./configure && make && make install && ldconfig && \ | ||
cd /hadoop_src && git checkout ${HADOOP_GIT_COMMIT} && mvn package -Pdist -DskipTests -Dtar && \ | ||
mv hadoop-dist/target/hadoop-${HADOOP_VERSION} /opt/hadoop && \ | ||
rm -r /hadoop_src && \ | ||
export HADOOP_HOME=/opt/hadoop && \ | ||
echo "\nexport HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \ | ||
RUN echo "downloading hadoop" && \ | ||
apt-get install -y wget && \ | ||
cd /tmp && \ | ||
wget http://apache.claz.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \ | ||
tar -xz && \ | ||
mv /tmp/hadoop-${HADOOP_VERSION} /opt/hadoop && \ | ||
echo "export HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \ | ||
echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \ | ||
apt-get purge -y --auto-remove g++ make build-essential autoconf automake && \ | ||
cd / && rm -rf /hadoop_deps | ||
rm -rf /opt/hadoop/share/doc | ||
|
||
RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh | ||
ENV HADOOP_HOME=/opt/hadoop | ||
ADD jars /jars | ||
|
||
|
||
# if numpy is installed on a driver it needs to be installed on all | ||
# workers, so install it everywhere | ||
RUN apt-get update && \ | ||
apt-get install -y g++ python-dev build-essential python3-dev && \ | ||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py && \ | ||
pip install -U pip setuptools wheel && \ | ||
pip install numpy && \ | ||
pip install matplotlib && \ | ||
pip install pandas && \ | ||
apt-get purge -y --auto-remove python-dev build-essential python3-dev && \ | ||
apt install -y python3-pip && \ | ||
pip3 install numpy && \ | ||
pip3 install matplotlib && \ | ||
pip3 install pandas==0.24.1 && \ | ||
pip3 install scikit-learn && \ | ||
pip3 install pyarrow==0.11.1 && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Final config | ||
ADD log4j.properties /opt/spark/conf/log4j.properties | ||
ADD start-common.sh start-worker start-master / | ||
ADD core-site.xml /opt/spark/conf/core-site.xml | ||
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf | ||
ENV PATH $PATH:/opt/spark/bin | ||
ENV PATH $PATH:/opt/spark/bin |
Oops, something went wrong.