diff --git a/tools/helm/livy/Dockerfile b/tools/helm/livy/Dockerfile
index 7fe535f479..8aa5157fa1 100644
--- a/tools/helm/livy/Dockerfile
+++ b/tools/helm/livy/Dockerfile
@@ -1,5 +1,55 @@
-FROM java:openjdk-8-jdk
-MAINTAINER Dalitso Banda <dalitsohb@gmail.com>
+FROM openjdk:8-jdk-slim-buster
+LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"
+
+# Get Spark from US Apache mirror.
+ENV APACHE_SPARK_VERSION 2.4.5
+ENV HADOOP_VERSION 3.2.1
+
+RUN echo "$LOG_TAG Getting SPARK_HOME" && \
+    apt-get update && \
+    # build deps and deps for c bindings for cntk
+    apt-get install -y build-essential && \
+    apt-get install -y autoconf automake libtool curl make unzip && \
+    mkdir -p /opt && \
+    cd /opt && \
+    curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz  | \
+        tar -xz && \
+    ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \
+    echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \
+    export SPARK_HOME=/opt/spark
+
+RUN echo "downloading hadoop" && \
+    apt-get install -y wget && \
+    cd /tmp && \
+    wget http://apache.claz.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \
+    tar -xz && \
+    mv /tmp/hadoop-${HADOOP_VERSION} /opt/hadoop && \
+    echo "export HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
+    echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \
+    rm -rf /opt/hadoop/share/doc
+
+RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh
+ENV HADOOP_HOME=/opt/hadoop
+ADD jars /jars
+
+# if numpy is installed on a driver it needs to be installed on all
+# workers, so install it everywhere
+RUN apt-get update && \
+    apt install -y python3-pip && \
+    pip3 install numpy && \
+    pip3 install matplotlib && \
+    pip3 install pandas==0.24.1 && \
+    pip3 install scikit-learn && \
+    pip3 install pyarrow==0.11.1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Final config
+ADD spark-config/log4j.properties /opt/spark/conf/log4j.properties
+ADD spark-config/start-common.sh spark-config/start-worker spark-config/start-master /
+ADD spark-config/core-site.xml /opt/spark/conf/core-site.xml
+ADD spark-config/spark-defaults.conf /opt/spark/conf/spark-defaults.conf
+ENV PATH $PATH:/opt/spark/bin
 
 ENV LIVY_VERSION="git_master"
 ENV LIVY_COMMIT="02550f7919b7348b6a7270cf806e031670037b2f"
@@ -9,91 +59,43 @@ ENV LOG_TAG="[LIVY_${LIVY_VERSION}]:" \
     LC_ALL=en_US.UTF-8
 
 RUN echo "$LOG_TAG Install essentials" && \
-    apt-get -y update && \
-    apt-get install -y locales && \
-    locale-gen $LANG && \
-    apt-get install -y git wget grep curl sed && \
-    apt-get install -y python-setuptools && \
-    apt-get autoclean &&  apt-get autoremove && \
-    echo "$LOG_TAG Install python dependencies" && \
-    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && \
-    rm get-pip.py && \
-    apt-get install -y python-dev libpython3-dev build-essential pkg-config gfortran && \
-    pip install -U pip setuptools wheel && \
+    apt-get update && \
+    apt-get install -y git wget curl && \
+    echo "$LOG_TAG setting python dependencies" && \
+    ln -s /usr/bin/python3 /usr/bin/python && \
     echo "$LOG_TAG Getting maven" && \
     wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \
     tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ && \
+    rm -rf apache-maven-3.3.9-bin.tar.gz && \
     ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn && \
     echo "$LOG_TAG Download and build Livy source" && \
     git clone https://github.com/apache/incubator-livy.git ${LIVY_HOME}_src && \
     cd ${LIVY_HOME}_src  && \
     git checkout ${LIVY_COMMIT} && \
     mvn package -DskipTests && \
-    ls ${LIVY_HOME}_src && \
+    mv ${LIVY_HOME}_src ${LIVY_HOME} && \
     echo "$LOG_TAG Cleanup" && \
-    apt-get purge -y --auto-remove build-essential pkg-config gfortran libpython3-dev  && \
-    apt-get autoremove && \
-    apt-get autoclean && \
-    apt-get clean && \
+    rm -rf /usr/local/apache-maven-3.3.9 && \
     rm -rf /root/.ivy2 && \
     rm -rf /root/.npm && \
     rm -rf /root/.m2 && \
     rm -rf /root/.cache && \
     rm -rf /tmp/*
 
-# Get Spark from US Apache mirror.
-ENV APACHE_SPARK_VERSION 2.4.0
-ENV HADOOP_VERSION 3.2.0
-ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1"
-
-RUN echo "$LOG_TAG Getting SPARK_HOME" && \
-    mkdir -p /opt && \
-    cd /opt && \
-    curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz  | \
-        tar -xz && \
-    ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \
-    echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \
-    export SPARK_HOME=/opt/spark
-
-RUN echo "$LOG_TAG building hadoop" && \
-    apt-get update && \
-    apt-get install -y make autoconf automake libtool g++ unzip && \
-    cd  / && \
-    git clone https://github.com/apache/hadoop.git  hadoop_src&& \
-    mkdir /hadoop_deps && cd /hadoop_deps && \
-    wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 && \
-    tar xvf protobuf-2.5.0.tar.bz2 && \
-    cd protobuf-2.5.0 && \
-    ./configure && make && make install && ldconfig && \
-    cd /hadoop_src && git checkout ${HADOOP_GIT_COMMIT} && mvn package -Pdist -DskipTests -Dtar && \
-    mv hadoop-dist/target/hadoop-${HADOOP_VERSION} /opt/hadoop && \
-    rm -r /hadoop_src && \
-    rm -rf /root/.ivy2 && \
-    rm -rf /root/.m2 && \
-    export HADOOP_HOME=/opt/hadoop && \
-    echo "\nexport HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
-    echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \
-    apt-get purge -y --auto-remove g++ make build-essential autoconf automake && \
-    cd  / && rm -rf /hadoop_deps
-
-RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh
-ENV HADOOP_HOME=/opt/hadoop
 ADD jars /jars
 
 ENV HADOOP_CONF_DIR /opt/hadoop/conf
 ENV CONF_DIR /livy/conf
 ENV SPARK_CONF_DIR /opt/spark/conf
 
-RUN mv ${LIVY_HOME}_src ${LIVY_HOME}
 ADD livy.conf ${LIVY_HOME}/conf
 EXPOSE 8998
 
 WORKDIR ${LIVY_HOME}
 
-RUN mkdir logs && export SPARK_HOME=/opt/spark && export HADOOP_HOME=/opt/hadoop && export SPARK_CONF_DIR=/opt/spark/conf
+RUN mkdir logs
 
 #hive needed for livy pyspark
-RUN wget http://central.maven.org/maven2/org/apache/spark/spark-hive_2.11/2.4.0/spark-hive_2.11-2.4.0.jar -P /opt/spark/jars
+RUN wget https://repo1.maven.org/maven2/org/apache/spark/spark-hive_2.11/2.4.5/spark-hive_2.11-2.4.5.jar -P /opt/spark/jars
 
-CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && echo '\nlivy.spark.master' $SPARK_MASTER >> /livy/conf/livy.conf && bin/livy-server"]
+CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && echo '\nlivy.spark.master' $SPARK_MASTER >> /livy/conf/livy.conf && bin/livy-server"]
\ No newline at end of file
diff --git a/tools/helm/livy/spark-config/core-site.xml b/tools/helm/livy/spark-config/core-site.xml
new file mode 100644
index 0000000000..2fecabedc8
--- /dev/null
+++ b/tools/helm/livy/spark-config/core-site.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+  <property>
+    <name>fs.gs.impl</name>
+    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
+    <description>The FileSystem for gs: (GCS) uris.</description>
+  </property>
+  <property>
+    <name>fs.AbstractFileSystem.gs.impl</name>
+    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
+    <description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
+  </property>
+  <property>
+    <name>fs.gs.project.id</name>
+    <value>NOT_RUNNING_INSIDE_GCE</value>
+  </property>
+</configuration>
diff --git a/tools/helm/livy/spark-config/log4j.properties b/tools/helm/livy/spark-config/log4j.properties
new file mode 100644
index 0000000000..3a2a882198
--- /dev/null
+++ b/tools/helm/livy/spark-config/log4j.properties
@@ -0,0 +1,12 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark-project.jetty=WARN
+log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/tools/helm/livy/spark-config/spark-defaults.conf b/tools/helm/livy/spark-config/spark-defaults.conf
new file mode 100644
index 0000000000..5b3e62b9f4
--- /dev/null
+++ b/tools/helm/livy/spark-config/spark-defaults.conf
@@ -0,0 +1 @@
+spark.app.id KubernetesSpark
diff --git a/tools/helm/livy/spark-config/start-common.sh b/tools/helm/livy/spark-config/start-common.sh
new file mode 100644
index 0000000000..ac8d505838
--- /dev/null
+++ b/tools/helm/livy/spark-config/start-common.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
+
+if [[ -n "${PROJECT_ID}" ]]; then
+  sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
+fi
+
+# We don't want any of the incoming service variables, we'd rather use
+# DNS. But this one interferes directly with Spark.
+unset SPARK_MASTER_PORT
+
+# spark.{executor,driver}.extraLibraryPath don't actually seem to
+# work, this seems to be the only reliable way to get the native libs
+# picked up.
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native
diff --git a/tools/helm/livy/spark-config/start-master b/tools/helm/livy/spark-config/start-master
new file mode 100644
index 0000000000..f5e83a3074
--- /dev/null
+++ b/tools/helm/livy/spark-config/start-master
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+. start-common.sh
+
+echo "$(hostname -i) spark-master" >> /etc/hosts
+
+# Run spark-class directly so that when it exits (or crashes), the pod restarts.
+/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
diff --git a/tools/helm/livy/spark-config/start-worker b/tools/helm/livy/spark-config/start-worker
new file mode 100644
index 0000000000..5b9ccaebce
--- /dev/null
+++ b/tools/helm/livy/spark-config/start-worker
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+. start-common.sh
+
+if ! getent hosts spark-master; then
+  echo "=== Cannot resolve the DNS entry for spark-master. Has the service been created yet, and is SkyDNS functional?"
+  echo "=== See http://kubernetes.io/v1.1/docs/admin/dns.html for more details on DNS integration."
+  echo "=== Sleeping 10s before pod exit."
+  sleep 10
+  exit 0
+fi
+
+# Run spark-class directly so that when it exits (or crashes), the pod restarts.
+/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 --webui-port 8081
diff --git a/tools/helm/spark/Dockerfile b/tools/helm/spark/Dockerfile
index a04d5e62e6..7c89e2d448 100644
--- a/tools/helm/spark/Dockerfile
+++ b/tools/helm/spark/Dockerfile
@@ -1,11 +1,15 @@
-FROM java:openjdk-8-jdk
+FROM openjdk:8-jdk-slim-buster
+LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"
 
 # Get Spark from US Apache mirror.
-ENV APACHE_SPARK_VERSION 2.4.0
-ENV HADOOP_VERSION 3.2.0
-ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1"
+ENV APACHE_SPARK_VERSION 2.4.5
+ENV HADOOP_VERSION 3.2.1
 
 RUN echo "$LOG_TAG Getting SPARK_HOME" && \
+    apt-get update && \
+    # build deps and deps for c bindings for cntk
+    apt-get install -y build-essential && \
+    apt-get install -y autoconf automake libtool curl make unzip && \
     mkdir -p /opt && \
     cd /opt && \
     curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz  | \
@@ -14,55 +18,35 @@ RUN echo "$LOG_TAG Getting SPARK_HOME" && \
     echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \
     export SPARK_HOME=/opt/spark
 
-RUN echo "$LOG_TAG Getting maven" && \
-    wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \
-    tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ && \
-    ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn 
-
-RUN echo "$LOG_TAG building hadoop" && \
-    echo "deb http://deb.debian.org/debian stretch main" >> /etc/apt/sources.list && \
-    apt-get update && \
-    # build deps and deps for c bindings for cntk
-    apt-get install -y g++ gcc-6 libstdc++-6-dev make build-essential && \
-    apt-get install -y autoconf automake libtool curl make unzip && \
-    cd  / && \
-    git clone https://github.com/apache/hadoop.git  hadoop_src&& \
-    mkdir /hadoop_deps && cd /hadoop_deps && \
-    wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 && \
-    tar xvf protobuf-2.5.0.tar.bz2 && \
-    cd protobuf-2.5.0 && \
-    ./configure && make && make install && ldconfig && \
-    cd /hadoop_src && git checkout ${HADOOP_GIT_COMMIT} && mvn package -Pdist -DskipTests -Dtar && \
-    mv hadoop-dist/target/hadoop-${HADOOP_VERSION} /opt/hadoop && \
-    rm -r /hadoop_src && \
-    export HADOOP_HOME=/opt/hadoop && \
-    echo "\nexport HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
+RUN echo "downloading hadoop" && \
+    apt-get install -y wget && \
+    cd /tmp && \
+    wget http://apache.claz.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \
+    tar -xz && \
+    mv /tmp/hadoop-${HADOOP_VERSION} /opt/hadoop && \
+    echo "export HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
     echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \
-    apt-get purge -y --auto-remove g++ make build-essential autoconf automake && \
-    cd  / && rm -rf /hadoop_deps
+    rm -rf /opt/hadoop/share/doc
 
 RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh
 ENV HADOOP_HOME=/opt/hadoop
 ADD jars /jars
 
-
 # if numpy is installed on a driver it needs to be installed on all
 # workers, so install it everywhere
 RUN apt-get update && \
-    apt-get install -y g++ python-dev build-essential python3-dev && \
-    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && \
-    rm get-pip.py && \
-    pip install -U pip setuptools wheel && \
-    pip install numpy && \
-    pip install matplotlib && \
-    pip install pandas && \
-    apt-get purge -y --auto-remove python-dev build-essential python3-dev && \
+    apt install -y python3-pip && \
+    pip3 install numpy && \
+    pip3 install matplotlib && \
+    pip3 install pandas==0.24.1 && \
+    pip3 install scikit-learn && \
+    pip3 install pyarrow==0.11.1 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Final config
 ADD log4j.properties /opt/spark/conf/log4j.properties
 ADD start-common.sh start-worker start-master /
 ADD core-site.xml /opt/spark/conf/core-site.xml
 ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
-ENV PATH $PATH:/opt/spark/bin
+ENV PATH $PATH:/opt/spark/bin
\ No newline at end of file
diff --git a/tools/helm/zepplin/Dockerfile b/tools/helm/zeppelin/Dockerfile
similarity index 51%
rename from tools/helm/zepplin/Dockerfile
rename to tools/helm/zeppelin/Dockerfile
index 870765e38a..686101540e 100644
--- a/tools/helm/zepplin/Dockerfile
+++ b/tools/helm/zeppelin/Dockerfile
@@ -1,7 +1,58 @@
-FROM java:openjdk-8-jdk
-MAINTAINER Dalitso Banda <dalitsohb@gmail.com>
+FROM openjdk:8-jdk-slim-buster
+LABEL maintainer="Dalitso Banda dalitsohb@gmail.com"
+
+# Get Spark from US Apache mirror.
+ENV APACHE_SPARK_VERSION 2.4.5
+ENV HADOOP_VERSION 3.2.1
+
+RUN echo "$LOG_TAG Getting SPARK_HOME" && \
+    apt-get update && \
+    # build deps and deps for c bindings for cntk
+    apt-get install -y build-essential && \
+    apt-get install -y autoconf automake libtool curl make unzip && \
+    mkdir -p /opt && \
+    cd /opt && \
+    curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz  | \
+        tar -xz && \
+    ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \
+    echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \
+    export SPARK_HOME=/opt/spark
+
+RUN echo "downloading hadoop" && \
+    apt-get install -y wget && \
+    cd /tmp && \
+    wget http://apache.claz.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \
+    tar -xz && \
+    mv /tmp/hadoop-${HADOOP_VERSION} /opt/hadoop && \
+    echo "export HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
+    echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \
+    rm -rf /opt/hadoop/share/doc
+
+RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh
+ENV HADOOP_HOME=/opt/hadoop
+ADD jars /jars
+
+# if numpy is installed on a driver it needs to be installed on all
+# workers, so install it everywhere
+RUN apt-get update && \
+    apt install -y python3-pip && \
+    pip3 install numpy && \
+    pip3 install matplotlib && \
+    pip3 install pandas==0.24.1 && \
+    pip3 install scikit-learn && \
+    pip3 install pyarrow==0.11.1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Final config
+ADD spark-config/log4j.properties /opt/spark/conf/log4j.properties
+ADD spark-config/start-common.sh spark-config/start-worker spark-config/start-master /
+ADD spark-config/core-site.xml /opt/spark/conf/core-site.xml
+ADD spark-config/spark-defaults.conf /opt/spark/conf/spark-defaults.conf
+ENV PATH $PATH:/opt/spark/bin
+
+ADD patch_beam.patch /tmp/patch_beam.patch
 
-# `Z_VERSION` will be updated by `dev/change_zeppelin_version.sh`
 ENV Z_VERSION="git_master"
 ENV Z_COMMIT="2ea945f548a4e41312026d5ee1070714c155a11e"
 ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \
@@ -21,10 +72,8 @@ RUN echo "$LOG_TAG Getting maven" && \
     tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ && \
     ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn 
 
-ADD patch_beam.patch /tmp/patch_beam.patch
-
 RUN echo "$LOG_TAG install nodejs" && \
-    curl -sL https://deb.nodesource.com/setup_11.x | bash - && apt-get install -y nodejs && \
+    curl -sL https://deb.nodesource.com/setup_12.x | bash - && apt-get install -y nodejs && \
     echo "$LOG_TAG Download Zeppelin source" && \
     git clone https://github.com/apache/zeppelin.git /zeppelin-${Z_VERSION}-bin-all && \
     mv /zeppelin-${Z_VERSION}-bin-all ${Z_HOME}_src && \
@@ -48,18 +97,19 @@ RUN echo "$LOG_TAG install nodejs" && \
     mkdir -p /usr/local/lib/node_modules && \
     npm install -g @angular/cli && \
     npm install -g grunt-cli bower && \
-    bower install && \
-    cd ${Z_HOME}_src && \
-    export MAVEN_OPTS="-Xmx2g -Xss128M -XX:MetaspaceSize=512M -XX:MaxMetaspaceSize=1024M -XX:+CMSClassUnloadingEnabled" && \
+    bower install
+
+RUN cd ${Z_HOME}_src && \
+    export MAVEN_OPTS="-Xmx2048m -XX:MaxPermSize=256m" && \
     mvn -e -B package -DskipTests -Pscala-2.11 -Pbuild-distr && \
     tar xvf ${Z_HOME}_src/zeppelin-distribution/target/zeppelin-0.9.0-SNAPSHOT.tar.gz && \
     rm -rf ${Z_HOME}/* && \
     mv zeppelin-0.9.0-SNAPSHOT ${Z_HOME}_dist && \
     mv ${Z_HOME}_dist/* ${Z_HOME} && \
     echo "$LOG_TAG Cleanup" && \
-    apt-get remove --purge -y r-base-dev r-cran-evaluate libfontconfig && \
+    rm -rf /usr/local/apache-maven-3.3.9 && \
     npm uninstall -g @angular/cli grunt-cli bower && \
-    apt-get autoclean &&  apt-get autoremove -y && \
+    rm -rf /usr/local/apache-maven-3.3.9 && \
     rm -rf ${Z_HOME}_dist && \
     rm -rf ${Z_HOME}_src && \
     rm -rf /root/.ivy2 && \
@@ -68,83 +118,19 @@ RUN echo "$LOG_TAG install nodejs" && \
     rm -rf /root/.cache && \
     rm -rf /tmp/*
 
-RUN echo "$LOG_TAG install tini related packages" && \
-    apt-get install -y wget curl grep sed dpkg && \
-    TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
-    curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
-    dpkg -i tini.deb && \
-    rm tini.deb
-
-RUN echo "$LOG_TAG installing python related packages" && \
-    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && \
-    rm get-pip.py && \
-    apt-get install -y python-dev libpython3-dev build-essential pkg-config gfortran && \
-    pip install -U pip setuptools wheel && \
-    pip install numpy && \
-    pip install matplotlib && \
-    pip install pandas && \
-    apt-get update && \
-    apt-get upgrade -y && \
-    echo "deb http://deb.debian.org/debian stretch main" >> /etc/apt/sources.list && \
-    apt-get update && \
-    apt-get install -y g++ gcc-6 libstdc++-6-dev && \
-    echo "$LOG_TAG Cleanup" && \
-    apt-get purge -y --auto-remove build-essential pkg-config gfortran libpython3-dev && \
-    apt-get autoremove -y && \
-    apt-get autoclean && \
-    apt-get clean && \
-    rm -rf /root/.npm && \
-    rm -rf /root/.m2 && \
-    rm -rf /root/.cache && \
-    rm -rf /tmp/*
-
-ENV APACHE_SPARK_VERSION 2.4.0
-ENV HADOOP_VERSION 3.2.0
-ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1"
-
-RUN echo "$LOG_TAG Getting SPARK_HOME" && \
-    mkdir -p /opt && \
-    cd /opt && \
-    curl http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-without-hadoop.tgz  | \
-        tar -xz && \
-    ln -s spark-${APACHE_SPARK_VERSION}-bin-without-hadoop spark && \
-    echo Spark ${APACHE_SPARK_VERSION} installed in /opt/spark && \
-    export SPARK_HOME=/opt/spark
-
-RUN echo "$LOG_TAG building hadoop" && \
-    apt-get update && \
-    apt-get install -y make && \
-    cd  / && \
-    git clone https://github.com/apache/hadoop.git  hadoop_src&& \
-    mkdir /hadoop_deps && cd /hadoop_deps && \
-    wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.bz2 && \
-    tar xvf protobuf-2.5.0.tar.bz2 && \
-    cd protobuf-2.5.0 && \
-    ./configure && make && make install && ldconfig && \
-    cd /hadoop_src && git checkout ${HADOOP_GIT_COMMIT} && mvn package -Pdist -DskipTests -Dtar && \
-    mv hadoop-dist/target/hadoop-${HADOOP_VERSION} /opt/hadoop && \
-    rm -r /hadoop_src && \
-    rm -rf /root/.ivy2 && \
-    rm -rf /root/.m2 && \
-    export HADOOP_HOME=/opt/hadoop && \
-    echo "\nexport HADOOP_CLASSPATH=/opt/hadoop/share/hadoop/tools/lib/*" >> /opt/hadoop/etc/hadoop/hadoop-env.sh && \
-    echo Hadoop ${HADOOP_VERSION} installed in /opt/hadoop && \
-    apt-get purge -y --auto-remove g++ make build-essential autoconf automake && \
-    cd  / && rm -rf /hadoop_deps
-
-RUN echo "\nSPARK_DIST_CLASSPATH=/jars:/jars/*:$(/opt/hadoop/bin/hadoop classpath)" >> /opt/spark/conf/spark-env.sh
-ENV HADOOP_HOME=/opt/hadoop
 ADD jars /jars
 
 # add notebooks
-ADD mmlsparkExamples ${Z_HOME}/notebook/mmlspark/
+ADD mmlsparkExamples/ ${Z_HOME}/notebook/mmlspark/
 
 ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
 ADD zeppelin-env.sh ${Z_HOME}/conf/
 
+# use python3 as default since thats what's in the base image \
+RUN echo "export PYSPARK_DRIVER_PYTHON=python3" >> ${Z_HOME}/conf/zeppelin-env.sh && \
+    echo "export PYSPARK_PYTHON=python3" >> ${Z_HOME}/conf/zeppelin-env.sh
+
 EXPOSE 8080
 
-ENTRYPOINT [ "/usr/bin/tini", "--" ]
 WORKDIR ${Z_HOME}
-CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && bin/zeppelin.sh"]
+CMD ["sh", "-c", "echo '\nspark.driver.host' $(hostname -i) >> /opt/spark/conf/spark-defaults.conf && bin/zeppelin.sh"]
\ No newline at end of file
diff --git a/tools/helm/zepplin/jars/README.md b/tools/helm/zeppelin/jars/README.md
similarity index 100%
rename from tools/helm/zepplin/jars/README.md
rename to tools/helm/zeppelin/jars/README.md
diff --git a/tools/helm/zepplin/mini.Dockerfile b/tools/helm/zeppelin/mini.Dockerfile
similarity index 100%
rename from tools/helm/zepplin/mini.Dockerfile
rename to tools/helm/zeppelin/mini.Dockerfile
diff --git a/tools/helm/zepplin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln b/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln
rename to tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln
diff --git a/tools/helm/zepplin/mmlsparkExamples/serving.py b/tools/helm/zeppelin/mmlsparkExamples/serving.py
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/serving.py
rename to tools/helm/zeppelin/mmlsparkExamples/serving.py
diff --git a/tools/helm/zepplin/mmlsparkExamples/simplification_mmlspark.zpln b/tools/helm/zeppelin/mmlsparkExamples/simplification_mmlspark.zpln
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/simplification_mmlspark.zpln
rename to tools/helm/zeppelin/mmlsparkExamples/simplification_mmlspark.zpln
diff --git a/tools/helm/zepplin/mmlsparkExamples/sparkPi_2E12S8C29.zpln b/tools/helm/zeppelin/mmlsparkExamples/sparkPi_2E12S8C29.zpln
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/sparkPi_2E12S8C29.zpln
rename to tools/helm/zeppelin/mmlsparkExamples/sparkPi_2E12S8C29.zpln
diff --git a/tools/helm/zepplin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln b/tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln
rename to tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln
diff --git a/tools/helm/zepplin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln b/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln
similarity index 100%
rename from tools/helm/zepplin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln
rename to tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln
diff --git a/tools/helm/zepplin/patch_beam.patch b/tools/helm/zeppelin/patch_beam.patch
similarity index 100%
rename from tools/helm/zepplin/patch_beam.patch
rename to tools/helm/zeppelin/patch_beam.patch
diff --git a/tools/helm/zeppelin/spark-config/core-site.xml b/tools/helm/zeppelin/spark-config/core-site.xml
new file mode 100644
index 0000000000..2fecabedc8
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/core-site.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+  <property>
+    <name>fs.gs.impl</name>
+    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
+    <description>The FileSystem for gs: (GCS) uris.</description>
+  </property>
+  <property>
+    <name>fs.AbstractFileSystem.gs.impl</name>
+    <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
+    <description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
+  </property>
+  <property>
+    <name>fs.gs.project.id</name>
+    <value>NOT_RUNNING_INSIDE_GCE</value>
+  </property>
+</configuration>
diff --git a/tools/helm/zeppelin/spark-config/log4j.properties b/tools/helm/zeppelin/spark-config/log4j.properties
new file mode 100644
index 0000000000..3a2a882198
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/log4j.properties
@@ -0,0 +1,12 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark-project.jetty=WARN
+log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/tools/helm/zeppelin/spark-config/spark-defaults.conf b/tools/helm/zeppelin/spark-config/spark-defaults.conf
new file mode 100644
index 0000000000..5b3e62b9f4
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/spark-defaults.conf
@@ -0,0 +1 @@
+spark.app.id KubernetesSpark
diff --git a/tools/helm/zeppelin/spark-config/start-common.sh b/tools/helm/zeppelin/spark-config/start-common.sh
new file mode 100644
index 0000000000..ac8d505838
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/start-common.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
+
+if [[ -n "${PROJECT_ID}" ]]; then
+  sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
+fi
+
+# We don't want any of the incoming service variables, we'd rather use
+# DNS. But this one interferes directly with Spark.
+unset SPARK_MASTER_PORT
+
+# spark.{executor,driver}.extraLibraryPath don't actually seem to
+# work, this seems to be the only reliable way to get the native libs
+# picked up.
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native
diff --git a/tools/helm/zeppelin/spark-config/start-master b/tools/helm/zeppelin/spark-config/start-master
new file mode 100644
index 0000000000..f5e83a3074
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/start-master
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+. start-common.sh
+
+echo "$(hostname -i) spark-master" >> /etc/hosts
+
+# Run spark-class directly so that when it exits (or crashes), the pod restarts.
+/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
diff --git a/tools/helm/zeppelin/spark-config/start-worker b/tools/helm/zeppelin/spark-config/start-worker
new file mode 100644
index 0000000000..5b9ccaebce
--- /dev/null
+++ b/tools/helm/zeppelin/spark-config/start-worker
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+. start-common.sh
+
+if ! getent hosts spark-master; then
+  echo "=== Cannot resolve the DNS entry for spark-master. Has the service been created yet, and is SkyDNS functional?"
+  echo "=== See http://kubernetes.io/v1.1/docs/admin/dns.html for more details on DNS integration."
+  echo "=== Sleeping 10s before pod exit."
+  sleep 10
+  exit 0
+fi
+
+# Run spark-class directly so that when it exits (or crashes), the pod restarts.
+/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 --webui-port 8081
diff --git a/tools/helm/zepplin/spark-defaults.conf b/tools/helm/zeppelin/spark-defaults.conf
similarity index 100%
rename from tools/helm/zepplin/spark-defaults.conf
rename to tools/helm/zeppelin/spark-defaults.conf
diff --git a/tools/helm/zepplin/zeppelin-env.sh b/tools/helm/zeppelin/zeppelin-env.sh
similarity index 100%
rename from tools/helm/zepplin/zeppelin-env.sh
rename to tools/helm/zeppelin/zeppelin-env.sh