amplab · dazza-codes · Jun 14, 2016 · Jun 15, 2016 · Jun 15, 2016 · Jun 15, 2016
diff --git a/create_image.sh b/create_image.sh
@@ -11,12 +11,19 @@ if [ "$(id -u)" != "0" ]; then
 fi
 
 # Dev tools
-sudo yum install -y java-1.7.0-openjdk-devel gcc gcc-c++ ant git
+sudo yum install -y gcc gcc-c++ ant git
+
+# Install java-8 for Spark 2.2.x
+sudo yum install -y java-1.8.0 java-1.8.0-devel
+sudo /usr/sbin/alternatives --set java /usr/lib/jvm/jre-1.8.0-openjdk.x86_64/bin/java
+sudo /usr/sbin/alternatives --set javac /usr/lib/jvm/jre-1.8.0-openjdk.x86_64/bin/javac
+#sudo yum remove java-1.7
+
 # Perf tools
 sudo yum install -y dstat iotop strace sysstat htop perf
 sudo debuginfo-install -q -y glibc
 sudo debuginfo-install -q -y kernel
-sudo yum --enablerepo='*-debug*' install -q -y java-1.7.0-openjdk-debuginfo.x86_64
+sudo yum --enablerepo='*-debug*' install -y java-1.8.0-openjdk-debuginfo
 
 # PySpark and MLlib deps
 sudo yum install -y  python-matplotlib python-tornado scipy libgfortran
@@ -38,42 +45,28 @@ sudo sed -i 's/.*ephemeral.*//g' /etc/cloud/cloud.cfg
 sudo sed -i 's/.*swap.*//g' /etc/cloud/cloud.cfg
 
 echo "mounts:" >> /etc/cloud/cloud.cfg
-echo " - [ ephemeral0, /mnt, auto, \"defaults,noatime,nodiratime\", "\
+echo " - [ ephemeral0, /mnt, auto, \"defaults,noatime\", "\
   "\"0\", \"0\" ]" >> /etc/cloud.cloud.cfg
 
 for x in {1..23}; do
   echo " - [ ephemeral$x, /mnt$((x + 1)), auto, "\
-    "\"defaults,noatime,nodiratime\", \"0\", \"0\" ]" >> /etc/cloud/cloud.cfg
+    "\"defaults,noatime\", \"0\", \"0\" ]" >> /etc/cloud/cloud.cfg
 done
 
-# Install Maven (for Hadoop)
+# Install Maven
 cd /tmp
 wget "http://archive.apache.org/dist/maven/maven-3/3.2.3/binaries/apache-maven-3.2.3-bin.tar.gz"
 tar xvzf apache-maven-3.2.3-bin.tar.gz
 mv apache-maven-3.2.3 /opt/
 
 # Edit bash profile
 echo "export PS1=\"\\u@\\h \\W]\\$ \"" >> ~/.bash_profile
-echo "export JAVA_HOME=/usr/lib/jvm/java-1.7.0" >> ~/.bash_profile
+echo "export JAVA_HOME=/usr/lib/jvm/java-1.8.0" >> ~/.bash_profile
 echo "export M2_HOME=/opt/apache-maven-3.2.3" >> ~/.bash_profile
 echo "export PATH=\$PATH:\$M2_HOME/bin" >> ~/.bash_profile
 
 source ~/.bash_profile
 
-# Build Hadoop to install native libs
-sudo mkdir /root/hadoop-native
-cd /tmp
-sudo yum install -y protobuf-compiler cmake openssl-devel
-wget "http://archive.apache.org/dist/hadoop/common/hadoop-2.4.1/hadoop-2.4.1-src.tar.gz"
-tar xvzf hadoop-2.4.1-src.tar.gz
-cd hadoop-2.4.1-src
-mvn package -Pdist,native -DskipTests -Dtar
-sudo mv hadoop-dist/target/hadoop-2.4.1/lib/native/* /root/hadoop-native
-
-# Install Snappy lib (for Hadoop)
-yum install -y snappy
-ln -sf /usr/lib64/libsnappy.so.1 /root/hadoop-native/.
-
 # Create /usr/bin/realpath which is used by R to find Java installations
 # NOTE: /usr/bin/realpath is missing in CentOS AMIs. See
 # http://superuser.com/questions/771104/usr-bin-realpath-not-found-in-centos-6-5

diff --git a/deploy.generic/root/spark-ec2/ec2-variables.sh b/deploy.generic/root/spark-ec2/ec2-variables.sh
@@ -20,13 +20,13 @@
 # These variables are automatically filled in by the spark-ec2 script.
 export MASTERS="{{master_list}}"
 export SLAVES="{{slave_list}}"
-export HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
-export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
+#export HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
+#export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
 export SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
 export MODULES="{{modules}}"
 export SPARK_VERSION="{{spark_version}}"
-export TACHYON_VERSION="{{tachyon_version}}"
-export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
+#export TACHYON_VERSION="{{tachyon_version}}"
+#export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
 export SWAP_MB="{{swap}}"
 export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
 export SPARK_MASTER_OPTS="{{spark_master_opts}}"

diff --git a/deploy_templates.py b/deploy_templates.py
@@ -47,9 +47,6 @@
 else:
   slave_ram_mb = max(512, slave_ram_mb - 1300) # Leave 1.3 GB RAM
 
-# Make tachyon_mb as slave_ram_mb for now.
-tachyon_mb = slave_ram_mb
-
 worker_instances_str = ""
 worker_cores = slave_cpus
 
@@ -63,18 +60,13 @@
   "master_list": os.getenv("MASTERS"),
   "active_master": os.getenv("MASTERS").split("\n")[0],
   "slave_list": os.getenv("SLAVES"),
-  "hdfs_data_dirs": os.getenv("HDFS_DATA_DIRS"),
-  "mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"),
   "spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"),
   "spark_worker_mem": "%dm" % slave_ram_mb,
   "spark_worker_instances": worker_instances_str,
   "spark_worker_cores": "%d" %  worker_cores,
   "spark_master_opts": os.getenv("SPARK_MASTER_OPTS", ""),
   "spark_version": os.getenv("SPARK_VERSION"),
-  "tachyon_version": os.getenv("TACHYON_VERSION"),
-  "hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"),
   "java_home": os.getenv("JAVA_HOME"),
-  "default_tachyon_mem": "%dMB" % tachyon_mb,
   "system_ram_mb": "%d" % system_ram_mb,
   "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
   "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),

diff --git a/ephemeral-hdfs/init.sh b/ephemeral-hdfs/init.sh
diff --git a/ephemeral-hdfs/setup-slave.sh b/ephemeral-hdfs/setup-slave.sh
diff --git a/ephemeral-hdfs/setup.sh b/ephemeral-hdfs/setup.sh
diff --git a/mapreduce/init.sh b/mapreduce/init.sh
diff --git a/mapreduce/setup.sh b/mapreduce/setup.sh
diff --git a/persistent-hdfs/init.sh b/persistent-hdfs/init.sh
diff --git a/persistent-hdfs/setup-slave.sh b/persistent-hdfs/setup-slave.sh
diff --git a/persistent-hdfs/setup.sh b/persistent-hdfs/setup.sh
diff --git a/rstudio/init.sh b/rstudio/init.sh