Skip to content

Commit

Permalink
Updated Spark to 2.4.5, infra Makefile revamp
Browse files Browse the repository at this point in the history
  • Loading branch information
luisbelloch committed Feb 13, 2020
1 parent abf3527 commit 08e3efa
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 18 deletions.
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
layout python3
# use sdk java 8.0.232.hs-adpt
export VBOXMANAGE=/Applications/VirtualBox.app/Contents/MacOS
export SPARK_HOME=$(pwd)/.spark
export PATH=$SPARK_HOME/bin:$VBOXMANAGE:$PATH
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ node_modules
alt/
checkpoint
metastore_db/
.spark/
.spark*/
.vagrant/
out/
__pycache__/
Expand Down
3 changes: 2 additions & 1 deletion infra/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ RUN apt-get update && \
apt-get install -y --no-install-recommends python3-software-properties python3-numpy curl && \
rm -rf /var/lib/apt/lists/*

ARG SPARK_VERSION=2.4.5
ENV SPARK_HOME=/opt/spark
RUN mkdir -p /opt/spark && curl -s http://apache.rediris.es/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz | tar -xz -C "${SPARK_HOME}" --strip-components=1
RUN mkdir -p /opt/spark && curl -s http://apache.rediris.es/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz | tar -xz -C "${SPARK_HOME}" --strip-components=1
ENV PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH

RUN cp "${SPARK_HOME}/conf/log4j.properties.template" "${SPARK_HOME}/conf/log4j.properties" && \
Expand Down
37 changes: 24 additions & 13 deletions infra/docker/Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
.PHONY: all build tag push list
SPARK_VERSION:=2.4.5
COURSE_VERSION:=2020.1
IMAGE_NAME:=luisbelloch/spark

all: build tag
.PHONY: help
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(lastword $(MAKEFILE_LIST)) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'

build:
docker build -t luisbelloch/spark .
.PHONY: all
all: build tag ## Builds and tags an image

tag:
docker tag luisbelloch/spark luisbelloch/spark:2.4.4
docker tag luisbelloch/spark luisbelloch/spark:2019.2
.PHONY: build
build: ## Assembles image from Spark binaries
docker build --build-arg SPARK_VERSION=${SPARK_VERSION} -t $(IMAGE_NAME) .

push:
docker push luisbelloch/spark:2.4.4
docker push luisbelloch/spark:2019.2
docker push luisbelloch/spark
.PHONY: tag
tag: ## Adds tags to current latest image
docker tag $(IMAGE_NAME) $(IMAGE_NAME):$(SPARK_VERSION)
docker tag $(IMAGE_NAME) $(IMAGE_NAME):$(COURSE_VERSION)

list:
docker images luisbelloch/spark
.PHONY: push
push: ## Uploads images to registry
docker push $(IMAGE_NAME):$(SPARK_VERSION)
docker push $(IMAGE_NAME):$(COURSE_VERSION)
docker push $(IMAGE_NAME)

.PHONY: list
list: ## Lists local generated images
docker images $(IMAGE_NAME)

2 changes: 1 addition & 1 deletion local_setup.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -euo pipefail
SPARK_URL=${SPARK_URL:-http://apache.rediris.es/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz}
SPARK_URL=${SPARK_URL:-http://apache.rediris.es/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz}
SPARK_PKG=${SPARK_URL##*/}
SPARK_HOME=${SPARK_HOME:-$(pwd)/.spark}

Expand Down
4 changes: 2 additions & 2 deletions playbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
- hosts: all
vars:
spark_home: /opt/spark
spark_pkg_name: spark-2.4.4-bin-hadoop2.7
spark_pkg_url: http://apache.rediris.es/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz
spark_pkg_name: spark-2.4.5-bin-hadoop2.7
spark_pkg_url: http://apache.rediris.es/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz

tasks:
- name: Update all packages to the latest version
Expand Down

0 comments on commit 08e3efa

Please sign in to comment.