forked from kotialla-sudo/docker-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
152 lines (115 loc) · 8.73 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
FROM debian:stretch
RUN apt-get update \
&& apt-get install -y wget openjdk-8-jdk sudo locales vim fish man-db nano \
&& dpkg-reconfigure -f noninteractive locales \
&& locale-gen C.UTF-8 \
&& /usr/sbin/update-locale LANG=C.UTF-8 \
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \
&& locale-gen \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
#Python 3.7.11
RUN apt-get update; apt-get install -y --no-install-recommends gnupg dirmngr apt-transport-https ca-certificates curl netbase wget ; rm -rf /var/lib/apt/lists/*
RUN if ! command -v gpg > /dev/null; then apt-get update; apt-get install -y --no-install-recommends gnupg dirmngr ; rm -rf /var/lib/apt/lists/*; fi
RUN apt-get update && apt-get install -y --no-install-recommends bzr git mercurial openssh-client subversion procps && rm -rf /var/lib/apt/lists/*
RUN apt-get update; apt-get install -y --no-install-recommends autoconf automake bzip2 dpkg-dev file g++ gcc imagemagick libbz2-dev libc6-dev libcurl4-openssl-dev libdb-dev libevent-dev libffi-dev libgdbm-dev libglib2.0-dev libgmp-dev libjpeg-dev libkrb5-dev liblzma-dev libmagickcore-dev libmagickwand-dev libmaxminddb-dev libncurses5-dev libncursesw5-dev libpng-dev libpq-dev libreadline-dev libsqlite3-dev libssl-dev libtool libwebp-dev libxml2-dev libxslt-dev libyaml-dev make patch unzip xz-utils zlib1g-dev $( if apt-cache show 'default-libmysqlclient-dev' 2>/dev/null | grep -q '^Version:'; then echo 'default-libmysqlclient-dev'; else echo 'libmysqlclient-dev'; fi ) ; rm -rf /var/lib/apt/lists/*
ENV PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LANG=C.UTF-8
RUN apt-get update && apt-get install -y --fix-missing libbluetooth-dev tk-dev uuid-dev && rm -rf /var/lib/apt/lists/*
ENV GPG_KEY=0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
ENV PYTHON_VERSION=3.7.11
RUN wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" && wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" && export GNUPGHOME="$(mktemp -d)" && gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY" && gpg --batch --verify python.tar.xz.asc python.tar.xz && { command -v gpgconf > /dev/null && gpgconf --kill all || :; } && rm -rf "$GNUPGHOME" python.tar.xz.asc && mkdir -p /usr/src/python && tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz && rm python.tar.xz && cd /usr/src/python && gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" && ./configure --build="$gnuArch" --enable-loadable-sqlite-extensions --enable-optimizations --enable-option-checking=fatal --enable-shared --with-system-expat --with-system-ffi --without-ensurepip && make -j "$(nproc)" PROFILE_TASK='-m test.regrtest --pgo test_array test_base64 test_binascii test_binhex test_binop test_bytes test_c_locale_coercion test_class test_cmath test_codecs test_compile test_complex test_csv test_decimal test_dict test_float test_fstring test_hashlib test_io test_iter test_json test_long test_math test_memoryview test_pickle test_re test_set test_slice test_struct test_threading test_time test_traceback test_unicode ' && make install && rm -rf /usr/src/python && find /usr/local -depth \( \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name '*.a' \) \) -o \( -type f -a -name 'wininst-*.exe' \) \) -exec rm -rf '{}' + && ldconfig && python3 --version
RUN cd /usr/local/bin && ln -s idle3 idle && ln -s pydoc3 pydoc && ln -s python3 python && ln -s python3-config python-config
ENV PYTHON_PIP_VERSION=21.2.3
ENV PYTHON_GET_PIP_URL=https://github.com/pypa/get-pip/raw/c20b0cfd643cd4a19246ccf204e2997af70f6b21/public/get-pip.py
ENV PYTHON_GET_PIP_SHA256=fa6f3fb93cce234cd4e8dd2beb54a51ab9c247653b52855a48dd44e6b21ff28b
RUN wget -O get-pip.py "$PYTHON_GET_PIP_URL"; echo "$PYTHON_GET_PIP_SHA256 *get-pip.py" | sha256sum --check --strict -; python get-pip.py --disable-pip-version-check --no-cache-dir "pip==$PYTHON_PIP_VERSION" ; pip --version; find /usr/local -depth \( \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \) -exec rm -rf '{}' +; rm -f get-pip.py
# Upgrading pip to the last compatible version
RUN pip3 install --upgrade pip
RUN pip3 install wheel pip -U &&\
pip3 install -r https://raw.githubusercontent.com/zephyrproject-rtos/zephyr/master/scripts/requirements.txt && \
pip3 install -r https://raw.githubusercontent.com/zephyrproject-rtos/mcuboot/master/scripts/requirements.txt && \
pip3 install west &&\
pip3 install sh &&\
pip3 install awscli PyGithub junitparser pylint \
statistics numpy \
imgtool \
protobuf
# HADOOP
ENV HADOOP_VERSION 2.7.0
ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$JAVA_HOME/bin
RUN wget http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \
tar -vxzf hadoop-$HADOOP_VERSION.tar.gz && \
mv hadoop-$HADOOP_VERSION /usr/hadoop-$HADOOP_VERSION && \
rm -rf $HADOOP_HOME/share/doc
# SPARK
ENV SPARK_VERSION 2.4.0
ENV SPARK_PACKAGE spark-$SPARK_VERSION-bin-without-hadoop
ENV SPARK_HOME /usr/spark-$SPARK_VERSION
ENV PYSPARK_DRIVER_PYTHON ipython:/home/user/anaconda3/bin/python3
ENV PYSPARK_PYTHON python3:/home/user/anaconda3/bin/python3
ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*"
ENV PATH $PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin:$JAVA_HOME/bin:BIN
RUN wget https://archive.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-without-hadoop.tgz && \
tar -xvzf spark-2.4.0-bin-without-hadoop.tgz && \
mv $SPARK_PACKAGE $SPARK_HOME && \
rm -rf $SPARK_HOME/examples $SPARK_HOME/ec2
RUN pip install ipython
ENV SPARK_HOME /usr/spark-$SPARK_VERSION
ENV PYSPARK_PYTHON python3:$SPARK_HOME/python:$SPARK_HOME/python/build
ENV PATH $SPARK_HOME/bin:$SPARK_HOME/python:$PATH
ENV PYSPARKPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/pyspark.zip
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
#Installing Anaconda3-2020.02-Linux-x86_64.sh
RUN apt-get clean
RUN ln -sf /bin/bash /bin/sh
RUN ln -s /bin/sh /usr/local/bin/sh
RUN mkdir /DATA
# RUN chgrp -R 0 /DATA && \
# chmod -R g=u /DATA
RUN chmod 777 -R /DATA
# user details
ENV USER=user
ENV UID=1000
ENV GID=1000
# create user
RUN groupadd --gid $GID $USER
RUN useradd --create-home --shell /bin/sh --uid $UID --gid $GID $USER
RUN echo 'user ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
USER $USER
WORKDIR /home/$USER
CMD ["bash"]
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
RUN wget https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh
RUN bash Anaconda3-2020.02-Linux-x86_64.sh -b
RUN rm Anaconda3-2020.02-Linux-x86_64.sh
RUN ls /home/$USER/anaconda3
RUN pip3 install pyspark &&\
pip install py4j &&\
pip install findspark
RUN /home/$USER/anaconda3/bin/python -m pip install --upgrade pip
COPY requirements.txt .
RUN pip install -r requirements.txt
ENV pyspark /home/$USER/anaconda3/bin:$pyspark
ENV py4j /home/$USER/anaconda3/bin:$py4j
RUN chmod 777 -R /home/$USER/anaconda3/bin
ENV CONDA_ENV_NAME mynewenv
RUN /home/$USER/anaconda3/bin/conda create -q --name $CONDA_ENV_NAME python=3.7.11 && \
/home/$USER/anaconda3/bin/conda clean --yes --all
ENV PATH /home/$USER/anaconda3/envs/$CONDA_ENV_NAME/bin:$PATH
ENV PATH /home/$USER/anaconda3/bin:$JAVA_HOME/bin:$PATH
RUN conda init bash
RUN /bin/bash -c "source /home/$USER/.bashrc"
#RUN bash conda activate base
# Create the environment:
COPY spark.py /home/$USER/anaconda3/bin
RUN chmod 777 -R /home/$USER/anaconda3/bin/spark.py
COPY app.py ./
EXPOSE 8080
ENTRYPOINT export PYSPARK_PYTHON=/home/user/anaconda3/bin/python3
ENTRYPOINT export PYSPARK_DRIVER_PYTHON=/home/user/anaconda3/bin/python3
#ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "myenv", "python3", "app.py"]
ENTRYPOINT ["python3", "app.py"]