-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Dockerfiles to build pnda docker images
- Loading branch information
Showing
29 changed files
with
1,288 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
FROM alpine:3.7 as platformlibs | ||
|
||
LABEL maintainer="[email protected]" | ||
LABEL organization="gradiant.org" | ||
|
||
COPY docker/hdfs_root_uri_conf.diff / | ||
RUN apk add --no-cache git bash python py2-pip && pip install setuptools | ||
RUN git clone https://github.com/pndaproject/platform-libraries.git | ||
RUN cd platform-libraries && git checkout tags/release/4.0 && \ | ||
export VERSION=$(git describe --tags) && \ | ||
git apply /hdfs_root_uri_conf.diff && \ | ||
python setup.py bdist_egg | ||
|
||
FROM alpine:3.7 | ||
|
||
COPY --from=platformlibs /platform-libraries/dist/platformlibs-0.1.5-py2.7.egg / | ||
COPY docker / | ||
ENV SPARK_HOME=/opt/spark | ||
|
||
RUN apk add --no-cache bash python2 py2-pip postgresql-dev libpng-dev freetype-dev ca-certificates build-base python2-dev krb5-dev libffi-dev cyrus-sasl-dev nodejs shadow python3 python3-dev openjdk8-jre && \ | ||
echo 'Installing python2 requirements' && \ | ||
pip2 install -r /requirements/requirements-jupyter.txt && \ | ||
pip2 install -r /requirements/app-packages-requirements.txt && pip2 install j2cli && \ | ||
/usr/bin/python2 -m ipykernel.kernelspec --name python2 --display-name "Python 2" && \ | ||
echo 'Instaling python3 requirements' && \ | ||
pip3 install -r /requirements/requirements-jupyter.txt && \ | ||
/usr/bin/python3 -m ipykernel.kernelspec --name python3 --display-name "Python 3" && \ | ||
echo 'Adding pyspark2 support' && \ | ||
mkdir -p /usr/local/share/jupyter/kernels/pyspark2 && mkdir -p /opt && \ | ||
wget -O- https://archive.apache.org/dist/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz | tar -xvz -C /tmp && \ | ||
mv /tmp/spark-2.3.0-bin-hadoop2.7 /opt/spark && \ | ||
echo 'Adding jupyter-scala_extension_spark' && \ | ||
jupyter nbextension enable --py widgetsnbextension --system && \ | ||
jupyter-kernelspec install /usr/lib/python3.6/site-packages/sparkmagic/kernels/sparkkernel && \ | ||
jupyter serverextension enable --py sparkmagic && \ | ||
echo 'Adding jupyter-extensions' && \ | ||
apk add --no-cache libxml2-dev libxslt-dev && \ | ||
pip3 install -r /requirements/requirements-jupyter-extensions.txt && \ | ||
jupyter serverextension enable --py jupyter_spark --system && \ | ||
jupyter nbextension install --py jupyter_spark --system && \ | ||
jupyter nbextension enable --py jupyter_spark --system && \ | ||
jupyter nbextension enable --py widgetsnbextension --system && \ | ||
echo 'Adding jupyterhub' && \ | ||
pip3 install -r /requirements/requirements-jupyterhub.txt && \ | ||
npm install -g configurable-http-proxy && mkdir -p /var/log/pnda && \ | ||
echo 'auth required pam_exec.so debug log=/var/log/pnda/login.log /create_notebook_dir.sh' >> /etc/pam.d/login | ||
RUN echo 'Adding pnda platform-libraries' && \ | ||
mkdir /etc/platformlibs && /usr/bin/python2 -m easy_install /platformlibs-0.1.5-py2.7.egg && \ | ||
adduser -D pnda && echo "pnda:pnda" | chpasswd && \ | ||
mkdir -p /opt/pnda && mv /notebooks /opt/pnda/jupyter_notebooks && \ | ||
echo 'auth required pam_listfile.so item=user sense=deny file=/etc/login.deny onerr=succeed' >> /etc/pam.d/login && \ | ||
echo 'root' >> /etc/login.deny | ||
|
||
RUN wget http://central.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0/spark-sql-kafka-0-10_2.11-2.3.0.jar \ | ||
-O /opt/spark/jars/spark-sql-kafka-0-10_2.11-2.3.0.jar && \ | ||
wget http://central.maven.org/maven2/org/apache/kafka/kafka-clients/1.0.0/kafka-clients-1.0.0.jar \ | ||
-O /opt/spark/jars/kafka-clients-1.0.0.jar | ||
|
||
ENTRYPOINT /entrypoint.sh | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
VERSION=4.4.0 | ||
docker build -t pnda/jupyter:$VERSION . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#!/usr/bin/python | ||
|
||
import argparse | ||
import subprocess | ||
import json | ||
import avro.schema | ||
import avro.io | ||
import io | ||
import datetime | ||
import uuid | ||
import time | ||
import sys | ||
|
||
from random import randint | ||
from avro.datafile import DataFileWriter | ||
from avro.io import DatumWriter | ||
from argparse import RawTextHelpFormatter | ||
|
||
def generate_sample_datasets (host_ips, metric_ids, year, month, day, hour): | ||
avro_schema = '' | ||
#load data from hdfs | ||
cat = subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-cat', '/user/pnda/PNDA_datasets/datasets/.metadata/schema.avsc'], stdout=subprocess.PIPE) | ||
for line in cat.stdout: | ||
avro_schema = avro_schema + line | ||
schema = avro.schema.parse(avro_schema) | ||
bytes_writer = io.BytesIO() | ||
encoder = avro.io.BinaryEncoder(bytes_writer) | ||
#create hdfs folder structure | ||
dir = create_hdfs_dirs (year, month, day, hour) | ||
filename = str(uuid.uuid4()) + '.avro' | ||
filepath = dir + filename | ||
tmp_file = '/tmp/' + filename | ||
|
||
writer = DataFileWriter(open(tmp_file, "w"), DatumWriter(), schema) | ||
|
||
start_dt = datetime.datetime(year, month, day, hour, 0, 0) | ||
start_ts = int(time.mktime(start_dt.timetuple())) | ||
end_dt = start_dt.replace(hour=hour+1) | ||
end_ts = int(time.mktime(end_dt.timetuple())) | ||
|
||
for ts in xrange(start_ts, end_ts, 1): | ||
#generate random pnda record on per host ip basis | ||
for host_ip in host_ips: | ||
record = {} | ||
record['timestamp'] = (ts * 1000) | ||
record['src'] = 'test' | ||
record['host_ip'] = host_ip | ||
record['rawdata'] = generate_random_metrics(metric_ids) | ||
#encode avro | ||
writer.append(record) | ||
writer.close() | ||
subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-copyFromLocal', tmp_file, dir]) | ||
return filepath | ||
|
||
def generate_random_metrics (metric_ids): | ||
''' | ||
generate random raw_data elementTon | ||
''' | ||
raw_data = {} | ||
for id in metric_ids: | ||
raw_data[id] = str(randint(0, 100)) | ||
return json.dumps(raw_data).encode('utf-8') | ||
|
||
def create_hdfs_dirs (year, month, day, hour): | ||
dir = "/user/pnda/PNDA_datasets/datasets/source=test/year=%0d/month=%02d/day=%02d/hour=%02d/" % (year, month, day, hour) | ||
subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-mkdir', '-p', dir]) | ||
return dir | ||
|
||
def get_args(): | ||
epilog = """ example: | ||
- create sample data sets | ||
data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' --year 2016 --month 4 --day 27 --hour 14 | ||
- create sample data sets using system datetime | ||
data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' | ||
""" | ||
|
||
dt = datetime.datetime.now() | ||
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description='Sample datasets generator', epilog=epilog) | ||
parser.add_argument('--hosts', help='list of sample host ips separated by comma', default='') | ||
parser.add_argument('--metrics', help='list of metrics ids', default='') | ||
parser.add_argument('--year', type=int, help='year', default=dt.year) | ||
parser.add_argument('--month', type=int, help='month', default=dt.month) | ||
parser.add_argument('--day', type=int, help='day of the month', default=dt.day) | ||
parser.add_argument('--hour', help='hour of the day', default=dt.hour) | ||
args = parser.parse_args() | ||
return args | ||
|
||
def main(): | ||
args = get_args() | ||
hosts = args.hosts.strip() | ||
if not hosts: | ||
print 'mandatory arg --hosts missing (aborting).' | ||
sys.exit() | ||
|
||
host_ips = [x.strip() for x in hosts.split(",")] | ||
|
||
metrics = args.metrics.strip() | ||
if not metrics: | ||
print 'mandatory arg --metrics missing (aborting).' | ||
sys.exit() | ||
metric_ids = [x.strip() for x in metrics.split(",")] | ||
|
||
year = int(args.year) | ||
month = int(args.month) | ||
day = int(args.day) | ||
hour = int(args.hour) | ||
filepath = generate_sample_datasets(host_ips, metric_ids, year, month, day, hour) | ||
print "Success: generated file path at " + filepath | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/sh | ||
|
||
set -x | ||
|
||
DIR=/home/$PAM_USER | ||
if [ ! -d $DIR ]; then | ||
mkdir $DIR | ||
chmod 0755 $DIR | ||
chown $PAM_USER: $DIR | ||
fi | ||
|
||
DIR=$DIR/jupyter_notebooks | ||
if [ ! -d $DIR ]; then | ||
mkdir $DIR | ||
cp -r /opt/pnda/jupyter_notebooks $DIR/examples | ||
chmod -R 0755 $DIR | ||
chown -R $PAM_USER: $DIR | ||
fi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#/bin/sh | ||
j2 /pyspark2_kernel.json.tpl > /usr/local/share/jupyter/kernels/pyspark2/kernel.json | ||
j2 /platformlibs.ini.tpl > /etc/platformlibs/platformlibs.ini | ||
/usr/bin/jupyterhub |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
diff --git a/platformlibs/data_handler.py b/platformlibs/data_handler.py | ||
index 27a2ea5..7bc1ae3 100644 | ||
--- a/platformlibs/data_handler.py | ||
+++ b/platformlibs/data_handler.py | ||
@@ -63,7 +63,10 @@ class DataHandler(object): | ||
if self._hdfs_root_uri: | ||
return self._hdfs_root_uri | ||
cm_conf = read_config('/etc/platformlibs/platformlibs.ini') | ||
- self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) | ||
+ if 'hdfs_root_uri' in cm_conf: | ||
+ self._hdfs_root_uri = cm_conf['hdfs_root_uri'] | ||
+ else: | ||
+ self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) | ||
return self._hdfs_root_uri | ||
|
||
@property |
Oops, something went wrong.