-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding all dockerfiles for PNDA services
- Loading branch information
Showing
83 changed files
with
2,147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
FROM alpine:3.7 as platformlibs | ||
|
||
LABEL maintainer="[email protected]" | ||
LABEL organization="gradiant.org" | ||
|
||
COPY docker/hdfs_root_uri_conf.diff / | ||
RUN apk add --no-cache git bash python py2-pip && pip install setuptools | ||
RUN git clone https://github.com/pndaproject/platform-libraries.git | ||
RUN cd platform-libraries && git checkout tags/release/4.0 && \ | ||
export VERSION=$(git describe --tags) && \ | ||
git apply /hdfs_root_uri_conf.diff && \ | ||
python setup.py bdist_egg | ||
|
||
FROM alpine:3.7 | ||
|
||
COPY --from=platformlibs /platform-libraries/dist/platformlibs-0.1.5-py2.7.egg / | ||
COPY docker / | ||
ENV SPARK_HOME=/opt/spark | ||
|
||
RUN apk add --no-cache bash python2 py2-pip postgresql-dev libpng-dev freetype-dev ca-certificates build-base python2-dev krb5-dev libffi-dev cyrus-sasl-dev nodejs shadow python3 python3-dev openjdk8-jre && \ | ||
echo 'Installing python2 requirements' && \ | ||
pip2 install -r /requirements/requirements-jupyter.txt && \ | ||
pip2 install -r /requirements/app-packages-requirements.txt && pip2 install j2cli && \ | ||
/usr/bin/python2 -m ipykernel.kernelspec --name python2 --display-name "Python 2" && \ | ||
echo 'Instaling python3 requirements' && \ | ||
pip3 install -r /requirements/requirements-jupyter.txt && \ | ||
/usr/bin/python3 -m ipykernel.kernelspec --name python3 --display-name "Python 3" && \ | ||
echo 'Adding pyspark2 support' && \ | ||
mkdir -p /usr/local/share/jupyter/kernels/pyspark2 && mkdir -p /opt && \ | ||
wget -O- https://archive.apache.org/dist/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz | tar -xvz -C /tmp && \ | ||
mv /tmp/spark-2.3.0-bin-hadoop2.7 /opt/spark && \ | ||
echo 'Adding jupyter-scala_extension_spark' && \ | ||
jupyter nbextension enable --py widgetsnbextension --system && \ | ||
jupyter-kernelspec install /usr/lib/python3.6/site-packages/sparkmagic/kernels/sparkkernel && \ | ||
jupyter serverextension enable --py sparkmagic && \ | ||
echo 'Adding jupyter-extensions' && \ | ||
apk add --no-cache libxml2-dev libxslt-dev && \ | ||
pip3 install -r /requirements/requirements-jupyter-extensions.txt && \ | ||
jupyter serverextension enable --py jupyter_spark --system && \ | ||
jupyter nbextension install --py jupyter_spark --system && \ | ||
jupyter nbextension enable --py jupyter_spark --system && \ | ||
jupyter nbextension enable --py widgetsnbextension --system && \ | ||
echo 'Adding jupyterhub' && \ | ||
pip3 install -r /requirements/requirements-jupyterhub.txt && \ | ||
npm install -g configurable-http-proxy && mkdir -p /var/log/pnda && \ | ||
echo 'auth required pam_exec.so debug log=/var/log/pnda/login.log /create_notebook_dir.sh' >> /etc/pam.d/login | ||
RUN echo 'Adding pnda platform-libraries' && \ | ||
mkdir /etc/platformlibs && /usr/bin/python2 -m easy_install /platformlibs-0.1.5-py2.7.egg && \ | ||
adduser -D pnda && echo "pnda:pnda" | chpasswd && \ | ||
mkdir -p /opt/pnda && mv /notebooks /opt/pnda/jupyter_notebooks && \ | ||
echo 'auth required pam_listfile.so item=user sense=deny file=/etc/login.deny onerr=succeed' >> /etc/pam.d/login && \ | ||
echo 'root' >> /etc/login.deny | ||
|
||
RUN wget http://central.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0/spark-sql-kafka-0-10_2.11-2.3.0.jar \ | ||
-O /opt/spark/jars/spark-sql-kafka-0-10_2.11-2.3.0.jar && \ | ||
wget http://central.maven.org/maven2/org/apache/kafka/kafka-clients/1.0.0/kafka-clients-1.0.0.jar \ | ||
-O /opt/spark/jars/kafka-clients-1.0.0.jar | ||
|
||
ENTRYPOINT /entrypoint.sh | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
VERSION=4.4.0 | ||
docker build -t pnda/jupyter:$VERSION . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#!/usr/bin/python | ||
|
||
import argparse | ||
import subprocess | ||
import json | ||
import avro.schema | ||
import avro.io | ||
import io | ||
import datetime | ||
import uuid | ||
import time | ||
import sys | ||
|
||
from random import randint | ||
from avro.datafile import DataFileWriter | ||
from avro.io import DatumWriter | ||
from argparse import RawTextHelpFormatter | ||
|
||
def generate_sample_datasets (host_ips, metric_ids, year, month, day, hour): | ||
avro_schema = '' | ||
#load data from hdfs | ||
cat = subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-cat', '/user/pnda/PNDA_datasets/datasets/.metadata/schema.avsc'], stdout=subprocess.PIPE) | ||
for line in cat.stdout: | ||
avro_schema = avro_schema + line | ||
schema = avro.schema.parse(avro_schema) | ||
bytes_writer = io.BytesIO() | ||
encoder = avro.io.BinaryEncoder(bytes_writer) | ||
#create hdfs folder structure | ||
dir = create_hdfs_dirs (year, month, day, hour) | ||
filename = str(uuid.uuid4()) + '.avro' | ||
filepath = dir + filename | ||
tmp_file = '/tmp/' + filename | ||
|
||
writer = DataFileWriter(open(tmp_file, "w"), DatumWriter(), schema) | ||
|
||
start_dt = datetime.datetime(year, month, day, hour, 0, 0) | ||
start_ts = int(time.mktime(start_dt.timetuple())) | ||
end_dt = start_dt.replace(hour=hour+1) | ||
end_ts = int(time.mktime(end_dt.timetuple())) | ||
|
||
for ts in xrange(start_ts, end_ts, 1): | ||
#generate random pnda record on per host ip basis | ||
for host_ip in host_ips: | ||
record = {} | ||
record['timestamp'] = (ts * 1000) | ||
record['src'] = 'test' | ||
record['host_ip'] = host_ip | ||
record['rawdata'] = generate_random_metrics(metric_ids) | ||
#encode avro | ||
writer.append(record) | ||
writer.close() | ||
subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-copyFromLocal', tmp_file, dir]) | ||
return filepath | ||
|
||
def generate_random_metrics (metric_ids): | ||
''' | ||
generate random raw_data elementTon | ||
''' | ||
raw_data = {} | ||
for id in metric_ids: | ||
raw_data[id] = str(randint(0, 100)) | ||
return json.dumps(raw_data).encode('utf-8') | ||
|
||
def create_hdfs_dirs (year, month, day, hour): | ||
dir = "/user/pnda/PNDA_datasets/datasets/source=test/year=%0d/month=%02d/day=%02d/hour=%02d/" % (year, month, day, hour) | ||
subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-mkdir', '-p', dir]) | ||
return dir | ||
|
||
def get_args(): | ||
epilog = """ example: | ||
- create sample data sets | ||
data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' --year 2016 --month 4 --day 27 --hour 14 | ||
- create sample data sets using system datetime | ||
data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' | ||
""" | ||
|
||
dt = datetime.datetime.now() | ||
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description='Sample datasets generator', epilog=epilog) | ||
parser.add_argument('--hosts', help='list of sample host ips separated by comma', default='') | ||
parser.add_argument('--metrics', help='list of metrics ids', default='') | ||
parser.add_argument('--year', type=int, help='year', default=dt.year) | ||
parser.add_argument('--month', type=int, help='month', default=dt.month) | ||
parser.add_argument('--day', type=int, help='day of the month', default=dt.day) | ||
parser.add_argument('--hour', help='hour of the day', default=dt.hour) | ||
args = parser.parse_args() | ||
return args | ||
|
||
def main(): | ||
args = get_args() | ||
hosts = args.hosts.strip() | ||
if not hosts: | ||
print 'mandatory arg --hosts missing (aborting).' | ||
sys.exit() | ||
|
||
host_ips = [x.strip() for x in hosts.split(",")] | ||
|
||
metrics = args.metrics.strip() | ||
if not metrics: | ||
print 'mandatory arg --metrics missing (aborting).' | ||
sys.exit() | ||
metric_ids = [x.strip() for x in metrics.split(",")] | ||
|
||
year = int(args.year) | ||
month = int(args.month) | ||
day = int(args.day) | ||
hour = int(args.hour) | ||
filepath = generate_sample_datasets(host_ips, metric_ids, year, month, day, hour) | ||
print "Success: generated file path at " + filepath | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/sh | ||
|
||
set -x | ||
|
||
DIR=/home/$PAM_USER | ||
if [ ! -d $DIR ]; then | ||
mkdir $DIR | ||
chmod 0755 $DIR | ||
chown $PAM_USER: $DIR | ||
fi | ||
|
||
DIR=$DIR/jupyter_notebooks | ||
if [ ! -d $DIR ]; then | ||
mkdir $DIR | ||
cp -r /opt/pnda/jupyter_notebooks $DIR/examples | ||
chmod -R 0755 $DIR | ||
chown -R $PAM_USER: $DIR | ||
fi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#/bin/sh | ||
j2 /pyspark2_kernel.json.tpl > /usr/local/share/jupyter/kernels/pyspark2/kernel.json | ||
j2 /platformlibs.ini.tpl > /etc/platformlibs/platformlibs.ini | ||
/usr/bin/jupyterhub |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
diff --git a/platformlibs/data_handler.py b/platformlibs/data_handler.py | ||
index 27a2ea5..7bc1ae3 100644 | ||
--- a/platformlibs/data_handler.py | ||
+++ b/platformlibs/data_handler.py | ||
@@ -63,7 +63,10 @@ class DataHandler(object): | ||
if self._hdfs_root_uri: | ||
return self._hdfs_root_uri | ||
cm_conf = read_config('/etc/platformlibs/platformlibs.ini') | ||
- self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) | ||
+ if 'hdfs_root_uri' in cm_conf: | ||
+ self._hdfs_root_uri = cm_conf['hdfs_root_uri'] | ||
+ else: | ||
+ self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) | ||
return self._hdfs_root_uri | ||
|
||
@property |
57 changes: 57 additions & 0 deletions
57
docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal SqlMagic notebook.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Minimal PNDA Jupyter SqlMagic notebook" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"'''\n", | ||
"Use following connection string to connect to MySQL DB. Enter valid username/password and hostname/IP of mysql server. \n", | ||
"%load_ext sql\n", | ||
"%sql mysql+pymysql://username:password@hostname/dbname\n", | ||
"\n", | ||
"\n", | ||
"Use following connection string to connect to Postregsql. Enter valid username/password and hostname/IP of postgresql server.\n", | ||
"%load_ext sql\n", | ||
"%sql postgresql://username:password@localhost/dbname\n", | ||
"\n", | ||
"Use following connection string to connect to Impala (CDH distribution only). Enter valid username/password and hostname/IP of impala server.\n", | ||
"Note : Impala connection through impyla requires to disable autocommit. Use %config SqlMagic to check various configurations available.\n", | ||
"%load_ext sql\n", | ||
"%config SqlMagic.autocommit=False\n", | ||
"%sql impala://hostname:port/dbname\n", | ||
"'''\n", | ||
"%load_ext sql" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
98 changes: 98 additions & 0 deletions
98
docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal notebook.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Minimal PNDA Jupyter notebook\n", | ||
"\n", | ||
"`%matplotlib notebook` must be set before `import matplotlib.pyplot as plt` or plotting with matplotlib will fail " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib notebook\n", | ||
"\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import sys\n", | ||
"import pandas as pd\n", | ||
"import matplotlib" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"print(u'▶ Python version ' + sys.version)\n", | ||
"print(u'▶ Pandas version ' + pd.__version__)\n", | ||
"print(u'▶ Matplotlib version ' + matplotlib.__version__)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"values = np.random.rand(100)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df = pd.DataFrame(data=values, columns=['RandomValue'])\n", | ||
"df.head(10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df.plot()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "PySpark2/Python2", | ||
"language": "python", | ||
"name": "pyspark2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.