diff --git a/.gitignore b/.gitignore index 9f6ecd7..a7f3e31 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ scripts/components/files/.ipynb_checkpoints/ .vscode/ +docker/dm_keys/ diff --git a/docker/AUTHORS b/docker/AUTHORS new file mode 100644 index 0000000..a7d61bf --- /dev/null +++ b/docker/AUTHORS @@ -0,0 +1 @@ +Carlos Giraldo \ No newline at end of file diff --git a/docker/CONTRIBUTORS b/docker/CONTRIBUTORS new file mode 100644 index 0000000..e69de29 diff --git a/docker/LICENSE b/docker/LICENSE new file mode 100644 index 0000000..51da0cf --- /dev/null +++ b/docker/LICENSE @@ -0,0 +1,15 @@ +Unless otherwise specified in the file, this software is: + +Copyright (c) 2018 Gradiant. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..7a1599e --- /dev/null +++ b/docker/README.md @@ -0,0 +1,173 @@ +# red-pnda + +Red PNDA logo + +This framework provisions a minimal set of the PNDA ([pnda.io](http://pnda.io)) components to enable developers writing apps targeted at the full PNDA stack, to experiment with the PNDA components in a smaller, lightweight environment. Data exploration and app prototyping is supported using Jupyter and Apache Spark. + +**Note**: + +* Packages and application support isn't available on red-pnda. The respective tabs will **not** work on the PNDA console and will throw an error message. + +* This framework is not implemented with either scalability nor HA in mind and hence is unsuited for running production workloads. If this is a requirement, then one of the core PNDA flavors will be required - see PNDA [Guide](http://pnda.io/guide). + + +The Red PNDA framework is intended as a platform for experimentation and is NOT formally supported at this point in time. Any issues encountered with the system can be reported to the standard PNDA support forums for informational purposes only. + +## Acknowledgement + +This work has been inspired by an initial concept created by Maros Marsalek ([https://github.com/marosmars](https://github.com/marosmars)) and Nick Hall ([https://github.com/cloudwiser](https://github.com/cloudwiser)) + +## Prerequisites + +Tested with Ubuntu 18.04 distro. + +Docker Engine (tested with docker version **18.05.0-ce**) + +docker-compose (tested with docker-compose version **1.21.2**) + + +Minimum amount of RAM / VCPU / Storage: 4GB / 2 / 16GB +Recommended amount of RAM / VCPU / Storage: 16GB / 4 / 60GB + +These values are illustrative since they depend on the data analytics application to run on PNDA. + +## Deploying red-PNDA as docker containers + +The `deploy.sh` script start the containers and perform several post deploy tasks (e.g., creating users, initializing DDBB tables, etc.). Just inspect the script for more information. + +To access the PNDA services from the host, the script appends `service-name IP-address` to the /etc/hosts file. + +After deployment access the [PNDA console-frontend Web](http://console-frontend). + +Default user is `pnda` and password `pnda`. + +Other service web UIs: + +* [Spark](http://spark-master:8080) +* [Kafka-manager](http://kafka-manager:10900) +* [HDFS](http://hdfs-namenode:50070) +* [HBASE](http://hbase-master:60010) +* [Jupyter](http://jupyter:8000) +* [Grafana](http://grafana:3000) +* [OpenTSDB](http://opentsdb:4242) + +### Terminal access to running containers + +You should be able to access a bash terminal in any of the running +containers through the `docker exec -ti CONTAINER_NAME /bin/bash` command. + +### Access to service logs +You should be able to get the logs of any of the running +containers through the `docker logs CONTAINER_NAME` command. + +## Red-PNDA components + +Red-PNDA makes use the following open source components: + +* Console Frontend - [https://github.com/pndaproject/platform-console-frontend](https://github.com/pndaproject/platform-console-frontend) +* Console Backend - [https://github.com/pndaproject/platform-console-backend](https://github.com/pndaproject/platform-console-backend) +* Platform Testing - [https://github.com/pndaproject/platform-testing](https://github.com/pndaproject/platform-testing) +* Platform Libraries - [https://github.com/pndaproject/platform-libraries](https://github.com/pndaproject/platform-libraries) +* Kafka 1.0.0 - [http://kafka.apache.org](http://kafka.apache.org) +* Jupyter Notebook - [http://jupyter.org](http://jupyter.org) +* Apache Spark 2.3.1 - [http://spark.apache.org](http://spark.apache.org) +* Apache Hbase 2.0.1 - [http://hbase.apache.org](http://hbase.apache.org) +* OpenTSDB 2.3.1 - [http://opentsdb.net](http://opentsdb.net) +* Grafana 5.0.3 - [https://grafana.com](https://grafana.com) +* Kafka Manager 1.3.3.17 - [https://github.com/yahoo/kafka-manager](https://github.com/yahoo/kafka-manager) +* Example Kafka Clients - [https://github.com/pndaproject/example-kafka-clients](https://github.com/pndaproject/example-kafka-clients) +* Jmxproxy 3.2.0 - [https://github.com/mk23/jmxproxy](https://github.com/mk23/jmxproxy) + +## Data Ingestion + +For instructions on how to use logstash to ingest data, refer to this [guide](../logstash_guide.md) + +For detailed instructions on different data ingress methods, refer to this [guide](http://pnda.io/pnda-guide/producer/) + +### Kafka + +#### How to connect to red-pnda kafka instance? + +To connect to the red-pnda kafka instance, you can connect to the broker on `kafka:9092`. + +#### Are there any default topics which I can use? + +By default, there are two kafka topics created for easy usage. + +1. raw.log.localtest +2. avro.log.localtest + +The `raw.log.localtest` topic is a generic topic; you could use this topic to ingest any type of data. + +The `avro.log.localtest` topic can be used to ingest PNDA avro encoded data. + +Note that if you use the `raw.log.localtest` topic, data is written to the disk of the VM. + +By default data is stored in the `/data` directory of the VM's file system using a system-timestamp directory hierarchy + +For example, if you streamed data on 20th June 2017 at 5PM, your data will be stored in... + + /data/year=2017/month=6/day=20/hour=17/dump.json + +#### Sample Kafka Producer + +We have also provided a sample Kafka producer in python. This will send one json event to the `raw.log.logtest` topic per execution, so feel free to play around with it. + + cd /opt/pnda + python producer.py + +Depending on what time you send the data, it will be stored in + + /data/year=yyyy/month=mm/day=dd/hour=hh/dump.json + +Where yyyy,mm,dd and hh can be retreived by using the system date command + + date + + +## Jupyter Notebooks + +The [Jupyter Notebook](http://jupyter.org) is a web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text. In Red PNDA, it supports exploration and presentation of data from the local file system. + +The default password for the Jupyter Notebook is `pnda` + +Please refer to our [Jupyter Guide](../jupyter_guide.md) for steps on how to use Jupyter + +For those who are new to PNDA, there’s a network-related dataset (BGP updates from the Internet) and an accompanying tutorial Juypter notebook named `Introduction to Big Data Analytics.ipynb`, to help you get started. + +Also, there's a sample tutorial named `tutorial.ipynb` provided to do some basic analysis with data dumped to disk via Kafka through Spark DataFrames. + +If you are interested in data mining or anomaly detection, take a look at the `red-pnda-anom-detect.ipynb` where we work with telemetry data and try and detect unintentional traffic loss in the network. + +## Grafana Server + +Default login credentials for Grafana is `pnda/pnda` + + +## Shutdown + +To stop docker PNDA services run `docker-compose down`. + +To remove the docker PNDA containers run `docker-compose rm`. + +To delete the PNDA docker persistent volumes run `./delete_volumes.sh`. + + +## General Troubleshooting + +Please refer to our [Troubleshooting guide](../General_Troubleshooting.md) for tips if you encounter any problems. + + +## Further Reading + +For further deep dive into the various components, use this as a entry point. + +* Jupyter Notebooks, this guide which contains a nice intro to Jupyter as well: [https://github.com/jakevdp/PythonDataScienceHandbook](https://github.com/jakevdp/PythonDataScienceHandbook) + +* OpenTSDB: [http://opentsdb.net/docs/build/html/user_guide/quickstart.html](http://opentsdb.net/docs/build/html/user_guide/quickstart.html) + +* Grafana: [http://docs.grafana.org/guides/getting_started/](http://docs.grafana.org/guides/getting_started/) + +* Kafka Manager: [https://github.com/yahoo/kafka-manager](https://github.com/yahoo/kafka-manager) + +* Apache Spark: [https://spark.apache.org/docs/1.6.1/quick-start.html](https://spark.apache.org/docs/1.6.1/quick-start.html) diff --git a/docker/delete_volumes.sh b/docker/delete_volumes.sh new file mode 100755 index 0000000..0ae4ad6 --- /dev/null +++ b/docker/delete_volumes.sh @@ -0,0 +1,4 @@ +#!/bin/bash + + +docker volume rm $(docker volume ls -f name=red-pnda -q) diff --git a/docker/deploy.sh b/docker/deploy.sh new file mode 100755 index 0000000..638b780 --- /dev/null +++ b/docker/deploy.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +echo "---------------- STARTING HDFS and HBASE ----------------" +docker-compose up -d zookeeper +docker-compose up -d hdfs-namenode +docker-compose up -d hdfs-datanode +while ! docker exec -ti hdfs-namenode nc -vz hdfs-namenode:8020 ; do + echo "waiting for hdfs-namenode to start" + sleep 2 +done +docker-compose up -d hbase-master +docker-compose up -d hbase-region + +echo "---------------- ADDING users to HDFS ----------------" +echo "adding hdfs as admin superuser" +docker exec -ti hdfs-namenode adduser --system --gecos "" --ingroup=root --shell /bin/bash --disabled-password hdfs +echo "adding pnda user" +PNDA_USER=pnda +PNDA_GROUP=pnda +docker exec -ti hdfs-namenode addgroup $PNDA_GROUP +docker exec -ti hdfs-namenode adduser --gecos "" --ingroup=$PNDA_GROUP --shell /bin/bash --disabled-password $PNDA_USER +docker exec -ti hdfs-namenode hdfs dfs -mkdir -p /user/$PNDA_USER +docker exec -ti hdfs-namenode hdfs dfs -chown $PNDA_USER:$PNDA_GROUP /user/$PNDA_USER +docker exec -ti hdfs-namenode hdfs dfs -chmod 770 /user/$PNDA_USER + + +echo "---------------- ADDING KITE_TOOLS to HDFS NAMENODE AND INITIALIZE PNDA REPOs ----------------" +docker cp hdfs/kite-files/pnda.avsc hdfs-namenode:/tmp/pnda.avsc +docker cp hdfs/kite-files/pnda_kite_partition.json hdfs-namenode:/tmp/pnda_kite_partition.json +docker exec -i hdfs-namenode apk add --no-cache curl +docker exec -i hdfs-namenode /bin/bash < hdfs/add_kite_tools_and_create_db.sh + +echo "---------------- CREATING HBASE TABLES for OPENTSDB ----------------" +docker exec -i hbase-master /bin/bash < opentsdb/create_opentsdb_hbase_tables.sh + +echo "---------------- ENABLING THRIFT API in HBASE MASTER ----------------" +docker exec -d hbase-master hbase thrift start -p 9090 +while ! docker exec -ti hbase-master nc -vz hbase-master:9090 ; do + echo "waiting for hbase thrift api to start" + sleep 2 +done +echo "---------------- STARTING THE REST OF THE SERVICES ----------------" +docker-compose up -d +echo "---------------- CREATING pnda user in services ----------------" +docker exec deployment-manager sh -c 'adduser -D pnda && echo "pnda:pnda" | chpasswd' +docker exec jupyter-ssh sh -c 'adduser -D pnda && echo "pnda:pnda" | chpasswd' + +echo "---------------- ADDING ssh keys to dm_keys volume ----------------" +mkdir -p dm_keys +echo "Generating SSH Keys for Deployment Manager connections" + ssh-keygen -b 2048 -t rsa -f dm_keys/dm -q -N "" +cp dm_keys/dm dm_keys/dm.pem + +docker cp dm_keys/ deployment-manager:/opt/pnda/ +docker exec -ti deployment-manager chown -R root:root /opt/pnda/dm_keys/ +docker exec -ti deployment-manager chmod 644 /opt/pnda/dm_keys/dm.pub +docker exec -ti deployment-manager chmod 600 /opt/pnda/dm_keys/dm.pem +docker exec -ti deployment-manager chmod 600 /opt/pnda/dm_keys/dm + + +echo "---------------- ADDING Public key to jupyter-ssh ----------------" +docker exec jupyter-ssh mkdir -p /home/pnda/.ssh +docker cp dm_keys/dm.pub jupyter-ssh:/home/pnda/.ssh/authorized_keys +docker exec jupyter-ssh chmod 644 /home/pnda/.ssh/authorized_keys +docker exec jupyter-ssh chown -R pnda:pnda /home/pnda/.ssh +docker exec jupyter-ssh mkdir -p /root/.ssh +docker cp dm_keys/dm.pub jupyter-ssh:/root/.ssh/authorized_keys +docker exec jupyter-ssh chmod 644 /root/.ssh/authorized_keys +docker exec jupyter-ssh chown -R root:root /root/.ssh +echo "---------------- ADDING Public key to deployment-manager-ssh ----------------" +#docker exec deployment-manager-ssh mkdir -p /root/.ssh +#docker cp dm_keys/dm.pub deployment-manager-ssh:/root/.ssh/authorized_keys +#docker exec deployment-manager-ssh chmod 644 /root/.ssh/authorized_keys +#docker exec deployment-manager-ssh chown -R root:root /root/.ssh + +./register_hostnames.sh + +#echo "---------------- OOZIE create sharelib in HDFS ----------------" +#docker exec oozie oozie-setup.sh sharelib create -fs hdfs://hdfs-namenode:8020 +echo "---------------- KAFKA-MANAGER CONFIGURATION ----------------" +curl -X POST \ + http://kafka-manager:10900/clusters \ + -H 'content-type: application/x-www-form-urlencoded' \ + -d 'name=PNDA&zkHosts=zookeeper%3A2181&kafkaVersion=1.0.0&jmxEnabled=true&jmxUser=&jmxPass=&activeOffsetCacheEnabled=true&securityProtocol=PLAINTEXT' &>/dev/null + +echo "---------------- GRAFANA: importing data sources and dashboards ----------------" +timeout 10s bash -c 'while [[ $(curl -s -o /dev/null -w %{http_code} http://grafana:3000/login) != 200 ]]; do sleep 1; done; echo OK' || echo TIMEOUT + +curl -H "Content-Type: application/json" -X POST \ +-d '{"name":"PNDA OpenTSDB","type":"opentsdb","url":"http://localhost:4242","access":"proxy","basicAuth": false,"isDefault": true }' \ +http://pnda:pnda@grafana:3000/api/datasources +curl -H "Content-Type: application/json" -X POST \ +-d '{"name":"PNDA Graphite","type":"graphite","url":"http://$GRAPHITE_HOST:$GRAPHITE_PORT","access":"proxy","basicAuth":false,"isDefault":false}' \ +http://pnda:pnda@grafana:3000/api/datasources +./grafana/grafana-import-dashboards.sh grafana/PNDA.json +./grafana/grafana-import-dashboards.sh grafana/PNDA-DM.json +./grafana/grafana-import-dashboards.sh grafana/PNDA-Hadoop.json +./grafana/grafana-import-dashboards.sh grafana/PNDA-Kafka.json +echo "red-PNDA Deployment Finished - Opening console-frontend web ui" +xdg-open http://console-frontend + + + + + + diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..619b99f --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,304 @@ +version: '3.4' +services: + gobblin: + container_name: gobblin + hostname: gobblin + image: pnda/gobblin:0.11.0-0.1.0 + build: + context: ./dockerfiles/platform-gobblin-modules + args: + version: 0.1.0 + environment: + - HDFS_URL=hdfs://hdfs-namenode:8020 + - MASTER_DATASET_DIRECTORY=/user/pnda/PNDA_datasets/datasets + - MASTER_DATASET_QUARANTINE_DIRECTORY=/user/pnda/PNDA_datasets/quarantine + - MAX_MAPPERS=4 + - KAFKA_BROKERS=kafka:9092 + spark-master: + container_name: spark-master + hostname: spark-master + image: gradiant/spark:2.3.0 + command: + - master + environment: + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 + - SPARK_MASTER_WEBUI_PORT=8080 + spark-worker: + container_name: spark-worker + hostname: spark-worker + image: gradiant/spark:2.3.0 + command: + - worker + - spark://spark-master:7077 + flink-master: + container_name: flink-master + hostname: flink-master + image: flink:1.6.0-hadoop27-scala_2.11-alpine + command: + - jobmanager + flink-worker: + container_name: flink-worker + hostname: flink-worker + image: flink:1.6.0-hadoop27-scala_2.11-alpine + depends_on: + - flink-master + command: + - taskmanager + environment: + - JOB_MANAGER_RPC_ADDRESS=flink-master + jupyter: + container_name: jupyter + hostname: jupyter + image: pnda/jupyter:4.4.0 + build: + context: ./dockerfiles/jupyter + args: + version: 4.4.0 + volumes: + - jupyter-home:/home + environment: + - SPARK_MASTER_URL=spark://spark-master:7077 + - HADOOP_DISTRO=env + - HDFS_ROOT_URI=hdfs://hdfs-namenode:8020 + jupyter-ssh: + container_name: jupyter-ssh + image: gradiant/openssh-server + network_mode: service:jupyter + volumes: + - jupyter-home:/home + depends_on: + - jupyter + grafana: + container_name: grafana + hostname: grafana + image: grafana/grafana:5.0.3 + environment: + - GF_SECURITY_ADMIN_USER=pnda + - GF_SECURITY_ADMIN_PASSWORD=pnda + ports: + - 3000:3000 + volumes: + - grafana:/var/lib/grafana + deployment-manager: + container_name: deployment-manager + hostname: deployment-manager + image: pnda/deployment-manager:1.0.0 + build: + context: ./dockerfiles/platform-deployment-manager + args: + version: 1.0.0 + environment: + - JUPYTER_HOST=jupyter + - DATA_LOGGER_URL=http://console-backend:3001 #data-logger uses the data-manager network stack + - PACKAGE_REPOSITORY_URL=http://package-repository:8888 + - HADOOP_DISTRO=env + - HDFS_ROOT_URI=hdfs://hdfs-namenode + - SPARK_MASTER=spark://spark-master:7077 + - YARN_NODE_MANAGERS=localhost + - ZOOKEEPER_QUORUM=zookeeper + - LOG_LEVEL=DEBUG + volumes: + - dm_keys:/opt/pnda/dm_keys + kafka-manager: + container_name: kafka-manager + hostname: kafka-manager + image: gradiant/kafka-manager:1.3.3.17 + environment: + - ZK_HOSTS=zookeeper:2181 + - KM_ARGS=-Dhttp.port=10900 + ports: + - 10900:10900 + package-repository: + container_name: package-repository + hostname: package-repository + image: pnda/package-repository:0.3.2 + build: + context: ./dockerfiles/platform-package-repository + args: + version: 0.3.2 + environment: + - FS_LOCATION_PATH=/mnt/packages + - DATA_LOGGER_URL=http://console-backend:3001 #data-logger uses the data-manager network stack + volumes: + - package-repository:/mnt/packages + hdfs-namenode: + container_name: hdfs-namenode + hostname: hdfs-namenode + image: gradiant/hdfs-namenode:2.7.7 + environment: + - CLUSTER_NAME=hdfs-pnda + - CORE_CONF_hadoop_proxyuser_root_hosts=* + - CORE_CONF_hadoop_proxyuser_root_groups=* + - HDFS_CONF_dfs_permissions_superusergroup=root + - HDFS_CONF_dfs_replication=1 + volumes: + - hdfs-name:/hadoop/dfs/name + hdfs-datanode: + container_name: hdfs-datanode + hostname: hdfs-datanode + image: gradiant/hdfs-datanode:2.7.7 + environment: + - CORE_CONF_fs_defaultFS=hdfs://hdfs-namenode:8020 + - CORE_CONF_hadoop_proxyuser_root_hosts=* + - CORE_CONF_hadoop_proxyuser_root_groups=* + - HDFS_CONF_dfs_permissions_superusergroup=root + - HDFS_CONF_dfs_replication=1 + - HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data + volumes: + - hdfs-data-0:/hadoop/dfs/data + hbase-master: + container_name: hbase-master + hostname: hbase-master + image: gradiant/hbase-master:2.0.1 + environment: + HBASE_CONF_hbase_rootdir: hdfs://hdfs-namenode:8020/hbase + HBASE_CONF_hbase_cluster_distributed: "true" + HBASE_CONF_hbase_zookeeper_quorum: zookeeper + HBASE_CONF_hbase_master: hbase-master:60000 + HBASE_CONF_hbase_master_hostname: hbase-master + HBASE_CONF_hbase_master_port: "60000" + HBASE_CONF_hbase_master_info_port: "60010" + HBASE_CONF_hbase_regionserver_port: "60020" + HBASE_CONF_hbase_regionserver_info_port: "60030" + hbase-region: + container_name: hbase-region + network_mode: "service:hdfs-datanode" + image: gradiant/hbase-region:2.0.1 + environment: +# # For now this only works with 1 instance of hdfs-datanode + HBASE_CONF_hbase_regionserver_hostname: hdfs-datanode + HBASE_CONF_hbase_rootdir: hdfs://hdfs-namenode:8020/hbase + HBASE_CONF_hbase_cluster_distributed: "true" + HBASE_CONF_hbase_zookeeper_quorum: zookeeper + HBASE_CONF_hbase_master: hbase-master:60000 + HBASE_CONF_hbase_master_hostname: hbase-master + HBASE_CONF_hbase_master_port: "60000" + HBASE_CONF_hbase_master_info_port: "60010" + HBASE_CONF_hbase_regionserver_port: "60020" + HBASE_CONF_hbase_regionserver_info_port: "60030" + opentsdb: + container_name: opentsdb + hostname: opentsdb + image: gradiant/opentsdb:2.3.1 + environment: + - ZKQUORUM=zookeeper:2181 + - ZKBASEDIR=/hbase + - TSDB_OPTS="--read-only" + - TSDB_PORT=4242 + volumes: + - tsdb-bin:/usr/share/opentsdb/bin + kafka: + container_name: kafka + hostname: kafka + image: confluentinc/cp-kafka:4.0.0 + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_INTER_BROKER_LISTENER_NAME=REPLICATION + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INGEST:PLAINTEXT,REPLICATION:PLAINTEXT,INTERNAL_PLAINTEXT:PLAINTEXT + - KAFKA_LISTENERS=INGEST://kafka:9094,REPLICATION://kafka:9093,INTERNAL_PLAINTEXT://kafka:9092 + - KAFKA_ADVERTISED_LISTENERS=INGEST://kafka:9094,REPLICATION://kafka:9093,INTERNAL_PLAINTEXT://kafka:9092 + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - JMX_PORT=9050 + ports: + - 9092:9092 + - 9050:9050 + volumes: + - kafka-data:/var/lib/kafka/data + zookeeper: + container_name: zookeeper + hostname: zookeeper + image: zookeeper:3.4 + volumes: + - zk-data:/data + - zk-datalog:/datalog + platform-testing: + container_name: platform-testing + hostname: platform-testing + image: pnda/platform-testing:0.5.0 + build: + context: ./dockerfiles/platform-testing + args: + version: 0.5.0 + environment: + - CONSOLE_HOSTS=console-backend:3001 + - ZOOKEEPERS=zookeeper:2181 + - KAFKA_BROKERS=kafka:9050 + jmxproxy: + container_name: jmxproxy + network_mode: service:platform-testing + image: gradiant/jmxproxy:3.2.0 + depends_on: + - platform-testing + console-frontend: + container_name: console-frontend + hostname: console-frontend + image: pnda/console-frontend:1.0.0 + build: + context: ./dockerfiles/platform-console-frontend + args: + version: 1.0.0 + environment: + - DATA_MANAGER_HOST=console-backend + - DATA_MANAGER_PORT=3123 + - KAFKA_MANAGER_URL=http://kafka-manager:10900 + - FLINK_URL=http://flink-master:8081 + - OPENTSDB_URL=http://opentsdb:4242 + ports: + - 80:80 + console-backend: + container_name: console-backend + hostname: console-backend + image: pnda/console-backend-data-manager:1.0.0 + build: + context: ./dockerfiles/platform-console-backend + args: + version: 1.0.0 + target: console-backend-data-manager + environment: + - CONSOLE_FRONTEND_HOSTS_CSV=console-frontend + - DATASET_MANAGER_URL=http://data-service:7000 + - DEPLOYMENT_MANAGER_URL=http://deployment-manager:5000 + console-backend-data-logger: + container_name: console-backend-data-logger + network_mode: service:console-backend + image: pnda/console-backend-data-logger:1.0.0 + build: + context: ./dockerfiles/platform-console-backend + args: + version: 1.0.0 + target: console-backend-data-logger + redis: + container_name: redis + network_mode: service:console-backend + image: redis:3.2.11-alpine + data-service: + container_name: data-service + hostname: data-service + image: pnda/data-service:0.2.2 + build: + context: ./dockerfiles/platform-data-mgmnt + args: + version: 0.2.2 + target: data-service + environment: + - LOCATION=/user/pnda/PNDA_datasets/datasets + - HADOOP_DISTRO=env + - HDFS_URL=hdfs-namenode:50070 + - HBASE_HOST=hbase-master + - CM_HOST=ambari-server + - CM_USER=scm + - CM_PASSWORD=scm + depends_on: + - hdfs-namenode +volumes: + jupyter-home: + dm_keys: + package-repository: + hdfs-name: + hdfs-data-0: + zk-data: + zk-datalog: + kafka-data: + grafana: + tsdb-bin: diff --git a/docker/dockerfiles/jupyter/Dockerfile b/docker/dockerfiles/jupyter/Dockerfile new file mode 100644 index 0000000..54618f9 --- /dev/null +++ b/docker/dockerfiles/jupyter/Dockerfile @@ -0,0 +1,60 @@ +FROM alpine:3.7 as platformlibs + +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" + +COPY docker/hdfs_root_uri_conf.diff / +RUN apk add --no-cache git bash python py2-pip && pip install setuptools +RUN git clone https://github.com/pndaproject/platform-libraries.git +RUN cd platform-libraries && git checkout tags/release/4.0 && \ + export VERSION=$(git describe --tags) && \ + git apply /hdfs_root_uri_conf.diff && \ + python setup.py bdist_egg + +FROM alpine:3.7 + +COPY --from=platformlibs /platform-libraries/dist/platformlibs-0.1.5-py2.7.egg / +COPY docker / +ENV SPARK_HOME=/opt/spark + +RUN apk add --no-cache bash python2 py2-pip postgresql-dev libpng-dev freetype-dev ca-certificates build-base python2-dev krb5-dev libffi-dev cyrus-sasl-dev nodejs shadow python3 python3-dev openjdk8-jre && \ + echo 'Installing python2 requirements' && \ + pip2 install -r /requirements/requirements-jupyter.txt && \ + pip2 install -r /requirements/app-packages-requirements.txt && pip2 install j2cli && \ + /usr/bin/python2 -m ipykernel.kernelspec --name python2 --display-name "Python 2" && \ + echo 'Instaling python3 requirements' && \ + pip3 install -r /requirements/requirements-jupyter.txt && \ + /usr/bin/python3 -m ipykernel.kernelspec --name python3 --display-name "Python 3" && \ + echo 'Adding pyspark2 support' && \ + mkdir -p /usr/local/share/jupyter/kernels/pyspark2 && mkdir -p /opt && \ + wget -O- https://archive.apache.org/dist/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz | tar -xvz -C /tmp && \ + mv /tmp/spark-2.3.0-bin-hadoop2.7 /opt/spark && \ + echo 'Adding jupyter-scala_extension_spark' && \ + jupyter nbextension enable --py widgetsnbextension --system && \ + jupyter-kernelspec install /usr/lib/python3.6/site-packages/sparkmagic/kernels/sparkkernel && \ + jupyter serverextension enable --py sparkmagic && \ + echo 'Adding jupyter-extensions' && \ + apk add --no-cache libxml2-dev libxslt-dev && \ + pip3 install -r /requirements/requirements-jupyter-extensions.txt && \ + jupyter serverextension enable --py jupyter_spark --system && \ + jupyter nbextension install --py jupyter_spark --system && \ + jupyter nbextension enable --py jupyter_spark --system && \ + jupyter nbextension enable --py widgetsnbextension --system && \ + echo 'Adding jupyterhub' && \ + pip3 install -r /requirements/requirements-jupyterhub.txt && \ + npm install -g configurable-http-proxy && mkdir -p /var/log/pnda && \ + echo 'auth required pam_exec.so debug log=/var/log/pnda/login.log /create_notebook_dir.sh' >> /etc/pam.d/login +RUN echo 'Adding pnda platform-libraries' && \ + mkdir /etc/platformlibs && /usr/bin/python2 -m easy_install /platformlibs-0.1.5-py2.7.egg && \ + adduser -D pnda && echo "pnda:pnda" | chpasswd && \ + mkdir -p /opt/pnda && mv /notebooks /opt/pnda/jupyter_notebooks && \ + echo 'auth required pam_listfile.so item=user sense=deny file=/etc/login.deny onerr=succeed' >> /etc/pam.d/login && \ + echo 'root' >> /etc/login.deny + +RUN wget http://central.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0/spark-sql-kafka-0-10_2.11-2.3.0.jar \ +-O /opt/spark/jars/spark-sql-kafka-0-10_2.11-2.3.0.jar && \ +wget http://central.maven.org/maven2/org/apache/kafka/kafka-clients/1.0.0/kafka-clients-1.0.0.jar \ +-O /opt/spark/jars/kafka-clients-1.0.0.jar + +ENTRYPOINT /entrypoint.sh + diff --git a/docker/dockerfiles/jupyter/build-docker.sh b/docker/dockerfiles/jupyter/build-docker.sh new file mode 100755 index 0000000..f1acc33 --- /dev/null +++ b/docker/dockerfiles/jupyter/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=4.4.0 +docker build -t pnda/jupyter:$VERSION . diff --git a/docker/dockerfiles/jupyter/data_generator.py b/docker/dockerfiles/jupyter/data_generator.py new file mode 100644 index 0000000..c428a86 --- /dev/null +++ b/docker/dockerfiles/jupyter/data_generator.py @@ -0,0 +1,114 @@ +#!/usr/bin/python + +import argparse +import subprocess +import json +import avro.schema +import avro.io +import io +import datetime +import uuid +import time +import sys + +from random import randint +from avro.datafile import DataFileWriter +from avro.io import DatumWriter +from argparse import RawTextHelpFormatter + +def generate_sample_datasets (host_ips, metric_ids, year, month, day, hour): + avro_schema = '' + #load data from hdfs + cat = subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-cat', '/user/pnda/PNDA_datasets/datasets/.metadata/schema.avsc'], stdout=subprocess.PIPE) + for line in cat.stdout: + avro_schema = avro_schema + line + schema = avro.schema.parse(avro_schema) + bytes_writer = io.BytesIO() + encoder = avro.io.BinaryEncoder(bytes_writer) + #create hdfs folder structure + dir = create_hdfs_dirs (year, month, day, hour) + filename = str(uuid.uuid4()) + '.avro' + filepath = dir + filename + tmp_file = '/tmp/' + filename + + writer = DataFileWriter(open(tmp_file, "w"), DatumWriter(), schema) + + start_dt = datetime.datetime(year, month, day, hour, 0, 0) + start_ts = int(time.mktime(start_dt.timetuple())) + end_dt = start_dt.replace(hour=hour+1) + end_ts = int(time.mktime(end_dt.timetuple())) + + for ts in xrange(start_ts, end_ts, 1): + #generate random pnda record on per host ip basis + for host_ip in host_ips: + record = {} + record['timestamp'] = (ts * 1000) + record['src'] = 'test' + record['host_ip'] = host_ip + record['rawdata'] = generate_random_metrics(metric_ids) + #encode avro + writer.append(record) + writer.close() + subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-copyFromLocal', tmp_file, dir]) + return filepath + +def generate_random_metrics (metric_ids): + ''' + generate random raw_data elementTon + ''' + raw_data = {} + for id in metric_ids: + raw_data[id] = str(randint(0, 100)) + return json.dumps(raw_data).encode('utf-8') + +def create_hdfs_dirs (year, month, day, hour): + dir = "/user/pnda/PNDA_datasets/datasets/source=test/year=%0d/month=%02d/day=%02d/hour=%02d/" % (year, month, day, hour) + subprocess.Popen(['sudo', '-u', 'hdfs', 'hadoop', 'fs', '-mkdir', '-p', dir]) + return dir + +def get_args(): + epilog = """ example: + - create sample data sets + data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' --year 2016 --month 4 --day 27 --hour 14 + - create sample data sets using system datetime + data_generator.py --hosts '10.0.0.1, 10.0.0.2' --metrics 'a, b, c' + """ + + dt = datetime.datetime.now() + parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description='Sample datasets generator', epilog=epilog) + parser.add_argument('--hosts', help='list of sample host ips separated by comma', default='') + parser.add_argument('--metrics', help='list of metrics ids', default='') + parser.add_argument('--year', type=int, help='year', default=dt.year) + parser.add_argument('--month', type=int, help='month', default=dt.month) + parser.add_argument('--day', type=int, help='day of the month', default=dt.day) + parser.add_argument('--hour', help='hour of the day', default=dt.hour) + args = parser.parse_args() + return args + +def main(): + args = get_args() + hosts = args.hosts.strip() + if not hosts: + print 'mandatory arg --hosts missing (aborting).' + sys.exit() + + host_ips = [x.strip() for x in hosts.split(",")] + + metrics = args.metrics.strip() + if not metrics: + print 'mandatory arg --metrics missing (aborting).' + sys.exit() + metric_ids = [x.strip() for x in metrics.split(",")] + + year = int(args.year) + month = int(args.month) + day = int(args.day) + hour = int(args.hour) + filepath = generate_sample_datasets(host_ips, metric_ids, year, month, day, hour) + print "Success: generated file path at " + filepath + +if __name__ == "__main__": + main() + + + \ No newline at end of file diff --git a/docker/dockerfiles/jupyter/docker/create_notebook_dir.sh b/docker/dockerfiles/jupyter/docker/create_notebook_dir.sh new file mode 100755 index 0000000..fb501c5 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/create_notebook_dir.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -x + +DIR=/home/$PAM_USER +if [ ! -d $DIR ]; then + mkdir $DIR + chmod 0755 $DIR + chown $PAM_USER: $DIR +fi + +DIR=$DIR/jupyter_notebooks +if [ ! -d $DIR ]; then + mkdir $DIR + cp -r /opt/pnda/jupyter_notebooks $DIR/examples + chmod -R 0755 $DIR + chown -R $PAM_USER: $DIR +fi + diff --git a/docker/dockerfiles/jupyter/docker/entrypoint.sh b/docker/dockerfiles/jupyter/docker/entrypoint.sh new file mode 100755 index 0000000..e9108dd --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/entrypoint.sh @@ -0,0 +1,4 @@ +#/bin/sh +j2 /pyspark2_kernel.json.tpl > /usr/local/share/jupyter/kernels/pyspark2/kernel.json +j2 /platformlibs.ini.tpl > /etc/platformlibs/platformlibs.ini +/usr/bin/jupyterhub diff --git a/docker/dockerfiles/jupyter/docker/hdfs_root_uri_conf.diff b/docker/dockerfiles/jupyter/docker/hdfs_root_uri_conf.diff new file mode 100644 index 0000000..9c83a5c --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/hdfs_root_uri_conf.diff @@ -0,0 +1,16 @@ +diff --git a/platformlibs/data_handler.py b/platformlibs/data_handler.py +index 27a2ea5..7bc1ae3 100644 +--- a/platformlibs/data_handler.py ++++ b/platformlibs/data_handler.py +@@ -63,7 +63,10 @@ class DataHandler(object): + if self._hdfs_root_uri: + return self._hdfs_root_uri + cm_conf = read_config('/etc/platformlibs/platformlibs.ini') +- self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) ++ if 'hdfs_root_uri' in cm_conf: ++ self._hdfs_root_uri = cm_conf['hdfs_root_uri'] ++ else: ++ self._hdfs_root_uri = get_hdfs_uri(cm_conf['cm_host'], cm_conf['cm_user'], cm_conf['cm_pass'], cm_conf['hadoop_distro']) + return self._hdfs_root_uri + + @property diff --git a/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal SqlMagic notebook.ipynb b/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal SqlMagic notebook.ipynb new file mode 100644 index 0000000..75c74d5 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal SqlMagic notebook.ipynb @@ -0,0 +1,57 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Minimal PNDA Jupyter SqlMagic notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "Use following connection string to connect to MySQL DB. Enter valid username/password and hostname/IP of mysql server. \n", + "%load_ext sql\n", + "%sql mysql+pymysql://username:password@hostname/dbname\n", + "\n", + "\n", + "Use following connection string to connect to Postregsql. Enter valid username/password and hostname/IP of postgresql server.\n", + "%load_ext sql\n", + "%sql postgresql://username:password@localhost/dbname\n", + "\n", + "Use following connection string to connect to Impala (CDH distribution only). Enter valid username/password and hostname/IP of impala server.\n", + "Note : Impala connection through impyla requires to disable autocommit. Use %config SqlMagic to check various configurations available.\n", + "%load_ext sql\n", + "%config SqlMagic.autocommit=False\n", + "%sql impala://hostname:port/dbname\n", + "'''\n", + "%load_ext sql" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal notebook.ipynb b/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal notebook.ipynb new file mode 100644 index 0000000..61fd532 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/notebooks/PNDA minimal notebook.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Minimal PNDA Jupyter notebook\n", + "\n", + "`%matplotlib notebook` must be set before `import matplotlib.pyplot as plt` or plotting with matplotlib will fail " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib notebook\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "import pandas as pd\n", + "import matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(u'▶ Python version ' + sys.version)\n", + "print(u'▶ Pandas version ' + pd.__version__)\n", + "print(u'▶ Matplotlib version ' + matplotlib.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "values = np.random.rand(100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.DataFrame(data=values, columns=['RandomValue'])\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark2/Python2", + "language": "python", + "name": "pyspark2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docker/dockerfiles/jupyter/docker/notebooks/tutorial/Example Platform-library PySpark Notebook.ipynb b/docker/dockerfiles/jupyter/docker/notebooks/tutorial/Example Platform-library PySpark Notebook.ipynb new file mode 100644 index 0000000..7dda082 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/notebooks/tutorial/Example Platform-library PySpark Notebook.ipynb @@ -0,0 +1,659 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "
\n", + "\n", + "\n", + "
\n", + "\n", + "# Welcome to Example Platform-library PySpark Notebook\n", + "\n", + "It's a shared Jupyter server for you to learn and try out Jupyter notebook and perform interactive data analytics using PNDA platform libraries.\n", + "\n", + "In this example notebook, **JsonDataHandler**, a data handler implementation based on the assumption that the 'rawdata' field wrapped in Pnda avro record is a well-formatted in JSON.

\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Instructions\n", + "\n", + "To run the codes below:\n", + "\n", + "1. Click on the cell to select it.\n", + "2. Press `SHIFT+ENTER` on your keyboard or press the play button () in the toolbar above." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate sample datasets ###\n", + "\n", + "If you don't have existed datasets, there are two ways to generate sample datasets:\n", + " * use data generation tool\n", + " * use embedded cell in this notebook\n", + "\n", + "Data generation tool is pre-installed on this node at `/home/cloud-user/data_generator.py`. \n", + "\n", + "
\n", + "

** Usage **

\n", + "```\n", + "./data_generator.py --hosts ','\\\n", + " --metrics ','\\\n", + " --year \\\n", + " --month \\\n", + " --day \\\n", + " --hour \n", + "```\n", + "

\n", + "[NOTE: if year|month|day|hour option is ignored, the script will extract values from current system time.]\n", + "

\n", + "
\n", + "\n", + "Alternative, you can simply run the cell below to generate sample network usage datesets." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Example 1: ** Generate sample network usage datasets " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/user/pnda/PNDA_datasets/datasets/source=test/year=2016/month=04/day=26/hour=16/f8236764-222c-4fd1-a873-61a71c28f6a3.avro'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import subprocess\n", + "import json\n", + "import avro.schema\n", + "import avro.io\n", + "import io\n", + "import datetime\n", + "import uuid\n", + "import time\n", + "import sys\n", + "import pyhdfs\n", + "\n", + "from random import randint\n", + "from avro.datafile import DataFileWriter\n", + "from avro.io import DatumWriter\n", + "from argparse import RawTextHelpFormatter\n", + "\n", + "fs = pyhdfs.HdfsClient(hosts='hdfs-namenode:50070', user_name='pnda')\n", + "\n", + "def generate_sample_datasets (host_ips, metric_ids, year, month, day, hour):\n", + " avro_schema = ''\n", + " #load data from hdfs\n", + " with fs.open('/user/pnda/PNDA_datasets/datasets/.metadata/schema.avsc') as f:\n", + " avro_schema = f.read()\n", + " schema = avro.schema.parse(avro_schema)\n", + " bytes_writer = io.BytesIO()\n", + " encoder = avro.io.BinaryEncoder(bytes_writer)\n", + " #create hdfs folder structure\n", + " dir = create_hdfs_dirs (year, month, day, hour)\n", + " filename = str(uuid.uuid4()) + '.avro'\n", + " filepath = dir + filename\n", + " tmp_file = '/tmp/' + filename\n", + " writer = DataFileWriter(open(tmp_file, \"w\"), DatumWriter(), schema)\n", + " start_dt = datetime.datetime(year, month, day, hour, 0, 0) \n", + " start_ts = int(time.mktime(start_dt.timetuple()))\n", + " end_dt = start_dt.replace(hour=hour+1)\n", + " end_ts = int(time.mktime(end_dt.timetuple()))\n", + "\n", + " for ts in xrange(start_ts, end_ts, 1):\n", + " #generate random pnda record on per host ip basis\n", + " for host_ip in host_ips:\n", + " record = {}\n", + " record['timestamp'] = (ts * 1000)\n", + " record['src'] = 'test'\n", + " record['host_ip'] = host_ip\n", + " record['rawdata'] = generate_random_metrics(metric_ids)\n", + " #encode avro\n", + " writer.append(record)\n", + " writer.close()\n", + " fs.copy_from_local(tmp_file, dir+filename)\n", + " return filepath\n", + "\n", + "def generate_random_metrics (metric_ids):\n", + " raw_data = {}\n", + " for id in metric_ids:\n", + " raw_data[id] = str(randint(0, 100))\n", + " return json.dumps(raw_data).encode('utf-8')\n", + "\n", + "def create_hdfs_dirs (year, month, day, hour):\n", + " dir = \"/user/pnda/PNDA_datasets/datasets/source=test/year=%0d/month=%02d/day=%02d/hour=%02d/\" % (year, month, day, hour)\n", + " fs.mkdirs(dir)\n", + " return dir \n", + "\n", + "#example host ips (update as you wish)\n", + "host_ips = ['10.0.0.1', '10.0.0.2', '10.0.0.3']\n", + "#example metric list (update as you wish)\n", + "metrics=['in_bytes', 'out_bytes', 'in_pks', 'out_pks']\n", + "#generate example datasets (update year, month, day, and hour as you wish)\n", + "generate_sample_datasets(host_ips, metrics, 2016, 4, 26, 16)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Play with RDD ###\n", + "RDD can be created automatically using PNDA platform libary. This allows data exploration using low-level RDD APIs.\n", + "\n", + "** Example 2: ** Create an instance of JsonDataHandler" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'cm_host'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m\u001b[0m", + "\u001b[0;31mKeyError\u001b[0mTraceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mplatformlibs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson_data_handler\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mJsonDataHandler\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mhandler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJsonDataHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"test\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"year=2016/month=04/day=26/hour=16\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/lib/python2.7/site-packages/platformlibs-0.6.8-py2.7.egg/platformlibs/json_data_handler.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, spark_context, datasource, path)\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mspark_context\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0mdatasource\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m path)\n\u001b[0m\u001b[1;32m 39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python2.7/site-packages/platformlibs-0.6.8-py2.7.egg/platformlibs/data_handler.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, spark_context, datasource, path)\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rdd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hdfs_root_uri\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_load_schema\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python2.7/site-packages/platformlibs-0.6.8-py2.7.egg/platformlibs/data_handler.pyc\u001b[0m in \u001b[0;36m_load_schema\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 106\u001b[0m \"\"\"\n\u001b[1;32m 107\u001b[0m \u001b[0mschema_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'{}/user/pnda/PNDA_datasets/datasets/'\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m \u001b[0;34m'.metadata/schema.avsc'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhdfs_root_uri\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspark_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtextFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mschema_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/lib/python2.7/site-packages/platformlibs-0.6.8-py2.7.egg/platformlibs/data_handler.pyc\u001b[0m in \u001b[0;36mhdfs_root_uri\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hdfs_root_uri\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0mcm_conf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/etc/platformlibs/platformlibs.ini'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hdfs_root_uri\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_hdfs_uri\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcm_conf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cm_host'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcm_conf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cm_user'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcm_conf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cm_pass'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcm_conf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'hadoop_distro'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hdfs_root_uri\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'cm_host'" + ] + } + ], + "source": [ + "from platformlibs.json_data_handler import JsonDataHandler\n", + "handler = JsonDataHandler(sc, \"test\", \"year=2016/month=04/day=26/hour=16\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Example 3: ** Simple RDD operations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import pprint\n", + "\n", + "rdd = handler.rdd\n", + "# print total nubmer of records\n", + "print rdd.count()\n", + "\n", + "# print one record\n", + "pprint.pprint(rdd.take(1))\n", + "\n", + "# use MapR function to print list of unique router ips\n", + "host_ips = rdd.map(lambda x: x['host_ip']).distinct().collect()\n", + "pprint.pprint(host_ips)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Challenge 1: ** How many unique metrics of all routers have been collected? What are they?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Speculate your anwser here\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualize high-level statistics ###\n", + "PNDA platform library provide functions to return high-level statistics on per host basis using `list_host_ips()` and on per metric basis using `list_metric_ids()`.\n", + "\n", + "** Example 4: ** Plot a bar chart to show the total number of records per host" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot simple bar chart\n", + "%matplotlib inline \n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# query host IPs\n", + "host_stats = handler.list_host_ips()\n", + "host_ips = []\n", + "counts = []\n", + "for stat in host_stats:\n", + " host_ips.append(stat[0])\n", + " counts.append(stat[1])\n", + "\n", + "fig, ax = plt.subplots(figsize=(15, 8))\n", + "x = np.arange(len(host_ips))\n", + "rects = ax.bar(x, counts, color='y')\n", + "plt.xticks(x+0.5, host_ips, rotation=45) \n", + "\n", + "def autolabel(rects):\n", + " # attach 'counts' labels\n", + " for rect in rects:\n", + " height = rect.get_height()\n", + " ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,\n", + " '%d' % int(height),\n", + " ha='center', va='bottom')\n", + "autolabel(rects) # add label on bar\n", + "plt.ylabel('counts')\n", + "plt.title('Statistics of hosts')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Challenge 2: ** Generate a bar chart to show total number of records per metric of host 10.0.0.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Speculate your anwser here\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introducing interactive UI ###\n", + "Interactivity introduction to your notebook can be done by adding widgets provided in the `ipywidgets` package. Each widget consists of two parts: the UI element (e.g. Text Input, sliding bar, etc.) and an event handler. \n", + "\n", + "** Example 5: ** Interactive visualization of total number of records per metric of a particular host" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "from ipywidgets import *\n", + "from IPython.display import display\n", + "import matplotlib.patches as mpatches\n", + "\n", + "options= ['--select--'] + sorted(host_ips)\n", + "selected_host = \"--select--\"\n", + "\n", + "host_ip_widget = widgets.Dropdown(description='Host IP:', width=100, options=options)\n", + "display(host_ip_widget)\n", + "# diplaying the limits input widget:\n", + "limits_input = widgets.Text(description=\"limits :\", width=200)\n", + "display(limits_input)\n", + "# preparing a container to put in the created checkbox per host ip\n", + "checkboxes = []\n", + "cb_container=widgets.HBox()\n", + "display(cb_container)\n", + "\n", + "# add button that updates the graph based on the checkboxes\n", + "button = widgets.Button(description=\"submit\")\n", + "display(button)\n", + "\n", + "def on_button_clicked(b):\n", + " selected_host = host_ip_widget.value\n", + " \n", + " def autolabel(rects):\n", + " # attach 'counts' labels\n", + " for rect in rects:\n", + " height = rect.get_height()\n", + " ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,\n", + " '%d' % int(height),\n", + " ha='center', va='bottom')\n", + " limit = -1\n", + " if limits_input.value:\n", + " limit = int(limits_input.value)\n", + " \n", + " filters={}\n", + " metrics = None\n", + " if selected_host != \"--select--\":\n", + " filters['host_ips']=[selected_host] \n", + " metrics = handler.list_metric_ids(limit=limit, filters=filters)\n", + " if len(metrics) > 0:\n", + " host_ip = metrics[0][0]\n", + " metric_stats = metrics[0][1]\n", + "\n", + " metric_ids=[]\n", + " metric_counts=[]\n", + " for stat in metric_stats:\n", + " metric_ids.append(stat[0])\n", + " metric_counts.append(stat[1])\n", + " x = np.arange(len(metric_ids))\n", + " fig, ax = plt.subplots(figsize=(15, 8))\n", + " metric_rects = ax.bar(x, metric_counts, color='y')\n", + " plt.xticks(x+0.5, metric_ids, rotation='vertical') \n", + " plt.ylabel ('counts')\n", + " patch = mpatches.Patch(color='y', label=host_ip)\n", + " plt.legend(handles=[patch])\n", + " autolabel(metric_rects)\n", + " plt.draw()\n", + " else:\n", + " print \"Please select a host ip from dropdown list.\"\n", + " \n", + "button.on_click(on_button_clicked)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Example 6: ** Interactive time series visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import datetime\n", + "from ipywidgets import *\n", + "from operator import add\n", + "from IPython.display import display\n", + "import calendar\n", + "import time\n", + "\n", + "dateFormatString = '%Y-%m-%d %H:%M:%S'\n", + "\n", + "colors=['b', 'c', 'y', 'm', 'r']\n", + "\n", + "# displaying the metric id input widget\n", + "metric_id_input = widgets.Text(description=\"metric id:\", width=200)\n", + "display(metric_id_input)\n", + "\n", + "host_ip_input = widgets.Text(description=\"host ip:\", width=200, value='edit and hit to add')\n", + "display(host_ip_input)\n", + "\n", + "#preparing the plot \n", + "plots = dict() \n", + "\n", + "#preparing a container to put in created checkbox per host ip\n", + "checkboxes = [] \n", + "cb_container = widgets.HBox() \n", + "display(cb_container)\n", + "\n", + "#preparing update button\n", + "update_button = widgets.Button(description=\"Update\")\n", + "\n", + "#normalise data with 5-min interval\n", + "def post_process(data):\n", + " def f(x): \n", + " sum_val = 0\n", + " for val in x:\n", + " sum_val = sum_val + x[0][1]\n", + " return sum_val\n", + " data_rdd = sc.parallelize(data).map(lambda x: (x[0], int(x[1]))).foldByKey(0, add).sortBy(lambda x: x[0]).groupBy(lambda x : (calendar.timegm(time.strptime(datetime.datetime.fromtimestamp(x[0]/1000).strftime('%Y-%m-%d %H:%M:%S'), dateFormatString))/(5*60))).map(lambda x : (x[0],list(x[1]))).mapValues(f).map(lambda x: (datetime.datetime.fromtimestamp(x[0] * 6*50), x[1]))\n", + " return data_rdd.keys().collect(), data_rdd.values().collect()\n", + "\n", + "#function to deal with the added host ip\n", + "def handle_submit(sender): \n", + " exists = False\n", + " for cb in checkboxes:\n", + " if cb.description is host_ip_input.value:\n", + " exists = True\n", + " if not exists and len(checkboxes)<5:\n", + " #add a new checkbox for the new host ip\n", + " checkboxes.append(widgets.Checkbox(description = host_ip_input.value, value=True, width=90))\n", + " cb_container.children=[i for i in checkboxes]\n", + " if len(checkboxes) == 1:\n", + " display(update_button)\n", + "\n", + "#function to deal with the checkbox update button \n", + "def on_button_clicked(b): \n", + " filters = {}\n", + " filters['metrics']=[metric_id_input.value]\n", + " host_ips = []\n", + " for c in cb_container.children:\n", + " if c.value:\n", + " host_ips.append(c.description) \n", + " filters['host_ips'] = host_ips\n", + "\n", + " results = handler.execute_query(filters=filters)\n", + "\n", + " i=0\n", + " if len(results) > 0:\n", + " # Plot things...\n", + " fig = plt.figure(figsize=(15, 8))\n", + " ax=fig.add_subplot(111)\n", + " for result in results:\n", + " label = result[0][1]\n", + " timestamps, values = post_process(result[1])\n", + " ax.plot_date(timestamps, values, c=colors[i], label=label)\n", + " i=i+1\n", + " ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter(\"%H:%M:%S\"))\n", + " plt.ylabel(metric_id_input.value)\n", + " plt.xlabel(\"time of the day\")\n", + " plt.legend(loc='upper right')\n", + " plt.gray() \n", + " plt.show()\n", + " \n", + "update_button.on_click(on_button_clicked) \n", + "host_ip_input.on_submit(handle_submit) \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Challenge 3: ** generate scatter plots to show packet/bytes drops (e.g. use in_byte metric) of a partiular host" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Speculate your anwser here" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark2/Python2", + "language": "python", + "name": "pyspark2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.14" + }, + "widgets": { + "state": { + "1557e450223e4dde8c2413a5ee83e705": { + "views": [ + { + "cell_index": 20 + } + ] + }, + "1afdbacd1b0b478aa01978edac6c190a": { + "views": [ + { + "cell_index": 20 + } + ] + }, + "1ba2b95d0e4d4a26a7aa819dc8534c6a": { + "views": [] + }, + "2db1cd0c66dd4e8196a900db826d64c8": { + "views": [] + }, + "38670bab911f412191a9b3deb84babc7": { + "views": [] + }, + "461bb60055dd4397b778a407640d1d99": { + "views": [] + }, + "533f372af51e4cb79e2122e986d701df": { + "views": [] + }, + "561a2c0073014a1d8cad39649bc96573": { + "views": [] + }, + "58ce38b80de34d6e82a4d160febff999": { + "views": [ + { + "cell_index": 18 + } + ] + }, + "633ca203e85541b59f9131e83f6b12bb": { + "views": [] + }, + "634e89cc27f14c5eb125d8294ab0e262": { + "views": [] + }, + "63bf2949f1ab4cdbb73d1d0d6d20dbd0": { + "views": [] + }, + "657e67c80c174b04bbd1f8cee14674ef": { + "views": [] + }, + "6b7ff28cf96145abbcb000a6d784b58c": { + "views": [] + }, + "7cf1b6bb25d841b0b83bf337300df6fd": { + "views": [] + }, + "810c709e9cc84968a1c20d1d13db8acf": { + "views": [] + }, + "a2439b82434d41a3b63efdbf3eee6519": { + "views": [ + { + "cell_index": 16 + } + ] + }, + "a4848088511d42379c3daf05b7150343": { + "views": [] + }, + "ac64a66ddb0e4cf9bd3cd37cf4275b28": { + "views": [] + }, + "ade75b3524bc42d39979108791ac27e0": { + "views": [] + }, + "aeac437c13634b02be44b86ce14a16d7": { + "views": [ + { + "cell_index": 18 + } + ] + }, + "b2d5229dfa0c4bc093922e6fd9afbd7a": { + "views": [] + }, + "b81aa794d7d74402bb68519be24bb444": { + "views": [] + }, + "bd62dae252ac444fbc377bec6458eb6b": { + "views": [] + }, + "c6d9fa2d6f2a4f0fb0b662d6bd89c6a4": { + "views": [] + }, + "cc93c8dda54d4f139f41b42c3dba67ad": { + "views": [ + { + "cell_index": 16 + } + ] + }, + "ce3a67a85dcf4cefbe5fa3e9e27644d8": { + "views": [] + }, + "df0a9835ec7c49ffaa7992c919a6ddcf": { + "views": [] + }, + "e61981d0808e4ce1ac91ad16d8f3b55f": { + "views": [] + }, + "e87b1311dbde4cd0bc020eb484d72f5b": { + "views": [] + }, + "e9353bafbf6a4a9b8b9de0747078720f": { + "views": [] + }, + "f0874b40c3da49fbb514e3f9f2047df3": { + "views": [] + }, + "f2bb960e4fc2454fa1be3b679c7fd078": { + "views": [] + }, + "fe575e9009c54084bd0a27bdd4c03382": { + "views": [] + } + }, + "version": "1.1.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/docker/dockerfiles/jupyter/docker/platform-libraries-env-conf.diff b/docker/dockerfiles/jupyter/docker/platform-libraries-env-conf.diff new file mode 100644 index 0000000..c8a1b39 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/platform-libraries-env-conf.diff @@ -0,0 +1,23 @@ +--- common_helpers.py.orig 2018-04-20 10:37:55.033647186 +0200 ++++ common_helpers.py 2018-04-20 10:50:57.434402052 +0200 +@@ -14,6 +14,7 @@ + Purpose: Utility library that defines common helper functions + """ + import requests ++import os + from cm_api.api_client import ApiResource + + def flatten_dict(input_d, result=None): +@@ -118,8 +119,10 @@ + - hadoop_distro: 'CDH' or 'HDP' + ''' + hdfs_uri = '' +- +- if hadoop_distro == 'CDH': ++ ++ if hadoop_distro == 'env': ++ hdfs_uri = os.getenv('HDFS_ROOT_URI') ++ elif hadoop_distro == 'CDH': + api = connect_cm(cm_host, cm_user, cm_pass) + + for cluster_detail in api.get_all_clusters(): diff --git a/docker/dockerfiles/jupyter/docker/platformlibs.ini.tpl b/docker/dockerfiles/jupyter/docker/platformlibs.ini.tpl new file mode 100644 index 0000000..7206110 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/platformlibs.ini.tpl @@ -0,0 +1,6 @@ +[cm] +hadoop_distro={{ HADOOP_DISTRO | default('HDP') }} +hdfs_root_uri={{ HDFS_ROOT_URI | default('hdfs://hdfs-namenode:8020') }} +cm_host={{ CM_HOST | default('cm') }} +cm_user={{ CM_USER | default('scm') }} +cm_pass={{ CM_PASSWORD | default('scm') }} diff --git a/docker/dockerfiles/jupyter/docker/pyspark2_kernel.json.tpl b/docker/dockerfiles/jupyter/docker/pyspark2_kernel.json.tpl new file mode 100644 index 0000000..5428176 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/pyspark2_kernel.json.tpl @@ -0,0 +1,21 @@ +{ + "display_name": "PySpark2/Python2", + "language": "python", + "argv": [ + "/usr/bin/python2", + "-m", + "ipykernel", + "-f", + "{connection_file}" + ], + "env": { + "HADOOP_CONF_DIR":"{{HADOOP_CONF_DIR | default('/')}}", + "PYSPARK_PYTHON":"/usr/bin/python2", + "SPARK_MAJOR_VERSION":"2", + "SPARK_HOME": "/opt/spark", + "WRAPPED_SPARK_HOME": "/usr/", + "PYTHONPATH": "/usr/lib/python2.7/site-packages:/opt/spark/python:/opt/spark/python/lib/py4j-0.10.6-src.zip", + "PYTHONSTARTUP": "/opt/spark/python/pyspark/shell.py", + "PYSPARK_SUBMIT_ARGS": "--master {{SPARK_MASTER_URL | default('spark://spark-master:7077')}} --jars /opt/spark/examples/jars/spark-examples_2.11-2.3.0.jar pyspark-shell" + } +} diff --git a/docker/dockerfiles/jupyter/docker/requirements/app-packages-requirements.txt b/docker/dockerfiles/jupyter/docker/requirements/app-packages-requirements.txt new file mode 100644 index 0000000..9c0a454 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/requirements/app-packages-requirements.txt @@ -0,0 +1,14 @@ +# App dependency packages to be installed for use in pyspark and Jupyter +avro==1.8.1 +cm-api==14.0.0 +fastavro==0.17.9 +happybase==1.0.0 +kafka-python==1.3.5 +PyHDFS==0.1.2 +pykafka==2.7.0 +pywebhdfs==0.4.1 +PyYAML==3.12 +thrift==0.9.3 +thrift_sasl==0.2.1 +thriftpy==0.3.9 +xmltodict==0.11.0 diff --git a/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter-extensions.txt b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter-extensions.txt new file mode 100644 index 0000000..735db94 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter-extensions.txt @@ -0,0 +1,3 @@ +lxml==3.6.4 +ipywidgets==6.0.0 +widgetsnbextension==2.0.0 diff --git a/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter.txt b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter.txt new file mode 100644 index 0000000..7323cf8 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyter.txt @@ -0,0 +1,44 @@ +backports-abc==0.5 +bleach==1.5.0 +decorator==4.0.10 +entrypoints==0.2.2 +https://github.com/klyr/jupyter-spark/releases/download/0.3.0-patch/jupyter-spark-0.3.0-patch.tar.gz +html5lib==0.9999999 +impyla==0.14.0 +ipykernel==4.5.2 +ipython==5.1.0 +ipython-genutils==0.1.0 +ipython-sql==0.3.8 +Jinja2==2.8 +jsonschema==2.5.1 +jupyter==1.0.0 +jupyter-client==4.4.0 +jupyter-console==5.0.0 +jupyter-core==4.4.0 +MarkupSafe==0.23 +mistune==0.7.3 +nbconvert==5.0.0 +nbformat==4.2.0 +notebook==4.3.1 +pandocfilters==1.4.1 +pexpect==4.2.1 +pickleshare==0.7.4 +prompt-toolkit==1.0.9 +ptyprocess==0.5.1 +Pygments==2.1.3 +pymysql==0.7.11 +psycopg2==2.7.3.2 +pyzmq==16.0.2 +qtconsole==4.2.1 +simplegeneric==0.8.1 +six==1.10.0 +sparkmagic==0.12.4 +sql-magic==0.0.3 +terminado==0.6 +testpath==0.3 +thrift==0.9.3 +tornado==4.4.2 +traitlets==4.3.1 +wcwidth==0.1.7 +widgetsnbextension==2.0.0 +matplotlib==2.1.2 diff --git a/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyterhub.txt b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyterhub.txt new file mode 100644 index 0000000..10113a8 --- /dev/null +++ b/docker/dockerfiles/jupyter/docker/requirements/requirements-jupyterhub.txt @@ -0,0 +1,15 @@ +alembic==0.8.9 +backports-abc==0.5 +decorator==4.0.10 +ipython-genutils==0.1.0 +Jinja2==2.8 +jupyterhub==0.7.0 +Mako==1.0.6 +MarkupSafe==0.23 +pamela==0.3.0 +python-editor==1.0.3 +requests==2.12.4 +six==1.10.0 +SQLAlchemy==1.1.4 +tornado==4.4.2 +traitlets==4.3.1 diff --git a/docker/dockerfiles/platform-console-backend/.jscsrc b/docker/dockerfiles/platform-console-backend/.jscsrc new file mode 100644 index 0000000..5f890b5 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/.jscsrc @@ -0,0 +1,28 @@ +{ + "preset": "./codestyle/PNDA.json", + "fileExtensions": [".js", "jscs"], + + "disallowSpacesInsideObjectBrackets": null, + "disallowImplicitTypeConversion": ["string"], + + "safeContextKeyword": "_this", + + "jsDoc": { + "checkAnnotations": "closurecompiler", + "checkParamNames": true, + "requireParamTypes": true, + "checkRedundantParams": true, + "checkReturnTypes": true, + "checkRedundantReturns": true, + "requireReturnTypes": true, + "checkTypes": "capitalizedNativeCase", + "checkRedundantAccess": true, + "requireNewlineAfterDescription": true, + "requireCamelCaseOrUpperCaseIdentifiers": false + }, + + "excludeFiles": [ + "test/data/**", + "patterns/*" + ] +} diff --git a/docker/dockerfiles/platform-console-backend/Dockerfile b/docker/dockerfiles/platform-console-backend/Dockerfile new file mode 100644 index 0000000..3383866 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/Dockerfile @@ -0,0 +1,42 @@ +FROM node:7.3.0-alpine as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN apk add --no-cache bash build-base python linux-pam-dev ca-certificates wget +RUN wget -qO- https://github.com/pndaproject/platform-console-backend/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-console-backend-$VERSION src +RUN npm install -g grunt-cli +RUN sed -i 's/grunt-cli v1.2/grunt-cli v1./g' /src/build.sh +RUN cd src && ./build.sh $VERSION + + +FROM node:7.3.0-alpine as console-backend-data-logger +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/console-backend-data-logger-$VERSION.tar.gz /src/pnda-build/console-backend-utils-$VERSION.tar.gz / +COPY console-backend-data-logger / +RUN apk add --no-cache py-pip tar bash && pip install j2cli +RUN tar -xzf /console-backend-data-logger-$VERSION.tar.gz && mv /console-backend-data-logger-$VERSION /console-backend-data-logger +RUN tar -xzf /console-backend-utils-$VERSION.tar.gz && mv /console-backend-utils-$VERSION /console-backend-utils +ENTRYPOINT /entrypoint.sh + +FROM node:7.3.0-alpine as console-backend-data-manager +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/console-backend-data-manager-$VERSION.tar.gz /src/pnda-build/console-backend-utils-$VERSION.tar.gz / +COPY console-backend-data-manager / +RUN apk add --no-cache py-pip build-base linux-pam-dev tar bash && pip install j2cli && \ + tar -xzf /console-backend-data-manager-$VERSION.tar.gz && \ + mv /console-backend-data-manager-$VERSION /console-backend-data-manager && \ + tar -xzf /console-backend-utils-$VERSION.tar.gz && \ + mv /console-backend-utils-$VERSION /console-backend-utils && \ + adduser -D pnda && echo "pnda:pnda" | chpasswd && \ + echo 'auth required pam_listfile.so item=user sense=deny file=/etc/login.deny onerr=succeed' >> /etc/pam.d/login && \ + echo 'root' >> /etc/login.deny +ENTRYPOINT /entrypoint.sh + diff --git a/docker/dockerfiles/platform-console-backend/build-docker.sh b/docker/dockerfiles/platform-console-backend/build-docker.sh new file mode 100755 index 0000000..1366412 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/build-docker.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +VERSION=1.0.0 +docker build --build-arg version=$VERSION --target console-backend-data-logger -t pnda/console-backend-data-logger:$VERSION . +docker build --build-arg version=$VERSION --target console-backend-data-manager -t pnda/console-backend-data-manager:$VERSION . diff --git a/docker/dockerfiles/platform-console-backend/console-backend-data-logger/entrypoint.sh b/docker/dockerfiles/platform-console-backend/console-backend-data-logger/entrypoint.sh new file mode 100755 index 0000000..2b0aba2 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/console-backend-data-logger/entrypoint.sh @@ -0,0 +1,3 @@ +#/bin/sh +j2 /logger.json.tpl > /console-backend-data-logger/conf/logger.json +node /console-backend-data-logger/app.js diff --git a/docker/dockerfiles/platform-console-backend/console-backend-data-logger/logger.json.tpl b/docker/dockerfiles/platform-console-backend/console-backend-data-logger/logger.json.tpl new file mode 100644 index 0000000..b4ae086 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/console-backend-data-logger/logger.json.tpl @@ -0,0 +1,4 @@ +{ + "log_file": "{{ LOG_FILE |default('/var/log/pnda/console/data-logger.log')}}", + "log_level": "{{ LOG_LEVEL|default('debug') }}" +} diff --git a/docker/dockerfiles/platform-console-backend/console-backend-data-manager/backend_data_manager_conf.js.tpl b/docker/dockerfiles/platform-console-backend/console-backend-data-manager/backend_data_manager_conf.js.tpl new file mode 100644 index 0000000..4b0d2f1 --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/console-backend-data-manager/backend_data_manager_conf.js.tpl @@ -0,0 +1,30 @@ +/* This file was generated from a template */ + +var hosts = '{{ CONSOLE_FRONTEND_HOSTS_CSV |default('127.0.0.1') }}'; +var port = '{{ CONSOLE_FRONTEND_PORT|default('None') }}'; +var whitelist = hosts.split(','); +whitelist.forEach(function(p, i, a) { + a[i] = "http://"+a[i]+ ((port=='None')?'':':'+port); +}); +module.exports = { + whitelist: whitelist, + deployment_manager: { + host: "{{DEPLOYMENT_MANAGER_URL|default('http://127.0.0.1:5000')}}", + API: { + endpoints: "/environment/endpoints", + packages_available: "/repository/packages?recency=999", + packages: "/packages", + applications: "/applications" + } + }, + dataset_manager: { + host: "{{DATASET_MANAGER_URL|default('http://127.0.0.1:7000')}}", + API: { + datasets: "/api/v1/datasets" + } + }, + session: { + secret: "data-manager-secret", + max_age: 86400000 + } +}; diff --git a/docker/dockerfiles/platform-console-backend/console-backend-data-manager/entrypoint.sh b/docker/dockerfiles/platform-console-backend/console-backend-data-manager/entrypoint.sh new file mode 100755 index 0000000..f66d3ea --- /dev/null +++ b/docker/dockerfiles/platform-console-backend/console-backend-data-manager/entrypoint.sh @@ -0,0 +1,3 @@ +#/bin/sh +j2 /backend_data_manager_conf.js.tpl > /console-backend-data-manager/conf/config.js +node /console-backend-data-manager/app.js diff --git a/docker/dockerfiles/platform-console-frontend/Dockerfile b/docker/dockerfiles/platform-console-frontend/Dockerfile new file mode 100644 index 0000000..bb26c24 --- /dev/null +++ b/docker/dockerfiles/platform-console-frontend/Dockerfile @@ -0,0 +1,23 @@ +FROM node:7.3.0-alpine as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN apk add --no-cache bash ca-certificates wget +RUN wget -qO- https://github.com/pndaproject/platform-console-frontend/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-console-frontend-$VERSION src +RUN sed -i 's/grunt-cli v1.2/grunt-cli v1./g' /src/build.sh +RUN npm install -g grunt-cli && cd src && ./build.sh $VERSION + + +FROM nginx:1.13.9-alpine +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/console-frontend-$VERSION.tar.gz / +COPY . / +ENTRYPOINT /entrypoint.sh +RUN apk add --no-cache py2-pip bash && pip install j2cli && \ + tar -xzf /console-frontend-$VERSION.tar.gz --strip 1 -C /usr/share/nginx/html/ + diff --git a/docker/dockerfiles/platform-console-frontend/PNDA.json.tpl b/docker/dockerfiles/platform-console-frontend/PNDA.json.tpl new file mode 100644 index 0000000..4aeace4 --- /dev/null +++ b/docker/dockerfiles/platform-console-frontend/PNDA.json.tpl @@ -0,0 +1,45 @@ +{ + "hadoop_distro": "{{HADOOP_DISTRO | default('hadoop') }}", + "clustername": "{{CLUSTERNAME | default('pnda')}}", + "edge_node": "{{ EDGE_NODE | default('pnda') }}", + "user_interfaces": [ + { + "name": "Hadoop Cluster Manager", + "link": "{{ HADOOP_MANAGER_URL |default('hadoop-manager')}}" + }, + { + "name": "Kafka Manager", + "link": "{{ KAFKA_MANAGER_URL |default('http://kafka-manager:10900')}}" + }, + { + "name": "OpenTSDB", + "link": "{{ OPENTSDB_URL |default('http://opentsdb:4242') }}" + }, + { + "name": "Grafana", + "link": "{{ GRAFANA_URL | default('http://grafana:3000')}}" + }, + { + "name": "PNDA logserver", + "link": "{{ KIBANA_URL | default('pnda')}}" + }, + { + "name": "Jupyter", + "link": "{{ JUPYTER_URL | default('http://jupyter:8000')}}" + }, + { + "name": "Flink", + "link": "{{ FLINK_URL | default('pnda')}}" + } + ], + "frontend": { + "version": "{{ VERSION | default('1.0.0')}}" + }, + "backend": { + "data-manager": { + "version": "{{DATA_MANAGER_VERSION| default('1.0.0')}}", + "host": "{{DATA_MANAGER_HOST| default('127.0.0.1')}}", "port": "{{DATA_MANAGER_PORT| default('3123')}}" + } + }, + "login_mode": "{{ LOGIN_MODE | default('PAM')}}" +} diff --git a/docker/dockerfiles/platform-console-frontend/build-docker.sh b/docker/dockerfiles/platform-console-frontend/build-docker.sh new file mode 100755 index 0000000..ecaafa5 --- /dev/null +++ b/docker/dockerfiles/platform-console-frontend/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=$(git describe --tags) +docker build --build-arg version=$VERSION -t pnda/console-frontend:$VERSION . diff --git a/docker/dockerfiles/platform-console-frontend/entrypoint.sh b/docker/dockerfiles/platform-console-frontend/entrypoint.sh new file mode 100755 index 0000000..edeb321 --- /dev/null +++ b/docker/dockerfiles/platform-console-frontend/entrypoint.sh @@ -0,0 +1,4 @@ +#/bin/sh +j2 /PNDA.json.tpl > /usr/share/nginx/html/conf/PNDA.json +j2 /nginx.conf.tpl > /etc/nginx/conf.d/default.conf +nginx -g 'daemon off;' diff --git a/docker/dockerfiles/platform-console-frontend/nginx.conf.tpl b/docker/dockerfiles/platform-console-frontend/nginx.conf.tpl new file mode 100644 index 0000000..a0453e7 --- /dev/null +++ b/docker/dockerfiles/platform-console-frontend/nginx.conf.tpl @@ -0,0 +1,15 @@ +server { + listen {{CONSOLE_FRONTEND_PORT | default('80') }}; + server_name {{CONSOLE_FRONTEND_HOST | default('localhost') }}; + location / { + root /usr/share/nginx/html; + index index.html index.htm; + } + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} + diff --git a/docker/dockerfiles/platform-data-mgmnt/Dockerfile b/docker/dockerfiles/platform-data-mgmnt/Dockerfile new file mode 100644 index 0000000..c357c5d --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/Dockerfile @@ -0,0 +1,45 @@ +FROM alpine:3.7 as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN apk add --no-cache bash patch build-base maven=3.5.2-r0 grep bc python2-dev py2-nose py2-pip linux-headers ca-certificates wget && \ + ln -s /usr/bin/nosetests-2.7 /usr/bin/nosetests +RUN wget -qO- https://github.com/pndaproject/platform-data-mgmnt/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-data-mgmnt-$VERSION src +RUN pip install pylint==1.6.4 mock==2.0.0 && \ + find /src -name requirements.txt -exec pip install -r '{}' \; +#pnda.io platform-testing search for Maven 3.0.5. We patch this to use Maven 3.5 +RUN sed -i 's/Apache Maven 3.0.5/Apache Maven 3.5/g' /src/build.sh +COPY hdfs-cleaner-env-conf.diff /src/ +COPY data-manager-env-conf.diff /src/ +RUN cd /src && \ + patch -p1 < hdfs-cleaner-env-conf.diff && \ + patch -p1 < data-manager-env-conf.diff && \ + ./build.sh $VERSION + +FROM alpine:3.7 as data-service +LABEL maintainer="cgiraldo@gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/data-service-$VERSION.tar.gz /src/data-service/src/requirements.txt / +COPY data-service / +RUN apk add --no-cache tar bash py2-pip build-base python2-dev linux-headers && pip install j2cli && pip install -r /requirements.txt +RUN tar -xzf /data-service-$VERSION.tar.gz && mv /data-service-$VERSION /data-service +ENV HDFS_URL hdfs-namenode:50070 +ENV HBASE_HOST hbase-master +ENTRYPOINT /entrypoint.sh + +FROM alpine:3.7 as hdfs-cleaner +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/hdfs-cleaner-$VERSION.tar.gz /src/hdfs-cleaner/src/requirements.txt / +COPY hdfs-cleaner / +RUN apk add --no-cache py-pip build-base linux-pam-dev python2-dev linux-headers tar bash && pip install j2cli && pip install -r /requirements.txt +RUN tar -xzf /hdfs-cleaner-$VERSION.tar.gz && mv /hdfs-cleaner-$VERSION /hdfs-cleaner +ENV HDFS_URL hdfs-namenode:50070 +ENV HBASE_HOST hbase-master +ENTRYPOINT /entrypoint.sh + diff --git a/docker/dockerfiles/platform-data-mgmnt/build-docker.sh b/docker/dockerfiles/platform-data-mgmnt/build-docker.sh new file mode 100755 index 0000000..9cc345e --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/build-docker.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +VERSION=0.2.2 +docker build --build-arg version=$VERSION --target data-service -t pnda/data-service:$VERSION . +docker build --build-arg version=$VERSION --target hdfs-cleaner -t pnda/hdfs-cleaner:$VERSION . diff --git a/docker/dockerfiles/platform-data-mgmnt/data-manager-env-conf.diff b/docker/dockerfiles/platform-data-mgmnt/data-manager-env-conf.diff new file mode 100644 index 0000000..5084762 --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/data-manager-env-conf.diff @@ -0,0 +1,42 @@ +diff --git a/data-service/src/main/resources/endpoint.py b/data-service/src/main/resources/endpoint.py +index 0fda1de..ae945c9 100644 +--- a/data-service/src/main/resources/endpoint.py ++++ b/data-service/src/main/resources/endpoint.py +@@ -14,11 +14,14 @@ + Purpose: Discover API endpoints of a cluster. + """ + import requests ++import os + + from cm_api.api_client import ApiResource + + CLOUDERA = "CDH" + HORTONWORKS = "HDP" ++ENVIRONMENT = "env" ++ + + class Endpoint(object): + """ +@@ -74,6 +77,8 @@ class Platform(object): + return Hortonworks() + elif distribution == "Local": + return Local() ++ elif distribution == "env": ++ return Environment() + + + def connect_cm(cm_host, cm_username, cm_password): +@@ -174,3 +179,12 @@ class Local(Platform): + endpoints = {"HDFS": Endpoint("HDFS", "192.168.33.10:50070"), + 'HBASE': Endpoint("HBASE", "192.168.33.10")} + return endpoints ++ ++class Environment(Platform): ++ """ ++ Platform instance used for testing purpose ++ """ ++ def discover(self, properties): ++ endpoints = {"HDFS": Endpoint("HDFS", os.getenv("HDFS_URL")), ++ 'HBASE': Endpoint("HBASE", os.getenv("HBASE_HOST"))} ++ return endpoints +\ No newline at end of file diff --git a/docker/dockerfiles/platform-data-mgmnt/data-service/entrypoint.sh b/docker/dockerfiles/platform-data-mgmnt/data-service/entrypoint.sh new file mode 100755 index 0000000..65eccbb --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/data-service/entrypoint.sh @@ -0,0 +1,4 @@ +#!/bin/sh +j2 /server.conf.tpl > /data-service/server.conf +cd /data-service +python apiserver.py diff --git a/docker/dockerfiles/platform-data-mgmnt/data-service/server.conf.tpl b/docker/dockerfiles/platform-data-mgmnt/data-service/server.conf.tpl new file mode 100644 index 0000000..04f6d63 --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/data-service/server.conf.tpl @@ -0,0 +1,10 @@ +ports = [7000, 7001] +bind_address = '0.0.0.0' +sync_period = 5000 +datasets_table = "platform_datasets" +data_repo = "{{ LOCATION | default('/user/pnda/PNDA_datasets/datasets') }}" +hadoop_distro = "{{ HADOOP_DISTRO | default('env') }}" +cm_host = "{{ CM_HOST | default('cm') }}" +cm_user = "{{ CM_USER | default('scm') }}" +cm_pass = "{{ CM_PASSWORD | default('scm') }}" +log_file_prefix = "log" diff --git a/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner-env-conf.diff b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner-env-conf.diff new file mode 100644 index 0000000..94b6c48 --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner-env-conf.diff @@ -0,0 +1,54 @@ +diff --git a/hdfs-cleaner/src/main/resources/endpoint.py b/hdfs-cleaner/src/main/resources/endpoint.py +index 0efe71d..268c388 100644 +--- a/hdfs-cleaner/src/main/resources/endpoint.py ++++ b/hdfs-cleaner/src/main/resources/endpoint.py +@@ -14,11 +14,13 @@ + Purpose: Discover API endpoints of a cluster. + """ + import requests ++import os + + from cm_api.api_client import ApiResource + + CLOUDERA = "CDH" + HORTONWORKS = "HDP" ++ENVIRONMENT = "env" + + + class Endpoint(object): +@@ -75,6 +77,8 @@ class Platform(object): + return Hortonworks() + elif distribution == "Local": + return Local() ++ elif distribution == "env": ++ return Environment() + + + def connect_cm(cm_host, cm_username, cm_password): +@@ -170,3 +174,12 @@ class Local(Platform): + endpoints = {"HDFS": Endpoint("HDFS", "192.168.33.10:50070"), + 'HBASE': Endpoint("HBASE", "192.168.33.10")} + return endpoints ++ ++class Environment(Platform): ++ """ ++ Platform instance used for testing purpose ++ """ ++ def discover(self, properties): ++ endpoints = {"HDFS": Endpoint("HDFS", os.getenv("HDFS_URL")), ++ 'HBASE': Endpoint("HBASE", os.getenv("HBASE_HOST"))} ++ return endpoints +\ No newline at end of file +diff --git a/hdfs-cleaner/src/main/resources/hdfs-cleaner.py b/hdfs-cleaner/src/main/resources/hdfs-cleaner.py +index 0107983..ce54005 100644 +--- a/hdfs-cleaner/src/main/resources/hdfs-cleaner.py ++++ b/hdfs-cleaner/src/main/resources/hdfs-cleaner.py +@@ -342,7 +342,7 @@ def main(): + aws_access_key_id=properties['s3_archive_access_key'], + aws_secret_access_key=properties['s3_archive_secret_access_key']) + s3conn.create_bucket(properties['container_name'], location=properties['s3_archive_region']) +- else: ++ elif properties['swift_account'] != '': + container_type = 'swift' + swift_conn = swiftclient.client.Connection(auth_version='2', + user=properties['swift_user'], diff --git a/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/entrypoint.sh b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/entrypoint.sh new file mode 100755 index 0000000..960cfd1 --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/sh +j2 /hdfs-cleaner/properties.json.tpl > /hdfs-cleaner/properties.json +cd /hdfs-cleaner +NAMENODE_HOST=${NAMENODE_HOST:-hdfs-namenode} +HBASE_HOST=${HBASE_HOST:-hbase-master} + +python hdfs-cleaner.py diff --git a/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/properties.json.tpl b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/properties.json.tpl new file mode 100644 index 0000000..c5e3070 --- /dev/null +++ b/docker/dockerfiles/platform-data-mgmnt/hdfs-cleaner/properties.json.tpl @@ -0,0 +1,19 @@ +{ + "hadoop_distro":"{{ HADOOP_DISTRO | default('env') }}", + "cm_host":"{{ CM_HOST | default('cm') }}", + "cm_user":"{{ CM_USER | default('scm') }}", + "cm_pass":"{{ CM_PASSWORD | default('scm') }}", + "datasets_table":"platform_datasets", + "spark_streaming_dirs_to_clean": [], + "general_dirs_to_clean": [ "/user/history/done/" ], + "old_dirs_to_clean": [], + "swift_repo": "swift://{{ ARCHIVE_CONTAINER | default('archive') }}.pnda/{{ CLUSTER_NAME | default('pnda')}}", + "container_name": "{{ ARCHIVE_CONTAINER | default('archive') }}", + "s3_archive_region": "{{ AWS_ARCHIVE_REGION | default('') }}", + "s3_archive_access_key": "{{ AWS_ARCHIVE_KEY | default('') }}", + "s3_archive_secret_access_key": "{{ AWS_ARCHIVE_SECRET | default('') }}", + "swift_account":"{{ KEYSTONE_TENTANT | default('') }}", + "swift_user": "{{ KEYSTONE_USER | default('') }}", + "swift_key": "{{ KEYSYONE_PASSWORD | default('') }}", + "swift_auth_url": "{{ KEYSTONE_AUTH_URL | default('') }}" +} diff --git a/docker/dockerfiles/platform-deployment-manager/Dockerfile b/docker/dockerfiles/platform-deployment-manager/Dockerfile new file mode 100644 index 0000000..f04908a --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/Dockerfile @@ -0,0 +1,34 @@ +FROM alpine:3.7 as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN apk add --no-cache bash patch build-base maven=3.5.2-r0 grep bc libffi-dev openssl-dev cyrus-sasl-dev python2-dev py2-nose py2-pip linux-headers ca-certificates wget && \ + ln -s /usr/bin/nosetests-2.7 /usr/bin/nosetests && \ + pip install pylint==1.6.4 mock==2.0.0 +RUN wget -qO- https://github.com/pndaproject/platform-deployment-manager/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-deployment-manager-$VERSION /src +RUN find /src -name requirements.txt -exec pip install -r '{}' \; +COPY opentsdb_with_cli.diff add_env_config.diff /src/ +#pnda.io platform-testing search for Maven 3.0.5. We patch this to use Maven 3.5 +RUN sed -i 's/Apache Maven 3.0.5/Apache Maven 3.5/g' /src/build.sh +RUN cd /src && patch -p1 < add_env_config.diff && \ + patch -p1 < opentsdb_with_cli.diff && \ + ./build.sh $VERSION + +FROM openjdk:8u171-jdk-alpine3.8 as deployment-manager +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/deployment-manager-$VERSION.tar.gz /src/api/src/main/resources/requirements.txt / +COPY usr/ opt/ entrypoint.sh dm-config.json.tpl / +RUN apk add --no-cache sudo tar bash py2-pip build-base python2-dev libffi-dev openssl-dev cyrus-sasl-dev linux-headers openssh-client && pip install j2cli && pip install -r /requirements.txt +RUN tar -xzf /deployment-manager-$VERSION.tar.gz && mv /deployment-manager-$VERSION /deployment-manager +ENTRYPOINT /entrypoint.sh + +ENV SPARK_HOME=/opt/spark +RUN mkdir -p /opt && \ + wget -O- https://archive.apache.org/dist/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz | tar -xvz -C /tmp && \ + mv /tmp/spark-2.3.0-bin-hadoop2.7 /opt/spark +ENV PATH=$PATH:$SPARK_HOME/bin diff --git a/docker/dockerfiles/platform-deployment-manager/add_env_config.diff b/docker/dockerfiles/platform-deployment-manager/add_env_config.diff new file mode 100644 index 0000000..e22221b --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/add_env_config.diff @@ -0,0 +1,15 @@ +diff --git a/api/src/main/resources/deployer_utils.py b/api/src/main/resources/deployer_utils.py +index 214a3dc..cca96fc 100644 +--- a/api/src/main/resources/deployer_utils.py ++++ b/api/src/main/resources/deployer_utils.py +@@ -65,7 +65,9 @@ def update_hadoop_env(env): + # 3. push the temporary values into the main descriptor + tmp_env = dict(env) + logging.debug('Updating environment descriptor') +- if env['hadoop_distro'] == 'CDH': ++ if env['hadoop_distro'] == 'env': ++ logging.info('hadoop info already in environment descriptor') ++ elif env['hadoop_distro'] == 'CDH': + fill_hadoop_env_cdh(tmp_env) + else: + fill_hadoop_env_hdp(tmp_env) diff --git a/docker/dockerfiles/platform-deployment-manager/build-docker.sh b/docker/dockerfiles/platform-deployment-manager/build-docker.sh new file mode 100755 index 0000000..24daa73 --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=1.0.0 +docker build --build-arg version=$VERSION -t pnda/deployment-manager:$VERSION . diff --git a/docker/dockerfiles/platform-deployment-manager/dm-config.json.tpl b/docker/dockerfiles/platform-deployment-manager/dm-config.json.tpl new file mode 100644 index 0000000..e6a4c3b --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/dm-config.json.tpl @@ -0,0 +1,41 @@ + +{ + "environment": { + "hadoop_distro":"{{ HADOOP_DISTRO | default('env') }}", + "hadoop_manager_host" : "{{ CM_HOST_IP | default('ambari-server') }}", + "hadoop_manager_username" : "{{ CM_USERNAME | default('admin') }}", + "hadoop_manager_password" : "{{ CM_PASSWORD | default('admin') }}", + "cluster_root_user" : "{{ OS_USER | default('root') }}", + "cluster_private_key" : "{{ KEYS_DIRECTORY | default('/opt/pnda/dm_keys') }}/dm.pem", + "kafka_zookeeper" : "{{ ZOOKEEPERS| default('zookeeper:2181') }}", + "kafka_brokers" : "{{ KAFKA_BROKERS|default('kafka:9092') }}", + "opentsdb" : "{{ OPENTSDB| default('opentsdb:4242')}}", + "kafka_manager" : "{{ KAFKA_MANAGER_URL | default('http://kafka-manager:10900') }}", + "namespace": "platform_app", + "metric_logger_url": "{{ DATA_LOGGER_URL |default('console_backend_data_logger:3001') }}/metrics", + "jupyter_host": "{{ JUPYTER_HOST | default('jupyter_host') }}", + "jupyter_notebook_directory": "{{ JUPYTER_NOTEBOOK_DIRECTORY | default('jupyter_notebooks') }}", + "app_packages_hdfs_path":"{{ APP_PACKAGES_HDFS_PATH | default('/pnda/deployment/app_packages') }}", + "queue_policy": "{{ POLICY_FILE_LINK | default('/opt/pnda/rm-wrapper/yarn-policy.sh') }}", + "name_node":"{{ HDFS_ROOT_URI | default('hdfs://hdfs-namenode') }}", + "webhdfs_host":"{{ WEBHDFS_HOST | default('hdfs-namenode') }}", + "webhdfs_port":"{{ WEBHDFS_PORT | default('50070') }}", + "hbase_thrift_server":"{{ HBASE_THRIFT_SERVER | default('hbase-master') }}", + "yarn_node_managers":"{{ YARN_NODE_MANAGERS | default('yarn-node-manager') }}", + "yarn_resource_manager_host":"{{ YARN_RESOURCE_MANAGER_HOST | default('') }}", + "yarn_resource_manager_port":"{{ YARN_RESOURCE_MANAGER_PORT | default('') }}", + "yarn_resource_manager_mr_port":"{{ YARN_RESOURCE_MANAGER_MR_PORT | default('') }}", + "zookeeper_quorum":"{{ ZOOKEEPER_QUORUM | default('zookeeper') }}", + "oozie_uri":"{{ OOZIE_URI | default('http://oozie:11000') }}" + }, + "config": { + "stage_root": "stage", + "plugins_path": "plugins", + "log_level": "{{ LOG_LEVEL | default('INFO') }}", + "deployer_thread_limit": 100, + "environment_sync_interval": 120, + "package_callback": "{{ DATA_LOGGER_URL |default('console_backend_data_logger:3001')}}/packages", + "application_callback": "{{ DATA_LOGGER_URL|default('console_backend_data_logger:3001') }}/applications", + "package_repository": "{{ PACKAGE_REPOSITORY_URL |default('http://package-repository:8888') }}" + } +} diff --git a/docker/dockerfiles/platform-deployment-manager/entrypoint.sh b/docker/dockerfiles/platform-deployment-manager/entrypoint.sh new file mode 100755 index 0000000..8151764 --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/entrypoint.sh @@ -0,0 +1,4 @@ +#/bin/sh +j2 /dm-config.json.tpl > /deployment-manager/dm-config.json +cd /deployment-manager/ +python app.py diff --git a/docker/dockerfiles/platform-deployment-manager/opentsdb_with_cli.diff b/docker/dockerfiles/platform-deployment-manager/opentsdb_with_cli.diff new file mode 100644 index 0000000..154dd33 --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/opentsdb_with_cli.diff @@ -0,0 +1,92 @@ +diff --git a/api/src/main/resources/opentsdb_descriptor.py b/api/src/main/resources/opentsdb_descriptor.py +index c77f344..b4581c4 100644 +--- a/api/src/main/resources/opentsdb_descriptor.py ++++ b/api/src/main/resources/opentsdb_descriptor.py +@@ -23,7 +23,7 @@ either express or implied. + + import json + import logging +-import deployer_utils ++import subprocess + + + def create(descriptor_path, environment): +@@ -33,12 +33,9 @@ def create(descriptor_path, environment): + + logging.debug('_deploy_opentsdb: %s', descriptor) + +- cmds = [] ++ zkquorum = environment['zookeeper_quorum'] ++ + for element in descriptor: + if 'name' in element: +- cmds.append('sudo /usr/share/opentsdb/bin/tsdb mkmetric %s' % element['name']) +- +- key_file = environment['cluster_private_key'] +- root_user = environment['cluster_root_user'] +- target_host = environment['opentsdb'].split(':')[0] +- deployer_utils.exec_ssh(target_host, root_user, key_file, cmds) ++ subprocess.check_call(['/usr/share/opentsdb/bin/tsdb', 'mkmetric', element['name'], ++ '--zkquorum', zkquorum], shell=True) +diff --git a/api/src/main/resources/test_application_creator.py b/api/src/main/resources/test_application_creator.py +index b6f8425..629913a 100644 +--- a/api/src/main/resources/test_application_creator.py ++++ b/api/src/main/resources/test_application_creator.py +@@ -46,7 +46,8 @@ class ApplicationCreatorTests(unittest.TestCase): + 'hive_server': 'hivehost', + 'hive_port': '124', + 'queue_policy': 'echo dev', +- 'opentsdb': '1.2.3.5:1234' ++ 'opentsdb': '1.2.3.5:1234', ++ 'zookeeper_quorum': 'zookeeper' + } + service = 'ns' + package_metadata = { +@@ -201,8 +202,8 @@ class ApplicationCreatorTests(unittest.TestCase): + creator.create_application('abcd', self.package_metadata, 'aname', self.property_overrides) + print post_mock.call_args_list + # pylint: disable=line-too-long +- post_mock.assert_any_call('oozie/v1/jobs', data='environment_cluster_private_keykeyfile.pemenvironment_hbase_thrift_serverhbasehostenvironment_webhdfs_hostwebhdfshostenvironment_opentsdb1.2.3.5:1234environment_yarn_node_managersnm1,nm2environment_webhdfs_portwebhdfsportenvironment_hbase_rest_serverhbasehostenvironment_oozie_urioozieenvironment_hbase_rest_port123environment_cluster_root_userroot_userenvironment_hive_port124environment_queue_policyecho devenvironment_name_nodenamenodeenvironment_hive_serverhivehostcomponent_property33component_property4ninecomponent_applicationanamecomponent_namecomponentAcomponent_job_nameaname-componentA-jobcomponent_hdfs_root/pnda/system/deployment-manager/applications/root/aname/componentAapplication_userrootdeployment_start2013-01-01T00:02Zdeployment_end2013-01-08T00:02Zuser.namerootoozie.use.system.libpathtrueoozie.libpath/pnda/deployment/platformmapreduce.job.queuenamedevoozie.wf.application.pathnamenode/pnda/system/deployment-manager/applications/root/aname/componentA', headers={'Content-Type': 'application/xml'}) +- post_mock.assert_any_call('oozie/v1/jobs', data='environment_cluster_private_keykeyfile.pemenvironment_hbase_thrift_serverhbasehostenvironment_webhdfs_hostwebhdfshostenvironment_opentsdb1.2.3.5:1234environment_yarn_node_managersnm1,nm2environment_webhdfs_portwebhdfsportenvironment_hbase_rest_serverhbasehostenvironment_oozie_urioozieenvironment_hbase_rest_port123environment_cluster_root_userroot_userenvironment_hive_port124environment_queue_policyecho devenvironment_name_nodenamenodeenvironment_hive_serverhivehostcomponent_applicationanamecomponent_namecomponentBcomponent_job_nameaname-componentB-jobcomponent_hdfs_root/pnda/system/deployment-manager/applications/root/aname/componentBapplication_userrootdeployment_start2013-01-01T00:02Zdeployment_end2013-01-08T00:02Zuser.namerootoozie.use.system.libpathtrueoozie.libpath/pnda/deployment/platformmapreduce.job.queuenamedevoozie.wf.application.pathnamenode/pnda/system/deployment-manager/applications/root/aname/componentB', headers={'Content-Type': 'application/xml'}) ++ post_mock.assert_any_call('oozie/v1/jobs', data='environment_cluster_private_keykeyfile.pemenvironment_hbase_thrift_serverhbasehostenvironment_webhdfs_hostwebhdfshostenvironment_opentsdb1.2.3.5:1234environment_yarn_node_managersnm1,nm2environment_webhdfs_portwebhdfsportenvironment_hbase_rest_serverhbasehostenvironment_oozie_urioozieenvironment_hbase_rest_port123environment_cluster_root_userroot_userenvironment_hive_port124environment_queue_policyecho devenvironment_name_nodenamenodeenvironment_zookeeper_quorumzookeeperenvironment_hive_serverhivehostcomponent_property33component_property4ninecomponent_applicationanamecomponent_namecomponentAcomponent_job_nameaname-componentA-jobcomponent_hdfs_root/pnda/system/deployment-manager/applications/root/aname/componentAapplication_userrootdeployment_start2013-01-01T00:02Zdeployment_end2013-01-08T00:02Zuser.namerootoozie.use.system.libpathtrueoozie.libpath/pnda/deployment/platformmapreduce.job.queuenamedevoozie.wf.application.pathnamenode/pnda/system/deployment-manager/applications/root/aname/componentA', headers={'Content-Type': 'application/xml'}) ++ post_mock.assert_any_call('oozie/v1/jobs', data='environment_cluster_private_keykeyfile.pemenvironment_hbase_thrift_serverhbasehostenvironment_webhdfs_hostwebhdfshostenvironment_opentsdb1.2.3.5:1234environment_yarn_node_managersnm1,nm2environment_webhdfs_portwebhdfsportenvironment_hbase_rest_serverhbasehostenvironment_oozie_urioozieenvironment_hbase_rest_port123environment_cluster_root_userroot_userenvironment_hive_port124environment_queue_policyecho devenvironment_name_nodenamenodeenvironment_zookeeper_quorumzookeeperenvironment_hive_serverhivehostcomponent_applicationanamecomponent_namecomponentBcomponent_job_nameaname-componentB-jobcomponent_hdfs_root/pnda/system/deployment-manager/applications/root/aname/componentBapplication_userrootdeployment_start2013-01-01T00:02Zdeployment_end2013-01-08T00:02Zuser.namerootoozie.use.system.libpathtrueoozie.libpath/pnda/deployment/platformmapreduce.job.queuenamedevoozie.wf.application.pathnamenode/pnda/system/deployment-manager/applications/root/aname/componentB', headers={'Content-Type': 'application/xml'}) + + put_mock.assert_any_call('oozie/v1/job/someid?action=suspend&user.name=root') + +diff --git a/api/src/main/resources/test_opentsdb_descriptor.py b/api/src/main/resources/test_opentsdb_descriptor.py +index cefa39c..6870cb5 100644 +--- a/api/src/main/resources/test_opentsdb_descriptor.py ++++ b/api/src/main/resources/test_opentsdb_descriptor.py +@@ -22,9 +22,9 @@ either express or implied. + """ + + import unittest +-from mock import patch, mock_open ++from mock import patch, mock_open, call + +-import deployer_utils ++import subprocess + import opentsdb_descriptor + + +@@ -33,13 +33,14 @@ class TestOpentsdbDescriptor(unittest.TestCase): + my_text = '[{"name":"my.metric"},{"name":"another.metic"}]' + mocked_open_function = mock_open(read_data=my_text) + +- with patch.object(deployer_utils, 'exec_ssh', return_value=None) as exec_ssh,\ ++ with patch.object(subprocess, 'check_call', return_value=None) as check_call,\ + patch("__builtin__.open", mocked_open_function): + environment = { +- 'cluster_private_key': 'key name', +- 'cluster_root_user': 'root user', +- 'opentsdb': '1.2.3.4:9999,5.6.7.8:8888'} ++ 'zookeeper_quorum': 'myzookeeper'} + opentsdb_descriptor.create("", environment) + +- expected_cmds = ['sudo /usr/share/opentsdb/bin/tsdb mkmetric my.metric', 'sudo /usr/share/opentsdb/bin/tsdb mkmetric another.metic'] +- exec_ssh.assert_called_once_with("1.2.3.4", "root user", "key name", expected_cmds) ++ expected_calls = [call(['/usr/share/opentsdb/bin/tsdb', 'mkmetric', 'my.metric', ++ '--zkquorum', 'myzookeeper'], shell=True), ++ call(['/usr/share/opentsdb/bin/tsdb', 'mkmetric', 'another.metic', ++ '--zkquorum', 'myzookeeper'], shell=True)] ++ check_call.assert_has_calls(expected_calls) diff --git a/docker/dockerfiles/platform-deployment-manager/opt/pnda/rm-wrapper/yarn-policy.sh b/docker/dockerfiles/platform-deployment-manager/opt/pnda/rm-wrapper/yarn-policy.sh new file mode 100755 index 0000000..9453ff9 --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/opt/pnda/rm-wrapper/yarn-policy.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +echo "default" +exit 0 diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/mygnuplot.sh b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/mygnuplot.sh new file mode 100755 index 0000000..a3723ce --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/mygnuplot.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Because !@#$%^ Java can't fucking do this without a bazillion lines of codes. +set -e +stdout=$1 +shift +stderr=$1 +shift +exec nice gnuplot "$@" >"$stdout" 2>"$stderr" diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/tsdb b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/tsdb new file mode 100755 index 0000000..1cbd8e4 --- /dev/null +++ b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/bin/tsdb @@ -0,0 +1,118 @@ +#!/usr/bin/env bash + +set -e +me=`basename "$0"` +mydir=`dirname "$0"` +# Either: +# abs_srcdir and abs_builddir are set: we're running in a dev tree +# or pkgdatadir is set: we've been installed, we respect that. +abs_srcdir='' +abs_builddir='' +pkgdatadir='/usr/share/opentsdb' +configdir='/etc/opentsdb' +# Either we've been installed and pkgdatadir exists, or we haven't been +# installed and abs_srcdir / abs_builddir aren't empty. +test -d "$pkgdatadir" || test -n "$abs_srcdir$abs_builddir" || { + echo >&2 "$me: Uh-oh, \`$pkgdatadir' doesn't exist, is OpenTSDB properly installed?" + exit 1 +} + +if test -n "$pkgdatadir"; then + localdir="$pkgdatadir/bin" + for jar in "$pkgdatadir"/*.jar; do + CLASSPATH="$CLASSPATH:$jar" + done + # Add pkgdatadir itself so we can find logback.xml + CLASSPATH="$CLASSPATH:$pkgdatadir" + + if test -d "$pkgdatadir/bin"; then + CLASSPATH="$CLASSPATH:$pkgdatadir/bin" + fi + + if test -d "$pkgdatadir/lib"; then + for jar in "$pkgdatadir"/lib/*.jar; do + CLASSPATH="$CLASSPATH:$jar" + done + fi + + if test -n "$configdir" && test -d "$configdir"; then + CLASSPATH="$CLASSPATH:$configdir" + fi +else + localdir="$abs_builddir" + # If we're running out of the build tree, it's especially important that we + # know exactly what jars we need to build the CLASSPATH. Otherwise people + # cannot easily pick up new dependencies as we might mix multiple versions + # of the same dependencies on the CLASSPATH, which is bad. Looking for a + # specific version of each jar prevents this problem. + # TODO(tsuna): Once we jarjar all the dependencies together, this will no + # longer be an issue. See issue #23. + for jar in `make -C "$abs_builddir" printdeps | sed '/third_party.*jar/!d'`; do + for dir in "$abs_builddir" "$abs_srcdir"; do + test -f "$dir/$jar" && CLASSPATH="$CLASSPATH:$dir/$jar" && continue 2 + done + echo >&2 "$me: error: Couldn't find \`$jar' either under \`$abs_builddir' or \`$abs_srcdir'." + exit 2 + done + # Add the src dir so we can find logback.xml + CLASSPATH="$CLASSPATH:$abs_srcdir/src" +fi +# Remove any leading colon. +CLASSPATH="${CLASSPATH#:}" + +usage() { + echo >&2 "usage: $me [args]" + echo 'Valid commands: fsck, import, mkmetric, query, tsd, scan, search, uid, version' + exit 1 +} + +case $1 in + (fsck) + MAINCLASS=Fsck + ;; + (import) + MAINCLASS=TextImporter + ;; + (mkmetric) + shift + set uid assign metrics "$@" + MAINCLASS=UidManager + ;; + (query) + MAINCLASS=CliQuery + ;; + (tsd) + MAINCLASS=TSDMain + ;; + (scan) + MAINCLASS=DumpSeries + ;; + (search) + MAINCLASS=Search + ;; + (uid) + MAINCLASS=UidManager + ;; + (version) + MAINCLASS=BuildData + ;; + (*) + echo >&2 "$me: error: unknown command '$1'" + usage + ;; +esac +shift + +JAVA=${JAVA-'java'} +JVMARGS=${JVMARGS-'-enableassertions -enablesystemassertions'} +test -r "$localdir/tsdb.local" && . "$localdir/tsdb.local" + +if [[ $CLASSPATH == *"asyncbigtable"* ]] +then + USE_BIGTABLE=1 + echo "Running OpenTSDB with Bigtable support" + + exec $JAVA $JVMARGS -classpath "$CLASSPATH:$HBASE_CONF" net.opentsdb.tools.$MAINCLASS "$@" +else + exec $JAVA $JVMARGS -classpath "$CLASSPATH" net.opentsdb.tools.$MAINCLASS "$@" +fi diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/async-1.4.0.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/async-1.4.0.jar new file mode 100644 index 0000000..7fa1692 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/async-1.4.0.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/asynchbase-1.8.2.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/asynchbase-1.8.2.jar new file mode 100644 index 0000000..79916dc Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/asynchbase-1.8.2.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-jexl-2.1.1.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-jexl-2.1.1.jar new file mode 100644 index 0000000..d160a23 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-jexl-2.1.1.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-logging-1.1.1.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-logging-1.1.1.jar new file mode 100644 index 0000000..1deef14 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-logging-1.1.1.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-math3-3.4.1.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-math3-3.4.1.jar new file mode 100644 index 0000000..cf9f519 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/commons-math3-3.4.1.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/guava-18.0.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/guava-18.0.jar new file mode 100644 index 0000000..8f89e49 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/guava-18.0.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-annotations-2.9.5.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-annotations-2.9.5.jar new file mode 100644 index 0000000..98ea004 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-annotations-2.9.5.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-core-2.9.5.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-core-2.9.5.jar new file mode 100644 index 0000000..b70d1ef Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-core-2.9.5.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-databind-2.9.5.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-databind-2.9.5.jar new file mode 100644 index 0000000..7a95150 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jackson-databind-2.9.5.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/javacc-6.1.2.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/javacc-6.1.2.jar new file mode 100644 index 0000000..a896f6d Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/javacc-6.1.2.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jgrapht-core-0.9.1.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jgrapht-core-0.9.1.jar new file mode 100644 index 0000000..f491e25 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/jgrapht-core-0.9.1.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/log4j-over-slf4j-1.7.7.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/log4j-over-slf4j-1.7.7.jar new file mode 100644 index 0000000..d2a102e Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/log4j-over-slf4j-1.7.7.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-classic-1.0.13.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-classic-1.0.13.jar new file mode 100644 index 0000000..80bf5d1 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-classic-1.0.13.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-core-1.0.13.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-core-1.0.13.jar new file mode 100644 index 0000000..568ccfa Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/logback-core-1.0.13.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/netty-3.10.6.Final.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/netty-3.10.6.Final.jar new file mode 100644 index 0000000..b0a1bda Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/netty-3.10.6.Final.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/protobuf-java-2.5.0.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/protobuf-java-2.5.0.jar new file mode 100644 index 0000000..4c4e686 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/protobuf-java-2.5.0.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/slf4j-api-1.7.7.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/slf4j-api-1.7.7.jar new file mode 100644 index 0000000..bebabd9 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/slf4j-api-1.7.7.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/tsdb-2.3.1.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/tsdb-2.3.1.jar new file mode 100644 index 0000000..fa28e82 Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/tsdb-2.3.1.jar differ diff --git a/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/zookeeper-3.4.6.jar b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/zookeeper-3.4.6.jar new file mode 100644 index 0000000..7c340be Binary files /dev/null and b/docker/dockerfiles/platform-deployment-manager/usr/share/opentsdb/lib/zookeeper-3.4.6.jar differ diff --git a/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties new file mode 100644 index 0000000..7302f73 --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties @@ -0,0 +1 @@ +#Fri Jun 29 07:53:14 CEST 2018 diff --git a/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties.lock b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties.lock new file mode 100644 index 0000000..b09ca20 Binary files /dev/null and b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/cache.properties.lock differ diff --git a/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileHashes.bin b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileHashes.bin new file mode 100644 index 0000000..9264c22 Binary files /dev/null and b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileHashes.bin differ diff --git a/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileSnapshots.bin b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileSnapshots.bin new file mode 100644 index 0000000..6203a72 Binary files /dev/null and b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/fileSnapshots.bin differ diff --git a/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/taskArtifacts.bin b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/taskArtifacts.bin new file mode 100644 index 0000000..98d1f5c Binary files /dev/null and b/docker/dockerfiles/platform-gobblin-modules/.gradle/2.13/taskArtifacts/taskArtifacts.bin differ diff --git a/docker/dockerfiles/platform-gobblin-modules/Dockerfile b/docker/dockerfiles/platform-gobblin-modules/Dockerfile new file mode 100644 index 0000000..02b017f --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/Dockerfile @@ -0,0 +1,37 @@ +FROM openjdk:8u171-jdk-alpine3.7 as builder +ARG version +ENV VERSION $version +RUN apk add --no-cache bash ca-certificates wget +# build pnda-gobblin modules +RUN wget -qO- https://github.com/pndaproject/platform-gobblin-modules/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-gobblin-modules-$VERSION src +RUN cd src && ./build.sh $VERSION + + + +FROM gradiant/gobblin:0.11.0 as pnda-gobblin +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version + +ENV GOBBLIN_VERSION=0.11.0 +ENV HDFS_URL=hdfs://hdfs-namenode:8020 +ENV MAX_MAPPERS=4 +ENV KAFKA_BROKERS=kafka:9092 +ENV MASTER_DATASET_DIRECTORY=/user/pnda/PNDA_datasets/datasets +ENV MASTER_DATASET_QUARANTINE_DIRECTORY=/user/pnda/PNDA_datasets/quarantine +ENV MASTER_DATASET_COMPACTION_DIRECTORY=/user/pnda/PNDA_datasets/compaction +ENV DATASET_COMPACTION_PATTERN=d + +ENV HADOOP_HOME=/ + +COPY --from=builder /src/pnda-build/gobblin-PNDA-$VERSION.tar.gz / +COPY entrypoint.sh mr.compact.tpl mr.pull.tpl / + +RUN apk add --no-cache py2-pip && pip install j2cli && \ + tar -xvf gobblin-PNDA-$VERSION.tar.gz -C /gobblin-dist/lib/ + +ENTRYPOINT ["/entrypoint.sh"] + + diff --git a/docker/dockerfiles/platform-gobblin-modules/build-docker.sh b/docker/dockerfiles/platform-gobblin-modules/build-docker.sh new file mode 100755 index 0000000..d4d3390 --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=0.1.0 +docker build --build-arg version=$VERSION -t pnda/gobblin:0.11.0-$VERSION . diff --git a/docker/dockerfiles/platform-gobblin-modules/entrypoint.sh b/docker/dockerfiles/platform-gobblin-modules/entrypoint.sh new file mode 100755 index 0000000..ccec1fd --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +j2 /mr.pull.tpl > /etc/gobblin/mr.pull +j2 /mr.compact.tpl > /etc/gobblin/mr.compact +/gobblin-dist/bin/gobblin-standalone.sh start \ No newline at end of file diff --git a/docker/dockerfiles/platform-gobblin-modules/mr.compact.tpl b/docker/dockerfiles/platform-gobblin-modules/mr.compact.tpl new file mode 100644 index 0000000..8e575f9 --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/mr.compact.tpl @@ -0,0 +1,45 @@ + +############################################################################### +###################### Gobblin Compaction Job configurations ################## +############################################################################### + +{%- set compaction_pattern = DATASET_COMPACTION_PATTERN | default('d') %} +{%- if compaction_pattern == 'H' %} +{%- set folder_pattern="'year='YYYY/'month='MM/'day='dd/'hour='HH" %} +{%- set time_ago='1d' %} +{% elif compaction_pattern == 'd' %} +{%- set folder_pattern="'year='YYYY/'month='MM/'day='dd" %} +{%- set time_ago='1d2h' %} +{% elif compaction_pattern == 'M' %} +{%- set folder_pattern="'year='YYYY/'month='MM" %} +{%- set time_ago='1m2h' %} +{% elif compaction_pattern == 'Y' %} +{%- set folder_pattern="'year='YYYY" %} +{%- set time_ago='12m2h' %} +{%- endif %} + +# File system URIs +fs.uri={{ HDFS_URL }}/ +writer.fs.uri=${fs.uri} + +job.name=CompactKafkaMR +job.group=PNDA + +#mr.job.max.mappers={{ MAX_MAPPERS | default('4') }} + +compaction.datasets.finder=gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder +compaction.input.dir={{ MASTER_DATASET_DIRECTORY | default('/user/pnda/PNDA_datasets/datasets') }} +compaction.dest.dir={{ MASTER_DATASET_COMPACTION_DIRECTORY | default('/user/pnda/PNDA_datasets/compacted') }} +compaction.input.subdir=. +compaction.dest.subdir=. +compaction.timebased.folder.pattern={{ folder_pattern }} +compaction.timebased.max.time.ago={{ time_ago }} +compaction.timebased.min.time.ago=1h +compaction.input.deduplicated=false +compaction.output.deduplicated=false +compaction.jobprops.creator.class=gobblin.compaction.mapreduce.MRCompactorTimeBasedJobPropCreator +compaction.job.runner.class=gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner +compaction.timezone=UTC +compaction.job.overwrite.output.dir=true +compaction.recompact.from.input.for.late.data=true +mapred.output.compress=true diff --git a/docker/dockerfiles/platform-gobblin-modules/mr.pull.tpl b/docker/dockerfiles/platform-gobblin-modules/mr.pull.tpl new file mode 100644 index 0000000..8db7030 --- /dev/null +++ b/docker/dockerfiles/platform-gobblin-modules/mr.pull.tpl @@ -0,0 +1,56 @@ +############################################################################### +###################### Gobblin MapReduce configurations ####################### +############################################################################### + +# File system URIs +fs.uri={{ HDFS_URL }}/ +writer.fs.uri=${fs.uri} + +job.name=PullFromKafka +job.group=PNDA +job.description=Pulls data from all kafka topics to HDFS + +#mr.job.max.mappers={{ MAX_MAPPERS | default('4') }} + +#Java Null pointer error if job.lock.enabled +job.lock.enabled=false + +# ==== Kafka Source ==== +source.class=gobblin.source.extractor.extract.kafka.KafkaDeserializerSource +source.timezone=UTC +source.schema={"namespace": "pnda.entity", \ + "type": "record", \ + "name": "event", \ + "fields": [ \ + {"name": "timestamp", "type": "long"}, \ + {"name": "src", "type": "string"}, \ + {"name": "host_ip", "type": "string"}, \ + {"name": "rawdata", "type": "bytes"} \ + ] \ + } + +kafka.deserializer.type=BYTE_ARRAY +kafka.workunit.packer.type=BI_LEVEL + +kafka.brokers={{ KAFKA_BROKERS }} +bootstrap.with.offset=earliest + +# ==== Converter ==== +converter.classes=gobblin.pnda.PNDAConverter +PNDA.quarantine.dataset.uri=dataset:{{ HDFS_URL }}{{ MASTER_DATASET_QUARANTINE_DIRECTORY }} + + +# ==== Writer ==== +writer.builder.class=gobblin.pnda.PNDAKiteWriterBuilder +kite.writer.dataset.uri=dataset:{{ HDFS_URL }}{{ MASTER_DATASET_DIRECTORY }} + +# ==== Metrics ==== +metrics.enabled=true +metrics.reporting.file.enabled=true + +# ==== Blacklist topics ==== +# Recent Kafka version uses internal __consumer_offsets topic, which we don't +# want to ingest +# Don't ingest the avro.internal.testbot topic as it's only an internal PNDA +# testing topic +topic.blacklist=__consumer_offsets,avro.internal.testbot \ No newline at end of file diff --git a/docker/dockerfiles/platform-package-repository/Dockerfile b/docker/dockerfiles/platform-package-repository/Dockerfile new file mode 100644 index 0000000..228c04a --- /dev/null +++ b/docker/dockerfiles/platform-package-repository/Dockerfile @@ -0,0 +1,27 @@ +FROM alpine:3.7 as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN wget -qO- https://github.com/pndaproject/platform-package-repository/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-package-repository-$VERSION src +RUN apk add --no-cache bash build-base maven=3.5.2-r0 grep bc python2-dev py2-nose py2-pip linux-headers && \ + ln -s /usr/bin/nosetests-2.7 /usr/bin/nosetests && \ + pip install pylint==1.6.4 mock==2.0.0 && \ + find /src -name requirements.txt -exec pip install -r '{}' \; +#pnda.io platform-package repository search for Maven 3.0.5. We patch this to use Maven 3.5 +RUN sed -i 's/Apache Maven 3.0.5/Apache Maven 3.5/g' /src/build.sh +RUN cd /src && ./build.sh $VERSION + +FROM alpine:3.7 as package-repository +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build/package-repository-$VERSION.tar.gz /src/api/src/main/resources/requirements.txt / +COPY entrypoint.sh pr-config.json.tpl / +RUN apk add --no-cache tar bash py2-pip build-base python2-dev linux-headers && pip install j2cli && pip install -r /requirements.txt +RUN tar -xzf /package-repository-$VERSION.tar.gz && mv /package-repository-$VERSION /package-repository +ENTRYPOINT /entrypoint.sh + + diff --git a/docker/dockerfiles/platform-package-repository/build-docker.sh b/docker/dockerfiles/platform-package-repository/build-docker.sh new file mode 100755 index 0000000..91d88a0 --- /dev/null +++ b/docker/dockerfiles/platform-package-repository/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=0.3.2 +docker build --build-arg version=$VERSION -t pnda/package-repository:$VERSION . diff --git a/docker/dockerfiles/platform-package-repository/entrypoint.sh b/docker/dockerfiles/platform-package-repository/entrypoint.sh new file mode 100755 index 0000000..9cf0508 --- /dev/null +++ b/docker/dockerfiles/platform-package-repository/entrypoint.sh @@ -0,0 +1,4 @@ +#/bin/sh +j2 /pr-config.json.tpl > /package-repository/pr-config.json +cd /package-repository/ +python package_repository_rest_server.py diff --git a/docker/dockerfiles/platform-package-repository/pr-config.json.tpl b/docker/dockerfiles/platform-package-repository/pr-config.json.tpl new file mode 100644 index 0000000..7a90c35 --- /dev/null +++ b/docker/dockerfiles/platform-package-repository/pr-config.json.tpl @@ -0,0 +1,13 @@ +{ + "FsRepository": { + "location": { + "path": "{{ FS_LOCATION_PATH |default('/mnt/packages') }}" + } + }, + "config": { + "log_level":"INFO", + "package_callback": "{{ DATA_LOGGER_URL |default('console_backend_data_logger:3001')}}/packages" + } + +} + diff --git a/docker/dockerfiles/platform-testing/Dockerfile b/docker/dockerfiles/platform-testing/Dockerfile new file mode 100644 index 0000000..e3903e7 --- /dev/null +++ b/docker/dockerfiles/platform-testing/Dockerfile @@ -0,0 +1,33 @@ +FROM alpine:3.7 as builder +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +RUN apk add --no-cache git bash python build-base linux-pam-dev maven=3.5.2-r0 bc grep python2-dev py2-nose py2-pip cyrus-sasl-dev ca-certificates wget\ +&& pip install spur==0.3.12 starbase==0.3.2 happybase==1.0.0 pyhs2==0.6.0 pywebhdfs==0.4.0 PyHDFS==0.1.2 cm-api==8.0.0 shyaml==0.4.1 \ +nose==1.3.7 mock==2.0.0 pylint==1.6.4 python-swiftclient==3.1.0 tornado==4.4.2 tornado-cors==0.6.0 Tornado-JSON==1.2.2 boto==2.40.0 \ +setuptools==28.8.0 --upgrade impyla==0.13.8 eventlet==0.19.0 kazoo==2.2.1 avro==1.8.1 kafka-python==1.3.5 prettytable==0.7.2 \ +pyhive==0.2.1 thrift_sasl==0.2.1 JayDeBeApi==1.1.1 \ +&& ln -s /usr/bin/nosetests-2.7 /usr/bin/nosetests + +RUN wget -qO- https://github.com/pndaproject/platform-testing/archive/$VERSION.tar.gz | tar -xvz && \ + mv platform-testing-$VERSION src + +#pnda.io platform-testing search for Maven 3.0.5. We patch this to use Maven 3.5 +RUN sed -i 's/Apache Maven 3.0.5/Apache Maven 3.5/g' /src/build.sh +RUN cd src && ./build.sh $VERSION + + +FROM alpine:3.7 as platform-testing +LABEL maintainer="cgiraldo@gradiant.org" +LABEL organization="gradiant.org" +ARG version +ENV VERSION $version +COPY --from=builder /src/pnda-build / +COPY jinja_entrypoint.sh entrypoint.sh.tpl hbase_spark_metric.py / +ENTRYPOINT /jinja_entrypoint.sh +RUN apk add --no-cache bash py2-pip tar && tar -xzf /platform-testing-general-${VERSION}.tar.gz \ +&& mv /platform-testing-general-${VERSION} /platform-testing-general \ +&& pip install j2cli \ +&& find /platform-testing-general -name requirements.txt -exec pip install -r '{}' \; + diff --git a/docker/dockerfiles/platform-testing/build-docker.sh b/docker/dockerfiles/platform-testing/build-docker.sh new file mode 100755 index 0000000..9f2af26 --- /dev/null +++ b/docker/dockerfiles/platform-testing/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +VERSION=0.5.0 +docker build --build-arg version=$VERSION -t pnda/testing:$VERSION . diff --git a/docker/dockerfiles/platform-testing/entrypoint.sh.tpl b/docker/dockerfiles/platform-testing/entrypoint.sh.tpl new file mode 100644 index 0000000..b747886 --- /dev/null +++ b/docker/dockerfiles/platform-testing/entrypoint.sh.tpl @@ -0,0 +1,16 @@ +#/bin/sh + +while true +do +python /platform-testing-general/monitor.py --plugin zookeeper \ +--postjson http://{{ CONSOLE_HOSTS | default('console-backend-data-logger:3001') }}/metrics \ +--extra "--zconnect {{ ZOOKEEPERS | default('zookeeper:2181') }}" + +python /platform-testing-general/monitor.py --plugin kafka \ +--postjson http://{{ CONSOLE_HOSTS | default('console-backend-data-logger:3001') }}/metrics \ +--extra "--brokerlist {{ KAFKA_BROKERS | default('kafka:9092') }} \ +--zkconnect {{ ZOOKEEPERS | default('zookeeper:2181') }} --prod2cons" + +python /hbase_spark_metric.py http://{{ CONSOLE_HOSTS | default('console-backend-data-logger:3001') }}/metrics +sleep 60 +done diff --git a/docker/dockerfiles/platform-testing/hbase_spark_metric.py b/docker/dockerfiles/platform-testing/hbase_spark_metric.py new file mode 100644 index 0000000..2aceec8 --- /dev/null +++ b/docker/dockerfiles/platform-testing/hbase_spark_metric.py @@ -0,0 +1,26 @@ +import requests +import json +import time +import sys + +# a python script to return OK status for hbase and spark components. Hard coded to return OK on every run - need to change this + +TIMESTAMP_MILLIS = lambda: int(time.time() * 1000) +components = {'hbase01': 'hadoop.HBASE.health', 'spark_on_yarn': 'hadoop.SPARK_ON_YARN.health'} +if len(sys.argv) > 2 : + print ("usage: hbase_spark_metric.py [console_backend_data_manager_metrics_endpoint]") + sys.exit() +elif len(sys.argv) == 2: + host = sys.argv[1] +else: + host = "http://127.0.0.1:3001/metrics" + +for key, value in components.iteritems(): + json_data = {"data": [{"source": key, "metric": value, "value": "OK", "causes": "[]", "timestamp": TIMESTAMP_MILLIS()}], "timestamp": TIMESTAMP_MILLIS()} + try: + headers = {'Content-Type': 'application/json', 'Connection':'close'} + response = requests.post(host, data=json.dumps(json_data), headers=headers) + if response.status_code != 200: + print "_send failed: %s", response.status_code + except requests.exceptions.RequestException as ex: + print "_send failed: %s", ex diff --git a/docker/dockerfiles/platform-testing/jinja_entrypoint.sh b/docker/dockerfiles/platform-testing/jinja_entrypoint.sh new file mode 100755 index 0000000..b8474b2 --- /dev/null +++ b/docker/dockerfiles/platform-testing/jinja_entrypoint.sh @@ -0,0 +1,4 @@ +#/bin/sh +j2 /entrypoint.sh.tpl > /entrypoint.sh +chmod +x /entrypoint.sh +/entrypoint.sh diff --git a/docker/grafana/PNDA-DM.json b/docker/grafana/PNDA-DM.json new file mode 100644 index 0000000..4d87f7a --- /dev/null +++ b/docker/grafana/PNDA-DM.json @@ -0,0 +1,515 @@ +{ + "__inputs": [ + { + "name": "DS_PNDA_GRAPHITE", + "label": "PNDA Graphite", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + } + ], + "__requires": [ + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "id": null, + "title": "PNDA Deployment Manager", + "tags": [ + "PNDA", + "deployment manager" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "100px", + "panels": [ + { + "content": "
\n PNDA Deployment Manager metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 5, + "isNew": true, + "links": [], + "mode": "html", + "span": 8, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "metric": "", + "refId": "A", + "target": "deployment-manager.packages_available_count" + } + ], + "thresholds": "", + "title": "Available Packages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "100", + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "deployment-manager.packages_deployed_count" + } + ], + "thresholds": "", + "title": "Deployed Packages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(deployment-manager.packages_available_time_ms, '$interval', 'avg'), 'Response Time')", + "textEditor": false + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Available Packages response time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "height": "", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(deployment-manager.packages_deployed_time_ms, '$interval', 'avg'), 'Response time')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Deployed Packages response time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "collapse": false, + "enable": true, + "notice": false, + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "current": { + "tags": [], + "text": "auto", + "value": "$__auto_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 0, + "type": "interval" + } + ] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 1, + "links": [], + "gnetId": null +} diff --git a/docker/grafana/PNDA-Hadoop.json b/docker/grafana/PNDA-Hadoop.json new file mode 100644 index 0000000..60a2ea7 --- /dev/null +++ b/docker/grafana/PNDA-Hadoop.json @@ -0,0 +1,1047 @@ +{ + "__inputs": [ + { + "name": "DS_PNDA_GRAPHITE", + "label": "PNDA Graphite", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + } + ], + "__requires": [ + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "id": null, + "title": "PNDA Hadoop", + "tags": [ + "PNDA", + "hbase", + "hdfs", + "hive", + "impala" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "content": "
\n PNDA HBASE metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 11, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "DROP", + "transform": "negative-Y" + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HBASE.create_table_time_ms, '$interval', 'avg'), 'CREATE')", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(smartSummarize(hadoop.HBASE.drop_table_time_ms, '$interval', 'avg'), 'DROP')", + "textEditor": true + } + ], + "timeFrom": null, + "timeShift": null, + "title": "HBASE table CREATE/DROP time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "WRITE", + "transform": "negative-Y" + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HBASE.read_time_ms, '$interval', 'avg'), 'READ')", + "textEditor": true + }, + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "B", + "target": "alias(smartSummarize(hadoop.HBASE.write_time_ms, '$interval', 'avg'), 'WRITE')", + "textEditor": true + } + ], + "timeFrom": null, + "timeShift": null, + "title": "HBASE Read/Write latency ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "title": "Row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "content": "
\n PNDA HDFS Metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 12, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HDFS.capacity_remaining, '$interval', 'sum'), 'Free Space')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "HDFS Free Space", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " live datanode(s)", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "hadoop.HDFS.live_datanodes" + } + ], + "thresholds": "", + "title": "", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "There are no", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " dead datanode(s)", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "hadoop.HDFS.dead_datanodes" + } + ], + "thresholds": "", + "title": "", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "There are no", + "value": "0" + } + ], + "valueName": "current" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "content": "
\n PNDA Hive Metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 13, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HIVE.create_metadata_time_ms, '$interval', 'avg'), 'Creation time')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Hive metadata creation time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HIVE.drop_table_time_ms, '$interval', 'avg'), 'Drop time')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Hive table drop time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.HIVE.connection_time_ms, '$interval', 'avg'), 'Connection time')", + "textEditor": false + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Hive connection time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "content": "
\n PNDA Impala metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 14, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.IMPALA.read_time_ms, '$interval', 'avg'), 'Read time')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Impala read time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(hadoop.IMPALA.connection_time_ms, '$interval', 'avg'), 'Connection time')", + "textEditor": false + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Impala connection time ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "collapse": false, + "enable": true, + "notice": false, + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "tags": [], + "text": "auto", + "value": "$__auto_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 0, + "type": "interval" + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 12, + "version": 1, + "links": [], + "gnetId": null +} diff --git a/docker/grafana/PNDA-Kafka.json b/docker/grafana/PNDA-Kafka.json new file mode 100644 index 0000000..2e7af3c --- /dev/null +++ b/docker/grafana/PNDA-Kafka.json @@ -0,0 +1,447 @@ +{ + "__inputs": [ + { + "name": "DS_PNDA_GRAPHITE", + "label": "PNDA Graphite", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + } + ], + "__requires": [ + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "id": null, + "title": "PNDA Kafka Brokers", + "tags": [ + "PNDA", + "kafka" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "100px", + "panels": [ + { + "content": "
\n PNDA Kafka Metrics\n
", + "editable": true, + "error": false, + "height": "50", + "id": 4, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "alias(smartSummarize(kafka.brokers.*.system.SystemLoadAverage, '$interval', 'avg'), 'Load Average')" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Load Average ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": "", + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "hide": false, + "refId": "A", + "target": "alias(kafka.brokers.*.UnderReplicatedPartitions, 'UnderReplicated partitions')", + "textEditor": false + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Underreplicated partitions", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/BytesOutPerSec/", + "transform": "negative-Y" + } + ], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "A", + "target": "aliasByNode(smartSummarize(derivative(kafka.brokers.*.topics.avro.*.*.BytesInPerSec.Count), '$interval', 'avg'), 2, 7)", + "textEditor": false + }, + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "errors": {}, + "refId": "B", + "target": "aliasByNode(smartSummarize(derivative(kafka.brokers.*.topics.avro.*.*.BytesOutPerSec.Count), '$interval', 'avg'), 2, 7)" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes ($interval average)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "collapse": false, + "enable": true, + "notice": false, + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "current": { + "tags": [], + "text": "auto", + "value": "$__auto_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 0, + "type": "interval" + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 12, + "version": 1, + "links": [], + "gnetId": null +} diff --git a/docker/grafana/PNDA.json b/docker/grafana/PNDA.json new file mode 100644 index 0000000..5cee508 --- /dev/null +++ b/docker/grafana/PNDA.json @@ -0,0 +1,625 @@ +{ + "__inputs": [ + { + "name": "DS_PNDA_GRAPHITE", + "label": "PNDA Graphite", + "description": "", + "type": "datasource", + "pluginId": "graphite", + "pluginName": "Graphite" + } + ], + "__requires": [ + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "dashlist", + "name": "Dashboard list", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.1" + }, + { + "type": "datasource", + "id": "graphite", + "name": "Graphite", + "version": "1.0.0" + } + ], + "id": null, + "title": "PNDA", + "tags": [], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "100px", + "panels": [ + { + "content": "
\n PNDA Metrics Dashboard List\n
", + "editable": true, + "error": false, + "id": 3, + "isNew": true, + "links": [], + "mode": "html", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "showTitle": false, + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "deployment-manager.packages_available_count" + } + ], + "thresholds": "", + "title": "Available packages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "deployment-manager.packages_deployed_count" + } + ], + "thresholds": "", + "title": "Deployed packages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "hadoop.HDFS.live_datanodes" + } + ], + "thresholds": "", + "title": "Available Hadoop Datanodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "hadoop.HDFS.dead_datanodes" + } + ], + "thresholds": "1", + "title": "Dead Hadoop Datanodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 8, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "kafka.nodes.ok", + "textEditor": false + } + ], + "thresholds": "", + "title": "Alive Kafka Brokers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PNDA_GRAPHITE}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "aggregator": "sum", + "downsampleAggregator": "avg", + "downsampleFillPolicy": "none", + "refId": "A", + "target": "kafka.nodes.ko", + "textEditor": false + } + ], + "thresholds": "", + "title": "Dead Kafka Brokers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": 95.25, + "panels": [ + { + "editable": true, + "error": false, + "headings": false, + "id": 1, + "isNew": true, + "limit": 10, + "links": [ + { + "type": "dashboard" + } + ], + "query": "", + "recent": false, + "search": true, + "span": 12, + "starred": false, + "tags": [ + "PNDA" + ], + "title": "PNDA Metrics", + "type": "dashlist" + } + ], + "showTitle": false, + "title": "Row" + } + ], + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 12, + "version": 1, + "links": [], + "gnetId": null +} diff --git a/docker/grafana/grafana-import-dashboards.sh b/docker/grafana/grafana-import-dashboards.sh new file mode 100755 index 0000000..08f52b4 --- /dev/null +++ b/docker/grafana/grafana-import-dashboards.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +[ "$#" -ne 1 ] && echo "Missing template filename" && exit 1 + +TEMP_FILE="/tmp/grafana-dashboard.json" + +JSON_PREFIX='{ "inputs": [ + { + "type": "datasource", + "pluginId": "graphite", + "name": "DS_PNDA_GRAPHITE", + "value": "PNDA Graphite" + } + ], + "dashboard": ' +JSON_SUFFIX=', "overwrite": true }' + +echo "${JSON_PREFIX}" > "${TEMP_FILE}" +cat "$1" >> "${TEMP_FILE}" +echo "${JSON_SUFFIX}" >> "${TEMP_FILE}" + +curl -H "Content-Type: application/json" -X POST -d @"${TEMP_FILE}" http://pnda:pnda@grafana:3000/api/dashboards/import diff --git a/docker/hdfs/add_kite_tools_and_create_db.sh b/docker/hdfs/add_kite_tools_and_create_db.sh new file mode 100755 index 0000000..73c0bd6 --- /dev/null +++ b/docker/hdfs/add_kite_tools_and_create_db.sh @@ -0,0 +1,10 @@ +#!/bin/bash +export HADOOP_HOME=$HADOOP_PREFIX +export HADOOP_MAPRED_HOME=$HADOOP_PREFIX +export HIVE_HOME=/opt/apache-hive-2.1.0-bin +mkdir -p $HIVE_HOME/lib +curl http://central.maven.org/maven2/org/apache/hive/hive-common/2.1.0/hive-common-2.1.0.jar -o $HIVE_HOME/lib/hive-common-2.1.0.jar +curl http://central.maven.org/maven2/org/kitesdk/kite-tools/1.1.0/kite-tools-1.1.0-binary.jar -o /usr/local/bin/kite-dataset +chmod +x /usr/local/bin/kite-dataset +su pnda +kite-dataset create --schema /tmp/pnda.avsc dataset:hdfs://hdfs-namenode/user/pnda/PNDA_datasets/datasets --partition-by /tmp/pnda_kite_partition.json diff --git a/docker/hdfs/kite-files/pnda.avsc b/docker/hdfs/kite-files/pnda.avsc new file mode 100644 index 0000000..1a0f866 --- /dev/null +++ b/docker/hdfs/kite-files/pnda.avsc @@ -0,0 +1,10 @@ +{"namespace": "pnda.entity", + "type": "record", + "name": "event", + "fields": [ + {"name": "timestamp", "type": "long"}, + {"name": "src", "type": "string"}, + {"name": "host_ip", "type": "string"}, + {"name": "rawdata", "type": "bytes"} + ] +} diff --git a/docker/hdfs/kite-files/pnda_kite_partition.json b/docker/hdfs/kite-files/pnda_kite_partition.json new file mode 100644 index 0000000..b430c3a --- /dev/null +++ b/docker/hdfs/kite-files/pnda_kite_partition.json @@ -0,0 +1,7 @@ +[ + {"type": "identity", "source": "src", "name": "source"}, + {"type": "year", "source": "timestamp"}, + {"type": "month", "source": "timestamp"}, + {"type": "day", "source": "timestamp"}, + {"type": "hour", "source": "timestamp"} +] diff --git a/docker/opentsdb/create_opentsdb_hbase_tables.sh b/docker/opentsdb/create_opentsdb_hbase_tables.sh new file mode 100755 index 0000000..4f132ce --- /dev/null +++ b/docker/opentsdb/create_opentsdb_hbase_tables.sh @@ -0,0 +1,49 @@ +#!/bin/sh +HBASE_HOME=/opt/hbase +COMPRESSION=GZ +# Small script to setup the HBase tables used by OpenTSDB. +test -n "$HBASE_HOME" || { + echo >&2 'The environment variable HBASE_HOME must be set' + exit 1 +} +test -d "$HBASE_HOME" || { + echo >&2 "No such directory: HBASE_HOME=$HBASE_HOME" + exit 1 +} + +TSDB_TABLE=${TSDB_TABLE-'tsdb'} +UID_TABLE=${UID_TABLE-'tsdb-uid'} +TREE_TABLE=${TREE_TABLE-'tsdb-tree'} +META_TABLE=${META_TABLE-'tsdb-meta'} +BLOOMFILTER=${BLOOMFILTER-'ROW'} +# LZO requires lzo2 64bit to be installed + the hadoop-gpl-compression jar. +COMPRESSION=${COMPRESSION-'LZO'} +# All compression codec names are upper case (NONE, LZO, SNAPPY, etc). +COMPRESSION=`echo "$COMPRESSION" | tr a-z A-Z` + +case $COMPRESSION in + (NONE|LZO|GZIP|SNAPPY) :;; # Known good. + (*) + echo >&2 "warning: compression codec '$COMPRESSION' might not be supported." + ;; +esac + +# HBase scripts also use a variable named `HBASE_HOME', and having this +# variable in the environment with a value somewhat different from what +# they expect can confuse them in some cases. So rename the variable. +hbh=$HBASE_HOME +unset HBASE_HOME +exec "$hbh/bin/hbase" shell < 'id', COMPRESSION => '$COMPRESSION', BLOOMFILTER => '$BLOOMFILTER'}, + {NAME => 'name', COMPRESSION => '$COMPRESSION', BLOOMFILTER => '$BLOOMFILTER'} + +create '$TSDB_TABLE', + {NAME => 't', VERSIONS => 1, COMPRESSION => '$COMPRESSION', BLOOMFILTER => '$BLOOMFILTER'} + +create '$TREE_TABLE', + {NAME => 't', VERSIONS => 1, COMPRESSION => '$COMPRESSION', BLOOMFILTER => '$BLOOMFILTER'} + +create '$META_TABLE', + {NAME => 'name', COMPRESSION => '$COMPRESSION', BLOOMFILTER => '$BLOOMFILTER'} +EOF diff --git a/docker/register_hostnames.sh b/docker/register_hostnames.sh new file mode 100755 index 0000000..69866c8 --- /dev/null +++ b/docker/register_hostnames.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +function add_hostname () { +HOST_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $1) +if grep -q "$1$" /etc/hosts +then + echo "Updating $1 IP in /etc/hosts" + sudo sed -i "s/.* $1$/$HOST_IP $1/" /etc/hosts +else + echo "creating a $1 entry in /etc/hosts" + echo "$HOST_IP $1" |sudo tee --append "/etc/hosts" +fi +} + +echo "---------------- ADDING services naming resolution through /etc/hosts ----------------" +add_hostname console-backend +add_hostname console-frontend +add_hostname kafka-manager +add_hostname jupyter +add_hostname grafana +add_hostname opentsdb +add_hostname data-service +add_hostname package-repository +add_hostname grafana +add_hostname spark-master +add_hostname spark-worker +add_hostname flink-master +add_hostname kafka +add_hostname hbase-master +add_hostname hdfs-namenode +add_hostname hdfs-datanode +