diff --git a/demos/gpu/README.ipynb b/demos/gpu/README.ipynb index 1ea9aa58..36596ff6 100644 --- a/demos/gpu/README.ipynb +++ b/demos/gpu/README.ipynb @@ -25,16 +25,11 @@ "- A **horovod** directory with applications that use Uber's [Horovod](https://eng.uber.com/horovod/) distributed deep-learning framework, which can be used to convert a single-GPU TensorFlow, Keras, or PyTorch model-training program to a distributed program that trains the model simultaneously over multiple GPUs.\n", " The objective is to speed up your model training with minimal changes to your existing single-GPU code and without complicating the execution.\n", " Horovod code can also run over CPUs with only minor modifications.\n", - " For more information and examples, see the [Horovod GitHub repository](https://github.com/horovod/horovod).\n", - " \n", " The Horovod tutorials include the following:\n", - "\n", - " - An image-recognition demo application for execution over GPUs (**image-classification**).\n", - " - A slightly modified version of the GPU image-classification demo application for execution over CPUs (**cpu/image-classification**).\n", " - Benchmark tests (**benchmark-tf.ipynb**, which executes **tf_cnn_benchmarks.py**).\n", + " - Note that under the demo folder you will find an image classificaiton demo that is also running with Horovod and can be set to run with GPU
\n", "\n", "- A **rapids** directory with applications that use NVIDIA's [RAPIDS](https://rapids.ai/) open-source libraries suite for executing end-to-end data science and analytics pipelines entirely on GPUs.\n", - "\n", " The RAPIDS tutorials include the following:\n", "\n", " - Demo applications that use the [cuDF](https://rapidsai.github.io/projects/cudf/en/latest/index.html) RAPIDS GPU DataFrame library to perform batching and aggregation of data that's read from a Kafaka stream, and then write the results to a Parquet file.
\n", diff --git a/demos/gpu/horovod/cpu/image-classification/01-load-data-cats-n-dogs.ipynb b/demos/gpu/horovod/cpu/image-classification/01-load-data-cats-n-dogs.ipynb deleted file mode 100644 index d08237a5..00000000 --- a/demos/gpu/horovod/cpu/image-classification/01-load-data-cats-n-dogs.ipynb +++ /dev/null @@ -1,284 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load Cats and Dogs Images" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "fe76d1d1ded592430e7548feacfa38dc42f085d9" - }, - "source": [ - "## Install Packages" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade keras==2.2.4\n", - "!pip install --upgrade tensorflow==1.13.1 \n", - "!pip install --upgrade 'numpy<1.15.0'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> **Note:** After running the pip command you should restart the Jupyter kernel.
\n", - "> To restart the kernel, click on the kernel-restart button in the notebook menu toolbar (the refresh icon next to the **Code** button)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Library" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "# This Python 3 environment comes with many helpful analytics libraries installed.\n", - "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python.\n", - "# For example, here are several helpful packages to load:\n", - "\n", - "import numpy as np # linear algebra\n", - "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", - "from keras.preprocessing.image import load_img\n", - "\n", - "# Input data files are available in the \"../input/\" directory.\n", - "# For example, running the following (by selecting 'Run' or pressing Shift+Enter) will list the files in the input directory:\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import random\n", - "\n", - "import os\n", - "import zipfile\n", - "\n", - "# Define locations\n", - "BASE_PATH = os.getcwd()\n", - "DATA_PATH = BASE_PATH + \"/cats_and_dogs_filtered/\"\n", - "!mkdir model\n", - "MODEL_PATH = BASE_PATH + '/model/'\n", - "\n", - "# Define image parameters\n", - "FAST_RUN = False\n", - "IMAGE_WIDTH=128\n", - "IMAGE_HEIGHT=128\n", - "IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)\n", - "IMAGE_CHANNELS=3 # RGB color\n", - "\n", - "# Any results you write to the current directory are saved as output." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DATA_PATH + 'catsndogs.zip'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download the Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!mkdir cats_and_dogs_filtered\n", - "# Download a sample stocks file from Iguazio demo bucket in AWS S3\n", - "!curl -L \"iguazio-sample-data.s3.amazonaws.com/catsndogs.zip\" > ./cats_and_dogs_filtered/catsndogs.zip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "zip_ref = zipfile.ZipFile(DATA_PATH + 'catsndogs.zip', 'r')\n", - "zip_ref.extractall('cats_and_dogs_filtered')\n", - "zip_ref.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "7335a579cc0268fba5d34d6f7558f33c187eedb3" - }, - "source": [ - "## Prepare the Traning Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def build_prediction_map(categories_map):\n", - " return {v:k for k ,v in categories_map.items()}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", - "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" - }, - "outputs": [], - "source": [ - "# Create a file-names list (JPG image-files only)\n", - "filenames = [file for file in os.listdir(DATA_PATH+\"/cats_n_dogs\") if file.endswith('jpg')]\n", - "categories = []\n", - "\n", - "# Categories and prediction-classes map\n", - "categories_map = {\n", - " 'dog': 1,\n", - " 'cat': 0,\n", - "}\n", - "prediction_map = build_prediction_map(categories_map)\n", - "with open(MODEL_PATH + 'prediction_classes_map.json', 'w') as f:\n", - " json.dump(prediction_map, f)\n", - "\n", - "# Create a pandas DataFrame for the full sample\n", - "for filename in filenames:\n", - " category = filename.split('.')[0]\n", - " categories.append([categories_map[category]])\n", - "\n", - "df = pd.DataFrame({\n", - " 'filename': filenames,\n", - " 'category': categories\n", - "})\n", - "df['category'] = df['category'].astype('str');" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_uuid": "915bb9ba7063ab4d5c07c542419ae119003a5f98" - }, - "outputs": [], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_uuid": "72bf69e817f67f5a2eaff8561217e22077248553" - }, - "outputs": [], - "source": [ - "df.tail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "a999484fc35b73373fafe2253ae9db7ff46fdb90" - }, - "source": [ - "## Check the Total Image Count\n", - "\n", - "Check the total image count for each category.
\n", - "The data set has 12,000 cat images and 12,000 dog images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_uuid": "fa26f0bc7a6d835a24989790b20f3c6f32946f45" - }, - "outputs": [], - "source": [ - "df['category'].value_counts().plot.bar()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "400a293df3c8499059d9175f3915187074efd971" - }, - "source": [ - "## Display the Sample Image" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_uuid": "602b40f7353871cb161c60b5237f0da0096b2f47" - }, - "outputs": [], - "source": [ - "sample = random.choice(filenames)\n", - "image = load_img(DATA_PATH+\"/cats_n_dogs/\"+sample)\n", - "plt.imshow(image)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demos/gpu/horovod/cpu/image-classification/02-train-with-horovod-cats-n-dogs.ipynb b/demos/gpu/horovod/cpu/image-classification/02-train-with-horovod-cats-n-dogs.ipynb deleted file mode 100644 index 281e82a7..00000000 --- a/demos/gpu/horovod/cpu/image-classification/02-train-with-horovod-cats-n-dogs.ipynb +++ /dev/null @@ -1,216 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install git+https://github.com/v3io/v3io-gputils" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "!rm -f /User/demos/gpu/horovod/cpu/image-classification/cats_dogs.hd5\n", - "!mkdir /User/demos/gpu/horovod/cpu/image-classification/checkpoints" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "HOROVOD_JOB_NAME = \"horovod-cats-n-dogs\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'apiVersion': 'kubeflow.org/v1alpha1',\n", - " 'kind': 'MPIJob',\n", - " 'metadata': {'creationTimestamp': '2019-07-02T07:41:59Z',\n", - " 'generation': 1,\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'namespace': 'default-tenant',\n", - " 'resourceVersion': '1391131',\n", - " 'selfLink': '/apis/kubeflow.org/v1alpha1/namespaces/default-tenant/mpijobs/horovod-cats-n-dogs',\n", - " 'uid': 'df9b08a1-9c9c-11e9-98d3-d8c4972b0204'},\n", - " 'spec': {'replicas': 8,\n", - " 'template': {'spec': {'containers': [{'command': ['mpirun',\n", - " 'python',\n", - " '/User/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py',\n", - " '/User/demos/gpu/horovod/cpu/image-classification/cats_and_dogs_filtered',\n", - " '/User/demos/gpu/horovod/cpu/image-classification'],\n", - " 'image': 'iguaziodocker/horovod-cpu:0.0.1',\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'resources': {'limits': {'nvidia.com/gpu': 0}},\n", - " 'securityContext': {'capabilities': {'add': ['IPC_LOCK']}},\n", - " 'volumeMounts': [{'mountPath': '/User',\n", - " 'name': 'v3io'}]}],\n", - " 'volumes': [{'flexVolume': {'driver': 'v3io/fuse',\n", - " 'options': {'accessKey': '1e52ff93-a541-4880-abf1-d9b948af77de',\n", - " 'container': 'users',\n", - " 'subPath': '/iguazio'}},\n", - " 'name': 'v3io'}]}}}}\n" - ] - } - ], - "source": [ - "from v3io_gputils.mpijob import MpiJob\n", - "\n", - "job = MpiJob(HOROVOD_JOB_NAME, 'iguaziodocker/horovod-cpu:0.0.1', ['/User/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py',\n", - " '/User/demos/gpu/horovod/cpu/image-classification/cats_and_dogs_filtered',\n", - " '/User/demos/gpu/horovod/cpu/image-classification'])\n", - "\n", - "job.replicas(2).gpus(0)\n", - "job.submit()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "horovod-cats-n-dogs-launcher-8kg8c 1/1 Running 0 75s\n", - "horovod-cats-n-dogs-worker-0 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-1 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-2 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-3 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-4 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-5 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-6 1/1 Running 0 83s\n", - "horovod-cats-n-dogs-worker-7 1/1 Running 0 83s\n" - ] - } - ], - "source": [ - "\n", - "!kubectl get pods | grep $HOROVOD_JOB_NAME" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "apiVersion: kubeflow.org/v1alpha1\n", - "kind: MPIJob\n", - "metadata:\n", - " creationTimestamp: 2019-07-02T06:49:07Z\n", - " generation: 1\n", - " name: horovod-cats-n-dogs\n", - " namespace: default-tenant\n", - " resourceVersion: \"1386982\"\n", - " selfLink: /apis/kubeflow.org/v1alpha1/namespaces/default-tenant/mpijobs/horovod-cats-n-dogs\n", - " uid: 7d0cd80c-9c95-11e9-98d3-d8c4972b0204\n", - "spec:\n", - " backoffLimit: 6\n", - " replicas: 8\n", - " template:\n", - " metadata:\n", - " creationTimestamp: null\n", - " spec:\n", - " containers:\n", - " - command:\n", - " - mpirun\n", - " - python\n", - " - /User/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py\n", - " - /User/demos/gpu/horovod/cpu/image-classification/cats_and_dogs_filtered\n", - " - /User/demos/gpu/horovod/cpu/image-classification\n", - " image: iguaziodocker/cpu/horovod-cpu:0.1.1\n", - " name: horovod-cats-n-dogs\n", - " resources:\n", - " limits:\n", - " nvidia.com/gpu: \"1\"\n", - " securityContext:\n", - " capabilities:\n", - " add:\n", - " - IPC_LOCK\n", - " volumeMounts:\n", - " - mountPath: /User\n", - " name: v3io\n", - " volumes:\n", - " - flexVolume:\n", - " driver: v3io/fuse\n", - " options:\n", - " accessKey: 1e52ff93-a541-4880-abf1-d9b948af77de\n", - " container: users\n", - " subPath: /iguazio\n", - " name: v3io\n", - "status:\n", - " completionTime: 2019-07-02T06:56:20Z\n", - " launcherStatus: Succeeded\n", - " startTime: 2019-07-02T06:49:14Z\n" - ] - } - ], - "source": [ - "!kubectl get mpijob $HOROVOD_JOB_NAME -o yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'apiVersion': 'v1',\n", - " 'details': {'group': 'kubeflow.org',\n", - " 'kind': 'mpijobs',\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'uid': '1b58dd58-9c97-11e9-98d3-d8c4972b0204'},\n", - " 'kind': 'Status',\n", - " 'metadata': {},\n", - " 'status': 'Success'}\n" - ] - } - ], - "source": [ - "job.delete()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demos/gpu/horovod/cpu/image-classification/03-infer.ipynb b/demos/gpu/horovod/cpu/image-classification/03-infer.ipynb deleted file mode 100644 index 995a17ba..00000000 --- a/demos/gpu/horovod/cpu/image-classification/03-infer.ipynb +++ /dev/null @@ -1,548 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create and Test a Model-Serving Nuclio Function\n", - "\n", - "This notebook demonstrates how to write an inference server, test it, and turn it into an auto-scaling Nuclio serverless function.\n", - "\n", - "- [Initialize Nuclio Emulation, Environment Variables, and Configuration](#image-class-infer-init-func)\n", - "- [Create and Load the Model and Set Up the Function Handler](#image-class-infer-create-n-load-model-n-set-up-func-handler)\n", - "- [Trigger the Function](#image-class-infer-func-trigger)\n", - "- [Prepare to Deploy the Function](#image-class-infer-func-deploy-prepare)\n", - "- [Deploy the Function](#image-class-infer-func-deploy)\n", - "- [Test the Function](#image-class-infer-func-test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Initialize Nuclio Emulation, Environment Variables, and Configuration\n", - "\n", - "> **Note:** Use `# nuclio: ignore` for sections that don't need to be copied to the function." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# nuclio: ignore\n", - "import nuclio\n", - "import random\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting 'IMAGE_WIDTH' environment variable\n", - "%nuclio: setting 'IMAGE_HEIGHT' environment variable\n", - "%nuclio: setting 'version' environment variable\n" - ] - } - ], - "source": [ - "%%nuclio env\n", - "IMAGE_WIDTH = 128\n", - "IMAGE_HEIGHT = 128\n", - "version = 1.0" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting 'MODEL_PATH' environment variable\n", - "%nuclio: setting 'PREDICTION_MAP_PATH' environment variable\n" - ] - } - ], - "source": [ - "%nuclio env -c MODEL_PATH=/model/\n", - "%nuclio env -l MODEL_PATH=/User/demos/gpu/horovod/cpu/image-classification/cats_dogs.hd5\n", - "%nuclio env -l PREDICTION_MAP_PATH=./model/prediction_classes_map.json" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "%%nuclio cmd -c\n", - "pip install keras==2.2.4\n", - "pip install tensorflow==1.13.1 \n", - "pip install 'numpy<1.15.0'\n", - "pip install requests\n", - "pip install pillow" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'\n" - ] - } - ], - "source": [ - "%%nuclio config \n", - "spec.build.baseImage = \"python:3.6-jessie\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mounting volume path /model as ~/demos/gpu/horovod/cpu/image-classification/cats_dogs/model\n" - ] - } - ], - "source": [ - "%nuclio mount /model ~/demos/gpu/horovod/cpu/image-classification/cats_dogs/model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Create and Load the Model and Set Up the Function Handler" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np \n", - "from tensorflow import keras\n", - "from keras.models import load_model\n", - "from keras.preprocessing import image\n", - "from keras.preprocessing.image import load_img\n", - "import json\n", - "import requests\n", - "\n", - "import os\n", - "from os import environ, path\n", - "from tempfile import mktemp" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "model_file = environ['MODEL_PATH']\n", - "prediction_map_file = environ['PREDICTION_MAP_PATH']\n", - "\n", - "# Set image parameters\n", - "IMAGE_WIDTH = int(environ['IMAGE_WIDTH'])\n", - "IMAGE_HEIGHT = int(environ['IMAGE_HEIGHT'])\n", - "\n", - "# load model\n", - "def init_context(context): \n", - " context.model = load_model(model_file)\n", - " with open(prediction_map_file, 'r') as f:\n", - " context.prediction_map = json.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def download_file(context, url, target_path):\n", - " with requests.get(url, stream=True) as response:\n", - " response.raise_for_status()\n", - " with open(target_path, 'wb') as f:\n", - " for chunk in response.iter_content(chunk_size=8192):\n", - " if chunk:\n", - " f.write(chunk)\n", - "\n", - " context.logger.info_with('Downloaded file',url=url)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def handler(context, event):\n", - " tmp_file = mktemp()\n", - " image_url = event.body.decode('utf-8').strip()\n", - " download_file(context, image_url, tmp_file)\n", - " \n", - " img = load_img(tmp_file, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - " x = image.img_to_array(img)\n", - " x = np.expand_dims(x, axis=0)\n", - "\n", - " images = np.vstack([x])\n", - " predicted_probability = context.model.predict_proba(images, batch_size=10)\n", - " predicted_class = list(zip(predicted_probability, map(lambda x: '1' if x >= 0.5 else '0', predicted_probability)))\n", - " actual_class = [(context.prediction_map[x[1]],x[0][0]) for x in predicted_class] \n", - " os.remove(tmp_file)\n", - " result = {'class':actual_class[0][0], 'dog-probability':float(actual_class[0][1])}\n", - " return json.dumps(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Trigger the Function" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: Logging before flag parsing goes to stderr.\n", - "W0702 07:05:39.756876 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:529: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", - "\n", - "W0702 07:05:39.784578 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4420: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", - "\n", - "W0702 07:05:39.814317 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:250: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", - "\n", - "W0702 07:05:39.814856 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:178: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n", - "\n", - "W0702 07:05:39.815345 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:185: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", - "\n", - "W0702 07:05:39.920376 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:2029: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.\n", - "\n", - "W0702 07:05:39.987011 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4255: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", - "\n", - "W0702 07:05:39.994644 140252009395584 deprecation.py:506] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3721: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", - "W0702 07:05:40.865994 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", - "\n", - "W0702 07:05:40.875694 140252009395584 deprecation.py:323] From /User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Use tf.where in 2.0, which has the same broadcast rule as np.where\n" - ] - } - ], - "source": [ - "# nuclio: ignore\n", - "init_context(context)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python> 2019-07-02 07:06:07,287 [info] Downloaded file: {'url': 'https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/dog.391.jpg'}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:06:07.287323 140252009395584 logger.py:100] Downloaded file\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"class\": \"dog\", \"dog-probability\": 1.0}\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# nuclio: ignore\n", - "# Select a sample for the test.\n", - "# Set both the local path for the test and the URL for downloading the sample from AWS S3.\n", - "DATA_LOCATION = \"./cats_and_dogs_filtered/\"\n", - "sample = random.choice(os.listdir(DATA_LOCATION+\"/cats_n_dogs\"))\n", - "image_local = DATA_LOCATION + \"cats_n_dogs/\"+sample # Temporary location for downloading the file \n", - "image_url = 'https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/' + sample \n", - "\n", - "# Show the image\n", - "img = load_img(image_local, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - "plt.imshow(img)\n", - "\n", - "event = nuclio.Event(body=bytes(image_url, 'utf-8'))\n", - "output = handler(context, event)\n", - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: notebook infer exported\n", - "Config:\n", - "apiVersion: nuclio.io/v1\n", - "kind: Function\n", - "metadata:\n", - " annotations:\n", - " nuclio.io/generated_by: function generated at 02-07-2019 by iguazio from /User/demos/image-classification/infer.ipynb\n", - " labels: {}\n", - " name: infer\n", - "spec:\n", - " build:\n", - " baseImage: python:3.6-jessie\n", - " commands:\n", - " - pip install git+https://github.com/fchollet/keras\n", - " - pip install tensorflow\n", - " - pip install numpy\n", - " - pip install requests\n", - " - pip install pillow\n", - " functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDE5LTA3LTAyIDA3OjA2CgppbXBvcnQgbnVtcHkgYXMgbnAgCmZyb20gdGVuc29yZmxvdyBpbXBvcnQga2VyYXMKZnJvbSBrZXJhcy5tb2RlbHMgaW1wb3J0IGxvYWRfbW9kZWwKZnJvbSBrZXJhcy5wcmVwcm9jZXNzaW5nIGltcG9ydCBpbWFnZQpmcm9tIGtlcmFzLnByZXByb2Nlc3NpbmcuaW1hZ2UgaW1wb3J0IGxvYWRfaW1nCmltcG9ydCBqc29uCmltcG9ydCByZXF1ZXN0cwoKaW1wb3J0IG9zCmZyb20gb3MgaW1wb3J0IGVudmlyb24sIHBhdGgKZnJvbSB0ZW1wZmlsZSBpbXBvcnQgbWt0ZW1wCgptb2RlbF9maWxlID0gZW52aXJvblsnTU9ERUxfUEFUSCddCnByZWRpY3Rpb25fbWFwX2ZpbGUgPSBlbnZpcm9uWydQUkVESUNUSU9OX01BUF9QQVRIJ10KCklNQUdFX1dJRFRIID0gaW50KGVudmlyb25bJ0lNQUdFX1dJRFRIJ10pCklNQUdFX0hFSUdIVCA9IGludChlbnZpcm9uWydJTUFHRV9IRUlHSFQnXSkKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6IAogICAgY29udGV4dC5tb2RlbCA9IGxvYWRfbW9kZWwobW9kZWxfZmlsZSkKICAgIHdpdGggb3BlbihwcmVkaWN0aW9uX21hcF9maWxlLCAncicpIGFzIGY6CiAgICAgICAgY29udGV4dC5wcmVkaWN0aW9uX21hcCA9IGpzb24ubG9hZChmKQoKZGVmIGRvd25sb2FkX2ZpbGUoY29udGV4dCwgdXJsLCB0YXJnZXRfcGF0aCk6CiAgICB3aXRoIHJlcXVlc3RzLmdldCh1cmwsIHN0cmVhbT1UcnVlKSBhcyByZXNwb25zZToKICAgICAgICByZXNwb25zZS5yYWlzZV9mb3Jfc3RhdHVzKCkKICAgICAgICB3aXRoIG9wZW4odGFyZ2V0X3BhdGgsICd3YicpIGFzIGY6CiAgICAgICAgICAgIGZvciBjaHVuayBpbiByZXNwb25zZS5pdGVyX2NvbnRlbnQoY2h1bmtfc2l6ZT04MTkyKToKICAgICAgICAgICAgICAgIGlmIGNodW5rOgogICAgICAgICAgICAgICAgICAgIGYud3JpdGUoY2h1bmspCgogICAgY29udGV4dC5sb2dnZXIuaW5mb193aXRoKCdEb3dubG9hZGVkIGZpbGUnLHVybD11cmwpCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICB0bXBfZmlsZSA9IG1rdGVtcCgpCiAgICBpbWFnZV91cmwgPSBldmVudC5ib2R5LmRlY29kZSgndXRmLTgnKS5zdHJpcCgpCiAgICBkb3dubG9hZF9maWxlKGNvbnRleHQsIGltYWdlX3VybCwgdG1wX2ZpbGUpCiAgICAKICAgIGltZyA9IGxvYWRfaW1nKHRtcF9maWxlLCB0YXJnZXRfc2l6ZT0oSU1BR0VfV0lEVEgsIElNQUdFX0hFSUdIVCkpCiAgICB4ID0gaW1hZ2UuaW1nX3RvX2FycmF5KGltZykKICAgIHggPSBucC5leHBhbmRfZGltcyh4LCBheGlzPTApCgogICAgaW1hZ2VzID0gbnAudnN0YWNrKFt4XSkKICAgIHByZWRpY3RlZF9wcm9iYWJpbGl0eSA9IGNvbnRleHQubW9kZWwucHJlZGljdF9wcm9iYShpbWFnZXMsIGJhdGNoX3NpemU9MTApCiAgICBwcmVkaWN0ZWRfY2xhc3MgPSBsaXN0KHppcChwcmVkaWN0ZWRfcHJvYmFiaWxpdHksIG1hcChsYW1iZGEgeDogJzEnIGlmIHggPj0gMC41IGVsc2UgJzAnLCBwcmVkaWN0ZWRfcHJvYmFiaWxpdHkpKSkKICAgIGFjdHVhbF9jbGFzcyA9IFsoY29udGV4dC5wcmVkaWN0aW9uX21hcFt4WzFdXSx4WzBdWzBdKSBmb3IgeCBpbiBwcmVkaWN0ZWRfY2xhc3NdICAgCiAgICBvcy5yZW1vdmUodG1wX2ZpbGUpCiAgICByZXN1bHQgPSB7J2NsYXNzJzphY3R1YWxfY2xhc3NbMF1bMF0sICdkb2ctcHJvYmFiaWxpdHknOmZsb2F0KGFjdHVhbF9jbGFzc1swXVsxXSl9CiAgICByZXR1cm4ganNvbi5kdW1wcyhyZXN1bHQpCgo=\n", - " noBaseImagesPull: true\n", - " env:\n", - " - name: IMAGE_WIDTH\n", - " value: '128'\n", - " - name: IMAGE_HEIGHT\n", - " value: '128'\n", - " - name: version\n", - " value: '1.0'\n", - " - name: MODEL_PATH\n", - " value: /model/\n", - " handler: infer:handler\n", - " runtime: python:3.6\n", - " volumes:\n", - " - volume:\n", - " flexVolume:\n", - " driver: v3io/fuse\n", - " options:\n", - " accessKey: 1e52ff93-a541-4880-abf1-d9b948af77de\n", - " container: users\n", - " subPath: /iguazio/demos/gpu/horovod/cpu/image-classification/cats_dogs/model\n", - " name: fs\n", - " volumeMount:\n", - " mountPath: /model\n", - " name: fs\n", - "\n", - "Code:\n", - "# Generated by nuclio.export.NuclioExporter on 2019-07-02 07:06\n", - "\n", - "import numpy as np \n", - "from tensorflow import keras\n", - "from keras.models import load_model\n", - "from keras.preprocessing import image\n", - "from keras.preprocessing.image import load_img\n", - "import json\n", - "import requests\n", - "\n", - "import os\n", - "from os import environ, path\n", - "from tempfile import mktemp\n", - "\n", - "model_file = environ['MODEL_PATH']\n", - "prediction_map_file = environ['PREDICTION_MAP_PATH']\n", - "\n", - "IMAGE_WIDTH = int(environ['IMAGE_WIDTH'])\n", - "IMAGE_HEIGHT = int(environ['IMAGE_HEIGHT'])\n", - "\n", - "def init_context(context): \n", - " context.model = load_model(model_file)\n", - " with open(prediction_map_file, 'r') as f:\n", - " context.prediction_map = json.load(f)\n", - "\n", - "def download_file(context, url, target_path):\n", - " with requests.get(url, stream=True) as response:\n", - " response.raise_for_status()\n", - " with open(target_path, 'wb') as f:\n", - " for chunk in response.iter_content(chunk_size=8192):\n", - " if chunk:\n", - " f.write(chunk)\n", - "\n", - " context.logger.info_with('Downloaded file',url=url)\n", - "\n", - "def handler(context, event):\n", - " tmp_file = mktemp()\n", - " image_url = event.body.decode('utf-8').strip()\n", - " download_file(context, image_url, tmp_file)\n", - " \n", - " img = load_img(tmp_file, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - " x = image.img_to_array(img)\n", - " x = np.expand_dims(x, axis=0)\n", - "\n", - " images = np.vstack([x])\n", - " predicted_probability = context.model.predict_proba(images, batch_size=10)\n", - " predicted_class = list(zip(predicted_probability, map(lambda x: '1' if x >= 0.5 else '0', predicted_probability)))\n", - " actual_class = [(context.prediction_map[x[1]],x[0][0]) for x in predicted_class] \n", - " os.remove(tmp_file)\n", - " result = {'class':actual_class[0][0], 'dog-probability':float(actual_class[0][1])}\n", - " return json.dumps(result)\n", - "\n", - "\n" - ] - } - ], - "source": [ - "%nuclio show" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Prepare to Deploy the Function\n", - "\n", - "Before you deploy the function, open a Jupyter terminal and run the following command:\n", - "\n", - "`pip install --upgrade nuclio-jupyter`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Deploy the Function\n", - "\n", - "Run the following command to deploy the function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nuclio.deploy] 2019-07-02 07:07:08,424 project name not found created new (ai)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:07:08.424754 140252009395584 deploy.py:317] project name not found created new (ai)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nuclio.deploy] 2019-07-02 07:07:09,507 (info) Building processor image\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:07:09.507237 140252009395584 deploy.py:274] (info) Building processor image\n" - ] - } - ], - "source": [ - "%nuclio deploy -n cats-dogs -p ai -c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Test the Function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Run a test with the new function. Replace \"function URL:port\" with the actual URL and port number.\n", - "# To get the function's URL, in the platform dashboard, navigate to the function page - Functions > ai > cats-dogs - and select the 'Status' tab.\n", - "!curl -X POST -d \"https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/cat.123.jpg\" " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py b/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py deleted file mode 100644 index f00b9973..00000000 --- a/demos/gpu/horovod/cpu/image-classification/horovod_train_cats_n_dogs.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import print_function -import os -import sys -import json -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K -import tensorflow as tf -import horovod.keras as hvd -import pandas as pd -from sklearn.model_selection import train_test_split - -# Get the images path -DATA_PATH = sys.argv[1] -HOROVOD_DIR = sys.argv[2] - -epochs = 6 -batch_size = 64 -os.environ["CUDA_VISIBLE_DEVICES"]="-1" - -# Define image parameters -IMAGE_WIDTH=128 -IMAGE_HEIGHT=128 -IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT) -IMAGE_CHANNELS=3 # RGB color - -# Create a file-names list (JPG image-files only) -filenames = [file for file in os.listdir(DATA_PATH + "/cats_n_dogs/") if file.endswith('jpg')] -categories = [] - -# Create a categories and prediction classes map -categories_map = { - 'dog': 1, - 'cat': 0, -} - -# Create a pandas DataFrame for the full sample -for filename in filenames: - category = filename.split('.')[0] - categories.append([categories_map[category]]) - -df = pd.DataFrame({ - 'filename': filenames, - 'category': categories -}) -df['category'] = df['category'].astype('str'); - -# Prepare, test, and train the data -train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42) -train_df = train_df.reset_index(drop=True) -validate_df = validate_df.reset_index(drop=True) -train_df['category'] = train_df['category'].astype('str'); -total_train = train_df.shape[0] -total_validate = validate_df.shape[0] - -total_train = train_df.shape[0] -total_validate = validate_df.shape[0] - -# Horovod: initialize Horovod. -hvd.init() - -# Horovod: pin GPU to be used to process local rank (one GPU per process). -config = tf.ConfigProto() -K.set_session(tf.Session(config=config)) - - -model = Sequential() - -model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS))) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(64, (3, 3), activation='relu')) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(128, (3, 3), activation='relu')) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Flatten()) -model.add(Dense(512, activation='relu')) -model.add(BatchNormalization()) -model.add(Dropout(0.5)) -model.add(Dense(1, activation='sigmoid')) - - -# Horovod: adjust learning rate based on number of GPUs. -opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) - -# Horovod: add Horovod Distributed Optimizer. -opt = hvd.DistributedOptimizer(opt) - -model.compile(loss='binary_crossentropy', - optimizer=opt, - metrics=['accuracy']) - -model.summary() - -callbacks = [ - # Horovod: broadcast initial variable states from rank 0 to all other processes. - # This is necessary to ensure consistent initialization of all workers when - # training is started with random weights or restored from a checkpoint. - hvd.callbacks.BroadcastGlobalVariablesCallback(0), - - # Horovod: average metrics among workers at the end of every epoch. - # Note: This callback must be in the list before the ReduceLROnPlateau, - # TensorBoard or other metrics-based callbacks. - hvd.callbacks.MetricAverageCallback(), - - # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final - # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during - # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. - hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), - - # Reduce the learning rate if training plateaues. - keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), -] - -# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. -if hvd.rank() == 0: - callbacks.append(keras.callbacks.ModelCheckpoint(HOROVOD_DIR + '/checkpoints/checkpoint-{epoch}.h5')) - -# Set up ImageDataGenerators to do data augmentation for the training images. -train_datagen = ImageDataGenerator( - rotation_range=15, - rescale=1./255, - shear_range=0.1, - zoom_range=0.2, - horizontal_flip=True, - width_shift_range=0.1, - height_shift_range=0.1 -) -train_generator = train_datagen.flow_from_dataframe( - train_df, - DATA_PATH + "/cats_n_dogs/", - x_col = 'filename', - y_col = 'category', - target_size = IMAGE_SIZE, - class_mode = 'binary', - batch_size = batch_size -) - -validation_datagen = ImageDataGenerator(rescale=1./255) -validation_generator = validation_datagen.flow_from_dataframe( - validate_df, - DATA_PATH + "/cats_n_dogs/", - x_col = 'filename', - y_col = 'category', - target_size = IMAGE_SIZE, - class_mode = 'binary', - batch_size = batch_size -) - -# Train the model -history = model.fit_generator( - train_generator, - steps_per_epoch=total_train // batch_size, - callbacks=callbacks, - epochs=epochs, - verbose=1, - validation_data=validation_generator, - validation_steps=total_validate // batch_size -) - -#save the model only on worker 0 to prevent failures ("cannot lock file") -if hvd.rank() == 0: - model.save(HOROVOD_DIR + '/cats_dogs.hd5') - -print(pd.DataFrame(history.history)) - diff --git a/demos/gpu/horovod/image-classification/01-load-data-cats-n-dogs.ipynb b/demos/gpu/horovod/image-classification/01-load-data-cats-n-dogs.ipynb deleted file mode 100644 index 9081de7b..00000000 --- a/demos/gpu/horovod/image-classification/01-load-data-cats-n-dogs.ipynb +++ /dev/null @@ -1,520 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load Cats and Dogs Images" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "fe76d1d1ded592430e7548feacfa38dc42f085d9" - }, - "source": [ - "## Install Packages" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade keras==2.2.4\n", - "!pip install --upgrade tensorflow==1.13.1 \n", - "!pip install --upgrade 'numpy<1.15.0'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> **Note:** After running the pip command you should restart the Jupyter kernel.
\n", - "> To restart the kernel, click on the kernel-restart button in the notebook menu toolbar (the refresh icon next to the **Code** button)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Library" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", - "/User/.pythonlibs/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", - " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n" - ] - } - ], - "source": [ - "# This Python 3 environment comes with many helpful analytics libraries installed.\n", - "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python.\n", - "# For example, here are several helpful packages to load:\n", - "\n", - "import numpy as np # linear algebra\n", - "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", - "from keras.preprocessing.image import load_img\n", - "\n", - "# Input data files are available in the \"../input/\" directory.\n", - "# For example, running the following (by selecting 'Run' or pressing Shift+Enter) will list the files in the input directory:\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import random\n", - "\n", - "import os\n", - "import zipfile\n", - "\n", - "# Define locations\n", - "BASE_PATH = os.getcwd()\n", - "DATA_PATH = BASE_PATH + \"/cats_and_dogs_filtered/\"\n", - "!mkdir model\n", - "MODEL_PATH = BASE_PATH + '/model/'\n", - "\n", - "# Define image parameters\n", - "FAST_RUN = False\n", - "IMAGE_WIDTH=128\n", - "IMAGE_HEIGHT=128\n", - "IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)\n", - "IMAGE_CHANNELS=3 # RGB color\n", - "\n", - "# Any results you write to the current directory are saved as output." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/User/demos/gpu/horovod/image-classification/cats_and_dogs_filtered/catsndogs.zip'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "DATA_PATH + 'catsndogs.zip'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download the Data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 65.2M 100 65.2M 0 0 13.9M 0 0:00:04 0:00:04 --:--:-- 15.3M\n" - ] - } - ], - "source": [ - "!mkdir cats_and_dogs_filtered\n", - "# Download a sample stocks file from Iguazio demo bucket in AWS S3\n", - "!curl -L \"iguazio-sample-data.s3.amazonaws.com/catsndogs.zip\" > ./cats_and_dogs_filtered/catsndogs.zip" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "zip_ref = zipfile.ZipFile(DATA_PATH + 'catsndogs.zip', 'r')\n", - "zip_ref.extractall('cats_and_dogs_filtered')\n", - "zip_ref.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "7335a579cc0268fba5d34d6f7558f33c187eedb3" - }, - "source": [ - "## Prepare the Traning Data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import json" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def build_prediction_map(categories_map):\n", - " return {v:k for k ,v in categories_map.items()}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", - "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" - }, - "outputs": [], - "source": [ - "# Create a file-names list (JPG image-files only)\n", - "filenames = [file for file in os.listdir(DATA_PATH+\"/cats_n_dogs\") if file.endswith('jpg')]\n", - "categories = []\n", - "\n", - "# Categories and prediction-classes map\n", - "categories_map = {\n", - " 'dog': 1,\n", - " 'cat': 0,\n", - "}\n", - "prediction_map = build_prediction_map(categories_map)\n", - "with open(MODEL_PATH + 'prediction_classes_map.json', 'w') as f:\n", - " json.dump(prediction_map, f)\n", - "\n", - "# Create a pandas DataFrame for the full sample\n", - "for filename in filenames:\n", - " category = filename.split('.')[0]\n", - " categories.append([categories_map[category]])\n", - "\n", - "df = pd.DataFrame({\n", - " 'filename': filenames,\n", - " 'category': categories\n", - "})\n", - "df['category'] = df['category'].astype('str');" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "_uuid": "915bb9ba7063ab4d5c07c542419ae119003a5f98" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
filenamecategory
0cat.0.jpg[0]
1cat.1.jpg[0]
2cat.10.jpg[0]
3cat.100.jpg[0]
4cat.101.jpg[0]
\n", - "
" - ], - "text/plain": [ - " filename category\n", - "0 cat.0.jpg [0]\n", - "1 cat.1.jpg [0]\n", - "2 cat.10.jpg [0]\n", - "3 cat.100.jpg [0]\n", - "4 cat.101.jpg [0]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "_uuid": "72bf69e817f67f5a2eaff8561217e22077248553" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
filenamecategory
1995dog.995.jpg[1]
1996dog.996.jpg[1]
1997dog.997.jpg[1]
1998dog.998.jpg[1]
1999dog.999.jpg[1]
\n", - "
" - ], - "text/plain": [ - " filename category\n", - "1995 dog.995.jpg [1]\n", - "1996 dog.996.jpg [1]\n", - "1997 dog.997.jpg [1]\n", - "1998 dog.998.jpg [1]\n", - "1999 dog.999.jpg [1]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.tail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "a999484fc35b73373fafe2253ae9db7ff46fdb90" - }, - "source": [ - "## Check the Total Image Count\n", - "\n", - "Check the total image count for each category.
\n", - "The data set has 12,000 cat images and 12,000 dog images." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "_uuid": "fa26f0bc7a6d835a24989790b20f3c6f32946f45" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD9CAYAAABQvqc9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAANK0lEQVR4nO3df6zdd13H8eeL1aGMuI7tsoy2s9MVEU0My82ckhBDDTAwdjEsGSHSzCb9Zyg4E1f9ZxH/2RLjcIlZbNiwKAGWSdIGp2QpEGMMkw7IYFRsM2G9trKL66YyF5i8/eN+Ctfb21/3tOfCfT8fSXO+38/3c8753OTkeb/93nPuTVUhSerhJau9AEnS9Bh9SWrE6EtSI0Zfkhox+pLUiNGXpEbWrfYCTueKK66ozZs3r/YyJOmHymOPPfbNqppZ7tgPdPQ3b97MgQMHVnsZkvRDJcnXT3XMyzuS1IjRl6RGjL4kNWL0JakRoy9JjZwx+kkeSPJ0ki8vGntFkkeSHBq3l43xJLk3yeEkjye5btF9to/5h5JsvzBfjiTpdM7mTP8vgLcsGdsF7K+qLcD+sQ9wI7Bl/NsJ3AcL3ySAO4FfAK4H7jzxjUKSND1njH5V/T3wzJLhbcCesb0HuGnR+IdqwWeB9UmuAt4MPFJVz1TVceARTv5GIkm6wFb64awrq+oYQFUdS/LKMb4BOLJo3twYO9X4SZLsZOF/CVx99dUrXN50bd71N6u9hDXla3e9bbWXsKb4+jx/1sJr83z/IDfLjNVpxk8erNpdVbNVNTszs+yniCVJK7TS6H9jXLZh3D49xueATYvmbQSOnmZckjRFK43+PuDEO3C2A3sXjb9rvIvnBuC5cRnok8Cbklw2foD7pjEmSZqiM17TT/IR4JeBK5LMsfAunLuAB5PsAJ4Cbh7THwbeChwGngduBaiqZ5L8EfC5Me99VbX0h8OSpAvsjNGvqnec4tDWZeYWcNspHucB4IFzWp0k6bzyE7mS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1YvQlqRGjL0mNGH1JasToS1IjRl+SGjH6ktSI0ZekRoy+JDVi9CWpEaMvSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0ZfkhqZKPpJfifJE0m+nOQjSX40yTVJHk1yKMnHklw85r507B8exzefjy9AknT2Vhz9JBuA3wZmq+rngIuAW4C7gXuqagtwHNgx7rIDOF5V1wL3jHmSpCma9PLOOuDHkqwDXgYcA94IPDSO7wFuGtvbxj7j+NYkmfD5JUnnYMXRr6p/A/4YeIqF2D8HPAY8W1UvjmlzwIaxvQE4Mu774ph/+UqfX5J07ia5vHMZC2fv1wCvAi4Bblxmap24y2mOLX7cnUkOJDkwPz+/0uVJkpYxyeWdXwH+tarmq+o7wMeBXwLWj8s9ABuBo2N7DtgEMI5fCjyz9EGrandVzVbV7MzMzATLkyQtNUn0nwJuSPKycW1+K/AV4NPA28ec7cDesb1v7DOOf6qqTjrTlyRdOJNc03+UhR/Ifh740nis3cAdwO1JDrNwzf7+cZf7gcvH+O3ArgnWLUlagXVnnnJqVXUncOeS4SeB65eZ+wJw8yTPJ0majJ/IlaRGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1YvQlqRGjL0mNGH1JasToS1IjRl+SGjH6ktSI0ZekRoy+JDVi9CWpEaMvSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUyETRT7I+yUNJ/jnJwSS/mOQVSR5JcmjcXjbmJsm9SQ4neTzJdefnS5Akna1Jz/T/FPi7qnoN8PPAQWAXsL+qtgD7xz7AjcCW8W8ncN+Ezy1JOkcrjn6SHwfeANwPUFXfrqpngW3AnjFtD3DT2N4GfKgWfBZYn+SqFa9cknTOJjnT/0lgHvhgki8k+UCSS4Arq+oYwLh95Zi/ATiy6P5zY0ySNCWTRH8dcB1wX1W9DvgW37+Us5wsM1YnTUp2JjmQ5MD8/PwEy5MkLTVJ9OeAuap6dOw/xMI3gW+cuGwzbp9eNH/TovtvBI4ufdCq2l1Vs1U1OzMzM8HyJElLrTj6VfXvwJEkPz2GtgJfAfYB28fYdmDv2N4HvGu8i+cG4LkTl4EkSdOxbsL7/xbw4SQXA08Ct7LwjeTBJDuAp4Cbx9yHgbcCh4Hnx1xJ0hRNFP2q+iIwu8yhrcvMLeC2SZ5PkjQZP5ErSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1YvQlqRGjL0mNGH1JasToS1IjRl+SGjH6ktSI0ZekRoy+JDVi9CWpEaMvSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjUwc/SQXJflCkk+M/WuSPJrkUJKPJbl4jL907B8exzdP+tySpHNzPs703wMcXLR/N3BPVW0BjgM7xvgO4HhVXQvcM+ZJkqZoougn2Qi8DfjA2A/wRuChMWUPcNPY3jb2Gce3jvmSpCmZ9Ez//cDvAd8d+5cDz1bVi2N/DtgwtjcARwDG8efGfEnSlKw4+kl+FXi6qh5bPLzM1DqLY4sfd2eSA0kOzM/Pr3R5kqRlTHKm/3rg15J8DfgoC5d13g+sT7JuzNkIHB3bc8AmgHH8UuCZpQ9aVburaraqZmdmZiZYniRpqRVHv6p+v6o2VtVm4BbgU1X1TuDTwNvHtO3A3rG9b+wzjn+qqk4605ckXTgX4n36dwC3JznMwjX7+8f4/cDlY/x2YNcFeG5J0mmsO/OUM6uqzwCfGdtPAtcvM+cF4Obz8XySpJXxE7mS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1YvQlqRGjL0mNGH1JasToS1IjRl+SGjH6ktSI0ZekRoy+JDVi9CWpEaMvSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0ZfkhpZcfSTbEry6SQHkzyR5D1j/BVJHklyaNxeNsaT5N4kh5M8nuS68/VFSJLOziRn+i8Cv1tVPwPcANyW5LXALmB/VW0B9o99gBuBLePfTuC+CZ5bkrQCK45+VR2rqs+P7f8CDgIbgG3AnjFtD3DT2N4GfKgWfBZYn+SqFa9cknTOzss1/SSbgdcBjwJXVtUxWPjGALxyTNsAHFl0t7kxJkmakomjn+TlwF8D762q/zzd1GXGapnH25nkQJID8/Pzky5PkrTIRNFP8iMsBP/DVfXxMfyNE5dtxu3TY3wO2LTo7huBo0sfs6p2V9VsVc3OzMxMsjxJ0hKTvHsnwP3Awar6k0WH9gHbx/Z2YO+i8XeNd/HcADx34jKQJGk61k1w39cDvwF8KckXx9gfAHcBDybZATwF3DyOPQy8FTgMPA/cOsFzS5JWYMXRr6p/YPnr9ABbl5lfwG0rfT5J0uT8RK4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1YvQlqRGjL0mNGH1JasToS1IjRl+SGjH6ktSI0ZekRoy+JDVi9CWpEaMvSY0YfUlqxOhLUiNGX5IaMfqS1IjRl6RGjL4kNWL0JakRoy9JjRh9SWrE6EtSI0Zfkhox+pLUiNGXpEaMviQ1MvXoJ3lLkq8mOZxk17SfX5I6m2r0k1wE/BlwI/Ba4B1JXjvNNUhSZ9M+078eOFxVT1bVt4GPAtumvAZJamva0d8AHFm0PzfGJElTsG7Kz5dlxur/TUh2AjvH7n8n+eoFX1UfVwDfXO1FnEnuXu0VaBX42jy/fuJUB6Yd/Tlg06L9jcDRxROqajewe5qL6iLJgaqaXe11SEv52pyeaV/e+RywJck1SS4GbgH2TXkNktTWVM/0q+rFJO8GPglcBDxQVU9Mcw2S1Nm0L+9QVQ8DD0/7eQV42Uw/uHxtTkmq6syzJElrgr+GQZIaMfqS1IjRl6RGpv6DXE1PktvPYtq3qurPL/hipEWS/PpZTHthvPFD55E/yF3DkhwD7mP5T0Kf8M6qevWUliQBkOQ/gL2c/rX5hqr6qSktqQ3P9Ne2v6yq951uQpJLprUYaZG/rarfPN2EJH81rcV04pm+JDXimX5TSW6tqg+u9jrUV5LXsPCr1Tew8IsXjwL7qurgqi5sjfPdO3394WovQH0luYOFv6cR4J9Y+L1cAT7iX9S7sLy8s4YlefxUh4BXV9VLp7ke6YQk/wL8bFV9Z8n4xcATVbVldVa29nl5Z227EngzcHzJeIB/nP5ypO/5LvAq4OtLxq8ax3SBGP217RPAy6vqi0sPJPnM9Jcjfc97gf1JDvH9v6Z3NXAt8O5VW1UDXt6RtCqSvISFv5u9gYX/fc4Bn6uq/13Vha1xRn8NS/L5qrpu0jnS+eZrc/UY/TUsyf8Ah043Bbi0qq6e0pIkwNfmavKa/tr2mrOY43+ltRp8ba4Sz/QlqRE/nCVJjRh9SWrE6EtSI0Zfkhox+pLUyP8B6MsDCi7ECA8AAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "df['category'].value_counts().plot.bar()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_uuid": "400a293df3c8499059d9175f3915187074efd971" - }, - "source": [ - "## Display the Sample Image" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "_uuid": "602b40f7353871cb161c60b5237f0da0096b2f47" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "sample = random.choice(filenames)\n", - "image = load_img(DATA_PATH+\"/cats_n_dogs/\"+sample)\n", - "plt.imshow(image)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/demos/gpu/horovod/image-classification/02-train-with-horovod-cats-n-dogs.ipynb b/demos/gpu/horovod/image-classification/02-train-with-horovod-cats-n-dogs.ipynb deleted file mode 100644 index 7cb681f7..00000000 --- a/demos/gpu/horovod/image-classification/02-train-with-horovod-cats-n-dogs.ipynb +++ /dev/null @@ -1,212 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install git+https://github.com/v3io/v3io-gputils" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "!rm -f {os.path.join(os.getcwd(), 'model', 'cats_dogs.hd5')}\n", - "!mkdir {os.path.join(os.getcwd(), 'checkpoints')}" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "HOROVOD_JOB_NAME = \"horovod-cats-n-dogs\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'apiVersion': 'kubeflow.org/v1alpha1',\n", - " 'kind': 'MPIJob',\n", - " 'metadata': {'creationTimestamp': '2019-09-18T11:59:45Z',\n", - " 'generation': 1,\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'namespace': 'default-tenant',\n", - " 'resourceVersion': '2002259',\n", - " 'selfLink': '/apis/kubeflow.org/v1alpha1/namespaces/default-tenant/mpijobs/horovod-cats-n-dogs',\n", - " 'uid': 'ce99b2ad-da0b-11e9-a796-02e0e01c020a'},\n", - " 'spec': {'replicas': 1,\n", - " 'template': {'spec': {'containers': [{'command': ['mpirun',\n", - " 'python',\n", - " '/User/demos/gpu/horovod/image-classification/horovod_train_cats_n_dogs.py',\n", - " '/User/demos/gpu/horovod/image-classification/cats_and_dogs_filtered',\n", - " '/User/demos/gpu/horovod/image-classification'],\n", - " 'image': 'iguaziodocker/horovod:0.1.1',\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'resources': {'limits': {'nvidia.com/gpu': 1}},\n", - " 'securityContext': {'capabilities': {'add': ['IPC_LOCK']}},\n", - " 'volumeMounts': [{'mountPath': '/User',\n", - " 'name': 'v3io'}],\n", - " 'workingDir': '/User'}],\n", - " 'volumes': [{'flexVolume': {'driver': 'v3io/fuse',\n", - " 'options': {'accessKey': 'bd182781-6b24-4899-b2b7-a84608931aeb',\n", - " 'container': 'users',\n", - " 'subPath': '/iguazio'}},\n", - " 'name': 'v3io'}]}}}}\n" - ] - } - ], - "source": [ - "from v3io_gputils.mpijob import MpiJob\n", - "\n", - "job = MpiJob(HOROVOD_JOB_NAME, 'iguaziodocker/horovod:0.1.1', [os.path.join(os.getcwd(), 'horovod_train_cats_n_dogs.py'),\n", - " os.path.join(os.getcwd(), 'cats_and_dogs_filtered'),\n", - " os.getcwd()])\n", - "\n", - "job.replicas(1).gpus(1)\n", - "job.submit()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "horovod-cats-n-dogs-launcher-ghqw6 0/1 PodInitializing 0 4s\n", - "horovod-cats-n-dogs-worker-0 1/1 Running 0 8s\n" - ] - } - ], - "source": [ - "\n", - "!kubectl get pods | grep $HOROVOD_JOB_NAME" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "apiVersion: kubeflow.org/v1alpha1\n", - "kind: MPIJob\n", - "metadata:\n", - " creationTimestamp: 2019-09-18T11:59:45Z\n", - " generation: 4\n", - " name: horovod-cats-n-dogs\n", - " namespace: default-tenant\n", - " resourceVersion: \"2002304\"\n", - " selfLink: /apis/kubeflow.org/v1alpha1/namespaces/default-tenant/mpijobs/horovod-cats-n-dogs\n", - " uid: ce99b2ad-da0b-11e9-a796-02e0e01c020a\n", - "spec:\n", - " backoffLimit: 6\n", - " replicas: 1\n", - " template:\n", - " metadata:\n", - " creationTimestamp: null\n", - " spec:\n", - " containers:\n", - " - command:\n", - " - mpirun\n", - " - python\n", - " - /User/demos/gpu/horovod/image-classification/horovod_train_cats_n_dogs.py\n", - " - /User/demos/gpu/horovod/image-classification/cats_and_dogs_filtered\n", - " - /User/demos/gpu/horovod/image-classification\n", - " image: iguaziodocker/horovod:0.1.1\n", - " name: horovod-cats-n-dogs\n", - " resources:\n", - " limits:\n", - " nvidia.com/gpu: \"1\"\n", - " securityContext:\n", - " capabilities:\n", - " add:\n", - " - IPC_LOCK\n", - " volumeMounts:\n", - " - mountPath: /User\n", - " name: v3io\n", - " workingDir: /User\n", - " volumes:\n", - " - flexVolume:\n", - " driver: v3io/fuse\n", - " options:\n", - " accessKey: bd182781-6b24-4899-b2b7-a84608931aeb\n", - " container: users\n", - " subPath: /iguazio\n", - " name: v3io\n", - "status:\n", - " launcherStatus: Active\n", - " startTime: 2019-09-18T11:59:49Z\n", - " workerReplicas: 1\n" - ] - } - ], - "source": [ - "!kubectl get mpijob $HOROVOD_JOB_NAME -o yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'apiVersion': 'v1',\n", - " 'details': {'group': 'kubeflow.org',\n", - " 'kind': 'mpijobs',\n", - " 'name': 'horovod-cats-n-dogs',\n", - " 'uid': '1b58dd58-9c97-11e9-98d3-d8c4972b0204'},\n", - " 'kind': 'Status',\n", - " 'metadata': {},\n", - " 'status': 'Success'}\n" - ] - } - ], - "source": [ - "job.delete()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/demos/gpu/horovod/image-classification/03-infer.ipynb b/demos/gpu/horovod/image-classification/03-infer.ipynb deleted file mode 100644 index 115b2af6..00000000 --- a/demos/gpu/horovod/image-classification/03-infer.ipynb +++ /dev/null @@ -1,548 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create and Test a Model-Serving Nuclio Function\n", - "\n", - "This notebook demonstrates how to write an inference server, test it, and turn it into an auto-scaling Nuclio serverless function.\n", - "\n", - "- [Initialize Nuclio Emulation, Environment Variables, and Configuration](#image-class-infer-init-func)\n", - "- [Create and Load the Model and Set Up the Function Handler](#image-class-infer-create-n-load-model-n-set-up-func-handler)\n", - "- [Trigger the Function](#image-class-infer-func-trigger)\n", - "- [Prepare to Deploy the Function](#image-class-infer-func-deploy-prepare)\n", - "- [Deploy the Function](#image-class-infer-func-deploy)\n", - "- [Test the Function](#image-class-infer-func-test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Initialize Nuclio Emulation, Environment Variables, and Configuration\n", - "\n", - "> **Note:** Use `# nuclio: ignore` for sections that don't need to be copied to the function." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# nuclio: ignore\n", - "import nuclio\n", - "import random\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting 'IMAGE_WIDTH' environment variable\n", - "%nuclio: setting 'IMAGE_HEIGHT' environment variable\n", - "%nuclio: setting 'version' environment variable\n" - ] - } - ], - "source": [ - "%%nuclio env\n", - "IMAGE_WIDTH = 128\n", - "IMAGE_HEIGHT = 128\n", - "version = 1.0" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting 'MODEL_PATH' environment variable\n", - "%nuclio: setting 'PREDICTION_MAP_PATH' environment variable\n" - ] - } - ], - "source": [ - "%nuclio env -c MODEL_PATH=/model/\n", - "%nuclio env -l MODEL_PATH=/User/demos/gpu/horovod/image-classification/cats_dogs.hd5\n", - "%nuclio env -l PREDICTION_MAP_PATH=./model/prediction_classes_map.json" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "%%nuclio cmd -c\n", - "pip install keras==2.2.4\n", - "pip install tensorflow==1.13.1 \n", - "pip install 'numpy<1.15.0'\n", - "pip install requests\n", - "pip install pillow" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'\n" - ] - } - ], - "source": [ - "%%nuclio config \n", - "spec.build.baseImage = \"python:3.6-jessie\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mounting volume path /model as ~/demos/gpu/horovod/image-classification/cats_dogs/model\n" - ] - } - ], - "source": [ - "%nuclio mount /model ~/demos/gpu/horovod/image-classification/cats_dogs/model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Create and Load the Model and Set Up the Function Handler" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np \n", - "from tensorflow import keras\n", - "from keras.models import load_model\n", - "from keras.preprocessing import image\n", - "from keras.preprocessing.image import load_img\n", - "import json\n", - "import requests\n", - "\n", - "import os\n", - "from os import environ, path\n", - "from tempfile import mktemp" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "model_file = environ['MODEL_PATH']\n", - "prediction_map_file = environ['PREDICTION_MAP_PATH']\n", - "\n", - "# Set image parameters\n", - "IMAGE_WIDTH = int(environ['IMAGE_WIDTH'])\n", - "IMAGE_HEIGHT = int(environ['IMAGE_HEIGHT'])\n", - "\n", - "# load model\n", - "def init_context(context): \n", - " context.model = load_model(model_file)\n", - " with open(prediction_map_file, 'r') as f:\n", - " context.prediction_map = json.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def download_file(context, url, target_path):\n", - " with requests.get(url, stream=True) as response:\n", - " response.raise_for_status()\n", - " with open(target_path, 'wb') as f:\n", - " for chunk in response.iter_content(chunk_size=8192):\n", - " if chunk:\n", - " f.write(chunk)\n", - "\n", - " context.logger.info_with('Downloaded file',url=url)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def handler(context, event):\n", - " tmp_file = mktemp()\n", - " image_url = event.body.decode('utf-8').strip()\n", - " download_file(context, image_url, tmp_file)\n", - " \n", - " img = load_img(tmp_file, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - " x = image.img_to_array(img)\n", - " x = np.expand_dims(x, axis=0)\n", - "\n", - " images = np.vstack([x])\n", - " predicted_probability = context.model.predict_proba(images, batch_size=10)\n", - " predicted_class = list(zip(predicted_probability, map(lambda x: '1' if x >= 0.5 else '0', predicted_probability)))\n", - " actual_class = [(context.prediction_map[x[1]],x[0][0]) for x in predicted_class] \n", - " os.remove(tmp_file)\n", - " result = {'class':actual_class[0][0], 'dog-probability':float(actual_class[0][1])}\n", - " return json.dumps(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Trigger the Function" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: Logging before flag parsing goes to stderr.\n", - "W0702 07:05:39.756876 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:529: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", - "\n", - "W0702 07:05:39.784578 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4420: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", - "\n", - "W0702 07:05:39.814317 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:250: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", - "\n", - "W0702 07:05:39.814856 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:178: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n", - "\n", - "W0702 07:05:39.815345 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:185: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", - "\n", - "W0702 07:05:39.920376 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:2029: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.\n", - "\n", - "W0702 07:05:39.987011 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4255: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", - "\n", - "W0702 07:05:39.994644 140252009395584 deprecation.py:506] From /User/.pythonlibs/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3721: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", - "W0702 07:05:40.865994 140252009395584 deprecation_wrapper.py:119] From /User/.pythonlibs/lib/python3.6/site-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", - "\n", - "W0702 07:05:40.875694 140252009395584 deprecation.py:323] From /User/.pythonlibs/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "Use tf.where in 2.0, which has the same broadcast rule as np.where\n" - ] - } - ], - "source": [ - "# nuclio: ignore\n", - "init_context(context)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python> 2019-07-02 07:06:07,287 [info] Downloaded file: {'url': 'https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/dog.391.jpg'}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:06:07.287323 140252009395584 logger.py:100] Downloaded file\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"class\": \"dog\", \"dog-probability\": 1.0}\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# nuclio: ignore\n", - "# Select a sample for the test.\n", - "# Set both the local path for the test and the URL for downloading the sample from AWS S3.\n", - "DATA_LOCATION = \"./cats_and_dogs_filtered/\"\n", - "sample = random.choice(os.listdir(DATA_LOCATION+\"/cats_n_dogs\"))\n", - "image_local = DATA_LOCATION + \"cats_n_dogs/\"+sample # Temporary location for downloading the file \n", - "image_url = 'https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/' + sample \n", - "\n", - "# Show the image\n", - "img = load_img(image_local, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - "plt.imshow(img)\n", - "\n", - "event = nuclio.Event(body=bytes(image_url, 'utf-8'))\n", - "output = handler(context, event)\n", - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "%nuclio: notebook infer exported\n", - "Config:\n", - "apiVersion: nuclio.io/v1\n", - "kind: Function\n", - "metadata:\n", - " annotations:\n", - " nuclio.io/generated_by: function generated at 02-07-2019 by iguazio from /User/demos/image-classification/infer.ipynb\n", - " labels: {}\n", - " name: infer\n", - "spec:\n", - " build:\n", - " baseImage: python:3.6-jessie\n", - " commands:\n", - " - pip install keras==2.2.4\n", - " - pip install tensorflow==1.13.1\n", - " - pip install 'numpy<1.15.0'\n", - " - pip install requests\n", - " - pip install pillow\n", - " functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDE5LTA3LTAyIDA3OjA2CgppbXBvcnQgbnVtcHkgYXMgbnAgCmZyb20gdGVuc29yZmxvdyBpbXBvcnQga2VyYXMKZnJvbSBrZXJhcy5tb2RlbHMgaW1wb3J0IGxvYWRfbW9kZWwKZnJvbSBrZXJhcy5wcmVwcm9jZXNzaW5nIGltcG9ydCBpbWFnZQpmcm9tIGtlcmFzLnByZXByb2Nlc3NpbmcuaW1hZ2UgaW1wb3J0IGxvYWRfaW1nCmltcG9ydCBqc29uCmltcG9ydCByZXF1ZXN0cwoKaW1wb3J0IG9zCmZyb20gb3MgaW1wb3J0IGVudmlyb24sIHBhdGgKZnJvbSB0ZW1wZmlsZSBpbXBvcnQgbWt0ZW1wCgptb2RlbF9maWxlID0gZW52aXJvblsnTU9ERUxfUEFUSCddCnByZWRpY3Rpb25fbWFwX2ZpbGUgPSBlbnZpcm9uWydQUkVESUNUSU9OX01BUF9QQVRIJ10KCklNQUdFX1dJRFRIID0gaW50KGVudmlyb25bJ0lNQUdFX1dJRFRIJ10pCklNQUdFX0hFSUdIVCA9IGludChlbnZpcm9uWydJTUFHRV9IRUlHSFQnXSkKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6IAogICAgY29udGV4dC5tb2RlbCA9IGxvYWRfbW9kZWwobW9kZWxfZmlsZSkKICAgIHdpdGggb3BlbihwcmVkaWN0aW9uX21hcF9maWxlLCAncicpIGFzIGY6CiAgICAgICAgY29udGV4dC5wcmVkaWN0aW9uX21hcCA9IGpzb24ubG9hZChmKQoKZGVmIGRvd25sb2FkX2ZpbGUoY29udGV4dCwgdXJsLCB0YXJnZXRfcGF0aCk6CiAgICB3aXRoIHJlcXVlc3RzLmdldCh1cmwsIHN0cmVhbT1UcnVlKSBhcyByZXNwb25zZToKICAgICAgICByZXNwb25zZS5yYWlzZV9mb3Jfc3RhdHVzKCkKICAgICAgICB3aXRoIG9wZW4odGFyZ2V0X3BhdGgsICd3YicpIGFzIGY6CiAgICAgICAgICAgIGZvciBjaHVuayBpbiByZXNwb25zZS5pdGVyX2NvbnRlbnQoY2h1bmtfc2l6ZT04MTkyKToKICAgICAgICAgICAgICAgIGlmIGNodW5rOgogICAgICAgICAgICAgICAgICAgIGYud3JpdGUoY2h1bmspCgogICAgY29udGV4dC5sb2dnZXIuaW5mb193aXRoKCdEb3dubG9hZGVkIGZpbGUnLHVybD11cmwpCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICB0bXBfZmlsZSA9IG1rdGVtcCgpCiAgICBpbWFnZV91cmwgPSBldmVudC5ib2R5LmRlY29kZSgndXRmLTgnKS5zdHJpcCgpCiAgICBkb3dubG9hZF9maWxlKGNvbnRleHQsIGltYWdlX3VybCwgdG1wX2ZpbGUpCiAgICAKICAgIGltZyA9IGxvYWRfaW1nKHRtcF9maWxlLCB0YXJnZXRfc2l6ZT0oSU1BR0VfV0lEVEgsIElNQUdFX0hFSUdIVCkpCiAgICB4ID0gaW1hZ2UuaW1nX3RvX2FycmF5KGltZykKICAgIHggPSBucC5leHBhbmRfZGltcyh4LCBheGlzPTApCgogICAgaW1hZ2VzID0gbnAudnN0YWNrKFt4XSkKICAgIHByZWRpY3RlZF9wcm9iYWJpbGl0eSA9IGNvbnRleHQubW9kZWwucHJlZGljdF9wcm9iYShpbWFnZXMsIGJhdGNoX3NpemU9MTApCiAgICBwcmVkaWN0ZWRfY2xhc3MgPSBsaXN0KHppcChwcmVkaWN0ZWRfcHJvYmFiaWxpdHksIG1hcChsYW1iZGEgeDogJzEnIGlmIHggPj0gMC41IGVsc2UgJzAnLCBwcmVkaWN0ZWRfcHJvYmFiaWxpdHkpKSkKICAgIGFjdHVhbF9jbGFzcyA9IFsoY29udGV4dC5wcmVkaWN0aW9uX21hcFt4WzFdXSx4WzBdWzBdKSBmb3IgeCBpbiBwcmVkaWN0ZWRfY2xhc3NdICAgCiAgICBvcy5yZW1vdmUodG1wX2ZpbGUpCiAgICByZXN1bHQgPSB7J2NsYXNzJzphY3R1YWxfY2xhc3NbMF1bMF0sICdkb2ctcHJvYmFiaWxpdHknOmZsb2F0KGFjdHVhbF9jbGFzc1swXVsxXSl9CiAgICByZXR1cm4ganNvbi5kdW1wcyhyZXN1bHQpCgo=\n", - " noBaseImagesPull: true\n", - " env:\n", - " - name: IMAGE_WIDTH\n", - " value: '128'\n", - " - name: IMAGE_HEIGHT\n", - " value: '128'\n", - " - name: version\n", - " value: '1.0'\n", - " - name: MODEL_PATH\n", - " value: /model/\n", - " handler: infer:handler\n", - " runtime: python:3.6\n", - " volumes:\n", - " - volume:\n", - " flexVolume:\n", - " driver: v3io/fuse\n", - " options:\n", - " accessKey: 1e52ff93-a541-4880-abf1-d9b948af77de\n", - " container: users\n", - " subPath: /iguazio/demos/gpu/horovod/image-classification/cats_dogs/model\n", - " name: fs\n", - " volumeMount:\n", - " mountPath: /model\n", - " name: fs\n", - "\n", - "Code:\n", - "# Generated by nuclio.export.NuclioExporter on 2019-07-02 07:06\n", - "\n", - "import numpy as np \n", - "from tensorflow import keras\n", - "from keras.models import load_model\n", - "from keras.preprocessing import image\n", - "from keras.preprocessing.image import load_img\n", - "import json\n", - "import requests\n", - "\n", - "import os\n", - "from os import environ, path\n", - "from tempfile import mktemp\n", - "\n", - "model_file = environ['MODEL_PATH']\n", - "prediction_map_file = environ['PREDICTION_MAP_PATH']\n", - "\n", - "IMAGE_WIDTH = int(environ['IMAGE_WIDTH'])\n", - "IMAGE_HEIGHT = int(environ['IMAGE_HEIGHT'])\n", - "\n", - "def init_context(context): \n", - " context.model = load_model(model_file)\n", - " with open(prediction_map_file, 'r') as f:\n", - " context.prediction_map = json.load(f)\n", - "\n", - "def download_file(context, url, target_path):\n", - " with requests.get(url, stream=True) as response:\n", - " response.raise_for_status()\n", - " with open(target_path, 'wb') as f:\n", - " for chunk in response.iter_content(chunk_size=8192):\n", - " if chunk:\n", - " f.write(chunk)\n", - "\n", - " context.logger.info_with('Downloaded file',url=url)\n", - "\n", - "def handler(context, event):\n", - " tmp_file = mktemp()\n", - " image_url = event.body.decode('utf-8').strip()\n", - " download_file(context, image_url, tmp_file)\n", - " \n", - " img = load_img(tmp_file, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))\n", - " x = image.img_to_array(img)\n", - " x = np.expand_dims(x, axis=0)\n", - "\n", - " images = np.vstack([x])\n", - " predicted_probability = context.model.predict_proba(images, batch_size=10)\n", - " predicted_class = list(zip(predicted_probability, map(lambda x: '1' if x >= 0.5 else '0', predicted_probability)))\n", - " actual_class = [(context.prediction_map[x[1]],x[0][0]) for x in predicted_class] \n", - " os.remove(tmp_file)\n", - " result = {'class':actual_class[0][0], 'dog-probability':float(actual_class[0][1])}\n", - " return json.dumps(result)\n", - "\n", - "\n" - ] - } - ], - "source": [ - "%nuclio show" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Prepare to Deploy the Function\n", - "\n", - "Before you deploy the function, open a Jupyter terminal and run the following command:\n", - "\n", - "`pip install --upgrade nuclio-jupyter`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Deploy the Function\n", - "\n", - "Run the following command to deploy the function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nuclio.deploy] 2019-07-02 07:07:08,424 project name not found created new (ai)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:07:08.424754 140252009395584 deploy.py:317] project name not found created new (ai)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nuclio.deploy] 2019-07-02 07:07:09,507 (info) Building processor image\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "I0702 07:07:09.507237 140252009395584 deploy.py:274] (info) Building processor image\n" - ] - } - ], - "source": [ - "%nuclio deploy -n cats-dogs -p ai -c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Test the Function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Run a test with the new function. Replace \"function URL:port\" with the actual URL and port number.\n", - "# To get the function's URL, in the platform dashboard, navigate to the function page - Functions > ai > cats-dogs - and select the 'Status' tab.\n", - "!curl -X POST -d \"https://s3.amazonaws.com/iguazio-sample-data/images/catanddog/cat.123.jpg\" " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demos/gpu/horovod/image-classification/horovod_train_cats_n_dogs.py b/demos/gpu/horovod/image-classification/horovod_train_cats_n_dogs.py deleted file mode 100644 index e2b3ea2c..00000000 --- a/demos/gpu/horovod/image-classification/horovod_train_cats_n_dogs.py +++ /dev/null @@ -1,178 +0,0 @@ -from __future__ import print_function -import os -import sys -import json -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K -import tensorflow as tf -import horovod.keras as hvd -import pandas as pd -from sklearn.model_selection import train_test_split - -# Get the images path -DATA_PATH = sys.argv[1] -HOROVOD_DIR = sys.argv[2] - -epochs = 6 -batch_size = 64 - -# Define image parameters -IMAGE_WIDTH=128 -IMAGE_HEIGHT=128 -IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT) -IMAGE_CHANNELS=3 # RGB color - -# Create a file-names list (JPG image-files only) -filenames = [file for file in os.listdir(DATA_PATH + "/cats_n_dogs/") if file.endswith('jpg')] -categories = [] - -# Create a categories and prediction classes map -categories_map = { - 'dog': 1, - 'cat': 0, -} - -# Create a pandas DataFrame for the full sample -for filename in filenames: - category = filename.split('.')[0] - categories.append([categories_map[category]]) - -df = pd.DataFrame({ - 'filename': filenames, - 'category': categories -}) -df['category'] = df['category'].astype('str'); - -# Prepare, test, and train the data -train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42) -train_df = train_df.reset_index(drop=True) -validate_df = validate_df.reset_index(drop=True) -train_df['category'] = train_df['category'].astype('str'); -total_train = train_df.shape[0] -total_validate = validate_df.shape[0] - -total_train = train_df.shape[0] -total_validate = validate_df.shape[0] - -# Horovod: initialize Horovod. -hvd.init() - -# Horovod: pin GPU to be used to process local rank (one GPU per process). -config = tf.ConfigProto() -config.gpu_options.allow_growth = True -config.gpu_options.visible_device_list = str(hvd.local_rank()) -K.set_session(tf.Session(config=config)) - - -model = Sequential() - -model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS))) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(64, (3, 3), activation='relu')) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(128, (3, 3), activation='relu')) -model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Flatten()) -model.add(Dense(512, activation='relu')) -model.add(BatchNormalization()) -model.add(Dropout(0.5)) -model.add(Dense(1, activation='sigmoid')) - - -# Horovod: adjust learning rate based on number of GPUs. -opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) - -# Horovod: add Horovod Distributed Optimizer. -opt = hvd.DistributedOptimizer(opt) - -model.compile(loss='binary_crossentropy', - optimizer=opt, - metrics=['accuracy']) - -model.summary() - -callbacks = [ - # Horovod: broadcast initial variable states from rank 0 to all other processes. - # This is necessary to ensure consistent initialization of all workers when - # training is started with random weights or restored from a checkpoint. - hvd.callbacks.BroadcastGlobalVariablesCallback(0), - - # Horovod: average metrics among workers at the end of every epoch. - # Note: This callback must be in the list before the ReduceLROnPlateau, - # TensorBoard or other metrics-based callbacks. - hvd.callbacks.MetricAverageCallback(), - - # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final - # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during - # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. - hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), - - # Reduce the learning rate if training plateaues. - keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), -] - -# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. -if hvd.rank() == 0: - callbacks.append(keras.callbacks.ModelCheckpoint(HOROVOD_DIR + '/checkpoints/checkpoint-{epoch}.h5')) - -# Set up ImageDataGenerators to do data augmentation for the training images. -train_datagen = ImageDataGenerator( - rotation_range=15, - rescale=1./255, - shear_range=0.1, - zoom_range=0.2, - horizontal_flip=True, - width_shift_range=0.1, - height_shift_range=0.1 -) -train_generator = train_datagen.flow_from_dataframe( - train_df, - DATA_PATH + "/cats_n_dogs/", - x_col = 'filename', - y_col = 'category', - target_size = IMAGE_SIZE, - class_mode = 'binary', - batch_size = batch_size -) - -validation_datagen = ImageDataGenerator(rescale=1./255) -validation_generator = validation_datagen.flow_from_dataframe( - validate_df, - DATA_PATH + "/cats_n_dogs/", - x_col = 'filename', - y_col = 'category', - target_size = IMAGE_SIZE, - class_mode = 'binary', - batch_size = batch_size -) - -# Train the model -history = model.fit_generator( - train_generator, - steps_per_epoch=total_train // batch_size, - callbacks=callbacks, - epochs=epochs, - verbose=1, - validation_data=validation_generator, - validation_steps=total_validate // batch_size -) - -#save the model only on worker 0 to prevent failures ("cannot lock file") -if hvd.rank() == 0: - model.save(HOROVOD_DIR + '/cats_dogs.hd5') - -print(pd.DataFrame(history.history)) -