diff --git a/examples/getting-started-movielens/01-Download-Convert.ipynb b/examples/getting-started-movielens/01-Download-Convert.ipynb index 3ad89c9c2..23d9d445d 100644 --- a/examples/getting-started-movielens/01-Download-Convert.ipynb +++ b/examples/getting-started-movielens/01-Download-Convert.ipynb @@ -33,7 +33,7 @@ "\n", "# Getting Started MovieLens: Download and Convert\n", "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n", "\n", "## MovieLens25M\n", "\n", @@ -97,8 +97,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "downloading ml-25m.zip: 262MB [01:22, 3.18MB/s] \n", - "unzipping files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:04<00:00, 2.00files/s]\n" + "downloading ml-25m.zip: 262MB [05:38, 774kB/s] \n", + "unzipping files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:04<00:00, 1.80files/s]\n" ] } ], @@ -483,13 +483,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest", - "nvcr.io/nvidia/merlin/merlin-tensorflow:latest", - "nvcr.io/nvidia/merlin/merlin-pytorch:latest" - ] } }, "nbformat": 4, diff --git a/examples/getting-started-movielens/02-ETL-with-NVTabular.ipynb b/examples/getting-started-movielens/02-ETL-with-NVTabular.ipynb index 46d795b31..607c4c1a8 100644 --- a/examples/getting-started-movielens/02-ETL-with-NVTabular.ipynb +++ b/examples/getting-started-movielens/02-ETL-with-NVTabular.ipynb @@ -33,7 +33,7 @@ "\n", "# Getting Started MovieLens: ETL with NVTabular\n", "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n", "\n", "## Overview\n", "\n", @@ -84,10 +84,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'\n", - " warn(f\"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + "2023-05-09 22:44:30.289311: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" ] } ], @@ -234,16 +234,6 @@ "execution_count": 5, "metadata": {}, "outputs": [], - "source": [ - "CATEGORICAL_COLUMNS = [\"userId\", \"movieId\"]\n", - "LABEL_COLUMNS = [\"rating\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], "source": [ "userId = [\"userId\"] >> TagAsUserID()\n", "movieId = [\"movieId\"] >> TagAsItemID()\n", @@ -260,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -280,113 +270,113 @@ "\n", "\n", "0\n", - "\n", - "TagAsUserID\n", + "\n", + "JoinExternal\n", "\n", - "\n", - "\n", - "1\n", - "\n", - "+\n", + "\n", + "\n", + "6\n", + "\n", + "output cols\n", "\n", - "\n", - "\n", - "0->1\n", - "\n", - "\n", + "\n", + "\n", + "0->6\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "3\n", - "\n", - "SelectionOp\n", + "5\n", + "\n", + "+\n", "\n", - "\n", + "\n", "\n", - "3->0\n", - "\n", - "\n", + "5->0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "SelectionOp\n", "\n", "\n", "\n", "2\n", - "\n", - "JoinExternal\n", + "\n", + "TagAsItemID\n", "\n", "\n", - "\n", + "\n", "1->2\n", - "\n", - "\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "4\n", - "\n", - "TagAsItemID\n", + "1_selector\n", + "\n", + "['movieId']\n", "\n", - "\n", - "\n", - "4->1\n", + "\n", + "\n", + "1_selector->1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "2->5\n", "\n", "\n", "\n", - "\n", - "\n", - "6\n", - "\n", - "output cols\n", + "\n", + "\n", + "3\n", + "\n", + "SelectionOp\n", "\n", - "\n", - "\n", - "2->6\n", - "\n", - "\n", + "\n", + "\n", + "4\n", + "\n", + "TagAsUserID\n", + "\n", + "\n", + "\n", + "3->4\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "3_selector\n", "\n", "['userId']\n", "\n", "\n", - "\n", + "\n", "3_selector->3\n", "\n", "\n", "\n", - "\n", - "\n", - "5\n", - "\n", - "SelectionOp\n", - "\n", - "\n", + "\n", "\n", - "5->4\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "5_selector\n", - "\n", - "['movieId']\n", - "\n", - "\n", - "\n", - "5_selector->5\n", - "\n", - "\n", + "4->5\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -410,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -426,7 +416,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -435,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -455,185 +445,185 @@ "\n", "\n", "0\n", - "\n", - "TagAsUserID\n", + "\n", + "AddTags\n", "\n", - "\n", - "\n", - "3\n", - "\n", - "+\n", + "\n", + "\n", + "5\n", + "\n", + "+\n", "\n", - "\n", - "\n", - "0->3\n", - "\n", - "\n", + "\n", + "\n", + "0->5\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "7\n", - "\n", - "SelectionOp\n", + "10\n", + "\n", + "LambdaOp(lambda col: (col > 3).astype("int8"))\n", "\n", - "\n", + "\n", "\n", - "7->0\n", - "\n", - "\n", + "10->0\n", + "\n", + "\n", "\n", "\n", "\n", "1\n", - "\n", - "AddTags\n", + "\n", + "JoinExternal\n", "\n", - "\n", - "\n", - "2\n", - "\n", - "+\n", + "\n", + "\n", + "7\n", + "\n", + "Categorify\n", "\n", - "\n", - "\n", - "1->2\n", - "\n", - "\n", + "\n", + "\n", + "1->7\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "9\n", - "\n", - "LambdaOp(lambda col: (col > 3).astype("int8"))\n", + "2\n", + "\n", + "+\n", "\n", - "\n", + "\n", "\n", - "9->1\n", - "\n", - "\n", + "2->1\n", + "\n", + "\n", "\n", - "\n", - "\n", - "11\n", - "\n", - "output cols\n", + "\n", + "\n", + "9\n", + "\n", + "TagAsUserID\n", "\n", - "\n", - "\n", - "2->11\n", - "\n", - "\n", + "\n", + "\n", + "9->2\n", + "\n", + "\n", "\n", "\n", "\n", "6\n", - "\n", - "Categorify\n", + "\n", + "TagAsItemID\n", "\n", "\n", - "\n", + "\n", "6->2\n", - "\n", - "\n", + "\n", + "\n", "\n", - "\n", - "\n", - "5\n", - "\n", - "JoinExternal\n", + "\n", + "\n", + "3\n", + "\n", + "SelectionOp\n", "\n", - "\n", - "\n", - "3->5\n", - "\n", - "\n", + "\n", + "\n", + "3->6\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "8\n", - "\n", - "TagAsItemID\n", + "3_selector\n", + "\n", + "['movieId']\n", "\n", - "\n", - "\n", - "8->3\n", - "\n", - "\n", + "\n", + "\n", + "3_selector->3\n", + "\n", + "\n", "\n", "\n", "\n", "4\n", - "\n", - "SelectionOp\n", + "\n", + "SelectionOp\n", "\n", - "\n", - "\n", - "4->9\n", - "\n", - "\n", + "\n", + "\n", + "4->10\n", + "\n", + "\n", "\n", "\n", "\n", "4_selector\n", - "\n", - "['rating']\n", + "\n", + "['rating']\n", "\n", "\n", - "\n", + "\n", "4_selector->4\n", - "\n", - "\n", + "\n", + "\n", "\n", - "\n", - "\n", - "5->6\n", - "\n", - "\n", + "\n", + "\n", + "11\n", + "\n", + "output cols\n", "\n", - "\n", - "\n", - "7_selector\n", - "\n", - "['userId']\n", + "\n", + "\n", + "5->11\n", + "\n", + "\n", "\n", - "\n", - "\n", - "7_selector->7\n", - "\n", - "\n", + "\n", + "\n", + "7->5\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "10\n", - "\n", - "SelectionOp\n", + "8\n", + "\n", + "SelectionOp\n", "\n", - "\n", - "\n", - "10->8\n", - "\n", - "\n", + "\n", + "\n", + "8->9\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "10_selector\n", - "\n", - "['movieId']\n", + "8_selector\n", + "\n", + "['userId']\n", "\n", - "\n", - "\n", - "10_selector->10\n", - "\n", - "\n", + "\n", + "\n", + "8_selector->8\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -652,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -675,31 +665,9 @@ "NVTabular solves this by providing the `Dataset` class, which breaks a set of parquet or csv files into into a collection of `cudf.DataFrame` chunks that can fit in device memory. The main purpose of this class is to abstract away the raw format of the data, and to allow other NVTabular classes to reliably materialize a `dask_cudf.DataFrame` collection (and/or collection-based iterator) on demand. Under the hood, the data decomposition corresponds to the construction of a [dask_cudf.DataFrame](https://docs.rapids.ai/api/cudf/stable/) object. By representing our dataset as a lazily-evaluated [Dask](https://www.dask.org/) collection, we can handle the calculation of complex global statistics (and later, can also iterate over the partitions while feeding data into a neural network). `part_size` defines the size read into GPU-memory at once." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now instantiate dataset iterators to loop through our dataset (which we couldn't fit into GPU memory). HugeCTR expect the categorical input columns as `int64` and continuous/label columns as `float32` We need to enforce the required HugeCTR data types, so we set them in a dictionary and give as an argument when creating our dataset." - ] - }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "dict_dtypes = {}\n", - "\n", - "for col in CATEGORICAL_COLUMNS:\n", - " dict_dtypes[col] = np.int64\n", - "\n", - "for col in LABEL_COLUMNS:\n", - " dict_dtypes[col] = np.float32" - ] - }, - { - "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -716,16 +684,32 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", " warnings.warn(\n" ] }, @@ -733,17 +717,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 692 ms, sys: 354 ms, total: 1.05 s\n", - "Wall time: 1.06 s\n" + "CPU times: user 1.16 s, sys: 303 ms, total: 1.46 s\n", + "Wall time: 1.46 s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -762,7 +756,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -777,19 +771,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We transform our workflow with `.transform`. We are going to add `'userId', 'movieId', 'genres'` columns to `_metadata.json`, because this json file will be needed for HugeCTR training to obtain the required information from all the rows in each parquet file." + "We run the workflow and transform our datasets with `.transform`." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 0 ns, sys: 1e+03 ns, total: 1e+03 ns\n", + "CPU times: user 1 µs, sys: 0 ns, total: 1 µs\n", "Wall time: 2.86 µs\n" ] } @@ -798,26 +792,21 @@ "%time\n", "workflow.transform(train_dataset).to_parquet(\n", " output_path=os.path.join(INPUT_DATA_DIR, \"train\"),\n", - " shuffle=nvt.io.Shuffle.PER_PARTITION,\n", - " cats=[\"userId\", \"movieId\", \"genres\"],\n", - " labels=[\"rating\"],\n", - " dtypes=dict_dtypes,\n", - " write_hugectr_keyset=True # only needed if using this ETL Notebook for training with HugeCTR\n", - " # should be removed otherwise to speed up computation\n", + " shuffle=nvt.io.Shuffle.PER_PARTITION\n", ")" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1 µs, sys: 0 ns, total: 1 µs\n", - "Wall time: 2.62 µs\n" + "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n", + "Wall time: 2.86 µs\n" ] } ], @@ -825,12 +814,7 @@ "%time\n", "workflow.transform(valid_dataset).to_parquet(\n", " output_path=os.path.join(INPUT_DATA_DIR, \"valid\"),\n", - " shuffle=False,\n", - " cats=[\"userId\", \"movieId\", \"genres\"],\n", - " labels=[\"rating\"],\n", - " dtypes=dict_dtypes,\n", - " write_hugectr_keyset=True # only needed if using this ETL Notebook for training with HugeCTR\n", - " # should be removed otherwise to speed up computation\n", + " shuffle=False\n", ")" ] }, @@ -843,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -882,6 +866,8 @@ " properties.domain.name\n", " properties.embedding_sizes.cardinality\n", " properties.embedding_sizes.dimension\n", + " properties.value_count.min\n", + " properties.value_count.max\n", " \n", " \n", " \n", @@ -902,6 +888,8 @@ " userId\n", " 162542.0\n", " 512.0\n", + " NaN\n", + " NaN\n", " \n", " \n", " 1\n", @@ -916,10 +904,12 @@ " 0.0\n", " .//categories/unique.movieId.parquet\n", " 0.0\n", - " 56658.0\n", + " 56622.0\n", " movieId\n", - " 56659.0\n", + " 56623.0\n", " 512.0\n", + " NaN\n", + " NaN\n", " \n", " \n", " 2\n", @@ -938,6 +928,8 @@ " genres\n", " 21.0\n", " 16.0\n", + " 0.0\n", + " NaN\n", " \n", " \n", " 3\n", @@ -956,16 +948,18 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", "" ], "text/plain": [ - "[{'name': 'userId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.userId.parquet', 'domain': {'min': 0, 'max': 162541, 'name': 'userId'}, 'embedding_sizes': {'cardinality': 162542, 'dimension': 512}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.movieId.parquet', 'domain': {'min': 0, 'max': 56658, 'name': 'movieId'}, 'embedding_sizes': {'cardinality': 56659, 'dimension': 512}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True), 'is_list': False, 'is_ragged': False}, {'name': 'genres', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.genres.parquet', 'domain': {'min': 0, 'max': 20, 'name': 'genres'}, 'embedding_sizes': {'cardinality': 21, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True), 'is_list': True, 'is_ragged': True}, {'name': 'rating', 'tags': {}, 'properties': {}, 'dtype': DType(name='int8', element_type=, element_size=8, element_unit=None, signed=True), 'is_list': False, 'is_ragged': False}]" + "[{'name': 'userId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.userId.parquet', 'domain': {'min': 0, 'max': 162541, 'name': 'userId'}, 'embedding_sizes': {'cardinality': 162542, 'dimension': 512}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.movieId.parquet', 'domain': {'min': 0, 'max': 56622, 'name': 'movieId'}, 'embedding_sizes': {'cardinality': 56623, 'dimension': 512}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'genres', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'start_index': 0, 'cat_path': './/categories/unique.genres.parquet', 'domain': {'min': 0, 'max': 20, 'name': 'genres'}, 'embedding_sizes': {'cardinality': 21, 'dimension': 16}, 'value_count': {'min': 0, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=0, max=None)))), 'is_list': True, 'is_ragged': True}, {'name': 'rating', 'tags': {}, 'properties': {}, 'dtype': DType(name='int8', element_type=, element_size=8, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]" ] }, - "execution_count": 18, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -991,7 +985,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1001,7 +995,7 @@ " ['/workspace/nvt-examples/movielens/data/valid/part_0.parquet'])" ] }, - "execution_count": 19, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1023,7 +1017,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1056,53 +1050,53 @@ " \n", " \n", " 0\n", - " 1691\n", - " 332\n", - " [1]\n", - " 1.0\n", + " 47641\n", + " 288\n", + " [1, 11, 6, 4]\n", + " 1\n", " \n", " \n", " 1\n", - " 1001\n", - " 154\n", - " [2, 6]\n", - " 1.0\n", + " 8745\n", + " 562\n", + " [3, 13, 2]\n", + " 1\n", " \n", " \n", " 2\n", - " 967\n", - " 245\n", - " [3, 2]\n", - " 0.0\n", + " 29817\n", + " 1118\n", + " [5, 13, 10, 2, 16]\n", + " 1\n", " \n", " \n", " 3\n", - " 150851\n", - " 622\n", - " [3, 5, 7, 15]\n", - " 1.0\n", + " 15207\n", + " 120\n", + " [3, 1, 14]\n", + " 1\n", " \n", " \n", " 4\n", - " 39553\n", - " 1146\n", - " [3, 1]\n", - " 1.0\n", + " 16\n", + " 843\n", + " [1]\n", + " 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " userId movieId genres rating\n", - "0 1691 332 [1] 1.0\n", - "1 1001 154 [2, 6] 1.0\n", - "2 967 245 [3, 2] 0.0\n", - "3 150851 622 [3, 5, 7, 15] 1.0\n", - "4 39553 1146 [3, 1] 1.0" + " userId movieId genres rating\n", + "0 47641 288 [1, 11, 6, 4] 1\n", + "1 8745 562 [3, 13, 2] 1\n", + "2 29817 1118 [5, 13, 10, 2, 16] 1\n", + "3 15207 120 [3, 1, 14] 1\n", + "4 16 843 [1] 0" ] }, - "execution_count": 20, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1127,7 +1121,7 @@ "- [Dataset](https://nvidia-merlin.github.io/core/main/api/merlin.io.html#merlin.io.Dataset)\n", "\n", "The next step for learning to use Merlin for creating a recommender system is to train a model.\n", - "Refer to [Training with TensorFlow](./03-Training-with-TF.ipynb), [Training with HugeCTR](./03-Training-with-HugeCTR.ipynb), or [Training with PyTorch](./03-Training-with-PyTorch.ipynb)." + "Refer to [Training with TensorFlow](./03-Training-with-TF.ipynb) or [Training with PyTorch](./03-Training-with-PyTorch.ipynb)." ] } ], @@ -1148,18 +1142,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest", - "nvcr.io/nvidia/merlin/merlin-tensorflow:latest", - "nvcr.io/nvidia/merlin/merlin-pytorch:latest" - ] - }, - "vscode": { - "interpreter": { - "hash": "ac3998f1d5f7dccf01e3ccfbb15b61acbc5b8a94bc76660c3c9107b3b4437593" - } } }, "nbformat": 4, diff --git a/examples/getting-started-movielens/03-Training-with-HugeCTR.ipynb b/examples/getting-started-movielens/03-Training-with-HugeCTR.ipynb deleted file mode 100644 index 292187fa2..000000000 --- a/examples/getting-started-movielens/03-Training-with-HugeCTR.ipynb +++ /dev/null @@ -1,458 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "afe54ce9", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ==============================================================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "id": "83fad6cb", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Getting Started MovieLens: Training with HugeCTR\n", - "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags) container.\n", - "\n", - "## Overview\n", - "\n", - "In this notebook, we want to provide an overview what HugeCTR framework is, its features and benefits. We will use HugeCTR to train a basic neural network architecture.\n", - "\n", - "Learning Objectives:\n", - "* Adopt NVTabular workflow to provide input files to HugeCTR\n", - "* Define HugeCTR neural network architecture\n", - "* Train a deep learning model with HugeCTR" - ] - }, - { - "cell_type": "markdown", - "id": "16956c69", - "metadata": {}, - "source": [ - "### Why use HugeCTR?\n", - "\n", - "HugeCTR is a GPU-accelerated recommender framework designed to distribute training across multiple GPUs and nodes and estimate Click-Through Rates (CTRs).
\n", - "\n", - "HugeCTR offers multiple advantages to train deep learning recommender systems:\n", - "1. **Speed**: HugeCTR is a highly efficient framework written in C++. We experienced up to 10x speed up. HugeCTR on a NVIDIA DGX A100 system proved to be the fastest commercially available solution for training the architecture Deep Learning Recommender Model (DLRM) developed by Facebook.\n", - "2. **Scale**: HugeCTR supports model parallel scaling. It distributes the large embedding tables over multiple GPUs or multiple nodes. \n", - "3. **Easy-to-use**: Easy-to-use Python API similar to Keras. Examples for popular deep learning recommender systems architectures (Wide&Deep, DLRM, DCN, DeepFM) are available." - ] - }, - { - "cell_type": "markdown", - "id": "a7e099b5", - "metadata": {}, - "source": [ - "### Other Features of HugeCTR\n", - "\n", - "HugeCTR is designed to scale deep learning models for recommender systems. It provides a list of other important features:\n", - "* Proficiency in oversubscribing models to train embedding tables with single nodes that don’t fit within the GPU or CPU memory (only required embeddings are prefetched from a parameter server per batch)\n", - "* Asynchronous and multithreaded data pipelines\n", - "* A highly optimized data loader.\n", - "* Supported data formats such as parquet and binary\n", - "* Integration with Triton Inference Server for deployment to production" - ] - }, - { - "cell_type": "markdown", - "id": "e8c0c88f", - "metadata": {}, - "source": [ - "### Getting Started" - ] - }, - { - "cell_type": "markdown", - "id": "065096f1", - "metadata": {}, - "source": [ - "In this example, we will train a neural network with HugeCTR. We will use preprocessed datasets generated via NVTabular in `02-ETL-with-NVTabular` notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "add3372c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "# External dependencies\n", - "import os\n", - "import nvtabular as nvt" - ] - }, - { - "cell_type": "markdown", - "id": "ac22a3ba", - "metadata": {}, - "source": [ - "We define our base directory, containing the data." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b81cacce", - "metadata": {}, - "outputs": [], - "source": [ - "# path to preprocessed data\n", - "INPUT_DATA_DIR = os.environ.get(\n", - " \"INPUT_DATA_DIR\", os.path.expanduser(\"/workspace/nvt-examples/movielens/data/\")\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6750ce01", - "metadata": {}, - "source": [ - "Let's load our saved workflow from the `02-ETL-with-NVTabular` notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "5ab5923c", - "metadata": {}, - "outputs": [], - "source": [ - "workflow = nvt.Workflow.load(os.path.join(INPUT_DATA_DIR, \"workflow\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "aa5405c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'userId': dtype('int64'),\n", - " 'movieId': dtype('int64'),\n", - " 'genres': dtype('int64'),\n", - " 'rating': dtype('int8')}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "workflow.output_dtypes" - ] - }, - { - "cell_type": "markdown", - "id": "94bef620", - "metadata": {}, - "source": [ - "Note: We do not have numerical output columns" - ] - }, - { - "cell_type": "markdown", - "id": "9e8ad562", - "metadata": {}, - "source": [ - "Let's clear existing directory should it exist from previous runs and create the output folders." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0c678d48", - "metadata": {}, - "outputs": [], - "source": [ - "MODEL_DIR = os.path.join(INPUT_DATA_DIR, \"model/movielens_hugectr/\")\n", - "!rm -rf {MODEL_DIR}\n", - "!mkdir -p {MODEL_DIR}\"1\"" - ] - }, - { - "cell_type": "markdown", - "id": "435c7e86", - "metadata": {}, - "source": [ - "## Scaling Accelerated training with HugeCTR" - ] - }, - { - "cell_type": "markdown", - "id": "5b76f6ea", - "metadata": {}, - "source": [ - "HugeCTR is a deep learning framework dedicated to recommendation systems. It is written in CUDA C++. As HugeCTR optimizes the training in CUDA++, we need to define the training pipeline and model architecture and execute it via the commandline. We will use the Python API, which is similar to Keras models.\n", - "\n", - "For more information on HugeCTR please consult the [HugeCTR repository](https://github.com/NVIDIA-Merlin/HugeCTR)." - ] - }, - { - "cell_type": "markdown", - "id": "0624ca30", - "metadata": {}, - "source": [ - "## Let's define our model\n", - "\n", - "Let's define our model. We will write the model to `./train_hugeCTR.py` and execute it afterwards." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "baaf3563", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting train_hugeCTR.py\n" - ] - } - ], - "source": [ - "%%writefile train_hugeCTR.py\n", - "\n", - "# External dependencies\n", - "import os\n", - "import nvtabular as nvt\n", - "from nvtabular.ops import get_embedding_sizes\n", - "import hugectr\n", - "from mpi4py import MPI \n", - "\n", - "# path to preprocessed data\n", - "INPUT_DATA_DIR = os.environ.get(\n", - " \"INPUT_DATA_DIR\", os.path.expanduser(\"/workspace/nvt-examples/movielens/data/\")\n", - ")\n", - "\n", - "MODEL_DIR = os.path.join(INPUT_DATA_DIR, \"model/movielens_hugectr/\")\n", - "\n", - "workflow = nvt.Workflow.load(os.path.join(INPUT_DATA_DIR, \"workflow\"))\n", - "\n", - "embeddings = get_embedding_sizes(workflow)\n", - "\n", - "solver = hugectr.CreateSolver(\n", - " vvgpu=[[0]],\n", - " batchsize=2048,\n", - " batchsize_eval=2048,\n", - " max_eval_batches=160,\n", - " i64_input_key=True,\n", - " use_mixed_precision=False,\n", - " repeat_dataset=True,\n", - ")\n", - "optimizer = hugectr.CreateOptimizer(optimizer_type=hugectr.Optimizer_t.Adam)\n", - "reader = hugectr.DataReaderParams(\n", - " data_reader_type=hugectr.DataReaderType_t.Parquet,\n", - " source=[INPUT_DATA_DIR + \"train/_file_list.txt\"],\n", - " eval_source=INPUT_DATA_DIR + \"valid/_file_list.txt\",\n", - " check_type=hugectr.Check_t.Non,\n", - " slot_size_array=[162542, 56586, 21],\n", - ")\n", - "\n", - "\n", - "model = hugectr.Model(solver, reader, optimizer)\n", - "\n", - "model.add(\n", - " hugectr.Input(\n", - " label_dim=1,\n", - " label_name=\"label\",\n", - " dense_dim=0,\n", - " dense_name=\"dense\",\n", - " data_reader_sparse_param_array=[\n", - " hugectr.DataReaderSparseParam(\"data1\", nnz_per_slot=10, is_fixed_length=False, slot_num=3)\n", - " ],\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.SparseEmbedding(\n", - " embedding_type=hugectr.Embedding_t.LocalizedSlotSparseEmbeddingHash,\n", - " workspace_size_per_gpu_in_mb=200,\n", - " embedding_vec_size=16,\n", - " combiner=\"sum\",\n", - " sparse_embedding_name=\"sparse_embedding1\",\n", - " bottom_name=\"data1\",\n", - " optimizer=optimizer,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.Reshape,\n", - " bottom_names=[\"sparse_embedding1\"],\n", - " top_names=[\"reshape1\"],\n", - " leading_dim=48,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"reshape1\"],\n", - " top_names=[\"fc1\"],\n", - " num_output=128,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.ReLU,\n", - " bottom_names=[\"fc1\"],\n", - " top_names=[\"relu1\"],\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu1\"],\n", - " top_names=[\"fc2\"],\n", - " num_output=128,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.ReLU,\n", - " bottom_names=[\"fc2\"],\n", - " top_names=[\"relu2\"],\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu2\"],\n", - " top_names=[\"fc3\"],\n", - " num_output=1,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.BinaryCrossEntropyLoss,\n", - " bottom_names=[\"fc3\", \"label\"],\n", - " top_names=[\"loss\"],\n", - " )\n", - ")\n", - "\n", - "model.compile()\n", - "model.summary()\n", - "model.fit(max_iter=2000, display=100, eval_interval=200, snapshot=1900)\n", - "model.graph_to_json(graph_config_file=MODEL_DIR + \"1/movielens.json\")" - ] - }, - { - "cell_type": "markdown", - "id": "e177dee5", - "metadata": {}, - "source": [ - "Now please run the script we outputted above in the terminal using the following command:\n", - "\n", - "```python train_hugeCTR.py```" - ] - }, - { - "cell_type": "markdown", - "id": "2ae972f8", - "metadata": {}, - "source": [ - "After training terminates, we can see that multiple `.model` files and folders are generated." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "9a6a869f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0_opt_sparse_1900.model _dense_1900.model _opt_dense_1900.model\r\n", - "\r\n", - "0_sparse_1900.model:\r\n", - "emb_vector key slot_id\r\n" - ] - } - ], - "source": [ - "ls *.model" - ] - }, - { - "cell_type": "markdown", - "id": "2d7dace8", - "metadata": {}, - "source": [ - "Let's move these files into the `movielens_hugectr` folder. When we start the Triton Inference Server, we will be able to point it to that directory and ask it to load our model using the files we move there below." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "b085ceb4", - "metadata": {}, - "outputs": [], - "source": [ - "!mv *.model {MODEL_DIR}" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest" - ] - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/getting-started-movielens/03-Training-with-TF.ipynb b/examples/getting-started-movielens/03-Training-with-TF.ipynb index aa385e25b..9f92725d6 100644 --- a/examples/getting-started-movielens/03-Training-with-TF.ipynb +++ b/examples/getting-started-movielens/03-Training-with-TF.ipynb @@ -69,7 +69,18 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 22:46:08.000436: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + } + ], "source": [ "# External dependencies\n", "import os\n", @@ -104,39 +115,49 @@ "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-01-20 11:26:39.373230: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + "2023-05-09 22:46:09.294968: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:09.295315: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:09.295465: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-01-20 11:26:40.514976: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:40.515413: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:40.515595: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:40.727125: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 22:46:10.932696: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-01-20 11:26:40.728141: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:40.728351: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:40.728508: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:41.475009: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:41.475231: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:41.475394: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-01-20 11:26:41.475508: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-01-20 11:26:41.475570: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "2023-05-09 22:46:10.933669: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:10.933873: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:10.934029: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:11.056253: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:11.056472: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:11.056632: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 22:46:11.056905: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-05-09 22:46:11.056916: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-05-09 22:46:11.056987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -275,17 +296,27 @@ "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-09 22:46:16.890983: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x11d82730 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", + "2023-05-09 22:46:16.891012: I tensorflow/compiler/xla/service/service.cc:181] StreamExecutor device (0): Quadro RTX 8000, Compute Capability 7.5\n", + "2023-05-09 22:46:16.904028: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", + "2023-05-09 22:46:17.039510: I tensorflow/compiler/jit/xla_compilation_cache.cc:480] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "1221/1221 [==============================] - 9s 6ms/step - loss: 0.6609 - auc: 0.5281 - regularization_loss: 0.0000e+00 - loss_batch: 0.6609 - val_loss: 0.6588 - val_auc: 0.5626 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6537\n" + "1221/1221 [==============================] - 55s 42ms/step - loss: 0.6611 - auc: 0.5247 - regularization_loss: 0.0000e+00 - loss_batch: 0.6611 - val_loss: 0.6596 - val_auc: 0.5521 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6610\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -333,50 +364,29 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n" + "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, dense_6_layer_call_fn while saving (showing 5 of 48). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n" + "INFO:tensorflow:Assets written to: /tmp/tmp00m5bvqr/model.savedmodel/assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n", - "WARNING:absl:Found untraced functions such as train_compute_metrics, model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, dense_6_layer_call_fn, dense_6_layer_call_and_return_conditional_losses while saving (showing 5 of 47). These functions will not be directly callable after loading.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmp5eyrbewk/model.savedmodel/assets\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmp5eyrbewk/model.savedmodel/assets\n" + "INFO:tensorflow:Assets written to: /tmp/tmp00m5bvqr/model.savedmodel/assets\n" ] }, { @@ -404,37 +414,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n", - "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:tensorflow:Unsupported signature for serialization: ((Prediction(outputs={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/outputs/rating/binary_output')}, targets={'rating/binary_output': TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets/rating/binary_output')}, sample_weight={'rating/binary_output': None}, features=None, negative_candidate_ids=None), ), {}).\n", - "WARNING:absl:Found untraced functions such as train_compute_metrics, model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, dense_6_layer_call_fn, dense_6_layer_call_and_return_conditional_losses while saving (showing 5 of 47). These functions will not be directly callable after loading.\n" + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", + "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, dense_6_layer_call_fn while saving (showing 5 of 48). These functions will not be directly callable after loading.\n" ] }, { diff --git a/examples/getting-started-movielens/04-Triton-Inference-with-HugeCTR.ipynb b/examples/getting-started-movielens/04-Triton-Inference-with-HugeCTR.ipynb deleted file mode 100644 index 2f98e09c9..000000000 --- a/examples/getting-started-movielens/04-Triton-Inference-with-HugeCTR.ipynb +++ /dev/null @@ -1,559 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "d813a4ce", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ===================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "id": "260dbfff", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Serve Recommendations from the HugeCTR Model\n", - "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags) container. \n", - "\n", - "## Overview\n", - "\n", - "In this notebook, we will show how we do inference with our trained deep learning recommender model using Triton Inference Server. In this example, we deploy the NVTabular workflow and HugeCTR model with Triton Inference Server. We deploy them as an ensemble. For each request, Triton Inference Server will feed the input data through the NVTabular workflow and its output through the HugeCR model." - ] - }, - { - "cell_type": "markdown", - "id": "e0157e1c", - "metadata": {}, - "source": [ - "## Getting Started" - ] - }, - { - "cell_type": "markdown", - "id": "71304a10", - "metadata": {}, - "source": [ - "We need to write configuration files with the stored model weights and model configuration.\n", - "\n", - "Let us first move all of our model files to a directory that we will be able to access from the scripts that we will generate." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c2efc44e", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "\n", - "# path to preprocessed data\n", - "INPUT_DATA_DIR = os.environ.get(\n", - " \"INPUT_DATA_DIR\", os.path.expanduser(\"/workspace/nvt-examples/movielens/data/\")\n", - ")\n", - "\n", - "# path to saved model\n", - "MODEL_DIR = os.path.join(INPUT_DATA_DIR, \"model/movielens_hugectr\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "6a9fbb6d", - "metadata": { - "tags": [ - "flake8-noqa-cell" - ] - }, - "outputs": [], - "source": [ - "file_to_write = \"\"\"\n", - "name: \"movielens_hugectr\"\n", - "backend: \"hugectr\"\n", - "max_batch_size: 64\n", - "input [\n", - " {\n", - " name: \"DES\"\n", - " data_type: TYPE_FP32\n", - " dims: [ -1 ]\n", - " },\n", - " {\n", - " name: \"CATCOLUMN\"\n", - " data_type: TYPE_INT64\n", - " dims: [ -1 ]\n", - " },\n", - " {\n", - " name: \"ROWINDEX\"\n", - " data_type: TYPE_INT32\n", - " dims: [ -1 ]\n", - " }\n", - "]\n", - "output [\n", - " {\n", - " name: \"OUTPUT0\"\n", - " data_type: TYPE_FP32\n", - " dims: [ -1 ]\n", - " }\n", - "]\n", - "instance_group [\n", - " {\n", - " count: 1\n", - " kind : KIND_GPU\n", - " gpus:[0]\n", - " }\n", - "]\n", - "\n", - "parameters [\n", - " {\n", - " key: \"config\"\n", - " value: { string_value: \"$MODEL_DIR/1/movielens.json\" }\n", - " },\n", - " {\n", - " key: \"gpucache\"\n", - " value: { string_value: \"true\" }\n", - " },\n", - " {\n", - " key: \"hit_rate_threshold\"\n", - " value: { string_value: \"0.8\" }\n", - " },\n", - " {\n", - " key: \"gpucacheper\"\n", - " value: { string_value: \"0.5\" }\n", - " },\n", - " {\n", - " key: \"label_dim\"\n", - " value: { string_value: \"1\" }\n", - " },\n", - " {\n", - " key: \"slots\"\n", - " value: { string_value: \"3\" }\n", - " },\n", - " {\n", - " key: \"cat_feature_num\"\n", - " value: { string_value: \"4\" }\n", - " },\n", - " {\n", - " key: \"des_feature_num\"\n", - " value: { string_value: \"0\" }\n", - " },\n", - " {\n", - " key: \"max_nnz\"\n", - " value: { string_value: \"2\" }\n", - " },\n", - " {\n", - " key: \"embedding_vector_size\"\n", - " value: { string_value: \"16\" }\n", - " },\n", - " {\n", - " key: \"embeddingkey_long_type\"\n", - " value: { string_value: \"true\" }\n", - " }\n", - "]\n", - "\"\"\"\n", - "\n", - "with open(os.path.join(MODEL_DIR, \"config.pbtxt\"), \"w\", encoding=\"utf-8\") as f:\n", - " f.write(file_to_write.replace(\"$MODEL_DIR\", MODEL_DIR))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0a23cb52", - "metadata": { - "tags": [ - "flake8-noqa-cell" - ] - }, - "outputs": [], - "source": [ - "config = json.dumps(\n", - "{\n", - " \"supportlonglong\": True,\n", - " \"models\": [\n", - " {\n", - " \"model\": \"movielens_hugectr\",\n", - " \"sparse_files\": [f\"{MODEL_DIR}/0_sparse_1900.model\"],\n", - " \"dense_file\": f\"{MODEL_DIR}/_dense_1900.model\",\n", - " \"network_file\": f\"{MODEL_DIR}/1/movielens.json\",\n", - " \"num_of_worker_buffer_in_pool\": \"1\",\n", - " \"num_of_refresher_buffer_in_pool\": \"1\",\n", - " \"cache_refresh_percentage_per_iteration\": \"0.2\",\n", - " \"deployed_device_list\": [\"0\"],\n", - " \"max_batch_size\": \"64\",\n", - " \"default_value_for_each_table\": [\"0.0\",\"0.0\"],\n", - " \"hit_rate_threshold\": \"0.9\",\n", - " \"gpucacheper\": \"0.5\",\n", - " \"maxnum_catfeature_query_per_table_per_sample\": [\"162542\", \"56632\",\"12\"],\n", - " \"embedding_vecsize_per_table\": [\"16\",\"16\",\"16\"],\n", - " \"gpucache\":\"true\"\n", - " }\n", - " ] \n", - "})\n", - "\n", - "config = json.loads(config)\n", - "with open(os.path.join(MODEL_DIR, \"ps.json\"), \"w\", encoding=\"utf-8\") as f:\n", - " json.dump(config, f)" - ] - }, - { - "cell_type": "markdown", - "id": "5eb3627f", - "metadata": {}, - "source": [ - "Let's import required libraries." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f5b54092", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "import tritonclient.grpc as httpclient\n", - "import cudf\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "id": "4e4592a9", - "metadata": {}, - "source": [ - "### Load Models on Triton Server" - ] - }, - { - "cell_type": "markdown", - "id": "c6f50e9e", - "metadata": {}, - "source": [ - "In the running docker container, you can start triton server with the command below:" - ] - }, - { - "cell_type": "markdown", - "id": "bc8aa849", - "metadata": {}, - "source": [ - "```\n", - "tritonserver --model-repository= --backend-config=hugectr,ps=/ps.json --model-control-mode=explicit\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "a0626de4", - "metadata": {}, - "source": [ - "Since we add `--model-control-mode=explicit` flag, the model wont be loaded at this step, we will load the model below." - ] - }, - { - "cell_type": "markdown", - "id": "9b7de550", - "metadata": {}, - "source": [ - "Note: The model-repository path is `/root/nvt-examples/movielens/data/model`. The models haven't been loaded, yet. We can request triton server to load the saved ensemble. We initialize a triton client. The path for the json file is `/root/nvt-examples/movielens/data/model/movielens_hugectr/ps.json`." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "a9d1c74a", - "metadata": {}, - "outputs": [], - "source": [ - "# disable warnings\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "f86290af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "client created.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tritonhttpclient/__init__.py:31: DeprecationWarning: The package `tritonhttpclient` is deprecated and will be removed in a future version. Please use instead `tritonclient.http`\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "import tritonhttpclient\n", - "\n", - "try:\n", - " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", - " print(\"client created.\")\n", - "except Exception as e:\n", - " print(\"channel creation failed: \" + str(e))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a2a2bed5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GET /v2/health/live, headers None\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "triton_client.is_server_live()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "dac3dd79", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "POST /v2/repository/index, headers None\n", - "\n", - "\n", - "bytearray(b'[{\"name\":\"movielens_hugectr\"}]')\n" - ] - }, - { - "data": { - "text/plain": [ - "[{'name': 'movielens_hugectr'}]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "triton_client.get_model_repository_index()" - ] - }, - { - "cell_type": "markdown", - "id": "23b2df62", - "metadata": {}, - "source": [ - "Let's load our model to Triton Server." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "2a1ec18b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "POST /v2/repository/models/movielens_hugectr/load, headers None\n", - "{}\n", - "\n", - "Loaded model 'movielens_hugectr'\n", - "CPU times: user 3.99 ms, sys: 0 ns, total: 3.99 ms\n", - "Wall time: 1.04 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "triton_client.load_model(model_name=\"movielens_hugectr\")" - ] - }, - { - "cell_type": "markdown", - "id": "eec2d617", - "metadata": {}, - "source": [ - "Let's send a request to Inference Server and print out the response. Since in our example above we do not have continuous columns, below our only inputs are categorical columns." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "8d78ad75", - "metadata": {}, - "outputs": [], - "source": [ - "file_to_write = f\"\"\"\n", - "from tritonclient.utils import *\n", - "import tritonclient.http as httpclient\n", - "import numpy as np\n", - "import pandas as pd\n", - "import sys\n", - "\n", - "model_name = 'movielens_hugectr'\n", - "CATEGORICAL_COLUMNS = [\"userId\", \"movieId\", \"genres\"]\n", - "CONTINUOUS_COLUMNS = []\n", - "LABEL_COLUMNS = ['label']\n", - "emb_size_array = [162542, 29434, 20]\n", - "shift = np.insert(np.cumsum(emb_size_array), 0, 0)[:-1]\n", - "df = pd.read_parquet('{INPUT_DATA_DIR}/valid/part_0.parquet')\n", - "test_df = df.head(10)\n", - "\n", - "rp_lst = [0]\n", - "cur = 0\n", - "for i in range(1, 31):\n", - " if i % 3 == 0:\n", - " cur += 2\n", - " rp_lst.append(cur)\n", - " else:\n", - " cur += 1\n", - " rp_lst.append(cur)\n", - "\n", - "with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n", - " test_df.iloc[:, :2] = test_df.iloc[:, :2] + shift[:2]\n", - " test_df.iloc[:, 2] = test_df.iloc[:, 2].apply(lambda x: [e + shift[2] for e in x])\n", - " embedding_columns = np.array([list(np.hstack(np.hstack(test_df[CATEGORICAL_COLUMNS].values)))], dtype='int64')\n", - " dense_features = np.array([[]], dtype='float32')\n", - " row_ptrs = np.array([rp_lst], dtype='int32')\n", - "\n", - " inputs = [httpclient.InferInput(\"DES\", dense_features.shape, np_to_triton_dtype(dense_features.dtype)),\n", - " httpclient.InferInput(\"CATCOLUMN\", embedding_columns.shape, np_to_triton_dtype(embedding_columns.dtype)),\n", - " httpclient.InferInput(\"ROWINDEX\", row_ptrs.shape, np_to_triton_dtype(row_ptrs.dtype))]\n", - "\n", - " inputs[0].set_data_from_numpy(dense_features)\n", - " inputs[1].set_data_from_numpy(embedding_columns)\n", - " inputs[2].set_data_from_numpy(row_ptrs)\n", - " outputs = [httpclient.InferRequestedOutput(\"OUTPUT0\")]\n", - "\n", - " response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)\n", - "\n", - " result = response.get_response()\n", - " print(result)\n", - " print(\"Prediction Result:\")\n", - " print(response.as_numpy(\"OUTPUT0\"))\n", - "\"\"\"\n", - "\n", - "with open(\"wdl2predict.py\", \"w\", encoding=\"utf-8\") as f:\n", - " f.write(file_to_write)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "339340c6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1851: SettingWithCopyWarning: \r\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\r\n", - "Try using .loc[row_indexer,col_indexer] = value instead\r\n", - "\r\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\r\n", - " self._setitem_single_column(loc, val, pi)\r\n", - "/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1773: SettingWithCopyWarning: \r\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\r\n", - "Try using .loc[row_indexer,col_indexer] = value instead\r\n", - "\r\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\r\n", - " self._setitem_single_column(ilocs[0], value, pi)\r\n", - "{'id': '1', 'model_name': 'movielens_hugectr', 'model_version': '1', 'parameters': {'NumSample': 10, 'DeviceID': 0}, 'outputs': [{'name': 'OUTPUT0', 'datatype': 'FP32', 'shape': [10], 'parameters': {'binary_data_size': 40}}]}\r\n", - "Prediction Result:\r\n", - "[0.5346206 0.49736455 0.2987379 0.6282493 0.7548654 0.59079504\r\n", - " 0.55132014 0.90419775 0.47409508 0.5124942 ]\r\n" - ] - } - ], - "source": [ - "!python3 ./wdl2predict.py" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest" - ] - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/getting-started-movielens/README.md b/examples/getting-started-movielens/README.md index d50ad3459..88b275ad4 100644 --- a/examples/getting-started-movielens/README.md +++ b/examples/getting-started-movielens/README.md @@ -8,9 +8,7 @@ Most users are familiar with the dataset and we will teach the basic concepts of - Use single-hot/multi-hot categorical input features with NVTabular. - Train a Merlin Model with Tensorflow. - Use the Merlin Dataloader with PyTorch. -- Train a HugeCTR model. - Serve recommendations from the Tensorflow model with the Triton Inference Server. -- Serve recommendations from the HugeCTR model with the Triton Inference Server. Explore the following notebooks: @@ -18,6 +16,4 @@ Explore the following notebooks: - [Feature Engineering with NVTabular](02-ETL-with-NVTabular.ipynb) - [Training with TensorFlow](03-Training-with-TF.ipynb) - [Training with PyTorch](03-Training-with-PyTorch.ipynb) -- [Training with HugeCTR](03-Training-with-HugeCTR.ipynb) - [Serve Recommendations with Triton Inference Server (Tensorflow)](04-Triton-Inference-with-TF.ipynb) -- [Serve Recommendations with Triton Inference Server (HugeCTR)](04-Triton-Inference-with-HugeCTR.ipynb) diff --git a/examples/scaling-criteo/01-Download-Convert.ipynb b/examples/scaling-criteo/01-Download-Convert.ipynb index 4abd1f90e..abed7b5d6 100644 --- a/examples/scaling-criteo/01-Download-Convert.ipynb +++ b/examples/scaling-criteo/01-Download-Convert.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +33,7 @@ "\n", "# Scaling Criteo: Download and Convert\n", "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n", "\n", "## Criteo 1TB Click Logs dataset\n", "\n", @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -151,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -177,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -227,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -277,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -297,28 +297,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest", - "nvcr.io/nvidia/merlin/merlin-tensorflow:latest", - "nvcr.io/nvidia/merlin/merlin-pytorch:latest" - ] } }, "nbformat": 4, diff --git a/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb b/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb index 2ba7a196d..1d4ab60e5 100644 --- a/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb +++ b/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", @@ -37,7 +33,7 @@ "\n", "# Scaling Criteo: ETL with NVTabular\n", "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n", "\n", "## Overview\n", "\n", @@ -80,12 +76,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# Standard Libraries\n", @@ -122,19 +114,14 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# define some information about where to get our data\n", "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/raid/data/criteo\")\n", "INPUT_DATA_DIR = os.environ.get(\"INPUT_DATA_DIR\", BASE_DIR + \"/converted/criteo\")\n", "OUTPUT_DATA_DIR = os.environ.get(\"OUTPUT_DATA_DIR\", BASE_DIR + \"/test_dask/output\")\n", - "USE_HUGECTR = bool(os.environ.get(\"USE_HUGECTR\", \"\"))\n", "stats_path = os.path.join(OUTPUT_DATA_DIR, \"test_dask/stats\")\n", "dask_workdir = os.path.join(OUTPUT_DATA_DIR, \"test_dask/workdir\")\n", "\n", @@ -163,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -179,12 +166,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -216,12 +199,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -465,7 +444,7 @@ "" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -537,12 +516,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# define our dataset schema\n", @@ -568,24 +543,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We need to enforce the required HugeCTR data types, so we set them in a dictionary and give as an argument when creating our dataset. The dictionary defines the output datatypes of our datasets." + "Optionally, we can define the output datatypes of our datasets." ] }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "dict_dtypes = {}\n", "\n", - "# The environment variable USE_HUGECTR defines, if we want to use the output for HugeCTR or another framework\n", "for col in CATEGORICAL_COLUMNS:\n", - " dict_dtypes[col] = np.int64 if USE_HUGECTR else np.int32\n", + " dict_dtypes[col] = np.int32\n", "\n", "for col in CONTINUOUS_COLUMNS:\n", " dict_dtypes[col] = np.float32\n", @@ -603,12 +573,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "train_dataset = nvt.Dataset(train_paths, engine=\"parquet\", part_size=part_size)\n", @@ -624,12 +590,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "output_train_dir = os.path.join(OUTPUT_DATA_DIR, \"train/\")\n", @@ -647,7 +609,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -661,10 +623,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 15, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -676,12 +638,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -708,12 +666,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -745,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -754,35 +708,11 @@ } ], "metadata": { - "file_extension": ".py", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest", - "nvcr.io/nvidia/merlin/merlin-tensorflow:latest", - "nvcr.io/nvidia/merlin/merlin-pytorch:latest" - ] - }, - "mimetype": "text/x-python", - "npconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3 + } }, "nbformat": 4, "nbformat_minor": 4 diff --git a/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb b/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb deleted file mode 100644 index 3093b279a..000000000 --- a/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb +++ /dev/null @@ -1,402 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ==============================================================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Scaling Criteo: Training with HugeCTR\n", - "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags) container.\n", - "\n", - "## Overview\n", - "\n", - "HugeCTR is an open-source framework to accelerate the training of CTR estimation models on NVIDIA GPUs. It is written in CUDA C++ and highly exploits GPU-accelerated libraries such as cuBLAS, cuDNN, and NCCL.

\n", - "HugeCTR offers multiple advantages to train deep learning recommender systems:\n", - "\n", - "1. **Speed**: HugeCTR is a highly efficient framework written in C++. We experienced up to 10x speed up. HugeCTR on a NVIDIA DGX A100 system proved to be the fastest commercially available solution for training the architecture Deep Learning Recommender Model (DLRM) developed by Facebook.\n", - "2. **Scale**: HugeCTR supports model parallel scaling. It distributes the large embedding tables over multiple GPUs or multiple nodes. \n", - "3. **Easy-to-use**: Easy-to-use Python API similar to Keras. Examples for popular deep learning recommender systems architectures (Wide&Deep, DLRM, DCN, DeepFM) are available.\n", - "\n", - "HugeCTR is able to train recommender system models with larger-than-memory embedding tables by leveraging a parameter server. \n", - "\n", - "You can find more information about HugeCTR from the [GitHub repository](https://github.com/NVIDIA-Merlin/HugeCTR).\n", - "\n", - "### Learning objectives\n", - "\n", - "In this notebook, we learn how to to use HugeCTR for training recommender system models\n", - "\n", - "- Use **HugeCTR** to define a recommender system model\n", - "- Train Facebook's [Deep Learning Recommendation Model](https://arxiv.org/pdf/1906.00091.pdf) with HugeCTR" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training with HugeCTR" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As HugeCTR optimizes the training in CUDA++, we need to define the training pipeline and model architecture and execute it via the commandline. We will use the Python API, which is similar to Keras models." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you are not familiar with HugeCTR's Python API and parameters, you can read more in its GitHub repository:\n", - "- [HugeCTR User Guide](https://nvidia-merlin.github.io/HugeCTR/master/hugectr_user_guide.html)\n", - "- [HugeCTR Python API](https://nvidia-merlin.github.io/HugeCTR/master/api/python_interface.html)\n", - "- [HugeCTR example architectures](https://github.com/NVIDIA/HugeCTR/tree/main/samples)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/raid/data/criteo\")\n", - "OUTPUT_DATA_DIR = os.environ.get(\"OUTPUT_DATA_DIR\", BASE_DIR + \"/test_dask/output\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we clean the output directory." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.system(\"rm -rf \" + os.path.join(OUTPUT_DATA_DIR, \"criteo_hugectr/\"))\n", - "os.system(\"mkdir -p \" + os.path.join(OUTPUT_DATA_DIR, \"criteo_hugectr/1/\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We write the code to a `./model.py` file and execute it. The code creates snapshots, which we will use for inference in the next notebook. We use `graph_to_json` to convert the model to a JSON configuration, required for the inference." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "file_to_write = f\"\"\"\n", - "import hugectr\n", - "from mpi4py import MPI # noqa\n", - "\n", - "# HugeCTR\n", - "solver = hugectr.CreateSolver(\n", - " vvgpu=[[0]],\n", - " max_eval_batches=100,\n", - " batchsize_eval=2720,\n", - " batchsize=2720,\n", - " i64_input_key=True,\n", - " use_mixed_precision=False,\n", - " repeat_dataset=True,\n", - ")\n", - "optimizer = hugectr.CreateOptimizer(optimizer_type=hugectr.Optimizer_t.SGD)\n", - "reader = hugectr.DataReaderParams(\n", - " data_reader_type=hugectr.DataReaderType_t.Parquet,\n", - " source=[\"{os.path.join(OUTPUT_DATA_DIR, \"train/_file_list.txt\")}\"],\n", - " eval_source=\"{os.path.join(OUTPUT_DATA_DIR, \"valid/_file_list.txt\")}\",\n", - " check_type=hugectr.Check_t.Non,\n", - " slot_size_array=[\n", - " 10000000,\n", - " 10000000,\n", - " 3014529,\n", - " 400781,\n", - " 11,\n", - " 2209,\n", - " 11869,\n", - " 148,\n", - " 4,\n", - " 977,\n", - " 15,\n", - " 38713,\n", - " 10000000,\n", - " 10000000,\n", - " 10000000,\n", - " 584616,\n", - " 12883,\n", - " 109,\n", - " 37,\n", - " 17177,\n", - " 7425,\n", - " 20266,\n", - " 4,\n", - " 7085,\n", - " 1535,\n", - " 64,\n", - " ],\n", - ")\n", - "model = hugectr.Model(solver, reader, optimizer)\n", - "model.add(\n", - " hugectr.Input(\n", - " label_dim=1,\n", - " label_name=\"label\",\n", - " dense_dim=13,\n", - " dense_name=\"dense\",\n", - " data_reader_sparse_param_array=[hugectr.DataReaderSparseParam(\"data1\", 1, False, 26)],\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.SparseEmbedding(\n", - " embedding_type=hugectr.Embedding_t.LocalizedSlotSparseEmbeddingHash,\n", - " workspace_size_per_gpu_in_mb=6000,\n", - " embedding_vec_size=128,\n", - " combiner=\"sum\",\n", - " sparse_embedding_name=\"sparse_embedding1\",\n", - " bottom_name=\"data1\",\n", - " optimizer=optimizer,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"dense\"],\n", - " top_names=[\"fc1\"],\n", - " num_output=512,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc1\"], top_names=[\"relu1\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu1\"],\n", - " top_names=[\"fc2\"],\n", - " num_output=256,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc2\"], top_names=[\"relu2\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu2\"],\n", - " top_names=[\"fc3\"],\n", - " num_output=128,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc3\"], top_names=[\"relu3\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.Interaction,\n", - " bottom_names=[\"relu3\", \"sparse_embedding1\"],\n", - " top_names=[\"interaction1\"],\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"interaction1\"],\n", - " top_names=[\"fc4\"],\n", - " num_output=1024,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc4\"], top_names=[\"relu4\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu4\"],\n", - " top_names=[\"fc5\"],\n", - " num_output=1024,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc5\"], top_names=[\"relu5\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu5\"],\n", - " top_names=[\"fc6\"],\n", - " num_output=512,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc6\"], top_names=[\"relu6\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu6\"],\n", - " top_names=[\"fc7\"],\n", - " num_output=256,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(layer_type=hugectr.Layer_t.ReLU, bottom_names=[\"fc7\"], top_names=[\"relu7\"])\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.InnerProduct,\n", - " bottom_names=[\"relu7\"],\n", - " top_names=[\"fc8\"],\n", - " num_output=1,\n", - " )\n", - ")\n", - "model.add(\n", - " hugectr.DenseLayer(\n", - " layer_type=hugectr.Layer_t.BinaryCrossEntropyLoss,\n", - " bottom_names=[\"fc8\", \"label\"],\n", - " top_names=[\"loss\"],\n", - " )\n", - ")\n", - "\n", - "MAX_ITER = 10000\n", - "EVAL_INTERVAL = 3200\n", - "model.compile()\n", - "model.summary()\n", - "model.fit(max_iter=MAX_ITER, eval_interval=EVAL_INTERVAL, display=1000, snapshot=3200)\n", - "model.graph_to_json(graph_config_file=\"{os.path.join(OUTPUT_DATA_DIR, \"criteo_hugectr/1/\", \"criteo.json\")}\")\n", - "\"\"\"\n", - "with open('./model.py', 'w', encoding='utf-8') as fi:\n", - " fi.write(file_to_write)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can execute the training process." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HugeCTR Version: 4.1\n", - "====================================================Model Init=====================================================\n", - "[HCTR][13:56:03.374][WARNING][RK0][main]: The model name is not specified when creating the solver.\n", - "[HCTR][13:56:03.374][INFO][RK0][main]: Global seed is 2831956451\n", - "[HCTR][13:56:03.376][INFO][RK0][main]: Device to NUMA mapping:\n", - " GPU 0 -> node 0\n", - "[HCTR][13:56:06.490][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.\n", - "[HCTR][13:56:06.490][INFO][RK0][main]: Start all2all warmup\n", - "[HCTR][13:56:06.490][INFO][RK0][main]: End all2all warmup\n", - "[HCTR][13:56:06.491][INFO][RK0][main]: Using All-reduce algorithm: NCCL\n", - "[HCTR][13:56:06.493][INFO][RK0][main]: Device 0: Tesla V100-SXM2-32GB-LS\n", - "[HCTR][13:56:06.493][INFO][RK0][main]: num of DataReader workers for train: 1\n", - "[HCTR][13:56:06.493][INFO][RK0][main]: num of DataReader workers for eval: 1\n", - "[HCTR][13:56:06.540][INFO][RK0][main]: Vocabulary size: 54120457\n", - "[HCTR][13:56:06.541][INFO][RK0][main]: max_vocabulary_size_per_gpu_=12288000\n", - "[HCTR][13:56:06.562][INFO][RK0][main]: Graph analysis to resolve tensor dependency\n", - "===================================================Model Compile===================================================\n", - "[HCTR][13:56:24.055][INFO][RK0][main]: gpu0 start to init embedding\n", - "[HCTR][13:56:24.107][INFO][RK0][main]: gpu0 init embedding done\n", - "[HCTR][13:56:24.111][INFO][RK0][main]: Starting AUC NCCL warm-up\n", - "[HCTR][13:56:24.113][INFO][RK0][main]: Warm-up done\n", - "===================================================Model Summary===================================================\n", - "run_time: 125.9122965335846\n" - ] - } - ], - "source": [ - "import time\n", - "\n", - "start = time.time()\n", - "os.system('python model.py')\n", - "end = time.time() - start\n", - "print(f\"run_time: {end}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We trained the model and created snapshots." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest" - ] - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/scaling-criteo/04-Triton-Inference-with-HugeCTR.ipynb b/examples/scaling-criteo/04-Triton-Inference-with-HugeCTR.ipynb deleted file mode 100644 index 0d875274e..000000000 --- a/examples/scaling-criteo/04-Triton-Inference-with-HugeCTR.ipynb +++ /dev/null @@ -1,639 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ==============================================================================\n", - "\n", - "# Each user is responsible for checking the content of datasets and the\n", - "# applicable licenses and determining if suitable for the intended use." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Scaling Criteo: Triton Inference with HugeCTR\n", - "\n", - "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags) container. \n", - "\n", - "## Overview\n", - "\n", - "The last step is to deploy the ETL workflow and saved model to production. In the production setting, we want to transform the input data as during training (ETL). We need to apply the same mean/std for continuous features and use the same categorical mapping to convert the categories to continuous integer before we use the deep learning model for a prediction. Therefore, we deploy the NVTabular workflow with the HugeCTR model as an ensemble model to Triton Inference. The ensemble model guarantees that the same transformation are applied to the raw inputs.\n", - "\n", - "\n", - "\n", - "### Learning objectives\n", - "\n", - "In this notebook, we learn how to deploy our models to production:\n", - "\n", - "- Use **NVTabular** to generate config and model files for Triton Inference Server\n", - "- Deploy an ensemble of NVTabular workflow and HugeCTR model\n", - "- Send example request to Triton Inference Server" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deploying Ensemble to Triton Inference Server\n", - "\n", - "First, we need to generate the Triton Inference Server configurations and save the models in the correct format. In the previous notebooks [02-ETL-with-NVTabular](./02-ETL-with-NVTabular.ipynb) and [03-Training-with-HugeCTR](./03-Training-with-HugeCTR.ipynb) we saved the NVTabular workflow and HugeCTR model to disk. We will load them." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After training terminates, we can see that two `.model` files are generated. We need to move them inside a temporary folder, like `criteo_hugectr/1`. Let's create these folders." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!\n", - " warnings.warn(\"urllib3 ({}) or chardet ({}) doesn't match a supported \"\n" - ] - } - ], - "source": [ - "import os\n", - "import glob\n", - "import json\n", - "\n", - "import numpy as np\n", - "import nvtabular as nvt\n", - "import tritonclient.grpc as grpcclient\n", - "\n", - "from merlin.core.dispatch import get_lib\n", - "from merlin.systems.triton import convert_df_to_triton_input\n", - "from nvtabular.inference.triton import export_hugectr_ensemble\n", - "\n", - "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/raid/data/criteo\")\n", - "OUTPUT_DATA_DIR = os.environ.get(\"OUTPUT_DATA_DIR\", BASE_DIR + \"/test_dask/output\")\n", - "original_data_path = os.environ.get(\"INPUT_FOLDER\", BASE_DIR + \"/converted/criteo\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we move our saved `.model` files inside 1 folder. We use only the last snapshot after `9600` iterations." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "os.system(\"mv *9600.model \" + os.path.join(OUTPUT_DATA_DIR, \"criteo_hugectr/1/\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need to load the NVTabular workflow first" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "workflow = nvt.Workflow.load(os.path.join(OUTPUT_DATA_DIR, \"workflow\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's clear the directory" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "os.system(\"rm -rf \" + os.path.join(OUTPUT_DATA_DIR, \"model_inference\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Export artifacts\n", - "\n", - "Now, we can save our models for use later during the inference stage. To do so we use export_hugectr_ensemble method below. With this method, we can generate the `config.pbtxt` files automatically for each model.

\n", - "The script below creates an ensemble triton server model where\n", - "- workflow is the the nvtabular workflow used in preprocessing,\n", - "- hugectr_model_path is the HugeCTR model that should be served. This path includes the model files.\n", - "- name is the base name of the various triton models.\n", - "- output_path is the path where is model will be saved to.\n", - "- cats are the categorical column names\n", - "- conts are the continuous column names" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "hugectr_params = dict()\n", - "# Config File in the final directory for serving\n", - "hugectr_params[\"config\"] = os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"criteo/1/criteo.json\")\n", - "hugectr_params[\"slots\"] = 26\n", - "hugectr_params[\"max_nnz\"] = 1\n", - "hugectr_params[\"embedding_vector_size\"] = 128\n", - "hugectr_params[\"n_outputs\"] = 1\n", - "export_hugectr_ensemble(\n", - " workflow=workflow,\n", - " # Current directory with model weights and config file\n", - " hugectr_model_path=os.path.join(OUTPUT_DATA_DIR, \"criteo_hugectr/1/\"),\n", - " hugectr_params=hugectr_params,\n", - " name=\"criteo\",\n", - " # Base directory for serving\n", - " output_path=os.path.join(OUTPUT_DATA_DIR, \"model_inference\"),\n", - " label_columns=[\"label\"],\n", - " cats=[\"C\" + str(x) for x in range(1, 27)],\n", - " conts=[\"I\" + str(x) for x in range(1, 14)],\n", - " max_batch_size=64,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can take a look at the generated files." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[01;34m/tmp/test_merlin_criteo_hugectr/output/criteo//model_inference\u001b[00m\r\n", - "├── \u001b[01;34mcriteo\u001b[00m\r\n", - "│   ├── \u001b[01;34m1\u001b[00m\r\n", - "│   │   ├── \u001b[01;34m0_sparse_9600.model\u001b[00m\r\n", - "│   │   │   ├── emb_vector\r\n", - "│   │   │   ├── key\r\n", - "│   │   │   └── slot_id\r\n", - "│   │   ├── _dense_9600.model\r\n", - "│   │   ├── _opt_dense_9600.model\r\n", - "│   │   └── criteo.json\r\n", - "│   └── config.pbtxt\r\n", - "├── \u001b[01;34mcriteo_ens\u001b[00m\r\n", - "│   ├── \u001b[01;34m1\u001b[00m\r\n", - "│   └── config.pbtxt\r\n", - "├── \u001b[01;34mcriteo_nvt\u001b[00m\r\n", - "│   ├── \u001b[01;34m1\u001b[00m\r\n", - "│   │   ├── \u001b[01;34m__pycache__\u001b[00m\r\n", - "│   │   │   └── model.cpython-38.pyc\r\n", - "│   │   ├── model.py\r\n", - "│   │   └── \u001b[01;34mworkflow\u001b[00m\r\n", - "│   │   ├── \u001b[01;34mcategories\u001b[00m\r\n", - "│   │   │   ├── unique.C1.parquet\r\n", - "│   │   │   ├── unique.C10.parquet\r\n", - "│   │   │   ├── unique.C11.parquet\r\n", - "│   │   │   ├── unique.C12.parquet\r\n", - "│   │   │   ├── unique.C13.parquet\r\n", - "│   │   │   ├── unique.C14.parquet\r\n", - "│   │   │   ├── unique.C15.parquet\r\n", - "│   │   │   ├── unique.C16.parquet\r\n", - "│   │   │   ├── unique.C17.parquet\r\n", - "│   │   │   ├── unique.C18.parquet\r\n", - "│   │   │   ├── unique.C19.parquet\r\n", - "│   │   │   ├── unique.C2.parquet\r\n", - "│   │   │   ├── unique.C20.parquet\r\n", - "│   │   │   ├── unique.C21.parquet\r\n", - "│   │   │   ├── unique.C22.parquet\r\n", - "│   │   │   ├── unique.C23.parquet\r\n", - "│   │   │   ├── unique.C24.parquet\r\n", - "│   │   │   ├── unique.C25.parquet\r\n", - "│   │   │   ├── unique.C26.parquet\r\n", - "│   │   │   ├── unique.C3.parquet\r\n", - "│   │   │   ├── unique.C4.parquet\r\n", - "│   │   │   ├── unique.C5.parquet\r\n", - "│   │   │   ├── unique.C6.parquet\r\n", - "│   │   │   ├── unique.C7.parquet\r\n", - "│   │   │   ├── unique.C8.parquet\r\n", - "│   │   │   └── unique.C9.parquet\r\n", - "│   │   ├── metadata.json\r\n", - "│   │   └── workflow.pkl\r\n", - "│   └── config.pbtxt\r\n", - "└── ps.json\r\n", - "\r\n", - "10 directories, 40 files\r\n" - ] - } - ], - "source": [ - "!tree $OUTPUT_DATA_DIR/model_inference" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need to write a configuration file with the stored model weights and model configuration." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "tags": [ - "flake8-noqa-cell" - ] - }, - "outputs": [], - "source": [ - "config = json.dumps(\n", - "{\n", - " \"supportlonglong\": \"true\",\n", - " \"models\": [\n", - " {\n", - " \"model\": \"criteo\",\n", - " \"sparse_files\": [os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"criteo/1/0_sparse_9600.model\")],\n", - " \"dense_file\": os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"criteo/1/_dense_9600.model\"),\n", - " \"network_file\": os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"criteo/1/criteo.json\"),\n", - " \"max_batch_size\": \"64\",\n", - " \"gpucache\": \"true\",\n", - " \"hit_rate_threshold\": \"0.9\",\n", - " \"gpucacheper\": \"0.5\",\n", - " \"num_of_worker_buffer_in_pool\": \"4\",\n", - " \"num_of_refresher_buffer_in_pool\": \"1\",\n", - " \"cache_refresh_percentage_per_iteration\": 0.2,\n", - " \"deployed_device_list\": [\"0\"],\n", - " \"default_value_for_each_table\": [\"0.0\", \"0.0\"],\n", - " \"maxnum_catfeature_query_per_table_per_sample\": [2, 26],\n", - " \"embedding_vecsize_per_table\": [16 for x in range(26)],\n", - " }\n", - " ],\n", - "}\n", - ")\n", - "\n", - "config = json.loads(config)\n", - "with open(os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"ps.json\"), \"w\", encoding=\"utf-8\") as f:\n", - " json.dump(config, f)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Start Triton Inference Server\n", - "\n", - "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in the merlin-tensorflow-container. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation). \n", - "\n", - "You can start the server by running the following command:\n", - "\n", - "```shell\n", - "tritonserver --model-repository= --backend-config=hugectr,ps=\n", - "```\n", - "\n", - "For the `--model-repository` argument, specify the same value as `os.path.join(OUTPUT_DATA_DIR, \"model_inference\"` that you specified previously in `export_hugectr_ensemble` for `output_path`.\n", - "For `ps=` argument, specify the same value as `os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"ps.json)` the file for ps.json." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference\n", - "/tmp/test_merlin_criteo_hugectr/output/criteo/model_inference/ps.json\n" - ] - } - ], - "source": [ - "print(os.path.join(OUTPUT_DATA_DIR, \"model_inference\"))\n", - "print(os.path.join(OUTPUT_DATA_DIR, \"model_inference\", \"ps.json\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get prediction from Triton Inference Server\n", - "\n", - "We have saved the models for Triton Inference Server. We started Triton Inference Server and the models are loaded. Now, we can send raw data as a request and receive the predictions.\n", - "\n", - "We read 3 example rows from the last parquet file from the raw data. We drop the target column, `label`, from the dataframe, as the information is not available at inference time." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
C1C2C3C4C5C6C7C8C9C10...I4I5I6I7I8I9I10I11I12I13
700002714039294011146411229355263701010371865651...0.2082150.9526710.9558720.9449220.1393800.9940920.0561030.5474730.7094420.930728
70001351429927259807239593611544862113292987...0.1717090.7595260.7950190.7163660.1349640.5167370.0655770.1297820.4713610.386101
70002130457752877367203328992712640366415968...0.8800280.3477010.2078920.7539500.3710130.7595020.2014770.1924470.0858930.957961
\n", - "

3 rows × 39 columns

\n", - "
" - ], - "text/plain": [ - " C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 ... \\\n", - "70000 2714039 29401 11464 1122 9355 2 6370 1010 37 1865651 ... \n", - "70001 3514299 27259 8072 395 9361 1 544 862 11 3292987 ... \n", - "70002 1304577 5287 7367 2033 2899 2 712 640 36 6415968 ... \n", - "\n", - " I4 I5 I6 I7 I8 I9 I10 \\\n", - "70000 0.208215 0.952671 0.955872 0.944922 0.139380 0.994092 0.056103 \n", - "70001 0.171709 0.759526 0.795019 0.716366 0.134964 0.516737 0.065577 \n", - "70002 0.880028 0.347701 0.207892 0.753950 0.371013 0.759502 0.201477 \n", - "\n", - " I11 I12 I13 \n", - "70000 0.547473 0.709442 0.930728 \n", - "70001 0.129782 0.471361 0.386101 \n", - "70002 0.192447 0.085893 0.957961 \n", - "\n", - "[3 rows x 39 columns]" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_lib = get_lib()\n", - "input_cols = workflow.input_schema.column_names\n", - "# read in data for request\n", - "data = df_lib.read_parquet(\n", - " os.path.join(sorted(glob.glob(original_data_path + \"/*.parquet\"))[-1]),\n", - " columns=input_cols\n", - ")\n", - "batch = data[:3]\n", - "batch = batch[[x for x in batch.columns if x not in ['label']]]\n", - "batch" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We generate a Triton Inference Server request object. \n", - "\n", - "Currently, `NA` and `None` values are not supported for `int32` columns. As a workaround, we will `NA` values with `0`. The output of the HugeCTR model is called `OUTPUT0`. For the same reason of dropping the target column, we need to remove it from the input schema, as well." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "input_schema = workflow.input_schema.remove_col('label')\n", - "inputs = convert_df_to_triton_input(\n", - " input_schema, \n", - " batch.fillna(0), \n", - " grpcclient.InferInput\n", - ")\n", - "output_cols = ['OUTPUT0']\n", - "outputs = [\n", - " grpcclient.InferRequestedOutput(col)\n", - " for col in output_cols\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We send the request to Triton Inference Server." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "# send request to tritonserver\n", - "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", - " response = client.infer(\"criteo_ens\", inputs, request_id=\"1\", outputs=outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We print out the predictions. The outputs are the probability scores, predicted by our model, how likely the ad will be clicked." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OUTPUT0 [0.52164096 0.50390565 0.4957397 ] (3,)\n" - ] - } - ], - "source": [ - "for col in output_cols:\n", - " print(col, response[col], response[col].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "In this example, we deployed a recommender system pipeline as an ensemble. First, NVTabular created features and afterwards, HugeCTR predicted the processed data. This process ensures that the training and production environments use the same feature engineering.\n", - "\n", - "## Next steps\n", - "\n", - "There is more detailed information in the [API documentation](https://nvidia-merlin.github.io/HugeCTR/main/hugectr_user_guide.html) and [more examples](https://nvidia-merlin.github.io/HugeCTR/main/notebooks/index.html) in the [HugeCTR repository](https://github.com/NVIDIA-Merlin/HugeCTR)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "merlin": { - "containers": [ - "nvcr.io/nvidia/merlin/merlin-hugectr:latest" - ] - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/scaling-criteo/README.md b/examples/scaling-criteo/README.md index feed55e04..26e3ca28a 100644 --- a/examples/scaling-criteo/README.md +++ b/examples/scaling-criteo/README.md @@ -6,8 +6,7 @@ We demonstrate how to scale NVTabular, as well as: - Use multiple GPUs and nodes with NVTabular for feature engineering. - Train recommender system models with the Merlin Models for TensorFlow. -- Train recommender system models with HugeCTR using multiple GPUs. -- Inference with the Triton Inference Server and Merlin Models for TensorFlow or HugeCTR. +- Inference with the Triton Inference Server and Merlin Models for TensorFlow. Our recommendation is to use our latest stable [Merlin containers](https://catalog.ngc.nvidia.com/containers?filters=&orderBy=dateModifiedDESC&query=merlin) for the examples. Each notebook provides the required container. @@ -18,9 +17,3 @@ Training and Deployment with **TensorFlow**: - [Feature Engineering with NVTabular](02-ETL-with-NVTabular.ipynb) - [Training with TensorFlow](03-Training-with-Merlin-Models-TensorFlow.ipynb) - [Deploy the TensorFlow Model with Triton Inference Server](04-Triton-Inference-with-Merlin-Models-TensorFlow.ipynb) - -Training and Deployment with **HugeCTR**: -- [Download and Convert](01-Download-Convert.ipynb) -- [Feature Engineering with NVTabular](02-ETL-with-NVTabular.ipynb) -- [Training with HugeCTR](03-Training-with-HugeCTR.ipynb) -- [Deploy the HugeCTR Model with Triton Inference Server](04-Triton-Inference-with-HugeCTR.ipynb)