From 4371b7e1d245778e222aad8ebfc96c9be3c197c0 Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Fri, 14 Apr 2023 14:14:09 +1000 Subject: [PATCH] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 1975 +++++++++++++++++ 1 file changed, 1975 insertions(+) create mode 100644 T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb new file mode 100644 index 0000000000..74b19fa9d3 --- /dev/null +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -0,0 +1,1975 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d062ceda", + "metadata": {}, + "outputs": [], + "source": [ + "# %%bash\n", + "\n", + "# # cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "# cd /models && git checkout main && git pull origin main && pip install .\n", + "# cd /core && git checkout main && git pull origin main && pip install .\n", + "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n", + "# cd /systems && git checkout main && git pull origin main && pip install .\n", + "# cd /dataloader && git checkout main && git pull origin main && pip install .\n", + "\n", + "# ---\n", + "# pip install matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e9929dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=0dd96474-79af-47bb-9148-b96d64204e14\n", + "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", + "100%|██████████| 43.4M/43.4M [00:12<00:00, 3.62MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", + "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", + "Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease\n", + "Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease\n", + "Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease\n", + "Reading package lists...\n", + "Reading package lists...\n", + "Building dependency tree...\n", + "Reading state information...\n", + "unzip is already the newest version (6.0-25ubuntu1.1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n", + "Archive: rees46_ecom_dataset_small_for_ci.zip\n", + " creating: ecom_dataset/0001/\n", + " inflating: ecom_dataset/0001/valid.parquet \n", + " extracting: ecom_dataset/0001/.zip \n", + " inflating: ecom_dataset/0001/train.parquet \n", + " inflating: ecom_dataset/0001/test.parquet \n", + " creating: ecom_dataset/0002/\n", + " inflating: ecom_dataset/0002/valid.parquet \n", + " inflating: ecom_dataset/0002/train.parquet \n", + " inflating: ecom_dataset/0002/test.parquet \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "rm -rf ecom_dataset\n", + "mkdir -p ecom_dataset\n", + "\n", + "pip install gdown\n", + "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "apt-get update -y\n", + "apt-get install unzip -y\n", + "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4a0105a7", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /dataloader && git checkout main && git pull origin main && git checkout ce2215d8f871d0fb8c71900f7b914a226aea7c24 && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8101aa27", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /core && git checkout main && git pull origin main && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0f799172", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile /core/merlin/dag/graph.py\n", + "\n", + "# #\n", + "# # Copyright (c) 2022, NVIDIA CORPORATION.\n", + "# #\n", + "# # Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# # you may not use this file except in compliance with the License.\n", + "# # You may obtain a copy of the License at\n", + "# #\n", + "# # http://www.apache.org/licenses/LICENSE-2.0\n", + "# #\n", + "# # Unless required by applicable law or agreed to in writing, software\n", + "# # distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# # See the License for the specific language governing permissions and\n", + "# # limitations under the License.\n", + "# #\n", + "\n", + "# import logging\n", + "# from collections import deque\n", + "# from typing import Dict, Optional\n", + "\n", + "# from merlin.dag.node import (\n", + "# Node,\n", + "# _combine_schemas,\n", + "# iter_nodes,\n", + "# postorder_iter_nodes,\n", + "# preorder_iter_nodes,\n", + "# )\n", + "# from merlin.schema import Schema\n", + "\n", + "# LOG = logging.getLogger(\"merlin\")\n", + "\n", + "\n", + "# class Graph:\n", + "# \"\"\"\n", + "# Represents an DAG composed of Nodes, each of which contains an operator that\n", + "# transforms dataframes or dataframe-like data\n", + "# \"\"\"\n", + "\n", + "# def __init__(self, output_node: Node, subgraphs: Optional[Dict[str, Node]] = None):\n", + "# self.output_node = output_node\n", + "# self.subgraphs = subgraphs or {}\n", + "\n", + "# parents_with_deps = self.output_node.parents_with_dependencies\n", + "# parents_with_deps.append(output_node)\n", + "\n", + "# for name, sg in self.subgraphs.items():\n", + "# if sg not in parents_with_deps:\n", + "# raise ValueError(\n", + "# f\"The output node of subgraph {name} does not exist in the provided graph.\"\n", + "# )\n", + "\n", + "# def subgraph(self, name: str) -> \"Graph\":\n", + "# if name not in self.subgraphs.keys():\n", + "# raise ValueError(f\"No subgraph named {name}. Options are: {self.subgraphs.keys()}\")\n", + "# return Graph(self.subgraphs[name])\n", + "\n", + "# @property\n", + "# def input_dtypes(self):\n", + "# if self.input_schema:\n", + "# return {\n", + "# name: col_schema.dtype\n", + "# for name, col_schema in self.input_schema.column_schemas.items()\n", + "# }\n", + "# else:\n", + "# return {}\n", + "\n", + "# @property\n", + "# def output_dtypes(self):\n", + "# if self.output_schema:\n", + "# return {\n", + "# name: col_schema.dtype\n", + "# for name, col_schema in self.output_schema.column_schemas.items()\n", + "# }\n", + "# else:\n", + "# return {}\n", + "\n", + "# @property\n", + "# def column_mapping(self):\n", + "# nodes = preorder_iter_nodes(self.output_node)\n", + "# column_mapping = self.output_node.column_mapping\n", + "# for node in list(nodes)[1:]:\n", + "# node_map = node.column_mapping\n", + "# for output_col, input_cols in column_mapping.items():\n", + "# early_inputs = []\n", + "# for input_col in input_cols:\n", + "# early_inputs += node_map.get(input_col, [input_col])\n", + "# column_mapping[output_col] = early_inputs\n", + "\n", + "# return column_mapping\n", + "\n", + "# def construct_schema(self, root_schema: Schema, preserve_dtypes=False) -> \"Graph\":\n", + "# \"\"\"\n", + "# Given the schema of a dataset to transform, determine the output schema of the graph\n", + "\n", + "# Parameters\n", + "# ----------\n", + "# root_schema : Schema\n", + "# The schema of a dataset to be transformed with this DAG\n", + "# preserve_dtypes : bool, optional\n", + "# Whether to keep any dtypes that may already be present in the schemas, by default False\n", + "\n", + "# Returns\n", + "# -------\n", + "# Graph\n", + "# This DAG after the schemas have been filled in\n", + "# \"\"\"\n", + "# nodes = list(postorder_iter_nodes(self.output_node))\n", + "\n", + "# self._compute_node_schemas(root_schema, nodes, preserve_dtypes)\n", + "# # self._validate_node_schemas(root_schema, nodes, preserve_dtypes)\n", + "\n", + "# return self\n", + "\n", + "# def _compute_node_schemas(self, root_schema, nodes, preserve_dtypes=False):\n", + "# for node in nodes:\n", + "# node.compute_schemas(root_schema, preserve_dtypes=preserve_dtypes)\n", + "\n", + "# def _validate_node_schemas(self, root_schema, nodes, strict_dtypes=False):\n", + "# for node in nodes:\n", + "# node.validate_schemas(root_schema, strict_dtypes=strict_dtypes)\n", + "\n", + "# @property\n", + "# def input_schema(self):\n", + "# # leaf_node input and output schemas are the same (aka selection)\n", + "# return _combine_schemas(self.leaf_nodes)\n", + "\n", + "# @property\n", + "# def leaf_nodes(self):\n", + "# return [node for node in postorder_iter_nodes(self.output_node) if not node.parents]\n", + "\n", + "# @property\n", + "# def output_schema(self):\n", + "# return self.output_node.output_schema\n", + "\n", + "# def _input_columns(self):\n", + "# input_cols = []\n", + "# for node in iter_nodes([self.output_node]):\n", + "# upstream_output_cols = []\n", + "\n", + "# for upstream_node in node.parents_with_dependencies:\n", + "# upstream_output_cols += upstream_node.output_columns.names\n", + "\n", + "# upstream_output_cols = _get_unique(upstream_output_cols)\n", + "# input_cols += list(set(node.input_columns.names) - set(upstream_output_cols))\n", + "\n", + "# return _get_unique(input_cols)\n", + "\n", + "# def remove_inputs(self, to_remove):\n", + "# \"\"\"\n", + "# Removes columns from a Graph\n", + "\n", + "# Starting at the leaf nodes, trickle down looking for columns to remove,\n", + "# when found remove but then must propagate the removal of any other\n", + "# output columns derived from that column.\n", + "\n", + "# Parameters\n", + "# -----------\n", + "# graph : Graph\n", + "# The graph to remove columns from\n", + "# to_remove : array_like\n", + "# A list of input column names to remove from the graph\n", + "\n", + "# Returns\n", + "# -------\n", + "# Graph\n", + "# The same graph with columns removed\n", + "# \"\"\"\n", + "# nodes_to_process = deque([(node, to_remove) for node in self.leaf_nodes])\n", + "\n", + "# while nodes_to_process:\n", + "# node, columns_to_remove = nodes_to_process.popleft()\n", + "# if node.input_schema and len(node.input_schema):\n", + "# output_columns_to_remove = node.remove_inputs(columns_to_remove)\n", + "\n", + "# for child in node.children:\n", + "# nodes_to_process.append(\n", + "# (child, list(set(to_remove + output_columns_to_remove)))\n", + "# )\n", + "\n", + "# if not len(node.input_schema):\n", + "# node.remove_child(child)\n", + "\n", + "# # remove any dependencies that do not have an output schema\n", + "# node.dependencies = [\n", + "# dep for dep in node.dependencies if dep.output_schema and len(dep.output_schema)\n", + "# ]\n", + "\n", + "# if not node.input_schema or not len(node.input_schema):\n", + "# for parent in node.parents:\n", + "# parent.remove_child(node)\n", + "# for dependency in node.dependencies:\n", + "# dependency.remove_child(node)\n", + "# del node\n", + "\n", + "# return self\n", + "\n", + "# @classmethod\n", + "# def get_nodes_by_op_type(cls, nodes, op_type):\n", + "# return set(node for node in iter_nodes(nodes) if isinstance(node.op, op_type))\n", + "\n", + "\n", + "# def _get_schemaless_nodes(nodes):\n", + "# schemaless_nodes = []\n", + "# for node in iter_nodes(nodes):\n", + "# if node.input_schema is None:\n", + "# schemaless_nodes.append(node)\n", + "\n", + "# return set(schemaless_nodes)\n", + "\n", + "\n", + "# def _get_unique(cols):\n", + "# # Need to preserve order in unique-column list\n", + "# return list({x: x for x in cols}.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ab4f272d", + "metadata": {}, + "outputs": [], + "source": [ + "# !cd /core && pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:21:28.090236: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-04-13 11:21:30.471061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.471514: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.471678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:21:30.757567: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:21:30.758435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.758639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:30.758792: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508802: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.508961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:21:31.509071: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-04-13 11:21:31.509079: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-04-13 11:21:31.509140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'\n", + "seq_name = target" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 1\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "410ea223", + "metadata": {}, + "outputs": [], + "source": [ + "# seq_name = 'seq'\n", + "# target = seq_name" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4328f03a", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.inference.triton import export_tensorflow_ensemble\n", + "from nvtabular import Workflow\n", + "from nvtabular.ops import Categorify, Rename" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4571b92b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d5a9dd50", + "metadata": {}, + "outputs": [], + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3116726e", + "metadata": {}, + "outputs": [], + "source": [ + "# cat rees46_schema_modified.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "69e8f95c", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile rees46_schema_modified_2.pbtxt\n", + "\n", + "# feature {\n", + "# name: \"seq\"\n", + "# value_count {\n", + "# min: 2\n", + "# }\n", + "# type: INT\n", + "# int_domain {\n", + "# name: \"seq\"\n", + "# min: 1\n", + "# max: 390000\n", + "# is_categorical: true\n", + "# }\n", + "# annotation {\n", + "# tag: \"item_id\"\n", + "# tag: \"list\"\n", + "# tag: \"categorical\"\n", + "# tag: \"item\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "523fe2ac", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-04-13 11:21:38.342588: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 106s 144ms/step - loss: 7.3129 - recall_at_20: 0.1424 - mrr_at_20: 0.0802 - ndcg_at_20: 0.0939 - map_at_20: 0.0802 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3149\n", + "84/84 [==============================] - 4s 27ms/step - loss: 8.5848 - recall_at_20: 0.2229 - mrr_at_20: 0.0736 - ndcg_at_20: 0.1066 - map_at_20: 0.0736 - precision_at_20: 0.0111 - regularization_loss: 0.0000e+00 - loss_batch: 8.5971\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.584781646728516,\n", + " 'recall_at_20': 0.2308632731437683,\n", + " 'mrr_at_20': 0.07471762597560883,\n", + " 'ndcg_at_20': 0.10908268392086029,\n", + " 'map_at_20': 0.07471762597560883,\n", + " 'precision_at_20': 0.011543160304427147,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.130510330200195}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")\n", + "\n", + "# model_transformer.save('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5bd66ba8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "serving_operators = [seq_name] >> TransformWorkflow(wf) >> PredictTensorflow(model_transformer)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3ef1e5fc", + "metadata": {}, + "outputs": [], + "source": [ + "# import merlin.models.tf as mm\n", + "# import tensorflow as tf\n", + "# tf_model_path = os.path.join('t4rec_model')\n", + "\n", + "# model = tf.keras.models.load_model(tf_model_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "e2a7b6ee", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "rm -rf /workspace/models_for_benchmarking\n", + "mkdir -p /workspace/models_for_benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "55ad012c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.value_count.minproperties.value_count.max
0sess_pid_seq(Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.IT...DType(name='int64', element_type=<ElementType....TrueTrue1390000sess_pid_seq2None
\n", + "
" + ], + "text/plain": [ + "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.schema.select_by_name('sess_pid_seq')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1a39b4f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, wf.input_schema)\n", + "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1720a5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '/workspace/models_for_benchmarking/1': No such file or directory\r\n" + ] + } + ], + "source": [ + "ls /workspace/models_for_benchmarking/1" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d7cdc6cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0413 11:24:28.716029 1527 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f7f2a000000' with size 268435456\n", + "I0413 11:24:28.716361 1527 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", + "I0413 11:24:28.718446 1527 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", + "I0413 11:24:28.718465 1527 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", + "I0413 11:24:28.718478 1527 model_lifecycle.cc:459] loading: executor_model:1\n", + "I0413 11:24:28.924940 1527 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", + "I0413 11:24:28.924955 1527 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", + "I0413 11:24:28.924960 1527 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", + "I0413 11:24:28.924962 1527 tensorflow.cc:2576] backend configuration:\n", + "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n", + "2023-04-13 11:24:30.207841: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:32.085748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:32.086174: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:32.086365: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "I0413 11:24:33.803267 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", + "2023-04-13 11:24:35.316462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:37.126873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.127251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.127427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0413 11:24:37.157059 1527 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", + "I0413 11:24:37.157179 1527 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", + "2023-04-13 11:24:37.157805: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.178699: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-04-13 11:24:37.178742: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.178876: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:37.179781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196289: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196570: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.196909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:37.197031: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-04-13 11:24:37.203975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-04-13 11:24:37.262568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", + "2023-04-13 11:24:37.271889: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", + "2023-04-13 11:24:37.678751: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:37.745105: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 587310 microseconds.\n", + "2023-04-13 11:24:39.105154: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:40.997532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:40.997994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:40.998186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "I0413 11:24:42.684588 1527 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", + "2023-04-13 11:24:42.684902: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:42.702205: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-04-13 11:24:42.702239: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:42.702447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.702659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.702822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:42.703311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-04-13 11:24:42.742722: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:24:43.330311: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-04-13 11:24:43.395816: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 710922 microseconds.\n", + "I0413 11:24:43.395921 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", + "I0413 11:24:43.396107 1527 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", + "2023-04-13 11:24:44.668497: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-04-13 11:24:46.525315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:46.525768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-04-13 11:24:46.525978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0413 11:24:46.583396 1527 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", + "I0413 11:24:46.583508 1527 server.cc:563] \n", + "+------------------+------+\n", + "| Repository Agent | Path |\n", + "+------------------+------+\n", + "+------------------+------+\n", + "\n", + "I0413 11:24:46.583587 1527 server.cc:590] \n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| Backend | Path | Config |\n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| python | /opt/tritonserver/backends/python/libtriton_python.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", + "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", + "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "I0413 11:24:46.583634 1527 server.cc:633] \n", + "+---------------------------+---------+--------+\n", + "| Model | Version | Status |\n", + "+---------------------------+---------+--------+\n", + "| 0_transformworkflowtriton | 1 | READY |\n", + "| 1_predicttensorflowtriton | 1 | READY |\n", + "| executor_model | 1 | READY |\n", + "+---------------------------+---------+--------+\n", + "\n", + "I0413 11:24:46.610538 1527 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", + "I0413 11:24:46.610778 1527 metrics.cc:757] Collecting CPU metrics\n", + "I0413 11:24:46.610913 1527 tritonserver.cc:2264] \n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| Option | Value |\n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| server_id | triton |\n", + "| server_version | 2.28.0 |\n", + "| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace logging |\n", + "| model_repository_path[0] | /workspace/models_for_benchmarking/ |\n", + "| model_control_mode | MODE_NONE |\n", + "| strict_model_config | 0 |\n", + "| rate_limit | OFF |\n", + "| pinned_memory_pool_byte_size | 268435456 |\n", + "| cuda_memory_pool_byte_size{0} | 67108864 |\n", + "| response_cache_byte_size | 0 |\n", + "| min_supported_compute_capability | 6.0 |\n", + "| strict_readiness | 1 |\n", + "| exit_timeout | 30 |\n", + "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n", + "I0413 11:24:46.611676 1527 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0413 11:24:46.611833 1527 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", + "I0413 11:24:46.652586 1527 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-13 11:25:37.504455: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + } + ], + "source": [ + "import nvtabular.inference.triton as nvt_triton\n", + "import tritonclient.grpc as grpcclient\n", + "import subprocess\n", + "\n", + "subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f63b425", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a772eeb", + "metadata": {}, + "outputs": [], + "source": [ + "# !pkill triton" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6ed7b5a", + "metadata": {}, + "outputs": [], + "source": [ + "import tritonhttpclient\n", + "try:\n", + " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + " print(\"client created.\")\n", + "except Exception as e:\n", + " print(\"channel creation failed: \" + str(e))\n", + "triton_client.is_server_live()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10c2a62e", + "metadata": {}, + "outputs": [], + "source": [ + "validation_data.iloc[]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c2723e9", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "\n", + "validation_data = valid.compute()\n", + "inputs = convert_df_to_triton_input(wf.input_schema, validation_data.iloc[:1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa9fc0dd", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[0].name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ae7eb08", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[0].shape()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac3596c3", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[1].name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18f8e77d", + "metadata": {}, + "outputs": [], + "source": [ + "inputs[1].shape()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "292b58da", + "metadata": {}, + "outputs": [], + "source": [ + "validation_data.iloc[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8e1fd90", + "metadata": {}, + "outputs": [], + "source": [ + "wf.input_schema" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a79c58f", + "metadata": {}, + "outputs": [], + "source": [ + "import tritonclient.grpc as grpcclient\n", + "\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer('1_predicttensorflowtriton', inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6dd51a6", + "metadata": {}, + "outputs": [], + "source": [ + "response.get_output('sess_pid_seq/categorical_output')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba6712bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "637eb3f0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd62f641", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d1bc6530", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:absl:Function `_wrapped_model` contains input name(s) sess_pid_seq with unsupported characters which will be renamed to sess_pid_seq_1 in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, train.schema)\n", + "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8d390999", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: \"0_predicttensorflowtriton\"\r\n", + "platform: \"tensorflow_savedmodel\"\r\n", + "input {\r\n", + " name: \"sess_pid_seq\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: 1\r\n", + "}\r\n", + "input {\r\n", + " name: \"sess_pid_seq_1\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: 1\r\n", + "}\r\n", + "output {\r\n", + " name: \"sess_pid_seq/categorical_output\"\r\n", + " data_type: TYPE_FP32\r\n", + " dims: -1\r\n", + " dims: 390001\r\n", + "}\r\n", + "parameters {\r\n", + " key: \"TF_GRAPH_TAG\"\r\n", + " value {\r\n", + " string_value: \"serve\"\r\n", + " }\r\n", + "}\r\n", + "parameters {\r\n", + " key: \"TF_SIGNATURE_DEF\"\r\n", + " value {\r\n", + " string_value: \"serving_default\"\r\n", + " }\r\n", + "}\r\n", + "backend: \"tensorflow\"\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f7fe741c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n" + ] + } + ], + "source": [ + "%%writefile /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n", + "\n", + "name: \"0_predicttensorflowtriton\"\n", + "platform: \"tensorflow_savedmodel\"\n", + "input {\n", + " name: \"sess_pid_seq\"\n", + " data_type: TYPE_INT32\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "input {\n", + " name: \"sess_pid_seq_1\"\n", + " data_type: TYPE_INT32\n", + " dims: -1\n", + " dims: 1\n", + "}\n", + "output {\n", + " name: \"sess_pid_seq/categorical_output\"\n", + " data_type: TYPE_FP32\n", + " dims: -1\n", + " dims: 390001\n", + "}\n", + "parameters {\n", + " key: \"TF_GRAPH_TAG\"\n", + " value {\n", + " string_value: \"serve\"\n", + " }\n", + "}\n", + "parameters {\n", + " key: \"TF_SIGNATURE_DEF\"\n", + " value {\n", + " string_value: \"serving_default\"\n", + " }\n", + "}\n", + "backend: \"tensorflow\"" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9cfe8bca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: \"executor_model\"\r\n", + "platform: \"merlin_executor\"\r\n", + "input {\r\n", + " name: \"sess_pid_seq__values\"\r\n", + " data_type: TYPE_INT64\r\n", + " dims: -1\r\n", + " dims: -1\r\n", + "}\r\n", + "input {\r\n", + " name: \"sess_pid_seq__lengths\"\r\n", + " data_type: TYPE_INT32\r\n", + " dims: -1\r\n", + " dims: -1\r\n", + "}\r\n", + "output {\r\n", + " name: \"sess_pid_seq/categorical_output\"\r\n", + " data_type: TYPE_FP32\r\n", + " dims: -1\r\n", + " dims: 390001\r\n", + "}\r\n", + "backend: \"python\"\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/config.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a659255d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting /workspace/models_for_benchmarking/executor_model/config.pbtxt\n" + ] + } + ], + "source": [ + "%%writefile /workspace/models_for_benchmarking/executor_model/config.pbtxt\n", + "\n", + "name: \"executor_model\"\n", + "platform: \"merlin_executor\"\n", + "input {\n", + " name: \"sess_pid_seq__values\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: -1\n", + "}\n", + "input {\n", + " name: \"sess_pid_seq__nnzs\"\n", + " data_type: TYPE_INT64\n", + " dims: -1\n", + " dims: -1\n", + "}\n", + "output {\n", + " name: \"sess_pid_seq/categorical_output\"\n", + " data_type: TYPE_FP32\n", + " dims: -1\n", + " dims: 390001\n", + "}\n", + "backend: \"python\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ddf2dc55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.\r\n", + "#\r\n", + "# Redistribution and use in source and binary forms, with or without\r\n", + "# modification, are permitted provided that the following conditions\r\n", + "# are met:\r\n", + "# * Redistributions of source code must retain the above copyright\r\n", + "# notice, this list of conditions and the following disclaimer.\r\n", + "# * Redistributions in binary form must reproduce the above copyright\r\n", + "# notice, this list of conditions and the following disclaimer in the\r\n", + "# documentation and/or other materials provided with the distribution.\r\n", + "# * Neither the name of NVIDIA CORPORATION nor the names of its\r\n", + "# contributors may be used to endorse or promote products derived\r\n", + "# from this software without specific prior written permission.\r\n", + "#\r\n", + "# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY\r\n", + "# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\n", + "# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r\n", + "# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r\n", + "# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r\n", + "# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r\n", + "# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r\n", + "# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\r\n", + "# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r\n", + "# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\n", + "# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r\n", + "import pathlib\r\n", + "from pathlib import Path\r\n", + "\r\n", + "from merlin.dag import postorder_iter_nodes\r\n", + "from merlin.systems.dag import Ensemble\r\n", + "from merlin.systems.dag.runtimes.triton import TritonExecutorRuntime\r\n", + "from merlin.systems.triton.conversions import (\r\n", + " dict_array_to_triton_response,\r\n", + " triton_request_to_dict_array,\r\n", + ")\r\n", + "from merlin.systems.triton.utils import triton_error_handling, triton_multi_request\r\n", + "\r\n", + "\r\n", + "class TritonPythonModel:\r\n", + " \"\"\"Model for Triton Python Backend.\r\n", + "\r\n", + " Every Python model must have \"TritonPythonModel\" as the class name\r\n", + " \"\"\"\r\n", + "\r\n", + " def initialize(self, args):\r\n", + " \"\"\"Called only once when the model is being loaded. Allowing\r\n", + " the model to initialize any state associated with this model.\r\n", + "\r\n", + " Parameters\r\n", + " ----------\r\n", + " args : dict\r\n", + " Both keys and values are strings. The dictionary keys and values are:\r\n", + " * model_config: A JSON string containing the model configuration\r\n", + " * model_instance_kind: A string containing model instance kind\r\n", + " * model_instance_device_id: A string containing model instance device ID\r\n", + " * model_repository: Model repository path\r\n", + " * model_version: Model version\r\n", + " * model_name: Model name\r\n", + " \"\"\"\r\n", + " # Arg parsing\r\n", + " model_repo = args[\"model_repository\"]\r\n", + " repository_path = _parse_model_repository(model_repo)\r\n", + "\r\n", + " ensemble_path = (\r\n", + " Path(repository_path) / args[\"model_name\"] / str(args[\"model_version\"]) / \"ensemble\"\r\n", + " )\r\n", + "\r\n", + " self.ensemble = Ensemble.load(str(ensemble_path))\r\n", + "\r\n", + " for node in list(postorder_iter_nodes(self.ensemble.graph.output_node)):\r\n", + " if hasattr(node.op, \"load_artifacts\"):\r\n", + " node.op.load_artifacts(str(ensemble_path))\r\n", + "\r\n", + " @triton_multi_request\r\n", + " @triton_error_handling\r\n", + " def execute(self, request):\r\n", + " \"\"\"Receives a list of pb_utils.InferenceRequest as the only argument. This\r\n", + " function is called when an inference is requested for this model. Depending on the\r\n", + " batching configuration (e.g. Dynamic Batching) used, `requests` may contain\r\n", + " multiple requests. Every Python model, must create one pb_utils.InferenceResponse\r\n", + " for every pb_utils.InferenceRequest in `requests`. If there is an error, you can\r\n", + " set the error argument when creating a pb_utils.InferenceResponse.\r\n", + "\r\n", + " Parameters\r\n", + " ----------\r\n", + " requests : list\r\n", + " A list of pb_utils.InferenceRequest\r\n", + "\r\n", + " Returns\r\n", + " -------\r\n", + " list\r\n", + " A list of pb_utils.InferenceResponse. The length of this list must\r\n", + " be the same as `requests`\r\n", + " \"\"\"\r\n", + " inputs = triton_request_to_dict_array(request, self.ensemble.input_schema.column_names)\r\n", + " outputs = self.ensemble.transform(inputs, runtime=TritonExecutorRuntime())\r\n", + " return dict_array_to_triton_response(outputs)\r\n", + "\r\n", + "\r\n", + "def _parse_model_repository(model_repository: str) -> str:\r\n", + " \"\"\"\r\n", + " Extract the model repository path from the model_repository value\r\n", + " passed to the TritonPythonModel initialize method.\r\n", + " \"\"\"\r\n", + " # Handle bug in Tritonserver 22.06\r\n", + " # model_repository argument became path to model.py\r\n", + " # instead of path to model directory within the model repository\r\n", + " if model_repository.endswith(\".py\"):\r\n", + " return str(pathlib.Path(model_repository).parent.parent.parent)\r\n", + " else:\r\n", + " return str(pathlib.Path(model_repository).parent)\r\n" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/1/model.py" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "3d21ce62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"versions\": {\"python\": \"3.8.10 (default, Nov 14 2022, 12:59:47) \\n[GCC 9.4.0]\"}, \"generated_timestamp\": 1679017581}" + ] + } + ], + "source": [ + "cat /workspace/models_for_benchmarking/executor_model/1/ensemble/metadata.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7998b835", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile /workspace/models_for_benchmarking/t4r_pytorch_pt/config.pbtxt\n", + "\n", + "# name: \"t4r_pytorch_pt\"\n", + "# input {\n", + "# name: \"sess_pid_seq__values\"\n", + "# data_type: TYPE_INT64\n", + "# dims: -1\n", + "# dims: 1\n", + "# }\n", + "# input {\n", + "# name: \"sess_pid_seq__nnzs\"\n", + "# data_type: TYPE_INT64\n", + "# dims: -1\n", + "# dims: 1\n", + "# }\n", + "# output {\n", + "# name: \"output\"\n", + "# data_type: TYPE_FP32\n", + "# dims: -1\n", + "# dims: 20\n", + "# }\n", + "# backend: \"python\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}