From 4371b7e1d245778e222aad8ebfc96c9be3c197c0 Mon Sep 17 00:00:00 2001
From: Radek Osmulski <rosmulski@gmail.com>
Date: Fri, 14 Apr 2023 14:14:09 +1000
Subject: [PATCH] update

---
 ...nd_save_model_for_benchmarking-Copy1.ipynb | 1975 +++++++++++++++++
 1 file changed, 1975 insertions(+)
 create mode 100644 T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb

diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb
new file mode 100644
index 0000000000..74b19fa9d3
--- /dev/null
+++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb
@@ -0,0 +1,1975 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d062ceda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%bash\n",
+    "\n",
+    "# # cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n",
+    "# cd /models && git checkout main && git pull origin main && pip install .\n",
+    "# cd /core && git checkout main && git pull origin main && pip install .\n",
+    "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n",
+    "# cd /systems && git checkout main && git pull origin main && pip install .\n",
+    "# cd /dataloader && git checkout main && git pull origin main && pip install .\n",
+    "\n",
+    "# ---\n",
+    "# pip install matplotlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e9929dc8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n",
+      "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n",
+      "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n",
+      "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n",
+      "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n",
+      "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n",
+      "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading...\n",
+      "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n",
+      "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=0dd96474-79af-47bb-9148-b96d64204e14\n",
+      "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n",
+      "100%|██████████| 43.4M/43.4M [00:12<00:00, 3.62MB/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease\n",
+      "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n",
+      "Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease\n",
+      "Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease\n",
+      "Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease\n",
+      "Reading package lists...\n",
+      "Reading package lists...\n",
+      "Building dependency tree...\n",
+      "Reading state information...\n",
+      "unzip is already the newest version (6.0-25ubuntu1.1).\n",
+      "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n",
+      "Archive:  rees46_ecom_dataset_small_for_ci.zip\n",
+      "   creating: ecom_dataset/0001/\n",
+      "  inflating: ecom_dataset/0001/valid.parquet  \n",
+      " extracting: ecom_dataset/0001/.zip  \n",
+      "  inflating: ecom_dataset/0001/train.parquet  \n",
+      "  inflating: ecom_dataset/0001/test.parquet  \n",
+      "   creating: ecom_dataset/0002/\n",
+      "  inflating: ecom_dataset/0002/valid.parquet  \n",
+      "  inflating: ecom_dataset/0002/train.parquet  \n",
+      "  inflating: ecom_dataset/0002/test.parquet  \n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "rm -rf ecom_dataset\n",
+    "mkdir -p ecom_dataset\n",
+    "\n",
+    "pip install gdown\n",
+    "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350  # <-- full dataset\n",
+    "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n",
+    "apt-get update -y\n",
+    "apt-get install unzip -y\n",
+    "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "4a0105a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !cd /dataloader && git checkout main && git pull origin main && git checkout ce2215d8f871d0fb8c71900f7b914a226aea7c24 && pip install ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8101aa27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !cd /core && git checkout main && git pull origin main && pip install ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0f799172",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%writefile /core/merlin/dag/graph.py\n",
+    "\n",
+    "# #\n",
+    "# # Copyright (c) 2022, NVIDIA CORPORATION.\n",
+    "# #\n",
+    "# # Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# # you may not use this file except in compliance with the License.\n",
+    "# # You may obtain a copy of the License at\n",
+    "# #\n",
+    "# #     http://www.apache.org/licenses/LICENSE-2.0\n",
+    "# #\n",
+    "# # Unless required by applicable law or agreed to in writing, software\n",
+    "# # distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# # See the License for the specific language governing permissions and\n",
+    "# # limitations under the License.\n",
+    "# #\n",
+    "\n",
+    "# import logging\n",
+    "# from collections import deque\n",
+    "# from typing import Dict, Optional\n",
+    "\n",
+    "# from merlin.dag.node import (\n",
+    "#     Node,\n",
+    "#     _combine_schemas,\n",
+    "#     iter_nodes,\n",
+    "#     postorder_iter_nodes,\n",
+    "#     preorder_iter_nodes,\n",
+    "# )\n",
+    "# from merlin.schema import Schema\n",
+    "\n",
+    "# LOG = logging.getLogger(\"merlin\")\n",
+    "\n",
+    "\n",
+    "# class Graph:\n",
+    "#     \"\"\"\n",
+    "#     Represents an DAG composed of Nodes, each of which contains an operator that\n",
+    "#     transforms dataframes or dataframe-like data\n",
+    "#     \"\"\"\n",
+    "\n",
+    "#     def __init__(self, output_node: Node, subgraphs: Optional[Dict[str, Node]] = None):\n",
+    "#         self.output_node = output_node\n",
+    "#         self.subgraphs = subgraphs or {}\n",
+    "\n",
+    "#         parents_with_deps = self.output_node.parents_with_dependencies\n",
+    "#         parents_with_deps.append(output_node)\n",
+    "\n",
+    "#         for name, sg in self.subgraphs.items():\n",
+    "#             if sg not in parents_with_deps:\n",
+    "#                 raise ValueError(\n",
+    "#                     f\"The output node of subgraph {name} does not exist in the provided graph.\"\n",
+    "#                 )\n",
+    "\n",
+    "#     def subgraph(self, name: str) -> \"Graph\":\n",
+    "#         if name not in self.subgraphs.keys():\n",
+    "#             raise ValueError(f\"No subgraph named {name}. Options are: {self.subgraphs.keys()}\")\n",
+    "#         return Graph(self.subgraphs[name])\n",
+    "\n",
+    "#     @property\n",
+    "#     def input_dtypes(self):\n",
+    "#         if self.input_schema:\n",
+    "#             return {\n",
+    "#                 name: col_schema.dtype\n",
+    "#                 for name, col_schema in self.input_schema.column_schemas.items()\n",
+    "#             }\n",
+    "#         else:\n",
+    "#             return {}\n",
+    "\n",
+    "#     @property\n",
+    "#     def output_dtypes(self):\n",
+    "#         if self.output_schema:\n",
+    "#             return {\n",
+    "#                 name: col_schema.dtype\n",
+    "#                 for name, col_schema in self.output_schema.column_schemas.items()\n",
+    "#             }\n",
+    "#         else:\n",
+    "#             return {}\n",
+    "\n",
+    "#     @property\n",
+    "#     def column_mapping(self):\n",
+    "#         nodes = preorder_iter_nodes(self.output_node)\n",
+    "#         column_mapping = self.output_node.column_mapping\n",
+    "#         for node in list(nodes)[1:]:\n",
+    "#             node_map = node.column_mapping\n",
+    "#             for output_col, input_cols in column_mapping.items():\n",
+    "#                 early_inputs = []\n",
+    "#                 for input_col in input_cols:\n",
+    "#                     early_inputs += node_map.get(input_col, [input_col])\n",
+    "#                 column_mapping[output_col] = early_inputs\n",
+    "\n",
+    "#         return column_mapping\n",
+    "\n",
+    "#     def construct_schema(self, root_schema: Schema, preserve_dtypes=False) -> \"Graph\":\n",
+    "#         \"\"\"\n",
+    "#         Given the schema of a dataset to transform, determine the output schema of the graph\n",
+    "\n",
+    "#         Parameters\n",
+    "#         ----------\n",
+    "#         root_schema : Schema\n",
+    "#             The schema of a dataset to be transformed with this DAG\n",
+    "#         preserve_dtypes : bool, optional\n",
+    "#             Whether to keep any dtypes that may already be present in the schemas, by default False\n",
+    "\n",
+    "#         Returns\n",
+    "#         -------\n",
+    "#         Graph\n",
+    "#             This DAG after the schemas have been filled in\n",
+    "#         \"\"\"\n",
+    "#         nodes = list(postorder_iter_nodes(self.output_node))\n",
+    "\n",
+    "#         self._compute_node_schemas(root_schema, nodes, preserve_dtypes)\n",
+    "#         # self._validate_node_schemas(root_schema, nodes, preserve_dtypes)\n",
+    "\n",
+    "#         return self\n",
+    "\n",
+    "#     def _compute_node_schemas(self, root_schema, nodes, preserve_dtypes=False):\n",
+    "#         for node in nodes:\n",
+    "#             node.compute_schemas(root_schema, preserve_dtypes=preserve_dtypes)\n",
+    "\n",
+    "#     def _validate_node_schemas(self, root_schema, nodes, strict_dtypes=False):\n",
+    "#         for node in nodes:\n",
+    "#             node.validate_schemas(root_schema, strict_dtypes=strict_dtypes)\n",
+    "\n",
+    "#     @property\n",
+    "#     def input_schema(self):\n",
+    "#         # leaf_node input and output schemas are the same (aka selection)\n",
+    "#         return _combine_schemas(self.leaf_nodes)\n",
+    "\n",
+    "#     @property\n",
+    "#     def leaf_nodes(self):\n",
+    "#         return [node for node in postorder_iter_nodes(self.output_node) if not node.parents]\n",
+    "\n",
+    "#     @property\n",
+    "#     def output_schema(self):\n",
+    "#         return self.output_node.output_schema\n",
+    "\n",
+    "#     def _input_columns(self):\n",
+    "#         input_cols = []\n",
+    "#         for node in iter_nodes([self.output_node]):\n",
+    "#             upstream_output_cols = []\n",
+    "\n",
+    "#             for upstream_node in node.parents_with_dependencies:\n",
+    "#                 upstream_output_cols += upstream_node.output_columns.names\n",
+    "\n",
+    "#             upstream_output_cols = _get_unique(upstream_output_cols)\n",
+    "#             input_cols += list(set(node.input_columns.names) - set(upstream_output_cols))\n",
+    "\n",
+    "#         return _get_unique(input_cols)\n",
+    "\n",
+    "#     def remove_inputs(self, to_remove):\n",
+    "#         \"\"\"\n",
+    "#         Removes columns from a Graph\n",
+    "\n",
+    "#         Starting at the leaf nodes, trickle down looking for columns to remove,\n",
+    "#         when found remove but then must propagate the removal of any other\n",
+    "#         output columns derived from that column.\n",
+    "\n",
+    "#         Parameters\n",
+    "#         -----------\n",
+    "#         graph : Graph\n",
+    "#             The graph to remove columns from\n",
+    "#         to_remove : array_like\n",
+    "#             A list of input column names to remove from the graph\n",
+    "\n",
+    "#         Returns\n",
+    "#         -------\n",
+    "#         Graph\n",
+    "#             The same graph with columns removed\n",
+    "#         \"\"\"\n",
+    "#         nodes_to_process = deque([(node, to_remove) for node in self.leaf_nodes])\n",
+    "\n",
+    "#         while nodes_to_process:\n",
+    "#             node, columns_to_remove = nodes_to_process.popleft()\n",
+    "#             if node.input_schema and len(node.input_schema):\n",
+    "#                 output_columns_to_remove = node.remove_inputs(columns_to_remove)\n",
+    "\n",
+    "#                 for child in node.children:\n",
+    "#                     nodes_to_process.append(\n",
+    "#                         (child, list(set(to_remove + output_columns_to_remove)))\n",
+    "#                     )\n",
+    "\n",
+    "#                     if not len(node.input_schema):\n",
+    "#                         node.remove_child(child)\n",
+    "\n",
+    "#             # remove any dependencies that do not have an output schema\n",
+    "#             node.dependencies = [\n",
+    "#                 dep for dep in node.dependencies if dep.output_schema and len(dep.output_schema)\n",
+    "#             ]\n",
+    "\n",
+    "#             if not node.input_schema or not len(node.input_schema):\n",
+    "#                 for parent in node.parents:\n",
+    "#                     parent.remove_child(node)\n",
+    "#                 for dependency in node.dependencies:\n",
+    "#                     dependency.remove_child(node)\n",
+    "#                 del node\n",
+    "\n",
+    "#         return self\n",
+    "\n",
+    "#     @classmethod\n",
+    "#     def get_nodes_by_op_type(cls, nodes, op_type):\n",
+    "#         return set(node for node in iter_nodes(nodes) if isinstance(node.op, op_type))\n",
+    "\n",
+    "\n",
+    "# def _get_schemaless_nodes(nodes):\n",
+    "#     schemaless_nodes = []\n",
+    "#     for node in iter_nodes(nodes):\n",
+    "#         if node.input_schema is None:\n",
+    "#             schemaless_nodes.append(node)\n",
+    "\n",
+    "#     return set(schemaless_nodes)\n",
+    "\n",
+    "\n",
+    "# def _get_unique(cols):\n",
+    "#     # Need to preserve order in unique-column list\n",
+    "#     return list({x: x for x in cols}.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ab4f272d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !cd /core && pip install ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ceb3ae93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-04-13 11:21:28.090236: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
+      "2023-04-13 11:21:30.471061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:30.471514: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:30.471678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO]: sparse_operation_kit is imported\n",
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n",
+      "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n",
+      "[SOK INFO] Initialize finished, communication tool: horovod\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-04-13 11:21:30.757567: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:21:30.758435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:30.758639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:30.758792: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:31.508591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:31.508802: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:31.508961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:21:31.509071: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+      "2023-04-13 11:21:31.509079: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
+      "2023-04-13 11:21:31.509140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n",
+      "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n",
+    "import gc\n",
+    "import numpy as np\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "from merlin.schema.tags import Tags\n",
+    "from merlin.io.dataset import Dataset\n",
+    "import merlin.models.tf as mm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "11647dd3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n",
+    "valid = Dataset(\"ecom_dataset/0002/test.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4ab4e0fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target = 'sess_pid_seq'\n",
+    "seq_name = target"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "8d9903e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# a couple of starter hyperparams\n",
+    "\n",
+    "d_model = 192\n",
+    "n_layer = 3\n",
+    "n_head = 16\n",
+    "batch_size = 128\n",
+    "learning_rate = 0.0006667377132554976\n",
+    "n_epoch = 1\n",
+    "item_embedding_dim = 448 \n",
+    "item_id_embeddings_init_std = 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "410ea223",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# seq_name = 'seq'\n",
+    "# target = seq_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "4328f03a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nvtabular.inference.triton import export_tensorflow_ensemble\n",
+    "from nvtabular import Workflow\n",
+    "from nvtabular.ops import Categorify, Rename"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4571b92b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "d5a9dd50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n",
+    "\n",
+    "wf = Workflow(ops)\n",
+    "\n",
+    "train = wf.fit_transform(train)\n",
+    "valid = wf.transform(valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "3116726e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# cat rees46_schema_modified.pbtxt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "69e8f95c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%writefile rees46_schema_modified_2.pbtxt\n",
+    "\n",
+    "# feature {\n",
+    "#   name: \"seq\"\n",
+    "#   value_count {\n",
+    "#     min: 2\n",
+    "#   }\n",
+    "#   type: INT\n",
+    "#   int_domain {\n",
+    "#     name: \"seq\"\n",
+    "#     min: 1\n",
+    "#     max: 390000\n",
+    "#     is_categorical: true\n",
+    "#   }\n",
+    "#   annotation {\n",
+    "#     tag: \"item_id\"\n",
+    "#     tag: \"list\"\n",
+    "#     tag: \"categorical\"\n",
+    "#     tag: \"item\"\n",
+    "#   }\n",
+    "# }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a6ade14a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n",
+    "\n",
+    "def get_model():\n",
+    "    mlp_block = mm.MLPBlock(\n",
+    "                    [d_model],\n",
+    "                    activation='relu',\n",
+    "                    no_activation_last_layer=True,\n",
+    "                )\n",
+    "\n",
+    "    schema = TensorflowMetadata.from_proto_text_file(\n",
+    "        './',\n",
+    "        file_name='rees46_schema_modified.pbtxt'\n",
+    "    ).to_merlin_schema()\n",
+    "\n",
+    "    train.schema = schema\n",
+    "    \n",
+    "    schema_model = schema.select_by_tag(Tags.ITEM_ID)\n",
+    "    input_block = mm.InputBlockV2(\n",
+    "        schema_model,\n",
+    "        categorical=mm.Embeddings(\n",
+    "                schema_model.select_by_tag(Tags.CATEGORICAL),\n",
+    "                dim=item_embedding_dim,\n",
+    "                sequence_combiner=None,\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "    train.schema = train.schema.select_by_name(seq_name)\n",
+    "\n",
+    "    xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n",
+    "\n",
+    "    dense_block = mm.SequentialBlock(\n",
+    "        input_block,\n",
+    "        mlp_block,\n",
+    "        xlnet_block\n",
+    "    )\n",
+    "\n",
+    "    mlp_block2 = mm.MLPBlock(\n",
+    "                    [item_embedding_dim],\n",
+    "                    activation='relu',\n",
+    "                    no_activation_last_layer=True,\n",
+    "                )\n",
+    "\n",
+    "    prediction_task = mm.CategoricalOutput(\n",
+    "        to_call=input_block[\"categorical\"][target],\n",
+    "    )\n",
+    "\n",
+    "    model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n",
+    "\n",
+    "    optimizer = tf.keras.optimizers.Adam(\n",
+    "        learning_rate=learning_rate,\n",
+    "    )\n",
+    "\n",
+    "    model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n",
+    "                  metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n",
+    "                 )\n",
+    "    return model_transformer, xlnet_block"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "523fe2ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n",
+      "2023-04-13 11:21:38.342588: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n",
+      "677/677 [==============================] - 106s 144ms/step - loss: 7.3129 - recall_at_20: 0.1424 - mrr_at_20: 0.0802 - ndcg_at_20: 0.0939 - map_at_20: 0.0802 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3149\n",
+      "84/84 [==============================] - 4s 27ms/step - loss: 8.5848 - recall_at_20: 0.2229 - mrr_at_20: 0.0736 - ndcg_at_20: 0.1066 - map_at_20: 0.0736 - precision_at_20: 0.0111 - regularization_loss: 0.0000e+00 - loss_batch: 8.5971\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'loss': 8.584781646728516,\n",
+       " 'recall_at_20': 0.2308632731437683,\n",
+       " 'mrr_at_20': 0.07471762597560883,\n",
+       " 'ndcg_at_20': 0.10908268392086029,\n",
+       " 'map_at_20': 0.07471762597560883,\n",
+       " 'precision_at_20': 0.011543160304427147,\n",
+       " 'regularization_loss': 0.0,\n",
+       " 'loss_batch': 9.130510330200195}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_transformer, xlnet_block = get_model()\n",
+    "model_transformer.fit(\n",
+    "    train,\n",
+    "    batch_size=batch_size,\n",
+    "    epochs=n_epoch,\n",
+    "    pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n",
+    ")\n",
+    "\n",
+    "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n",
+    "model_transformer.evaluate(\n",
+    "    valid,\n",
+    "    batch_size=batch_size,\n",
+    "    pre=predict_last,\n",
+    "    return_dict=True\n",
+    ")\n",
+    "\n",
+    "# model_transformer.save('t4rec_model')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "5bd66ba8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n",
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n",
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n",
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[i] = tf.keras.utils.serialize_keras_object(layer)\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  return generic_utils.serialize_keras_object(obj)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from merlin.systems.dag.ops.workflow import TransformWorkflow\n",
+    "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n",
+    "\n",
+    "serving_operators = [seq_name] >> TransformWorkflow(wf) >> PredictTensorflow(model_transformer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "3ef1e5fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import merlin.models.tf as mm\n",
+    "# import tensorflow as tf\n",
+    "# tf_model_path = os.path.join('t4rec_model')\n",
+    "\n",
+    "# model = tf.keras.models.load_model(tf_model_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "e2a7b6ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "rm -rf /workspace/models_for_benchmarking\n",
+    "mkdir -p /workspace/models_for_benchmarking"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "55ad012c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>tags</th>\n",
+       "      <th>dtype</th>\n",
+       "      <th>is_list</th>\n",
+       "      <th>is_ragged</th>\n",
+       "      <th>properties.domain.min</th>\n",
+       "      <th>properties.domain.max</th>\n",
+       "      <th>properties.domain.name</th>\n",
+       "      <th>properties.value_count.min</th>\n",
+       "      <th>properties.value_count.max</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sess_pid_seq</td>\n",
+       "      <td>(Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.IT...</td>\n",
+       "      <td>DType(name='int64', element_type=&lt;ElementType....</td>\n",
+       "      <td>True</td>\n",
+       "      <td>True</td>\n",
+       "      <td>1</td>\n",
+       "      <td>390000</td>\n",
+       "      <td>sess_pid_seq</td>\n",
+       "      <td>2</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "[{'name': 'sess_pid_seq', 'tags': {<Tags.CATEGORICAL: 'categorical'>, <Tags.ITEM: 'item'>, <Tags.ID: 'id'>, <Tags.ITEM_ID: 'item_id'>, <Tags.LIST: 'list'>}, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=<ElementType.Int: 'int'>, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.schema.select_by_name('sess_pid_seq')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "1a39b4f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n",
+      "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[i] = tf.keras.utils.serialize_keras_object(layer)\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  return generic_utils.serialize_keras_object(obj)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from merlin.systems.dag.ensemble import Ensemble\n",
+    "\n",
+    "ensemble = Ensemble(serving_operators, wf.input_schema)\n",
+    "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "1720a5af",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ls: cannot access '/workspace/models_for_benchmarking/1': No such file or directory\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "ls /workspace/models_for_benchmarking/1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "d7cdc6cc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<subprocess.Popen at 0x7fd4c501f0a0>"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "I0413 11:24:28.716029 1527 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f7f2a000000' with size 268435456\n",
+      "I0413 11:24:28.716361 1527 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n",
+      "I0413 11:24:28.718446 1527 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n",
+      "I0413 11:24:28.718465 1527 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n",
+      "I0413 11:24:28.718478 1527 model_lifecycle.cc:459] loading: executor_model:1\n",
+      "I0413 11:24:28.924940 1527 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n",
+      "I0413 11:24:28.924955 1527 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n",
+      "I0413 11:24:28.924960 1527 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n",
+      "I0413 11:24:28.924962 1527 tensorflow.cc:2576] backend configuration:\n",
+      "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n",
+      "2023-04-13 11:24:30.207841: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:24:32.085748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:32.086174: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:32.086365: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
+      "I0413 11:24:33.803267 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n",
+      "2023-04-13 11:24:35.316462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:24:37.126873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.127251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.127427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "I0413 11:24:37.157059 1527 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n",
+      "I0413 11:24:37.157179 1527 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n",
+      "2023-04-13 11:24:37.157805: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:37.178699: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n",
+      "2023-04-13 11:24:37.178742: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:37.178876: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:24:37.179781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.196068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.196289: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.196570: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.196747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.196909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:37.197031: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
+      "2023-04-13 11:24:37.203975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n",
+      "2023-04-13 11:24:37.262568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n",
+      "2023-04-13 11:24:37.271889: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n",
+      "2023-04-13 11:24:37.678751: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:37.745105: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 587310 microseconds.\n",
+      "2023-04-13 11:24:39.105154: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:24:40.997532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:40.997994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:40.998186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+      "  warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n",
+      "I0413 11:24:42.684588 1527 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n",
+      "2023-04-13 11:24:42.684902: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:42.702205: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n",
+      "2023-04-13 11:24:42.702239: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:42.702447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:42.702659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:42.702822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:42.703025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:42.703189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:42.703311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n",
+      "2023-04-13 11:24:42.742722: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-04-13 11:24:43.330311: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n",
+      "2023-04-13 11:24:43.395816: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 710922 microseconds.\n",
+      "I0413 11:24:43.395921 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n",
+      "I0413 11:24:43.396107 1527 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n",
+      "2023-04-13 11:24:44.668497: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-13 11:24:46.525315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:46.525768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-04-13 11:24:46.525978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "I0413 11:24:46.583396 1527 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n",
+      "I0413 11:24:46.583508 1527 server.cc:563] \n",
+      "+------------------+------+\n",
+      "| Repository Agent | Path |\n",
+      "+------------------+------+\n",
+      "+------------------+------+\n",
+      "\n",
+      "I0413 11:24:46.583587 1527 server.cc:590] \n",
+      "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "| Backend    | Path                                                            | Config                                                                                                                                                        |\n",
+      "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "| python     | /opt/tritonserver/backends/python/libtriton_python.so           | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n",
+      "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n",
+      "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "\n",
+      "I0413 11:24:46.583634 1527 server.cc:633] \n",
+      "+---------------------------+---------+--------+\n",
+      "| Model                     | Version | Status |\n",
+      "+---------------------------+---------+--------+\n",
+      "| 0_transformworkflowtriton | 1       | READY  |\n",
+      "| 1_predicttensorflowtriton | 1       | READY  |\n",
+      "| executor_model            | 1       | READY  |\n",
+      "+---------------------------+---------+--------+\n",
+      "\n",
+      "I0413 11:24:46.610538 1527 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n",
+      "I0413 11:24:46.610778 1527 metrics.cc:757] Collecting CPU metrics\n",
+      "I0413 11:24:46.610913 1527 tritonserver.cc:2264] \n",
+      "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "| Option                           | Value                                                                                                                                                                                                |\n",
+      "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "| server_id                        | triton                                                                                                                                                                                               |\n",
+      "| server_version                   | 2.28.0                                                                                                                                                                                               |\n",
+      "| server_extensions                | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace logging |\n",
+      "| model_repository_path[0]         | /workspace/models_for_benchmarking/                                                                                                                                                                  |\n",
+      "| model_control_mode               | MODE_NONE                                                                                                                                                                                            |\n",
+      "| strict_model_config              | 0                                                                                                                                                                                                    |\n",
+      "| rate_limit                       | OFF                                                                                                                                                                                                  |\n",
+      "| pinned_memory_pool_byte_size     | 268435456                                                                                                                                                                                            |\n",
+      "| cuda_memory_pool_byte_size{0}    | 67108864                                                                                                                                                                                             |\n",
+      "| response_cache_byte_size         | 0                                                                                                                                                                                                    |\n",
+      "| min_supported_compute_capability | 6.0                                                                                                                                                                                                  |\n",
+      "| strict_readiness                 | 1                                                                                                                                                                                                    |\n",
+      "| exit_timeout                     | 30                                                                                                                                                                                                   |\n",
+      "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
+      "\n",
+      "I0413 11:24:46.611676 1527 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n",
+      "I0413 11:24:46.611833 1527 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n",
+      "I0413 11:24:46.652586 1527 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-04-13 11:25:37.504455: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n"
+     ]
+    }
+   ],
+   "source": [
+    "import nvtabular.inference.triton as nvt_triton\n",
+    "import tritonclient.grpc as grpcclient\n",
+    "import subprocess\n",
+    "\n",
+    "subprocess.Popen(['tritonserver', '--model-repository=/workspace/models_for_benchmarking/'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f63b425",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a772eeb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pkill triton"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6ed7b5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tritonhttpclient\n",
+    "try:\n",
+    "    triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n",
+    "    print(\"client created.\")\n",
+    "except Exception as e:\n",
+    "    print(\"channel creation failed: \" + str(e))\n",
+    "triton_client.is_server_live()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10c2a62e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "validation_data.iloc[]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c2723e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from merlin.systems.triton import convert_df_to_triton_input\n",
+    "\n",
+    "validation_data = valid.compute()\n",
+    "inputs = convert_df_to_triton_input(wf.input_schema, validation_data.iloc[:1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa9fc0dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs[0].name()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ae7eb08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs[0].shape()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac3596c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs[1].name()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18f8e77d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs[1].shape()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "292b58da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "validation_data.iloc[:1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8e1fd90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wf.input_schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a79c58f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tritonclient.grpc as grpcclient\n",
+    "\n",
+    "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n",
+    "    response = client.infer('1_predicttensorflowtriton', inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6dd51a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response.get_output('sess_pid_seq/categorical_output')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba6712bb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "637eb3f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd62f641",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "d1bc6530",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n",
+      "  (_feature_shapes): Dict(\n",
+      "    (sess_pid_seq): TensorShape([128, None, 1])\n",
+      "  )\n",
+      "  (_feature_dtypes): Dict(\n",
+      "    (sess_pid_seq): tf.int32\n",
+      "  )\n",
+      "), because it is not built.\n",
+      "WARNING:absl:Function `_wrapped_model` contains input name(s) sess_pid_seq with unsupported characters which will be renamed to sess_pid_seq_1 in the SavedModel.\n",
+      "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 110). These functions will not be directly callable after loading.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/0_predicttensorflowtriton/1/model.savedmodel/assets\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:83: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  config[i] = tf.keras.utils.serialize_keras_object(layer)\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n",
+      "  return generic_utils.serialize_keras_object(obj)\n",
+      "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
+      "  warnings.warn(\n",
+      "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values  each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from merlin.systems.dag.ensemble import Ensemble\n",
+    "\n",
+    "ensemble = Ensemble(serving_operators, train.schema)\n",
+    "ens_conf, node_confs = ensemble.export(\"/workspace/models_for_benchmarking\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "8d390999",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name: \"0_predicttensorflowtriton\"\r\n",
+      "platform: \"tensorflow_savedmodel\"\r\n",
+      "input {\r\n",
+      "  name: \"sess_pid_seq\"\r\n",
+      "  data_type: TYPE_INT32\r\n",
+      "  dims: -1\r\n",
+      "  dims: 1\r\n",
+      "}\r\n",
+      "input {\r\n",
+      "  name: \"sess_pid_seq_1\"\r\n",
+      "  data_type: TYPE_INT32\r\n",
+      "  dims: -1\r\n",
+      "  dims: 1\r\n",
+      "}\r\n",
+      "output {\r\n",
+      "  name: \"sess_pid_seq/categorical_output\"\r\n",
+      "  data_type: TYPE_FP32\r\n",
+      "  dims: -1\r\n",
+      "  dims: 390001\r\n",
+      "}\r\n",
+      "parameters {\r\n",
+      "  key: \"TF_GRAPH_TAG\"\r\n",
+      "  value {\r\n",
+      "    string_value: \"serve\"\r\n",
+      "  }\r\n",
+      "}\r\n",
+      "parameters {\r\n",
+      "  key: \"TF_SIGNATURE_DEF\"\r\n",
+      "  value {\r\n",
+      "    string_value: \"serving_default\"\r\n",
+      "  }\r\n",
+      "}\r\n",
+      "backend: \"tensorflow\"\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "f7fe741c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile /workspace/models_for_benchmarking/0_predicttensorflowtriton/config.pbtxt\n",
+    "\n",
+    "name: \"0_predicttensorflowtriton\"\n",
+    "platform: \"tensorflow_savedmodel\"\n",
+    "input {\n",
+    "  name: \"sess_pid_seq\"\n",
+    "  data_type: TYPE_INT32\n",
+    "  dims: -1\n",
+    "  dims: 1\n",
+    "}\n",
+    "input {\n",
+    "  name: \"sess_pid_seq_1\"\n",
+    "  data_type: TYPE_INT32\n",
+    "  dims: -1\n",
+    "  dims: 1\n",
+    "}\n",
+    "output {\n",
+    "  name: \"sess_pid_seq/categorical_output\"\n",
+    "  data_type: TYPE_FP32\n",
+    "  dims: -1\n",
+    "  dims: 390001\n",
+    "}\n",
+    "parameters {\n",
+    "  key: \"TF_GRAPH_TAG\"\n",
+    "  value {\n",
+    "    string_value: \"serve\"\n",
+    "  }\n",
+    "}\n",
+    "parameters {\n",
+    "  key: \"TF_SIGNATURE_DEF\"\n",
+    "  value {\n",
+    "    string_value: \"serving_default\"\n",
+    "  }\n",
+    "}\n",
+    "backend: \"tensorflow\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "9cfe8bca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name: \"executor_model\"\r\n",
+      "platform: \"merlin_executor\"\r\n",
+      "input {\r\n",
+      "  name: \"sess_pid_seq__values\"\r\n",
+      "  data_type: TYPE_INT64\r\n",
+      "  dims: -1\r\n",
+      "  dims: -1\r\n",
+      "}\r\n",
+      "input {\r\n",
+      "  name: \"sess_pid_seq__lengths\"\r\n",
+      "  data_type: TYPE_INT32\r\n",
+      "  dims: -1\r\n",
+      "  dims: -1\r\n",
+      "}\r\n",
+      "output {\r\n",
+      "  name: \"sess_pid_seq/categorical_output\"\r\n",
+      "  data_type: TYPE_FP32\r\n",
+      "  dims: -1\r\n",
+      "  dims: 390001\r\n",
+      "}\r\n",
+      "backend: \"python\"\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat /workspace/models_for_benchmarking/executor_model/config.pbtxt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a659255d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting /workspace/models_for_benchmarking/executor_model/config.pbtxt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile /workspace/models_for_benchmarking/executor_model/config.pbtxt\n",
+    "\n",
+    "name: \"executor_model\"\n",
+    "platform: \"merlin_executor\"\n",
+    "input {\n",
+    "  name: \"sess_pid_seq__values\"\n",
+    "  data_type: TYPE_INT64\n",
+    "  dims: -1\n",
+    "  dims: -1\n",
+    "}\n",
+    "input {\n",
+    "  name: \"sess_pid_seq__nnzs\"\n",
+    "  data_type: TYPE_INT64\n",
+    "  dims: -1\n",
+    "  dims: -1\n",
+    "}\n",
+    "output {\n",
+    "  name: \"sess_pid_seq/categorical_output\"\n",
+    "  data_type: TYPE_FP32\n",
+    "  dims: -1\n",
+    "  dims: 390001\n",
+    "}\n",
+    "backend: \"python\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "ddf2dc55",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.\r\n",
+      "#\r\n",
+      "# Redistribution and use in source and binary forms, with or without\r\n",
+      "# modification, are permitted provided that the following conditions\r\n",
+      "# are met:\r\n",
+      "#  * Redistributions of source code must retain the above copyright\r\n",
+      "#    notice, this list of conditions and the following disclaimer.\r\n",
+      "#  * Redistributions in binary form must reproduce the above copyright\r\n",
+      "#    notice, this list of conditions and the following disclaimer in the\r\n",
+      "#    documentation and/or other materials provided with the distribution.\r\n",
+      "#  * Neither the name of NVIDIA CORPORATION nor the names of its\r\n",
+      "#    contributors may be used to endorse or promote products derived\r\n",
+      "#    from this software without specific prior written permission.\r\n",
+      "#\r\n",
+      "# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY\r\n",
+      "# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\n",
+      "# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\r\n",
+      "# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r\n",
+      "# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r\n",
+      "# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r\n",
+      "# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r\n",
+      "# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\r\n",
+      "# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r\n",
+      "# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\n",
+      "# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r\n",
+      "import pathlib\r\n",
+      "from pathlib import Path\r\n",
+      "\r\n",
+      "from merlin.dag import postorder_iter_nodes\r\n",
+      "from merlin.systems.dag import Ensemble\r\n",
+      "from merlin.systems.dag.runtimes.triton import TritonExecutorRuntime\r\n",
+      "from merlin.systems.triton.conversions import (\r\n",
+      "    dict_array_to_triton_response,\r\n",
+      "    triton_request_to_dict_array,\r\n",
+      ")\r\n",
+      "from merlin.systems.triton.utils import triton_error_handling, triton_multi_request\r\n",
+      "\r\n",
+      "\r\n",
+      "class TritonPythonModel:\r\n",
+      "    \"\"\"Model for Triton Python Backend.\r\n",
+      "\r\n",
+      "    Every Python model must have \"TritonPythonModel\" as the class name\r\n",
+      "    \"\"\"\r\n",
+      "\r\n",
+      "    def initialize(self, args):\r\n",
+      "        \"\"\"Called only once when the model is being loaded. Allowing\r\n",
+      "        the model to initialize any state associated with this model.\r\n",
+      "\r\n",
+      "        Parameters\r\n",
+      "        ----------\r\n",
+      "        args : dict\r\n",
+      "          Both keys and values are strings. The dictionary keys and values are:\r\n",
+      "          * model_config: A JSON string containing the model configuration\r\n",
+      "          * model_instance_kind: A string containing model instance kind\r\n",
+      "          * model_instance_device_id: A string containing model instance device ID\r\n",
+      "          * model_repository: Model repository path\r\n",
+      "          * model_version: Model version\r\n",
+      "          * model_name: Model name\r\n",
+      "        \"\"\"\r\n",
+      "        # Arg parsing\r\n",
+      "        model_repo = args[\"model_repository\"]\r\n",
+      "        repository_path = _parse_model_repository(model_repo)\r\n",
+      "\r\n",
+      "        ensemble_path = (\r\n",
+      "            Path(repository_path) / args[\"model_name\"] / str(args[\"model_version\"]) / \"ensemble\"\r\n",
+      "        )\r\n",
+      "\r\n",
+      "        self.ensemble = Ensemble.load(str(ensemble_path))\r\n",
+      "\r\n",
+      "        for node in list(postorder_iter_nodes(self.ensemble.graph.output_node)):\r\n",
+      "            if hasattr(node.op, \"load_artifacts\"):\r\n",
+      "                node.op.load_artifacts(str(ensemble_path))\r\n",
+      "\r\n",
+      "    @triton_multi_request\r\n",
+      "    @triton_error_handling\r\n",
+      "    def execute(self, request):\r\n",
+      "        \"\"\"Receives a list of pb_utils.InferenceRequest as the only argument. This\r\n",
+      "        function is called when an inference is requested for this model. Depending on the\r\n",
+      "        batching configuration (e.g. Dynamic Batching) used, `requests` may contain\r\n",
+      "        multiple requests. Every Python model, must create one pb_utils.InferenceResponse\r\n",
+      "        for every pb_utils.InferenceRequest in `requests`. If there is an error, you can\r\n",
+      "        set the error argument when creating a pb_utils.InferenceResponse.\r\n",
+      "\r\n",
+      "        Parameters\r\n",
+      "        ----------\r\n",
+      "        requests : list\r\n",
+      "          A list of pb_utils.InferenceRequest\r\n",
+      "\r\n",
+      "        Returns\r\n",
+      "        -------\r\n",
+      "        list\r\n",
+      "          A list of pb_utils.InferenceResponse. The length of this list must\r\n",
+      "          be the same as `requests`\r\n",
+      "        \"\"\"\r\n",
+      "        inputs = triton_request_to_dict_array(request, self.ensemble.input_schema.column_names)\r\n",
+      "        outputs = self.ensemble.transform(inputs, runtime=TritonExecutorRuntime())\r\n",
+      "        return dict_array_to_triton_response(outputs)\r\n",
+      "\r\n",
+      "\r\n",
+      "def _parse_model_repository(model_repository: str) -> str:\r\n",
+      "    \"\"\"\r\n",
+      "    Extract the model repository path from the model_repository value\r\n",
+      "    passed to the TritonPythonModel initialize method.\r\n",
+      "    \"\"\"\r\n",
+      "    # Handle bug in Tritonserver 22.06\r\n",
+      "    # model_repository argument became path to model.py\r\n",
+      "    # instead of path to model directory within the model repository\r\n",
+      "    if model_repository.endswith(\".py\"):\r\n",
+      "        return str(pathlib.Path(model_repository).parent.parent.parent)\r\n",
+      "    else:\r\n",
+      "        return str(pathlib.Path(model_repository).parent)\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat /workspace/models_for_benchmarking/executor_model/1/model.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "3d21ce62",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"versions\": {\"python\": \"3.8.10 (default, Nov 14 2022, 12:59:47) \\n[GCC 9.4.0]\"}, \"generated_timestamp\": 1679017581}"
+     ]
+    }
+   ],
+   "source": [
+    "cat /workspace/models_for_benchmarking/executor_model/1/ensemble/metadata.json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7998b835",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %%writefile /workspace/models_for_benchmarking/t4r_pytorch_pt/config.pbtxt\n",
+    "\n",
+    "# name: \"t4r_pytorch_pt\"\n",
+    "# input {\n",
+    "#   name: \"sess_pid_seq__values\"\n",
+    "#   data_type: TYPE_INT64\n",
+    "#   dims: -1\n",
+    "#   dims: 1\n",
+    "# }\n",
+    "# input {\n",
+    "#   name: \"sess_pid_seq__nnzs\"\n",
+    "#   data_type: TYPE_INT64\n",
+    "#   dims: -1\n",
+    "#   dims: 1\n",
+    "# }\n",
+    "# output {\n",
+    "#   name: \"output\"\n",
+    "#   data_type: TYPE_FP32\n",
+    "#   dims: -1\n",
+    "#   dims: 20\n",
+    "# }\n",
+    "# backend: \"python\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}