diff --git a/examples/distributed_data_classification_examples/test_domain.ipynb b/examples/distributed_data_classification_examples/test_domain.ipynb
index 45bedbd85..be36c450b 100644
--- a/examples/distributed_data_classification_examples/test_domain.ipynb
+++ b/examples/distributed_data_classification_examples/test_domain.ipynb
@@ -15,13 +15,14 @@
    ],
    "source": [
     "%env PYTHONWARNINGS=ignore\n",
+    "\n",
     "import warnings\n",
     "warnings.filterwarnings(\"ignore\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,20 +80,26 @@
     "\n",
     "model_file_name = \"/home/nfs/syurick/LLM_domain_classifier_inference/GoogleDebertaAgree_v3b_bce_maxlen512_bs64_noRef_best.pth\"\n",
     "# Input can be a string or list\n",
-    "input_file_path = \"/home/nfs/syurick/LLM_domain_classifier_inference/4360_results_jsonl_dir/\"\n",
-    "output_file_path = \"/raid/vjawa/output_file.json\"\n"
+    "input_file_path = \"/raid/vjawa/prospector-lm/subset_CC-MAIN-2023-14_english\"\n",
+    "output_file_path = \"/raid/vjawa/output_subset_CC-MAIN-2023-14_english\"\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Reading 16 files\n",
+      "Reading 50 files\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "Starting domain classifier inference\n"
      ]
     },
@@ -100,31 +107,159 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "GPU: 0, Part: 14: 100%|██████████| 937/937 [00:14<00:00, 64.41it/s] \n",
-      "GPU: 0, Part: 13: 100%|██████████| 937/937 [00:16<00:00, 57.34it/s]\n",
-      "GPU: 0, Part: 12: 100%|██████████| 937/937 [00:14<00:00, 63.48it/s] \n",
-      "GPU: 0, Part: 5: 100%|██████████| 938/938 [00:14<00:00, 62.78it/s] \n",
-      "GPU: 0, Part: 9: 100%|██████████| 937/937 [00:15<00:00, 60.63it/s]\n",
-      "GPU: 0, Part: 10: 100%|██████████| 937/937 [00:15<00:00, 61.35it/s]\n",
-      "GPU: 0, Part: 6: 100%|██████████| 938/938 [00:15<00:00, 59.92it/s]\n",
-      "GPU: 0, Part: 4: 100%|██████████| 938/938 [00:15<00:00, 61.30it/s]\n",
-      "GPU: 0, Part: 15: 100%|██████████| 937/937 [00:15<00:00, 61.20it/s]\n",
-      "GPU: 0, Part: 0: 100%|██████████| 938/938 [00:15<00:00, 61.44it/s]\n",
-      "GPU: 0, Part: 2: 100%|██████████| 938/938 [00:15<00:00, 61.10it/s]\n",
-      "GPU: 0, Part: 1: 100%|██████████| 938/938 [00:15<00:00, 60.69it/s]\n",
-      "GPU: 0, Part: 8: 100%|██████████| 937/937 [00:15<00:00, 60.11it/s]\n",
-      "GPU: 0, Part: 7: 100%|██████████| 937/937 [00:15<00:00, 60.54it/s]\n",
-      "GPU: 0, Part: 3: 100%|██████████| 938/938 [00:15<00:00, 60.71it/s]\n",
-      "GPU: 0, Part: 11: 100%|██████████| 937/937 [00:15<00:00, 60.47it/s]\n"
+      "GPU: 0, Part: 9: 100%|██████████| 9995/9995 [00:42<00:00, 233.16it/s]/s]\n",
+      "GPU: 0, Part: 44: 100%|██████████| 9793/9793 [00:41<00:00, 235.01it/s]\n",
+      "GPU: 0, Part: 7: 100%|██████████| 9956/9956 [00:42<00:00, 233.63it/s]]\n",
+      "GPU: 0, Part: 8: 100%|██████████| 10093/10093 [00:44<00:00, 226.58it/s]\n",
+      "GPU: 0, Part: 47: 100%|██████████| 10100/10100 [00:43<00:00, 234.62it/s]\n",
+      "GPU: 0, Part: 6: 100%|██████████| 10088/10088 [00:42<00:00, 236.62it/s]\n",
+      "GPU: 0, Part: 48: 100%|██████████| 10021/10021 [00:42<00:00, 235.82it/s]\n",
+      "GPU: 0, Part: 49: 100%|██████████| 10200/10200 [00:42<00:00, 238.71it/s]\n",
+      "GPU: 0, Part: 4: 100%|██████████| 9747/9747 [00:42<00:00, 227.39it/s]]\n",
+      "GPU: 0, Part: 40: 100%|██████████| 9999/9999 [00:42<00:00, 236.56it/s]\n",
+      "GPU: 0, Part: 46: 100%|██████████| 9994/9994 [00:43<00:00, 230.24it/s]\n",
+      "GPU: 0, Part: 41: 100%|██████████| 9938/9938 [00:43<00:00, 228.66it/s]\n",
+      "GPU: 0, Part: 45: 100%|██████████| 9832/9832 [00:43<00:00, 225.21it/s]\n",
+      "GPU: 0, Part: 42: 100%|██████████| 9985/9985 [00:42<00:00, 232.65it/s]\n",
+      "GPU: 0, Part: 5: 100%|██████████| 9873/9873 [00:43<00:00, 225.99it/s]\n",
+      "GPU: 0, Part: 43: 100%|██████████| 9933/9933 [00:44<00:00, 221.88it/s]\n",
+      "GPU: 0, Part: 39: 100%|██████████| 10075/10075 [00:31<00:00, 316.80it/s]\n",
+      "GPU: 0, Part: 3: 100%|██████████| 9714/9714 [00:33<00:00, 293.11it/s]s]\n",
+      "GPU: 0, Part: 26: 100%|██████████| 10090/10090 [00:31<00:00, 316.52it/s]\n",
+      "GPU: 0, Part: 36: 100%|██████████| 10019/10019 [00:32<00:00, 307.36it/s]\n",
+      "GPU: 0, Part: 33: 100%|██████████| 10187/10187 [00:32<00:00, 313.55it/s]\n",
+      "GPU: 0, Part: 27: 100%|██████████| 9941/9941 [00:32<00:00, 310.53it/s]\n",
+      "GPU: 0, Part: 28: 100%|██████████| 10095/10095 [00:32<00:00, 315.00it/s]\n",
+      "GPU: 0, Part: 31: 100%|██████████| 10150/10150 [00:32<00:00, 311.38it/s]\n",
+      "GPU: 0, Part: 34: 100%|██████████| 9934/9934 [00:33<00:00, 294.76it/s]s]\n",
+      "GPU: 0, Part: 37: 100%|██████████| 9939/9939 [00:33<00:00, 293.81it/s]\n",
+      "GPU: 0, Part: 32: 100%|██████████| 9959/9959 [00:33<00:00, 294.59it/s]s]\n",
+      "GPU: 0, Part: 25: 100%|██████████| 10204/10204 [00:34<00:00, 298.84it/s]\n",
+      "GPU: 0, Part: 35: 100%|██████████| 9861/9861 [00:33<00:00, 292.10it/s]s]\n",
+      "GPU: 0, Part: 30: 100%|██████████| 10031/10031 [00:34<00:00, 294.82it/s]\n",
+      "GPU: 0, Part: 38: 100%|██████████| 9988/9988 [00:35<00:00, 284.55it/s]\n",
+      "GPU: 0, Part: 29: 100%|██████████| 10027/10027 [00:34<00:00, 291.28it/s]\n",
+      "GPU: 0, Part: 24: 100%|██████████| 9799/9799 [00:31<00:00, 312.35it/s]]\n",
+      "GPU: 0, Part: 23: 100%|██████████| 9905/9905 [00:32<00:00, 306.16it/s]]\n",
+      "GPU: 0, Part: 22: 100%|██████████| 9658/9658 [00:30<00:00, 316.38it/s]]\n",
+      "GPU: 0, Part: 17: 100%|██████████| 9986/9986 [00:31<00:00, 320.21it/s]]]\n",
+      "GPU: 0, Part: 12: 100%|██████████| 10014/10014 [00:31<00:00, 322.65it/s]\n",
+      "GPU: 0, Part: 20: 100%|██████████| 9421/9421 [00:31<00:00, 300.02it/s]]\n",
+      "GPU: 0, Part: 18: 100%|██████████| 9658/9658 [00:32<00:00, 298.53it/s]]\n",
+      "GPU: 0, Part: 21: 100%|██████████| 9559/9559 [00:30<00:00, 308.45it/s]]\n",
+      "GPU: 0, Part: 19: 100%|██████████| 9664/9664 [00:31<00:00, 304.16it/s]s]\n",
+      "GPU: 0, Part: 15: 100%|██████████| 9860/9860 [00:31<00:00, 315.41it/s]]]\n",
+      "GPU: 0, Part: 14: 100%|██████████| 10159/10159 [00:32<00:00, 314.31it/s]\n",
+      "GPU: 0, Part: 13: 100%|██████████| 10260/10260 [00:34<00:00, 300.39it/s]\n",
+      "GPU: 0, Part: 10: 100%|██████████| 10061/10061 [00:31<00:00, 314.94it/s]\n",
+      "GPU: 0, Part: 16: 100%|██████████| 10009/10009 [00:34<00:00, 292.21it/s]\n",
+      "GPU: 0, Part: 11: 100%|██████████| 10284/10284 [00:34<00:00, 298.47it/s]\n",
+      "GPU: 0, Part: 2: 100%|██████████| 9684/9684 [00:35<00:00, 273.30it/s]\n",
+      "GPU: 0, Part: 1: 100%|██████████| 9698/9698 [00:30<00:00, 315.30it/s]\n",
+      "GPU: 0, Part: 0: 100%|██████████| 9729/9729 [00:31<00:00, 309.69it/s]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-05-09 01:05:38,916 - tornado.application - ERROR - Uncaught exception GET /status/ws (10.2.226.46)\n",
+      "HTTPServerRequest(protocol='http', host='10.120.104.12:8787', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='10.2.226.46')\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/web.py\", line 1790, in _execute\n",
+      "    result = await result\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 273, in get\n",
+      "    await self.ws_connection.accept_connection(self)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 863, in accept_connection\n",
+      "    await self._accept_connection(handler)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 946, in _accept_connection\n",
+      "    await self._receive_frame_loop()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 1105, in _receive_frame_loop\n",
+      "    self.handler.on_ws_connection_close(self.close_code, self.close_reason)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 571, in on_ws_connection_close\n",
+      "    self.on_connection_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 563, in on_connection_close\n",
+      "    self.on_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/views/ws.py\", line 308, in on_close\n",
+      "    self.connection.session.notify_connection_lost()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/connection.py\", line 65, in session\n",
+      "    assert self._session is not None\n",
+      "AssertionError\n",
+      "2024-05-09 01:05:43,132 - bokeh.server.protocol_handler - ERROR - error handling message\n",
+      " message: Message 'PATCH-DOC' content: {'events': [{'kind': 'ModelChanged', 'model': {'id': 'p9330'}, 'attr': 'inner_width', 'new': 834}, {'kind': 'ModelChanged', 'model': {'id': 'p9330'}, 'attr': 'inner_height', 'new': 863}, {'kind': 'ModelChanged', 'model': {'id': 'p9330'}, 'attr': 'outer_width', 'new': 854}, {'kind': 'ModelChanged', 'model': {'id': 'p9330'}, 'attr': 'outer_height', 'new': 895}]} \n",
+      " error: AssertionError()\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/protocol_handler.py\", line 97, in handle\n",
+      "    work = await handler(message, connection)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/session.py\", line 295, in patch\n",
+      "    return connection.session._handle_patch(message, connection)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/connection.py\", line 65, in session\n",
+      "    assert self._session is not None\n",
+      "AssertionError\n",
+      "2024-05-09 01:05:44,167 - bokeh.server.protocol_handler - ERROR - error handling message\n",
+      " message: Message 'PATCH-DOC' content: {'events': [{'kind': 'MessageSent', 'msg_type': 'bokeh_event', 'msg_data': {'type': 'event', 'name': 'document_ready', 'values': {'type': 'map'}}}]} \n",
+      " error: AssertionError()\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/protocol_handler.py\", line 97, in handle\n",
+      "    work = await handler(message, connection)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/session.py\", line 295, in patch\n",
+      "    return connection.session._handle_patch(message, connection)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/connection.py\", line 65, in session\n",
+      "    assert self._session is not None\n",
+      "AssertionError\n",
+      "2024-05-09 01:05:44,171 - tornado.application - ERROR - Uncaught exception GET /gpu/ws (10.2.226.46)\n",
+      "HTTPServerRequest(protocol='http', host='10.120.104.12:8787', method='GET', uri='/gpu/ws', version='HTTP/1.1', remote_ip='10.2.226.46')\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/web.py\", line 1790, in _execute\n",
+      "    result = await result\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 273, in get\n",
+      "    await self.ws_connection.accept_connection(self)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 863, in accept_connection\n",
+      "    await self._accept_connection(handler)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 946, in _accept_connection\n",
+      "    await self._receive_frame_loop()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 1105, in _receive_frame_loop\n",
+      "    self.handler.on_ws_connection_close(self.close_code, self.close_reason)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 571, in on_ws_connection_close\n",
+      "    self.on_connection_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 563, in on_connection_close\n",
+      "    self.on_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/views/ws.py\", line 308, in on_close\n",
+      "    self.connection.session.notify_connection_lost()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/connection.py\", line 65, in session\n",
+      "    assert self._session is not None\n",
+      "AssertionError\n",
+      "2024-05-09 01:05:55,279 - tornado.application - ERROR - Uncaught exception GET /status/ws (10.2.226.46)\n",
+      "HTTPServerRequest(protocol='http', host='10.120.104.12:8787', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='10.2.226.46')\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/web.py\", line 1790, in _execute\n",
+      "    result = await result\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 273, in get\n",
+      "    await self.ws_connection.accept_connection(self)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 863, in accept_connection\n",
+      "    await self._accept_connection(handler)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 946, in _accept_connection\n",
+      "    await self._receive_frame_loop()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 1105, in _receive_frame_loop\n",
+      "    self.handler.on_ws_connection_close(self.close_code, self.close_reason)\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 571, in on_ws_connection_close\n",
+      "    self.on_connection_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/tornado/websocket.py\", line 563, in on_connection_close\n",
+      "    self.on_close()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/views/ws.py\", line 308, in on_close\n",
+      "    self.connection.session.notify_connection_lost()\n",
+      "  File \"/datasets/vjawa/miniconda3/envs/NeMo-Curator-env-2/lib/python3.10/site-packages/bokeh/server/connection.py\", line 65, in session\n",
+      "    assert self._session is not None\n",
+      "AssertionError\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Writing to disk complete for 16 partitions\n",
-      "CPU times: user 11.4 s, sys: 11.4 s, total: 22.7 s\n",
-      "Wall time: 49.7 s\n"
+      "Writing to disk complete for 50 partitions\n",
+      "CPU times: user 39.5 s, sys: 27.2 s, total: 1min 6s\n",
+      "Wall time: 3min 14s\n"
      ]
     }
    ],
@@ -138,7 +273,7 @@
     "domain_classifier = DomainClassifier(\n",
     "    model_file_name=model_file_name,\n",
     "    labels=labels,\n",
-    "    batch_size=256,\n",
+    "    batch_size=1024,\n",
     ")\n",
     "result_dataset = domain_classifier(dataset=input_dataset)\n",
     "result_dataset.to_json(output_file_dir=output_file_path, write_to_filename=True)"
@@ -146,14 +281,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Reading 16 files\n"
+      "Reading 50 files\n"
      ]
     }
    ],
@@ -163,7 +298,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/raid/vjawa/output_subset_CC-MAIN-2023-14_english'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output_file_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -190,16 +345,16 @@
        "      <th></th>\n",
        "      <th>adlr_id</th>\n",
        "      <th>filename</th>\n",
-       "      <th>id</th>\n",
        "      <th>labels</th>\n",
-       "      <th>pred</th>\n",
+       "      <th>langid_score</th>\n",
+       "      <th>language</th>\n",
        "      <th>source_id</th>\n",
-       "      <th>split_id</th>\n",
        "      <th>text</th>\n",
        "      <th>url</th>\n",
+       "      <th>warc_id</th>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>npartitions=16</th>\n",
+       "      <th>npartitions=50</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
@@ -217,7 +372,7 @@
        "      <td>object</td>\n",
        "      <td>object</td>\n",
        "      <td>object</td>\n",
-       "      <td>object</td>\n",
+       "      <td>float64</td>\n",
        "      <td>object</td>\n",
        "      <td>object</td>\n",
        "      <td>object</td>\n",
@@ -278,10 +433,10 @@
        "<div>Dask Name: read_single_partition, 1 graph layer</div>"
       ],
       "text/plain": [
-       "<dask_cudf.DataFrame | 16 tasks | 16 npartitions>"
+       "<dask_cudf.DataFrame | 50 tasks | 50 npartitions>"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -292,7 +447,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -318,203 +473,215 @@
        "      <th></th>\n",
        "      <th>adlr_id</th>\n",
        "      <th>filename</th>\n",
-       "      <th>id</th>\n",
        "      <th>labels</th>\n",
-       "      <th>pred</th>\n",
+       "      <th>langid_score</th>\n",
+       "      <th>language</th>\n",
        "      <th>source_id</th>\n",
-       "      <th>split_id</th>\n",
        "      <th>text</th>\n",
        "      <th>url</th>\n",
+       "      <th>warc_id</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>cc-2022-40-0431053204</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>a8083fe4-525d-4888-8513-b91f43bd8ee1</td>\n",
-       "      <td>Online_Communities</td>\n",
-       "      <td>Online_Communities</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030336...</td>\n",
-       "      <td>lambada-0003225258-0000</td>\n",
-       "      <td>Having been a community leader—and member—for ...</td>\n",
-       "      <td>https://lisalarter.com/7-tips-for-building-ste...</td>\n",
+       "      <td>cc-2023-14-0001622299</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Jobs_and_Education</td>\n",
+       "      <td>0.946693</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Neighborhood Street Fund Application: plans fo...</td>\n",
+       "      <td>http://12thaveseattle.com/blog/2013/02/11/12th...</td>\n",
+       "      <td>6dd74af8-669e-4aaf-b5f8-e2a44f03574b</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>cc-2022-40-0510168267</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>559febdc-cb7f-4217-897a-c8dac325123b</td>\n",
-       "      <td>Finance</td>\n",
-       "      <td>Finance</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030337...</td>\n",
-       "      <td>lambada-0003918122-0000</td>\n",
-       "      <td>Zelle is a way of sending money to almost anyo...</td>\n",
-       "      <td>https://oregonmassageandwellnessclinic.com/app...</td>\n",
+       "      <td>cc-2023-14-0001622300</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Computers_and_Electronics</td>\n",
+       "      <td>0.918942</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Main navigation\\n\\nProject Assistance, Managem...</td>\n",
+       "      <td>http://1kenthomas.com/slides/drupal-project-as...</td>\n",
+       "      <td>eb065ae1-4737-4557-b040-96a1ecf67db4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>cc-2022-40-0695312978</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>b1ec1a9f-693e-4672-b485-54f48a3dfdb6</td>\n",
-       "      <td>Arts_and_Entertainment</td>\n",
-       "      <td>Arts_and_Entertainment</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030337...</td>\n",
-       "      <td>lambada-0005286343-0000</td>\n",
-       "      <td>Nicole Scherzinger and Enrique Lglesias Get St...</td>\n",
-       "      <td>https://menzmag.com/entertainment/celebrity-go...</td>\n",
+       "      <td>cc-2023-14-0001622301</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Autos_and_Vehicles</td>\n",
+       "      <td>0.937426</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>RENDI AUTO\\n\\nBMW X5\\n\\nPeriod\\n\\n-\\n\\nName\\n\\...</td>\n",
+       "      <td>http://1rendiauto.ee/en/autod-2/?auto_id=13</td>\n",
+       "      <td>9f69f52e-350d-4677-8609-d7f3c8d759c6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>cc-2022-40-0318121708</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>f1217f04-58d3-4c88-8d33-250401b219f6</td>\n",
-       "      <td>Internet_and_Telecom</td>\n",
-       "      <td>Internet_and_Telecom</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030335...</td>\n",
-       "      <td>lambada-0002386272-0000</td>\n",
-       "      <td>Thanksgiving 2021 WhatsApp Status Video to Dow...</td>\n",
-       "      <td>https://nonstop-news.com/lifestyle/thanksgivin...</td>\n",
+       "      <td>cc-2023-14-0001622302</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Sensitive_Subjects</td>\n",
+       "      <td>0.976790</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Now based on multiple underwater UFO encounter...</td>\n",
+       "      <td>http://24newstodays.com/2022/12/23/weird-encou...</td>\n",
+       "      <td>256174a9-506e-4b23-9580-de10e3ab4590</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>cc-2022-40-0602859436</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>d255ebe4-0601-469b-a5d3-c4102d83dabd</td>\n",
-       "      <td>Games</td>\n",
-       "      <td>Games</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030337...</td>\n",
-       "      <td>lambada-0004541139-0000</td>\n",
-       "      <td>Lakeside Inn And Casino Lake Tahoe – Online si...</td>\n",
-       "      <td>https://psplondon.com/lakeside-inn-and-casino-...</td>\n",
+       "      <td>cc-2023-14-0001622303</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Jobs_and_Education</td>\n",
+       "      <td>0.906984</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>We are commited to providing the highest level...</td>\n",
+       "      <td>http://434caaeea2929142-u.edu-newsletters.com/...</td>\n",
+       "      <td>f87fd3c6-1450-4260-93e5-bed50ea608e4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>cc-2022-40-0025406361</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>5d598bfa-ca17-4203-800c-4d02072c3b87</td>\n",
-       "      <td>Books_and_Literature</td>\n",
-       "      <td>Books_and_Literature</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030334...</td>\n",
-       "      <td>lambada-0000190248-0000</td>\n",
-       "      <td>A THOUSAND WORDS - Alex Waterhouse-Hayward's b...</td>\n",
-       "      <td>http://blog.alexwaterhousehayward.com/2006/03/...</td>\n",
+       "      <td>cc-2023-14-0001622304</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Sports</td>\n",
+       "      <td>0.977782</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>football\\n\\nThe football season in Bulgaria ha...</td>\n",
+       "      <td>http://4liberty.eu/tag/football/</td>\n",
+       "      <td>933d5938-d5b7-4745-ba69-db153fb25173</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>cc-2022-40-0605292636</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>42ced198-6cdb-4ef2-bf4d-dc1254da0da6</td>\n",
-       "      <td>Shopping</td>\n",
-       "      <td>Beauty_and_Fitness</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030337...</td>\n",
-       "      <td>lambada-0004601177-0000</td>\n",
-       "      <td>Search our store\\n\\nCLOCKWORK ORANGE OUTFIT\\n\\...</td>\n",
-       "      <td>https://dressx.com/products/clockwork-orange-o...</td>\n",
+       "      <td>cc-2023-14-0001622305</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Pets_and_Animals</td>\n",
+       "      <td>0.942719</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>As leaders of the No Kill Movement on Maui, we...</td>\n",
+       "      <td>http://9thlifehawaii.org/site/Spay-amp-Neuter-...</td>\n",
+       "      <td>c1a83c12-0fbb-4fc9-8c68-4a109d71048e</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>cc-2022-40-0270701137</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>a0bbffa6-670d-43e4-8027-9fc0862df95f</td>\n",
-       "      <td>News</td>\n",
-       "      <td>News</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030335...</td>\n",
-       "      <td>lambada-0002122651-0000</td>\n",
-       "      <td>The Democrat Police State Imposes its Tyranny\\...</td>\n",
-       "      <td>https://www.paulcraigroberts.org/2022/08/13/th...</td>\n",
+       "      <td>cc-2023-14-0001622306</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Internet_and_Telecom</td>\n",
+       "      <td>0.948170</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>The IP address for this domain may have change...</td>\n",
+       "      <td>http://a1levelingcleveland.com/cgi-sys/default...</td>\n",
+       "      <td>48829d73-dc4b-4423-8dd5-db32ac6a4349</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>cc-2022-40-0130518751</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>80948f1a-0970-4bc4-879a-22725a388d62</td>\n",
-       "      <td>Games</td>\n",
-       "      <td>Games</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030334...</td>\n",
-       "      <td>lambada-0000961821-0000</td>\n",
-       "      <td>How to Play the Lottery Online\\n\\nThe lottery ...</td>\n",
-       "      <td>https://moellerdog.com/index.php/2022/09/16/ho...</td>\n",
+       "      <td>cc-2023-14-0001622307</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Food_and_Drink</td>\n",
+       "      <td>0.971278</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>A\\nvegetarian since the age of 15, Abbey Levin...</td>\n",
+       "      <td>http://abbeysvegetarianrecipes.com/abbey.html</td>\n",
+       "      <td>223a469b-6001-4820-aa2d-7eadb6afa6c4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>cc-2022-40-0430464926</td>\n",
-       "      <td>00.jsonl</td>\n",
-       "      <td>234d085c-f735-4b2e-bcfd-edc65fb4ed22</td>\n",
-       "      <td>Beauty_and_Fitness</td>\n",
-       "      <td>Beauty_and_Fitness</td>\n",
-       "      <td>crawl-data-CC-MAIN-2022-40-segments-1664030336...</td>\n",
-       "      <td>lambada-0003227706-0000</td>\n",
-       "      <td>LASER LIPOSUCTION\\n\\nLaser Liposuction works b...</td>\n",
-       "      <td>https://shapechicagoland.com/services/body-tre...</td>\n",
+       "      <td>cc-2023-14-0001622308</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Online_Communities</td>\n",
+       "      <td>0.864449</td>\n",
+       "      <td>EN</td>\n",
+       "      <td>crawl-data-CC-MAIN-2023-14-segments-1679296943...</td>\n",
+       "      <td>Search This Blog\\n\\nDisclaimer\\n\\nThe owners, ...</td>\n",
+       "      <td>http://abeckslife.blogspot.com/2011/12/woodwri...</td>\n",
+       "      <td>9c57d2eb-dd01-4cc6-a473-7ae30ba26e3e</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                 adlr_id  filename                                    id  \\\n",
-       "0  cc-2022-40-0431053204  00.jsonl  a8083fe4-525d-4888-8513-b91f43bd8ee1   \n",
-       "1  cc-2022-40-0510168267  00.jsonl  559febdc-cb7f-4217-897a-c8dac325123b   \n",
-       "2  cc-2022-40-0695312978  00.jsonl  b1ec1a9f-693e-4672-b485-54f48a3dfdb6   \n",
-       "3  cc-2022-40-0318121708  00.jsonl  f1217f04-58d3-4c88-8d33-250401b219f6   \n",
-       "4  cc-2022-40-0602859436  00.jsonl  d255ebe4-0601-469b-a5d3-c4102d83dabd   \n",
-       "5  cc-2022-40-0025406361  00.jsonl  5d598bfa-ca17-4203-800c-4d02072c3b87   \n",
-       "6  cc-2022-40-0605292636  00.jsonl  42ced198-6cdb-4ef2-bf4d-dc1254da0da6   \n",
-       "7  cc-2022-40-0270701137  00.jsonl  a0bbffa6-670d-43e4-8027-9fc0862df95f   \n",
-       "8  cc-2022-40-0130518751  00.jsonl  80948f1a-0970-4bc4-879a-22725a388d62   \n",
-       "9  cc-2022-40-0430464926  00.jsonl  234d085c-f735-4b2e-bcfd-edc65fb4ed22   \n",
+       "                 adlr_id                                           filename  \\\n",
+       "0  cc-2023-14-0001622299  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "1  cc-2023-14-0001622300  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "2  cc-2023-14-0001622301  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "3  cc-2023-14-0001622302  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "4  cc-2023-14-0001622303  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "5  cc-2023-14-0001622304  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "6  cc-2023-14-0001622305  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "7  cc-2023-14-0001622306  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "8  cc-2023-14-0001622307  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "9  cc-2023-14-0001622308  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
        "\n",
-       "                   labels                    pred  \\\n",
-       "0      Online_Communities      Online_Communities   \n",
-       "1                 Finance                 Finance   \n",
-       "2  Arts_and_Entertainment  Arts_and_Entertainment   \n",
-       "3    Internet_and_Telecom    Internet_and_Telecom   \n",
-       "4                   Games                   Games   \n",
-       "5    Books_and_Literature    Books_and_Literature   \n",
-       "6                Shopping      Beauty_and_Fitness   \n",
-       "7                    News                    News   \n",
-       "8                   Games                   Games   \n",
-       "9      Beauty_and_Fitness      Beauty_and_Fitness   \n",
+       "                      labels  langid_score language  \\\n",
+       "0         Jobs_and_Education      0.946693       EN   \n",
+       "1  Computers_and_Electronics      0.918942       EN   \n",
+       "2         Autos_and_Vehicles      0.937426       EN   \n",
+       "3         Sensitive_Subjects      0.976790       EN   \n",
+       "4         Jobs_and_Education      0.906984       EN   \n",
+       "5                     Sports      0.977782       EN   \n",
+       "6           Pets_and_Animals      0.942719       EN   \n",
+       "7       Internet_and_Telecom      0.948170       EN   \n",
+       "8             Food_and_Drink      0.971278       EN   \n",
+       "9         Online_Communities      0.864449       EN   \n",
        "\n",
-       "                                           source_id                 split_id  \\\n",
-       "0  crawl-data-CC-MAIN-2022-40-segments-1664030336...  lambada-0003225258-0000   \n",
-       "1  crawl-data-CC-MAIN-2022-40-segments-1664030337...  lambada-0003918122-0000   \n",
-       "2  crawl-data-CC-MAIN-2022-40-segments-1664030337...  lambada-0005286343-0000   \n",
-       "3  crawl-data-CC-MAIN-2022-40-segments-1664030335...  lambada-0002386272-0000   \n",
-       "4  crawl-data-CC-MAIN-2022-40-segments-1664030337...  lambada-0004541139-0000   \n",
-       "5  crawl-data-CC-MAIN-2022-40-segments-1664030334...  lambada-0000190248-0000   \n",
-       "6  crawl-data-CC-MAIN-2022-40-segments-1664030337...  lambada-0004601177-0000   \n",
-       "7  crawl-data-CC-MAIN-2022-40-segments-1664030335...  lambada-0002122651-0000   \n",
-       "8  crawl-data-CC-MAIN-2022-40-segments-1664030334...  lambada-0000961821-0000   \n",
-       "9  crawl-data-CC-MAIN-2022-40-segments-1664030336...  lambada-0003227706-0000   \n",
+       "                                           source_id  \\\n",
+       "0  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "1  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "2  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "3  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "4  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "5  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "6  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "7  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "8  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
+       "9  crawl-data-CC-MAIN-2023-14-segments-1679296943...   \n",
        "\n",
        "                                                text  \\\n",
-       "0  Having been a community leader—and member—for ...   \n",
-       "1  Zelle is a way of sending money to almost anyo...   \n",
-       "2  Nicole Scherzinger and Enrique Lglesias Get St...   \n",
-       "3  Thanksgiving 2021 WhatsApp Status Video to Dow...   \n",
-       "4  Lakeside Inn And Casino Lake Tahoe – Online si...   \n",
-       "5  A THOUSAND WORDS - Alex Waterhouse-Hayward's b...   \n",
-       "6  Search our store\\n\\nCLOCKWORK ORANGE OUTFIT\\n\\...   \n",
-       "7  The Democrat Police State Imposes its Tyranny\\...   \n",
-       "8  How to Play the Lottery Online\\n\\nThe lottery ...   \n",
-       "9  LASER LIPOSUCTION\\n\\nLaser Liposuction works b...   \n",
+       "0  Neighborhood Street Fund Application: plans fo...   \n",
+       "1  Main navigation\\n\\nProject Assistance, Managem...   \n",
+       "2  RENDI AUTO\\n\\nBMW X5\\n\\nPeriod\\n\\n-\\n\\nName\\n\\...   \n",
+       "3  Now based on multiple underwater UFO encounter...   \n",
+       "4  We are commited to providing the highest level...   \n",
+       "5  football\\n\\nThe football season in Bulgaria ha...   \n",
+       "6  As leaders of the No Kill Movement on Maui, we...   \n",
+       "7  The IP address for this domain may have change...   \n",
+       "8  A\\nvegetarian since the age of 15, Abbey Levin...   \n",
+       "9  Search This Blog\\n\\nDisclaimer\\n\\nThe owners, ...   \n",
+       "\n",
+       "                                                 url  \\\n",
+       "0  http://12thaveseattle.com/blog/2013/02/11/12th...   \n",
+       "1  http://1kenthomas.com/slides/drupal-project-as...   \n",
+       "2        http://1rendiauto.ee/en/autod-2/?auto_id=13   \n",
+       "3  http://24newstodays.com/2022/12/23/weird-encou...   \n",
+       "4  http://434caaeea2929142-u.edu-newsletters.com/...   \n",
+       "5                   http://4liberty.eu/tag/football/   \n",
+       "6  http://9thlifehawaii.org/site/Spay-amp-Neuter-...   \n",
+       "7  http://a1levelingcleveland.com/cgi-sys/default...   \n",
+       "8      http://abbeysvegetarianrecipes.com/abbey.html   \n",
+       "9  http://abeckslife.blogspot.com/2011/12/woodwri...   \n",
        "\n",
-       "                                                 url  \n",
-       "0  https://lisalarter.com/7-tips-for-building-ste...  \n",
-       "1  https://oregonmassageandwellnessclinic.com/app...  \n",
-       "2  https://menzmag.com/entertainment/celebrity-go...  \n",
-       "3  https://nonstop-news.com/lifestyle/thanksgivin...  \n",
-       "4  https://psplondon.com/lakeside-inn-and-casino-...  \n",
-       "5  http://blog.alexwaterhousehayward.com/2006/03/...  \n",
-       "6  https://dressx.com/products/clockwork-orange-o...  \n",
-       "7  https://www.paulcraigroberts.org/2022/08/13/th...  \n",
-       "8  https://moellerdog.com/index.php/2022/09/16/ho...  \n",
-       "9  https://shapechicagoland.com/services/body-tre...  "
+       "                                warc_id  \n",
+       "0  6dd74af8-669e-4aaf-b5f8-e2a44f03574b  \n",
+       "1  eb065ae1-4737-4557-b040-96a1ecf67db4  \n",
+       "2  9f69f52e-350d-4677-8609-d7f3c8d759c6  \n",
+       "3  256174a9-506e-4b23-9580-de10e3ab4590  \n",
+       "4  f87fd3c6-1450-4260-93e5-bed50ea608e4  \n",
+       "5  933d5938-d5b7-4745-ba69-db153fb25173  \n",
+       "6  c1a83c12-0fbb-4fc9-8c68-4a109d71048e  \n",
+       "7  48829d73-dc4b-4423-8dd5-db32ac6a4349  \n",
+       "8  223a469b-6001-4820-aa2d-7eadb6afa6c4  \n",
+       "9  9c57d2eb-dd01-4cc6-a473-7ae30ba26e3e  "
       ]
      },
-     "execution_count": 9,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }