From 9c671571014c60e678d1e993e4a0fc73b7c77b2b Mon Sep 17 00:00:00 2001
From: Roman Bredehoft <roman.bredehoft@zama.ai>
Date: Mon, 5 Aug 2024 16:49:23 +0200
Subject: [PATCH] chore: add simulation execution

---
 .../workflows/run_one_use_cases_example.yaml  |    2 +-
 .../lora_finetune/gpt2_finetune_hybrid.ipynb  | 1572 +----------------
 2 files changed, 47 insertions(+), 1527 deletions(-)

diff --git a/.github/workflows/run_one_use_cases_example.yaml b/.github/workflows/run_one_use_cases_example.yaml
index d3941490ec..2072fd6953 100644
--- a/.github/workflows/run_one_use_cases_example.yaml
+++ b/.github/workflows/run_one_use_cases_example.yaml
@@ -25,7 +25,7 @@ on:
           - titanic
           # --- refresh_use_cases_list.py: refresh list of use cases currently available [END] ---
       push_changes:
-        description: 'Push the refreshed notebook(s)'
+        description: 'Push refreshed notebook(s)'
         required: false
         type: boolean
         default: false
diff --git a/use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb b/use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb
index 3ad79a27a2..0ba0e8bc44 100644
--- a/use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb
+++ b/use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -95,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -209,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -220,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -242,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -250,7 +250,7 @@
     "\n",
     "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)\n",
     "\n",
-    "EPOCHS = 100\n",
+    "EPOCHS = 2\n",
     "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n",
     "\n",
     "training_args = TrainingArguments(\n",
@@ -273,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -296,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -305,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -333,7 +333,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -349,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -364,7 +364,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -433,1537 +433,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   1%|          | 1/100 [00:01<01:56,  1.17s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/100, Loss: 1.5326, grad norm: 0.6547388434410095, lr: 0.000495\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   2%|▏         | 2/100 [00:01<01:10,  1.39it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 2/100, Loss: 1.5092, grad norm: 0.5734729170799255, lr: 0.00049\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   3%|▎         | 3/100 [00:01<00:54,  1.77it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 3/100, Loss: 1.4762, grad norm: 0.4197540581226349, lr: 0.00048499999999999997\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   4%|▍         | 4/100 [00:02<00:47,  2.02it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 4/100, Loss: 1.5085, grad norm: 0.569969117641449, lr: 0.00048\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   5%|▌         | 5/100 [00:02<00:42,  2.21it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 5/100, Loss: 1.4667, grad norm: 0.5897998213768005, lr: 0.000475\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   6%|▌         | 6/100 [00:03<00:40,  2.34it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 6/100, Loss: 1.4486, grad norm: 0.44352057576179504, lr: 0.00047\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   7%|▋         | 7/100 [00:03<00:38,  2.42it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 7/100, Loss: 1.4159, grad norm: 0.506279706954956, lr: 0.000465\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   8%|▊         | 8/100 [00:03<00:37,  2.48it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 8/100, Loss: 1.4051, grad norm: 0.6538838148117065, lr: 0.00046\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:   9%|▉         | 9/100 [00:04<00:36,  2.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 9/100, Loss: 1.3889, grad norm: 0.6592888236045837, lr: 0.000455\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  10%|█         | 10/100 [00:04<00:35,  2.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 10/100, Loss: 1.3730, grad norm: 0.6219719052314758, lr: 0.00045000000000000004\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  11%|█         | 11/100 [00:05<00:34,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 11/100, Loss: 1.3494, grad norm: 0.6324566602706909, lr: 0.00044500000000000003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  12%|█▏        | 12/100 [00:05<00:34,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 12/100, Loss: 1.3169, grad norm: 0.5421789288520813, lr: 0.00044\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  13%|█▎        | 13/100 [00:05<00:34,  2.56it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 13/100, Loss: 1.3013, grad norm: 0.5423504114151001, lr: 0.000435\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  14%|█▍        | 14/100 [00:06<00:33,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 14/100, Loss: 1.3303, grad norm: 0.6302087903022766, lr: 0.00043\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  15%|█▌        | 15/100 [00:06<00:32,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 15/100, Loss: 1.2771, grad norm: 0.5095004439353943, lr: 0.000425\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  16%|█▌        | 16/100 [00:06<00:32,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 16/100, Loss: 1.2506, grad norm: 0.5400538444519043, lr: 0.00042\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  17%|█▋        | 17/100 [00:07<00:31,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 17/100, Loss: 1.2341, grad norm: 0.5874373316764832, lr: 0.000415\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  18%|█▊        | 18/100 [00:07<00:31,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 18/100, Loss: 1.2215, grad norm: 0.5731167793273926, lr: 0.00041\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  19%|█▉        | 19/100 [00:08<00:30,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 19/100, Loss: 1.1856, grad norm: 0.5122016072273254, lr: 0.00040500000000000003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  20%|██        | 20/100 [00:08<00:30,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 20/100, Loss: 1.1938, grad norm: 0.5971183180809021, lr: 0.0004\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  21%|██        | 21/100 [00:08<00:30,  2.62it/s]"
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+      "Training Progress:  50%|█████     | 1/2 [02:53<02:53, 173.99s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 21/100, Loss: 1.1668, grad norm: 0.6376621127128601, lr: 0.000395\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  22%|██▏       | 22/100 [00:09<00:29,  2.60it/s]"
+      "Epoch 1/2, Loss: 1.8678, grad norm: 0.20942462980747223, lr: 0.00025\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 22/100, Loss: 1.1436, grad norm: 0.5452390909194946, lr: 0.00039000000000000005\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  23%|██▎       | 23/100 [00:09<00:29,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 23/100, Loss: 1.1361, grad norm: 0.5471293330192566, lr: 0.00038500000000000003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  24%|██▍       | 24/100 [00:10<00:29,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 24/100, Loss: 1.1000, grad norm: 0.6130229234695435, lr: 0.00038\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  25%|██▌       | 25/100 [00:10<00:28,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 25/100, Loss: 1.0795, grad norm: 0.6525614261627197, lr: 0.000375\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  26%|██▌       | 26/100 [00:10<00:28,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 26/100, Loss: 1.0930, grad norm: 0.9915198683738708, lr: 0.00037\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  27%|██▋       | 27/100 [00:11<00:28,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 27/100, Loss: 1.0531, grad norm: 0.590857207775116, lr: 0.000365\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  28%|██▊       | 28/100 [00:11<00:28,  2.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 28/100, Loss: 1.0564, grad norm: 0.8754357695579529, lr: 0.00035999999999999997\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  29%|██▉       | 29/100 [00:11<00:27,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 29/100, Loss: 1.0520, grad norm: 0.8149130344390869, lr: 0.000355\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  30%|███       | 30/100 [00:12<00:27,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 30/100, Loss: 1.0313, grad norm: 0.5920228958129883, lr: 0.00035\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  31%|███       | 31/100 [00:12<00:26,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 31/100, Loss: 1.0182, grad norm: 0.6779032349586487, lr: 0.000345\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  32%|███▏      | 32/100 [00:13<00:26,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 32/100, Loss: 0.9980, grad norm: 0.5544361472129822, lr: 0.00034\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  33%|███▎      | 33/100 [00:13<00:26,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 33/100, Loss: 0.9932, grad norm: 0.7196674942970276, lr: 0.000335\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  34%|███▍      | 34/100 [00:13<00:25,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 34/100, Loss: 0.9819, grad norm: 0.7083548903465271, lr: 0.00033\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  35%|███▌      | 35/100 [00:14<00:24,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 35/100, Loss: 0.9250, grad norm: 0.7313346266746521, lr: 0.00032500000000000004\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  36%|███▌      | 36/100 [00:14<00:24,  2.63it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 36/100, Loss: 0.9198, grad norm: 0.6564635634422302, lr: 0.00032\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  37%|███▋      | 37/100 [00:15<00:23,  2.64it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 37/100, Loss: 0.9157, grad norm: 0.7937288880348206, lr: 0.000315\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  38%|███▊      | 38/100 [00:15<00:23,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 38/100, Loss: 0.8932, grad norm: 0.6338443756103516, lr: 0.00031\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  39%|███▉      | 39/100 [00:15<00:23,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 39/100, Loss: 0.9295, grad norm: 0.8935690522193909, lr: 0.000305\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  40%|████      | 40/100 [00:16<00:22,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 40/100, Loss: 0.8730, grad norm: 0.7592346668243408, lr: 0.0003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  41%|████      | 41/100 [00:16<00:22,  2.64it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 41/100, Loss: 0.8485, grad norm: 0.7101594805717468, lr: 0.000295\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  42%|████▏     | 42/100 [00:16<00:21,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 42/100, Loss: 0.8411, grad norm: 0.6478201150894165, lr: 0.00029\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  43%|████▎     | 43/100 [00:17<00:21,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 43/100, Loss: 0.8544, grad norm: 0.7164880037307739, lr: 0.000285\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  44%|████▍     | 44/100 [00:17<00:21,  2.64it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 44/100, Loss: 0.8414, grad norm: 0.7436962127685547, lr: 0.00028000000000000003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  45%|████▌     | 45/100 [00:18<00:20,  2.63it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 45/100, Loss: 0.8121, grad norm: 0.9844059944152832, lr: 0.000275\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  46%|████▌     | 46/100 [00:18<00:20,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 46/100, Loss: 0.8048, grad norm: 0.9871523976325989, lr: 0.00027\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  47%|████▋     | 47/100 [00:18<00:20,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 47/100, Loss: 0.8153, grad norm: 0.8394853472709656, lr: 0.00026500000000000004\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  48%|████▊     | 48/100 [00:19<00:19,  2.62it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 48/100, Loss: 0.8046, grad norm: 0.9217925667762756, lr: 0.00026000000000000003\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  49%|████▉     | 49/100 [00:19<00:19,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 49/100, Loss: 0.7614, grad norm: 1.015302062034607, lr: 0.000255\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  50%|█████     | 50/100 [00:19<00:19,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 50/100, Loss: 0.7760, grad norm: 0.9043252468109131, lr: 0.00025\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  51%|█████     | 51/100 [00:20<00:18,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 51/100, Loss: 0.7693, grad norm: 0.8068227767944336, lr: 0.000245\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  52%|█████▏    | 52/100 [00:20<00:18,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 52/100, Loss: 0.7422, grad norm: 0.9263298511505127, lr: 0.00024\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  53%|█████▎    | 53/100 [00:21<00:17,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 53/100, Loss: 0.7486, grad norm: 1.0840318202972412, lr: 0.000235\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  54%|█████▍    | 54/100 [00:21<00:17,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 54/100, Loss: 0.7469, grad norm: 0.8277450799942017, lr: 0.00023\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  55%|█████▌    | 55/100 [00:21<00:17,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 55/100, Loss: 0.7148, grad norm: 0.8486602306365967, lr: 0.00022500000000000002\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  56%|█████▌    | 56/100 [00:22<00:16,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 56/100, Loss: 0.7018, grad norm: 0.9315493106842041, lr: 0.00022\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  57%|█████▋    | 57/100 [00:22<00:16,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 57/100, Loss: 0.6978, grad norm: 0.8715642690658569, lr: 0.000215\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  58%|█████▊    | 58/100 [00:23<00:16,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 58/100, Loss: 0.6954, grad norm: 0.9117729067802429, lr: 0.00021\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  59%|█████▉    | 59/100 [00:23<00:15,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 59/100, Loss: 0.6805, grad norm: 0.8932844996452332, lr: 0.000205\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  60%|██████    | 60/100 [00:23<00:15,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 60/100, Loss: 0.6801, grad norm: 1.0779385566711426, lr: 0.0002\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  61%|██████    | 61/100 [00:24<00:14,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 61/100, Loss: 0.6582, grad norm: 0.9519742131233215, lr: 0.00019500000000000002\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  62%|██████▏   | 62/100 [00:24<00:14,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 62/100, Loss: 0.6777, grad norm: 1.0926264524459839, lr: 0.00019\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  63%|██████▎   | 63/100 [00:24<00:14,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 63/100, Loss: 0.6813, grad norm: 1.2714309692382812, lr: 0.000185\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  64%|██████▍   | 64/100 [00:25<00:13,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 64/100, Loss: 0.6696, grad norm: 1.0693631172180176, lr: 0.00017999999999999998\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  65%|██████▌   | 65/100 [00:25<00:13,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 65/100, Loss: 0.6621, grad norm: 1.1618248224258423, lr: 0.000175\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  66%|██████▌   | 66/100 [00:26<00:13,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 66/100, Loss: 0.6314, grad norm: 0.9860178232192993, lr: 0.00017\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  67%|██████▋   | 67/100 [00:26<00:12,  2.59it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 67/100, Loss: 0.6267, grad norm: 1.0095081329345703, lr: 0.000165\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  68%|██████▊   | 68/100 [00:26<00:12,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 68/100, Loss: 0.6275, grad norm: 0.9747483134269714, lr: 0.00016\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  69%|██████▉   | 69/100 [00:27<00:12,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 69/100, Loss: 0.6391, grad norm: 1.1718988418579102, lr: 0.000155\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  70%|███████   | 70/100 [00:27<00:11,  2.54it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 70/100, Loss: 0.6172, grad norm: 0.8902360796928406, lr: 0.00015\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  71%|███████   | 71/100 [00:28<00:11,  2.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 71/100, Loss: 0.6180, grad norm: 1.0743216276168823, lr: 0.000145\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  72%|███████▏  | 72/100 [00:28<00:10,  2.56it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 72/100, Loss: 0.6124, grad norm: 1.4731453657150269, lr: 0.00014000000000000001\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  73%|███████▎  | 73/100 [00:28<00:10,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 73/100, Loss: 0.5906, grad norm: 1.2012979984283447, lr: 0.000135\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  74%|███████▍  | 74/100 [00:29<00:10,  2.57it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 74/100, Loss: 0.5892, grad norm: 1.3028196096420288, lr: 0.00013000000000000002\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  75%|███████▌  | 75/100 [00:29<00:09,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 75/100, Loss: 0.5887, grad norm: 1.0304925441741943, lr: 0.000125\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  76%|███████▌  | 76/100 [00:30<00:09,  2.58it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 76/100, Loss: 0.5687, grad norm: 0.9565426707267761, lr: 0.00012\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  77%|███████▋  | 77/100 [00:30<00:09,  2.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 77/100, Loss: 0.5913, grad norm: 1.1523699760437012, lr: 0.000115\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  78%|███████▊  | 78/100 [00:30<00:08,  2.52it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 78/100, Loss: 0.5948, grad norm: 1.1738296747207642, lr: 0.00011\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  79%|███████▉  | 79/100 [00:31<00:08,  2.49it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 79/100, Loss: 0.5651, grad norm: 1.260327696800232, lr: 0.000105\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  80%|████████  | 80/100 [00:31<00:08,  2.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 80/100, Loss: 0.5798, grad norm: 1.1174153089523315, lr: 0.0001\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  81%|████████  | 81/100 [00:32<00:07,  2.39it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 81/100, Loss: 0.5539, grad norm: 0.9862734079360962, lr: 9.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  82%|████████▏ | 82/100 [00:32<00:07,  2.45it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 82/100, Loss: 0.5794, grad norm: 0.9966534972190857, lr: 8.999999999999999e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  83%|████████▎ | 83/100 [00:32<00:06,  2.48it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 83/100, Loss: 0.5628, grad norm: 1.0192633867263794, lr: 8.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  84%|████████▍ | 84/100 [00:33<00:06,  2.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 84/100, Loss: 0.5633, grad norm: 0.9687011241912842, lr: 8e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  85%|████████▌ | 85/100 [00:33<00:05,  2.55it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 85/100, Loss: 0.5684, grad norm: 1.0944551229476929, lr: 7.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  86%|████████▌ | 86/100 [00:33<00:05,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 86/100, Loss: 0.5491, grad norm: 1.0738519430160522, lr: 7.000000000000001e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  87%|████████▋ | 87/100 [00:34<00:04,  2.69it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 87/100, Loss: 0.5521, grad norm: 1.282754898071289, lr: 6.500000000000001e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  88%|████████▊ | 88/100 [00:34<00:04,  2.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 88/100, Loss: 0.5486, grad norm: 0.9802149534225464, lr: 6e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  89%|████████▉ | 89/100 [00:35<00:03,  2.81it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 89/100, Loss: 0.5413, grad norm: 1.0144387483596802, lr: 5.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  90%|█████████ | 90/100 [00:35<00:03,  2.85it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 90/100, Loss: 0.5474, grad norm: 1.223002552986145, lr: 5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  91%|█████████ | 91/100 [00:35<00:03,  2.87it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 91/100, Loss: 0.5436, grad norm: 1.1522656679153442, lr: 4.4999999999999996e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  92%|█████████▏| 92/100 [00:36<00:02,  2.88it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 92/100, Loss: 0.5376, grad norm: 1.1914536952972412, lr: 4e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  93%|█████████▎| 93/100 [00:36<00:02,  2.90it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 93/100, Loss: 0.5398, grad norm: 1.0207066535949707, lr: 3.5000000000000004e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  94%|█████████▍| 94/100 [00:36<00:02,  2.91it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 94/100, Loss: 0.5116, grad norm: 1.0995105504989624, lr: 3e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  95%|█████████▌| 95/100 [00:37<00:01,  3.06it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 95/100, Loss: 0.5242, grad norm: 1.0830743312835693, lr: 2.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  96%|█████████▌| 96/100 [00:37<00:01,  3.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 96/100, Loss: 0.5603, grad norm: 1.2351734638214111, lr: 2e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  97%|█████████▋| 97/100 [00:37<00:01,  2.97it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 97/100, Loss: 0.5152, grad norm: 0.9580557346343994, lr: 1.5e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  98%|█████████▊| 98/100 [00:38<00:00,  2.96it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 98/100, Loss: 0.5217, grad norm: 0.9174291491508484, lr: 1e-05\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress:  99%|█████████▉| 99/100 [00:38<00:00,  2.95it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 99/100, Loss: 0.5333, grad norm: 1.0415540933609009, lr: 5e-06\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress: 100%|██████████| 100/100 [00:38<00:00,  2.94it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 100/100, Loss: 0.5352, grad norm: 0.9800519943237305, lr: 0.0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Training Progress: 100%|██████████| 100/100 [00:38<00:00,  2.56it/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m torch\u001b[38;5;241m.\u001b[39mmanual_seed(SEED)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mtrain_custom_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhybrid_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_dataloader\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraining_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfhe\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msimulate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[34], line 35\u001b[0m, in \u001b[0;36mtrain_custom_model\u001b[0;34m(hybrid_model, train_dataloader, training_args, fhe)\u001b[0m\n\u001b[1;32m     31\u001b[0m run_optimizer \u001b[38;5;241m=\u001b[39m is_last_batch_step \u001b[38;5;129;01mor\u001b[39;00m accumulate_gradients\n\u001b[1;32m     33\u001b[0m hybrid_model\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mtoggle_run_optimizer(enable\u001b[38;5;241m=\u001b[39mrun_optimizer)\n\u001b[0;32m---> 35\u001b[0m loss, grad_norm \u001b[38;5;241m=\u001b[39m \u001b[43mhybrid_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput_ids\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfhe\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfhe\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     37\u001b[0m total_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mitem()\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grad_norm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/ml/torch/hybrid_model.py:419\u001b[0m, in \u001b[0;36mHybridFHEModel.__call__\u001b[0;34m(self, x, fhe)\u001b[0m\n\u001b[1;32m    417\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mremote_modules\u001b[38;5;241m.\u001b[39mvalues():\n\u001b[1;32m    418\u001b[0m     module\u001b[38;5;241m.\u001b[39mfhe_local_mode \u001b[38;5;241m=\u001b[39m HybridFHEMode(fhe)\n\u001b[0;32m--> 419\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "Cell \u001b[0;32mIn[25], line 34\u001b[0m, in \u001b[0;36mLoraTraining.forward\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     31\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps\n\u001b[1;32m     33\u001b[0m \u001b[38;5;66;03m# Update gradients\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     36\u001b[0m grad_norm \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     37\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcalibrate \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_optimizer:\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/torch/_tensor.py:488\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    479\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    480\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m    481\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    486\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    487\u001b[0m     )\n\u001b[0;32m--> 488\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    489\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m    490\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    192\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m    194\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m    195\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    196\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    198\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    199\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/torch/autograd/function.py:267\u001b[0m, in \u001b[0;36mBackwardCFunction.apply\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    263\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mImplementing both \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbackward\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvjp\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for a custom \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    264\u001b[0m                        \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFunction is not allowed. You should only implement one \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    265\u001b[0m                        \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof them.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    266\u001b[0m user_fn \u001b[38;5;241m=\u001b[39m vjp_fn \u001b[38;5;28;01mif\u001b[39;00m vjp_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m Function\u001b[38;5;241m.\u001b[39mvjp \u001b[38;5;28;01melse\u001b[39;00m backward_fn\n\u001b[0;32m--> 267\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43muser_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/use_case_examples/lora_finetune/custom_module.py:36\u001b[0m, in \u001b[0;36mForwardBackwardModule.backward\u001b[0;34m(ctx, grad_output)\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbackward\u001b[39m(ctx, grad_output):\n\u001b[1;32m     35\u001b[0m     backward_module \u001b[38;5;241m=\u001b[39m ctx\u001b[38;5;241m.\u001b[39mbackward_module\n\u001b[0;32m---> 36\u001b[0m     grad_input \u001b[38;5;241m=\u001b[39m \u001b[43mbackward_module\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrad_output\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     38\u001b[0m     \u001b[38;5;66;03m# grad_weight and grad_bias are not needed when computing the backward for lora\u001b[39;00m\n\u001b[1;32m     39\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m grad_input, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/ml/torch/hybrid_model.py:253\u001b[0m, in \u001b[0;36mRemoteModule.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    244\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfhe_local_mode \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m {\n\u001b[1;32m    245\u001b[0m     HybridFHEMode\u001b[38;5;241m.\u001b[39mDISABLE,\n\u001b[1;32m    246\u001b[0m     HybridFHEMode\u001b[38;5;241m.\u001b[39mCALIBRATE,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    249\u001b[0m }:\n\u001b[1;32m    250\u001b[0m     \u001b[38;5;66;03m# Using quantized module\u001b[39;00m\n\u001b[1;32m    251\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprivate_q_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    252\u001b[0m     y \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor(\n\u001b[0;32m--> 253\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprivate_q_module\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetach\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnumpy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfhe\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfhe_local_mode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    254\u001b[0m     )\n\u001b[1;32m    256\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfhe_local_mode \u001b[38;5;241m==\u001b[39m HybridFHEMode\u001b[38;5;241m.\u001b[39mDISABLE:\n\u001b[1;32m    257\u001b[0m     \u001b[38;5;66;03m# Calling torch\u001b[39;00m\n\u001b[1;32m    258\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprivate_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/ml/quantization/quantized_module.py:443\u001b[0m, in \u001b[0;36mQuantizedModule.forward\u001b[0;34m(self, fhe, debug, *x)\u001b[0m\n\u001b[1;32m    440\u001b[0m     y_pred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdequantize_output(\u001b[38;5;241m*\u001b[39mto_tuple(q_y_pred))\n\u001b[1;32m    441\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m y_pred, debug_value_tracker\n\u001b[0;32m--> 443\u001b[0m q_y_pred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquantized_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mq_x\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfhe\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfhe\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    445\u001b[0m \u001b[38;5;66;03m# De-quantize the output predicted values\u001b[39;00m\n\u001b[1;32m    446\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdequantize_output(\u001b[38;5;241m*\u001b[39mto_tuple(q_y_pred))\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/ml/quantization/quantized_module.py:486\u001b[0m, in \u001b[0;36mQuantizedModule.quantized_forward\u001b[0;34m(self, fhe, *q_x)\u001b[0m\n\u001b[1;32m    484\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_clear_forward(\u001b[38;5;241m*\u001b[39mq_x)\n\u001b[1;32m    485\u001b[0m simulate \u001b[38;5;241m=\u001b[39m fhe \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msimulate\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 486\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fhe_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mq_x\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msimulate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msimulate\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/ml/quantization/quantized_module.py:651\u001b[0m, in \u001b[0;36mQuantizedModule._fhe_forward\u001b[0;34m(self, simulate, *q_x)\u001b[0m\n\u001b[1;32m    648\u001b[0m     predict_method \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfhe_circuit\u001b[38;5;241m.\u001b[39mencrypt_run_decrypt\n\u001b[1;32m    650\u001b[0m \u001b[38;5;66;03m# Execute the forward pass in FHE or with simulation\u001b[39;00m\n\u001b[0;32m--> 651\u001b[0m q_result \u001b[38;5;241m=\u001b[39m to_tuple(\u001b[43mpredict_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mq_input\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m    653\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(q_result) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(q_result_by_output), (\n\u001b[1;32m    654\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNumber of outputs does not match the number of output quantizers.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    655\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(q_result)\u001b[38;5;132;01m=}\u001b[39;00m\u001b[38;5;124m!=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_quantizers)\u001b[38;5;132;01m=}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    656\u001b[0m )\n\u001b[1;32m    657\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m elt_index, elt \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(q_result):\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/fhe/compilation/circuit.py:168\u001b[0m, in \u001b[0;36mCircuit.simulate\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    165\u001b[0m ordered_validated_args \u001b[38;5;241m=\u001b[39m validate_input_args(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mclient_specs, \u001b[38;5;241m*\u001b[39margs)\n\u001b[1;32m    167\u001b[0m exporter \u001b[38;5;241m=\u001b[39m SimulatedValueExporter\u001b[38;5;241m.\u001b[39mnew(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mclient_specs\u001b[38;5;241m.\u001b[39mclient_parameters)\n\u001b[0;32m--> 168\u001b[0m exported \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    169\u001b[0m     (\n\u001b[1;32m    170\u001b[0m         \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    171\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m arg \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    172\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m Value(\n\u001b[1;32m    173\u001b[0m             exporter\u001b[38;5;241m.\u001b[39mexport_tensor(position, arg\u001b[38;5;241m.\u001b[39mflatten()\u001b[38;5;241m.\u001b[39mtolist(), \u001b[38;5;28mlist\u001b[39m(arg\u001b[38;5;241m.\u001b[39mshape))\n\u001b[1;32m    174\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;129;01mand\u001b[39;00m arg\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m!=\u001b[39m ()\n\u001b[1;32m    175\u001b[0m             \u001b[38;5;28;01melse\u001b[39;00m exporter\u001b[38;5;241m.\u001b[39mexport_scalar(position, \u001b[38;5;28mint\u001b[39m(arg))\n\u001b[1;32m    176\u001b[0m         )\n\u001b[1;32m    177\u001b[0m     )\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m position, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(ordered_validated_args)\n\u001b[1;32m    179\u001b[0m ]\n\u001b[1;32m    181\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mrun(\u001b[38;5;241m*\u001b[39mexported)\n\u001b[1;32m    182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results, \u001b[38;5;28mtuple\u001b[39m):\n",
+      "File \u001b[0;32m~/Documents/concrete-ml/.venv/lib/python3.10/site-packages/concrete/fhe/compilation/circuit.py:173\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    165\u001b[0m ordered_validated_args \u001b[38;5;241m=\u001b[39m validate_input_args(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mclient_specs, \u001b[38;5;241m*\u001b[39margs)\n\u001b[1;32m    167\u001b[0m exporter \u001b[38;5;241m=\u001b[39m SimulatedValueExporter\u001b[38;5;241m.\u001b[39mnew(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mclient_specs\u001b[38;5;241m.\u001b[39mclient_parameters)\n\u001b[1;32m    168\u001b[0m exported \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    169\u001b[0m     (\n\u001b[1;32m    170\u001b[0m         \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    171\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m arg \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    172\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m Value(\n\u001b[0;32m--> 173\u001b[0m             \u001b[43mexporter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexport_tensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mposition\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mflatten\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtolist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    174\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;129;01mand\u001b[39;00m arg\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m!=\u001b[39m ()\n\u001b[1;32m    175\u001b[0m             \u001b[38;5;28;01melse\u001b[39;00m exporter\u001b[38;5;241m.\u001b[39mexport_scalar(position, \u001b[38;5;28mint\u001b[39m(arg))\n\u001b[1;32m    176\u001b[0m         )\n\u001b[1;32m    177\u001b[0m     )\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m position, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(ordered_validated_args)\n\u001b[1;32m    179\u001b[0m ]\n\u001b[1;32m    181\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulator\u001b[38;5;241m.\u001b[39mrun(\u001b[38;5;241m*\u001b[39mexported)\n\u001b[1;32m    182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results, \u001b[38;5;28mtuple\u001b[39m):\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
    "source": [
     "torch.manual_seed(SEED)\n",
     "\n",
-    "train_custom_model(hybrid_model, train_dataloader, training_args, fhe=\"disable\")"
+    "train_custom_model(hybrid_model, train_dataloader, training_args, fhe=\"simulate\")"
    ]
   },
   {