deploy: b691824

waleko · Sep 16, 2023 · 8e6a5ff · 8e6a5ff
1 parent d8eb968
commit 8e6a5ff
Show file tree

Hide file tree

Showing 5 changed files with 596 additions and 586 deletions.
diff --git a/_sources/notebooks/2_inference.ipynb b/_sources/notebooks/2_inference.ipynb
@@ -15,21 +15,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 57,
    "id": "initial_id",
    "metadata": {
     "collapsed": true
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from pathlib import Path\n",
     "\n",
@@ -58,24 +49,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 58,
    "id": "ad4d16d13804be69",
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]\n",
-      "Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]\n",
-      "Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]\n",
-      "Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]\n",
-      "Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# download tokenizer from huggingface\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/codereviewer\")\n",
@@ -86,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 59,
    "id": "cb003d6d8f578da1",
    "metadata": {
     "collapsed": false
@@ -120,7 +99,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 60,
    "id": "d06f51b2150c61c4",
    "metadata": {
     "collapsed": false
@@ -167,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 61,
    "id": "7a5b97449733bbc6",
    "metadata": {
     "collapsed": false
@@ -192,11 +171,11 @@
     "        )\n",
     "        # decode the predictions\n",
     "        preds_np = preds.detach().cpu().numpy()\n",
-    "        preds_decoded = np.apply_along_axis(lambda row: tokenizer.decode(\n",
-    "            row[2:], skip_special_tokens=True, clean_up_tokenization_spaces=False\n",
-    "        ), 1, preds_np)\n",
+    "        preds_decoded = [tokenizer.decode(row[2:],\n",
+    "         skip_special_tokens=True,\n",
+    "         clean_up_tokenization_spaces=False) for row in preds_np]\n",
     "        # add the decoded predictions to the result\n",
-    "        result += list(preds_decoded)\n",
+    "        result += preds_decoded\n",
     "    return result"
    ]
   },
@@ -215,7 +194,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 62,
    "id": "c508661efcdcad40",
    "metadata": {
     "collapsed": false
@@ -225,13 +204,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 478kB/s]\n",
-      "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:02<00:00, 306MB/s] \n",
-      "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 36.7kB/s]\n",
-      "100%|██████████| 636/636 [11:08<00:00,  1.05s/it]\n",
-      "100%|██████████| 63/63 [03:31<00:00,  3.35s/it]\n",
-      "100%|██████████| 63/63 [01:57<00:00,  1.87s/it]\n",
-      "100%|██████████| 63/63 [02:39<00:00,  2.53s/it]\n"
+      "100%|██████████| 636/636 [11:27<00:00,  1.08s/it]\n",
+      "100%|██████████| 63/63 [03:37<00:00,  3.45s/it]\n",
+      "100%|██████████| 63/63 [02:01<00:00,  1.93s/it]\n",
+      "100%|██████████| 63/63 [02:46<00:00,  2.64s/it]\n"
      ]
     }
    ],
@@ -270,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 63,
    "id": "851255e54c49484a",
    "metadata": {
     "collapsed": false
@@ -280,16 +256,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]\n",
-      "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] \n",
-      "Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']\n",
+      "Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.weight', 'cls_head.bias']\n",
       "- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]\n",
-      "100%|██████████| 636/636 [15:58<00:00,  1.51s/it]\n",
-      "100%|██████████| 63/63 [01:41<00:00,  1.61s/it]\n",
-      "100%|██████████| 63/63 [01:32<00:00,  1.47s/it]\n",
-      "100%|██████████| 63/63 [01:27<00:00,  1.39s/it]\n"
+      "100%|██████████| 636/636 [15:51<00:00,  1.50s/it]\n",
+      "100%|██████████| 63/63 [01:40<00:00,  1.59s/it]\n",
+      "100%|██████████| 63/63 [01:32<00:00,  1.48s/it]\n",
+      "100%|██████████| 63/63 [01:26<00:00,  1.38s/it]\n"
      ]
     }
    ],