From de727fdf8fb283b0f6d2feb2bf6bfc877d48756f Mon Sep 17 00:00:00 2001
From: Ellington <ellingtonkirby31@gmail.com>
Date: Wed, 10 Jan 2024 07:05:00 +0100
Subject: [PATCH] fix accuracy metric and add test_with_y

---
 Finetune_GLUE.ipynb | 32 ++++++++++++++++++++------------
 finetune.py         |  9 +++++----
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/Finetune_GLUE.ipynb b/Finetune_GLUE.ipynb
index 7869c2f..b0eabe5 100644
--- a/Finetune_GLUE.ipynb
+++ b/Finetune_GLUE.ipynb
@@ -187,10 +187,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "accuracy_metric = AvgMetric(lambda inp,targ: torch.eq(torch.tensor(inp.argmax(dim=-1)), torch.tensor(targ)).float().mean())\n",
     "METRICS = {\n",
     "  **{ task:[MatthewsCorrCoef()] for task in ['cola']},\n",
-    "  **{ task:[accuracy] for task in ['sst2', 'mnli', 'qnli', 'rte', 'wnli', 'snli','ax']},\n",
-    "  **{ task:[F1Score(), accuracy] for task in ['mrpc', 'qqp']}, \n",
+    "  **{ task:[accuracy_metric] for task in ['sst2', 'mnli', 'qnli', 'rte', 'wnli', 'snli','ax']},\n",
+    "  **{ task:[F1Score(), accuracy_metric] for task in ['mrpc', 'qqp']}, \n",
     "  **{ task:[PearsonCorrCoef(), SpearmanCorrCoef()] for task in ['stsb']}\n",
     "}\n",
     "NUM_CLASS = {\n",
@@ -319,7 +320,7 @@
     "    glue_dsets[task]['train'] = datasets.concatenate_datasets([glue_dsets[task]['train'], swapped_train])\n",
     "\n",
     "  # Load / Make dataloaders\n",
-    "  hf_dsets = HF_Datasets(glue_dsets[task], hf_toker=hf_tokenizer, n_inp=3,\n",
+    "  hf_dsets = HF_Datasets(glue_dsets[task], hf_toker=hf_tokenizer, n_inp=3, test_with_y=True,\n",
     "                cols={'inp_ids':TensorText, 'attn_mask':noop, 'token_type_ids':noop, 'label':TensorCategory})\n",
     "  if c.double_unordered and task in ['mrpc', 'stsb']:\n",
     "    dl_kwargs = {'train': {'cache_name': f\"double_dl_{c.max_length}_train.json\"}}\n",
@@ -342,7 +343,7 @@
     "  glue_dsets['wnli'] = wsc.my_map(partial(wsc_trick_process, hf_toker=hf_tokenizer),\n",
     "                                  cache_file_names=\"tricked_{split}.arrow\")\n",
     "  cols={'prefix':TensorText,'suffix':TensorText,'cands':TensorText,'cand_lens':noop,'label':TensorCategory}\n",
-    "  glue_dls['wnli'] = HF_Datasets(glue_dsets['wnli'], hf_toker=hf_tokenizer, n_inp=4, \n",
+    "  glue_dls['wnli'] = HF_Datasets(glue_dsets['wnli'], hf_toker=hf_tokenizer, n_inp=4, test_with_y=True,\n",
     "                                 cols=cols).dataloaders(bs=32, cache_name=\"dl_tricked_{split}.json\")"
    ]
   },
@@ -718,17 +719,24 @@
    },
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
-     "text": "wnli\nSome weights of the model checkpoint at google/electra-large-discriminator were not used when initializing ElectraForPreTraining: [&#39;electra.embeddings_project.weight&#39;, &#39;electra.embeddings_project.bias&#39;]\n- This IS expected if you are initializing ElectraForPreTraining from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n- This IS NOT expected if you are initializing ElectraForPreTraining from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     "output_type": "stream",
+     "text": [
+      "wnli\n",
+      "Some weights of the model checkpoint at google/electra-large-discriminator were not used when initializing ElectraForPreTraining: [&#39;electra.embeddings_project.weight&#39;, &#39;electra.embeddings_project.bias&#39;]\n",
+      "- This IS expected if you are initializing ElectraForPreTraining from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
+      "- This IS NOT expected if you are initializing ElectraForPreTraining from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
     },
     {
-     "output_type": "display_data",
      "data": {
-      "text/plain": "<IPython.core.display.HTML object>",
-      "text/html": ""
+      "text/html": [],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
      },
-     "metadata": {}
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -778,7 +786,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7-final"
+   "version": "3.10.11"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
@@ -1277,4 +1285,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 1
-}
\ No newline at end of file
+}
diff --git a/finetune.py b/finetune.py
index 222ea8a..35b76e9 100644
--- a/finetune.py
+++ b/finetune.py
@@ -149,10 +149,11 @@ def after_fit(self):
 
 
 # %%
+accuracy_metric = AvgMetric(lambda inp,targ: torch.eq(torch.tensor(inp.argmax(dim=-1)), torch.tensor(targ)).float().mean())
 METRICS = {
   **{ task:[MatthewsCorrCoef()] for task in ['cola']},
-  **{ task:[accuracy] for task in ['sst2', 'mnli', 'qnli', 'rte', 'wnli', 'snli','ax']},
-  **{ task:[F1Score(), accuracy] for task in ['mrpc', 'qqp']}, 
+  **{ task:[accuracy_metric] for task in ['sst2', 'mnli', 'qnli', 'rte', 'wnli', 'snli','ax']},
+  **{ task:[F1Score(), accuracy_metric] for task in ['mrpc', 'qqp']}, 
   **{ task:[PearsonCorrCoef(), SpearmanCorrCoef()] for task in ['stsb']}
 }
 NUM_CLASS = {
@@ -229,7 +230,7 @@ def tokenize_sents_max_len(example, cols, max_len, swap=False):
     glue_dsets[task]['train'] = datasets.concatenate_datasets([glue_dsets[task]['train'], swapped_train])
 
   # Load / Make dataloaders
-  hf_dsets = HF_Datasets(glue_dsets[task], hf_toker=hf_tokenizer, n_inp=3,
+  hf_dsets = HF_Datasets(glue_dsets[task], hf_toker=hf_tokenizer, n_inp=3, test_with_y=True,
                 cols={'inp_ids':TensorText, 'attn_mask':noop, 'token_type_ids':noop, 'label':TensorCategory})
   if c.double_unordered and task in ['mrpc', 'stsb']:
     dl_kwargs = {'train': {'cache_name': f"double_dl_{c.max_length}_train.json"}}
@@ -245,7 +246,7 @@ def tokenize_sents_max_len(example, cols, max_len, swap=False):
   glue_dsets['wnli'] = wsc.my_map(partial(wsc_trick_process, hf_toker=hf_tokenizer),
                                   cache_file_names="tricked_{split}.arrow")
   cols={'prefix':TensorText,'suffix':TensorText,'cands':TensorText,'cand_lens':noop,'label':TensorCategory}
-  glue_dls['wnli'] = HF_Datasets(glue_dsets['wnli'], hf_toker=hf_tokenizer, n_inp=4, 
+  glue_dls['wnli'] = HF_Datasets(glue_dsets['wnli'], hf_toker=hf_tokenizer, n_inp=4, test_with_y=True,
                                  cols=cols).dataloaders(bs=32, cache_name="dl_tricked_{split}.json")
 
 # %% [markdown]