diff --git a/fine_tuning_ALBERT.ipynb b/fine_tuning_ALBERT.ipynb
deleted file mode 100644
index 143f5c9..0000000
--- a/fine_tuning_ALBERT.ipynb
+++ /dev/null
@@ -1,1971 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "fine-tuning ALBERT.ipynb",
-      "provenance": [],
-      "authorship_tag": "ABX9TyPQ0neoGAreJPtIYFOz3s34",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/mjag7682/CS9-1-NLP-for-Twitter-Data-for-predicting-stocks/blob/master/fine_tuning_ALBERT.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "9ulHtYtNrQ8r",
-        "outputId": "4d47a5fc-ce15-49bd-89cc-c86e8d6debe9",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        }
-      },
-      "source": [
-        "!pip install keras\n",
-        "!pip install tensorflow\n",
-        "!pip install transformers\n",
-        "!pip3 install albert-tensorflow\n",
-        "!pip install torch\n",
-        "!pip install sentencepiece"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: keras in /usr/local/lib/python3.6/dist-packages (2.4.3)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras) (3.13)\n",
-            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras) (2.10.0)\n",
-            "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras) (1.18.5)\n",
-            "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras) (1.4.1)\n",
-            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py->keras) (1.15.0)\n",
-            "Requirement already satisfied: tensorflow in /usr/local/lib/python3.6/dist-packages (2.3.0)\n",
-            "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n",
-            "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n",
-            "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n",
-            "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n",
-            "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n",
-            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n",
-            "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n",
-            "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n",
-            "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n",
-            "Requirement already satisfied: scipy==1.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.4.1)\n",
-            "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n",
-            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n",
-            "Requirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n",
-            "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n",
-            "Requirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n",
-            "Requirement already satisfied: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
-            "Requirement already satisfied: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.9.2->tensorflow) (50.3.0)\n",
-            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n",
-            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n",
-            "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n",
-            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n",
-            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n",
-            "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n",
-            "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n",
-            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n",
-            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n",
-            "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n",
-            "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n",
-            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n",
-            "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n",
-            "Collecting transformers\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/19/22/aff234f4a841f8999e68a7a94bdd4b60b4cebcfeca5d67d61cd08c9179de/transformers-3.3.1-py3-none-any.whl (1.1MB)\n",
-            "\u001b[K     |████████████████████████████████| 1.1MB 2.8MB/s \n",
-            "\u001b[?25hCollecting sacremoses\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
-            "\u001b[K     |████████████████████████████████| 890kB 17.4MB/s \n",
-            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n",
-            "Collecting sentencepiece!=0.1.92\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n",
-            "\u001b[K     |████████████████████████████████| 1.1MB 18.1MB/s \n",
-            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
-            "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n",
-            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
-            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
-            "Collecting tokenizers==0.8.1.rc2\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/80/83/8b9fccb9e48eeb575ee19179e2bdde0ee9a1904f97de5f02d19016b8804f/tokenizers-0.8.1rc2-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n",
-            "\u001b[K     |████████████████████████████████| 3.0MB 28.7MB/s \n",
-            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
-            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0)\n",
-            "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
-            "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n",
-            "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
-            "Building wheels for collected packages: sacremoses\n",
-            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=7aceb359875e5a113a3b4100f89e9d914f4e2a2ce05ed7eaaf52e10e7dfa0b06\n",
-            "  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
-            "Successfully built sacremoses\n",
-            "Installing collected packages: sacremoses, sentencepiece, tokenizers, transformers\n",
-            "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc2 transformers-3.3.1\n",
-            "Collecting albert-tensorflow\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ba/1e/e776bb23e6f89a1f1d7d33b50d0bd9c2c7b24b39aa548f041827a9c00d73/albert_tensorflow-1.1-py3-none-any.whl (81kB)\n",
-            "\u001b[K     |████████████████████████████████| 81kB 2.3MB/s \n",
-            "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from albert-tensorflow) (1.15.0)\n",
-            "Installing collected packages: albert-tensorflow\n",
-            "Successfully installed albert-tensorflow-1.1\n",
-            "Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.6.0+cu101)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.18.5)\n",
-            "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch) (0.16.0)\n",
-            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (0.1.91)\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VPur-ModrqiE"
-      },
-      "source": [
-        "# Check GPU"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eEMLd2nzrtAr",
-        "outputId": "ac45d376-2643-4643-873d-b887fa65f29f",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "import tensorflow as tf\n",
-        "\n",
-        "# Get the GPU device name.\n",
-        "device_name = tf.test.gpu_device_name()\n",
-        "\n",
-        "# The device name should look like the following:\n",
-        "if device_name == '/device:GPU:0':\n",
-        "    print('Found GPU at: {}'.format(device_name))\n",
-        "else:\n",
-        "    raise SystemError('GPU device not found')"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Found GPU at: /device:GPU:0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "aJUsW5-trxGv",
-        "outputId": "5034d3c9-248b-4711-b07c-162d62d9de9b",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 51
-        }
-      },
-      "source": [
-        "import torch\n",
-        "\n",
-        "# If there's a GPU available...\n",
-        "if torch.cuda.is_available():    \n",
-        "\n",
-        "    # Tell PyTorch to use the GPU.    \n",
-        "    device = torch.device(\"cuda\")\n",
-        "\n",
-        "    print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
-        "\n",
-        "    print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
-        "\n",
-        "# If not...\n",
-        "else:\n",
-        "    print('No GPU available, using the CPU instead.')\n",
-        "    device = torch.device(\"cpu\")"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "There are 1 GPU(s) available.\n",
-            "We will use the GPU: Tesla P100-PCIE-16GB\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sZuNMIWGs5L0"
-      },
-      "source": [
-        "# Mounting"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "PGdZlz87rX7Q",
-        "outputId": "4c0f3693-aaaa-4f16-d8de-3eaa8a2b1b90",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "from google.colab import drive\n",
-        "drive.mount('/content/gdrive')"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Mounted at /content/gdrive\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2lMkdNcrrbM0",
-        "outputId": "52181ba1-f48d-4883-ac18-73d8e6f65b1c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 136
-        }
-      },
-      "source": [
-        "!git clone https://github.com/mjag7682/ALBERT"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Cloning into 'ALBERT'...\n",
-            "remote: Enumerating objects: 9, done.\u001b[K\n",
-            "remote: Counting objects: 100% (9/9), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (9/9), done.\u001b[K\n",
-            "remote: Total 362 (delta 2), reused 0 (delta 0), pack-reused 353\u001b[K\n",
-            "Receiving objects: 100% (362/362), 244.39 KiB | 470.00 KiB/s, done.\n",
-            "Resolving deltas: 100% (235/235), done.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "skH_bDk2rdJ_",
-        "outputId": "4137e476-8d92-444d-eb2b-3461194b4b68",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        }
-      },
-      "source": [
-        "!pip install -r /content/ALBERT/requirements.txt "
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Collecting tensorflow==1.15.2\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/9a/d9/fd234c7bf68638423fb8e7f44af7fcfce3bcaf416b51e6d902391e47ec43/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl (110.5MB)\n",
-            "\u001b[K     |████████████████████████████████| 110.5MB 65kB/s \n",
-            "\u001b[?25hCollecting tensorflow_hub==0.7\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/00/0e/a91780d07592b1abf9c91344ce459472cc19db3b67fdf3a61dca6ebb2f5c/tensorflow_hub-0.7.0-py2.py3-none-any.whl (89kB)\n",
-            "\u001b[K     |████████████████████████████████| 92kB 9.6MB/s \n",
-            "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from -r /content/ALBERT/requirements.txt (line 5)) (0.1.93)\n",
-            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.15.0)\n",
-            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.32.0)\n",
-            "Collecting gast==0.2.2\n",
-            "  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz\n",
-            "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.8.1)\n",
-            "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.18.5)\n",
-            "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.12.4)\n",
-            "Collecting tensorboard<1.16.0,>=1.15.0\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)\n",
-            "\u001b[K     |████████████████████████████████| 3.8MB 41.1MB/s \n",
-            "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.3.0)\n",
-            "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.10.0)\n",
-            "Collecting tensorflow-estimator==1.15.1\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)\n",
-            "\u001b[K     |████████████████████████████████| 512kB 29.1MB/s \n",
-            "\u001b[?25hRequirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.35.1)\n",
-            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.2)\n",
-            "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.12.1)\n",
-            "Collecting keras-applications>=1.0.8\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)\n",
-            "\u001b[K     |████████████████████████████████| 51kB 5.6MB/s \n",
-            "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.0)\n",
-            "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.2.0)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (50.3.0)\n",
-            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.0.1)\n",
-            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.2)\n",
-            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.10.0)\n",
-            "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.0.0)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.0)\n",
-            "Building wheels for collected packages: gast\n",
-            "  Building wheel for gast (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for gast: filename=gast-0.2.2-cp36-none-any.whl size=7542 sha256=da09531e91a678dff87f3e427990e2f4fae4e07e33f032b7f927656d5c7d3e26\n",
-            "  Stored in directory: /root/.cache/pip/wheels/5c/2e/7e/a1d4d4fcebe6c381f378ce7743a3ced3699feb89bcfbdadadd\n",
-            "Successfully built gast\n",
-            "\u001b[31mERROR: tensorflow-probability 0.11.0 has requirement gast>=0.3.2, but you'll have gast 0.2.2 which is incompatible.\u001b[0m\n",
-            "Installing collected packages: gast, tensorboard, tensorflow-estimator, keras-applications, tensorflow, tensorflow-hub\n",
-            "  Found existing installation: gast 0.3.3\n",
-            "    Uninstalling gast-0.3.3:\n",
-            "      Successfully uninstalled gast-0.3.3\n",
-            "  Found existing installation: tensorboard 2.3.0\n",
-            "    Uninstalling tensorboard-2.3.0:\n",
-            "      Successfully uninstalled tensorboard-2.3.0\n",
-            "  Found existing installation: tensorflow-estimator 2.3.0\n",
-            "    Uninstalling tensorflow-estimator-2.3.0:\n",
-            "      Successfully uninstalled tensorflow-estimator-2.3.0\n",
-            "  Found existing installation: tensorflow 2.3.0\n",
-            "    Uninstalling tensorflow-2.3.0:\n",
-            "      Successfully uninstalled tensorflow-2.3.0\n",
-            "  Found existing installation: tensorflow-hub 0.9.0\n",
-            "    Uninstalling tensorflow-hub-0.9.0:\n",
-            "      Successfully uninstalled tensorflow-hub-0.9.0\n",
-            "Successfully installed gast-0.2.2 keras-applications-1.0.8 tensorboard-1.15.0 tensorflow-1.15.2 tensorflow-estimator-1.15.1 tensorflow-hub-0.7.0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jwgFYW9dsFO1"
-      },
-      "source": [
-        "# Fine Tune"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "VW_nppn5tbuc",
-        "outputId": "30455f24-785e-42ea-cdd8-5eef33bcee93",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 854
-        }
-      },
-      "source": [
-        "!pip install --upgrade tensorflow"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Collecting tensorflow\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ad/ad/769c195c72ac72040635c66cd9ba7b0f4b4fc1ac67e59b99fa6988446c22/tensorflow-2.3.1-cp36-cp36m-manylinux2010_x86_64.whl (320.4MB)\n",
-            "\u001b[K     |████████████████████████████████| 320.4MB 50kB/s \n",
-            "\u001b[?25hRequirement already satisfied, skipping upgrade: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n",
-            "Requirement already satisfied, skipping upgrade: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n",
-            "Requirement already satisfied, skipping upgrade: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n",
-            "Requirement already satisfied, skipping upgrade: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
-            "Requirement already satisfied, skipping upgrade: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n",
-            "Requirement already satisfied, skipping upgrade: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n",
-            "Requirement already satisfied, skipping upgrade: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n",
-            "Requirement already satisfied, skipping upgrade: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
-            "Requirement already satisfied, skipping upgrade: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n",
-            "Requirement already satisfied, skipping upgrade: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n",
-            "Requirement already satisfied, skipping upgrade: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n",
-            "Requirement already satisfied, skipping upgrade: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n",
-            "Requirement already satisfied, skipping upgrade: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n",
-            "Requirement already satisfied, skipping upgrade: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n",
-            "Requirement already satisfied, skipping upgrade: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n",
-            "Requirement already satisfied, skipping upgrade: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n",
-            "Requirement already satisfied, skipping upgrade: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (50.3.0)\n",
-            "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n",
-            "Requirement already satisfied, skipping upgrade: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n",
-            "Requirement already satisfied, skipping upgrade: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n",
-            "Requirement already satisfied, skipping upgrade: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n",
-            "Requirement already satisfied, skipping upgrade: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n",
-            "Requirement already satisfied, skipping upgrade: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n",
-            "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n",
-            "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n",
-            "Requirement already satisfied, skipping upgrade: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n",
-            "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n",
-            "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n",
-            "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n",
-            "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n",
-            "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n",
-            "Requirement already satisfied, skipping upgrade: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n",
-            "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n",
-            "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n",
-            "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n",
-            "Installing collected packages: tensorflow\n",
-            "  Found existing installation: tensorflow 2.3.0\n",
-            "    Uninstalling tensorflow-2.3.0:\n",
-            "      Successfully uninstalled tensorflow-2.3.0\n",
-            "Successfully installed tensorflow-2.3.1\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.colab-display-data+json": {
-              "pip_warning": {
-                "packages": [
-                  "tensorflow"
-                ]
-              }
-            }
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oBKiimp4YxOf",
-        "outputId": "58f244b6-afb1-45b9-c9c8-060ae48116ce",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 51
-        }
-      },
-      "source": [
-        "# !pip install modeling"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "\u001b[31mERROR: Could not find a version that satisfies the requirement modeling (from versions: none)\u001b[0m\n",
-            "\u001b[31mERROR: No matching distribution found for modeling\u001b[0m\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "5rYCZ946YvOj"
-      },
-      "source": [
-        "from tensorflow.python.compiler.tensorrt import trt_convert as trt\n",
-        "import tensorflow as tf\n",
-        "# from albert import modeling\n",
-        "# import tokenization\n",
-        "# import optimization\n",
-        "import pandas as pd\n",
-        "import numpy as np\n",
-        "from keras.preprocessing.sequence import pad_sequences"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "dbAgsBcot4n9"
-      },
-      "source": [
-        "from transformers import AlbertTokenizer\n",
-        "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n",
-        "from transformers import AlbertForSequenceClassification,AlbertConfig\n",
-        "from transformers.tokenization_bert import BertTokenizer\n",
-        "import torch.nn as nn\n",
-        "from torch.nn import CrossEntropyLoss\n",
-        "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/data/30k-clean.model\" # This is the vocab file output from Build Vocab step\n",
-        "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n",
-        "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n",
-        "tokenizer = AlbertTokenizer(vocab_file=VOCAB_FILE)\n",
-        "config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
-        "model = AlbertModel(config)\n",
-        "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "U6uLqvcVsJRY",
-        "outputId": "9ba394cf-afea-439f-9ad7-8a3f59861df4",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 283
-        }
-      },
-      "source": [
-        "# from transformers import AlbertTokenizer\n",
-        "# from transformers import AlbertForSequenceClassification,AlbertConfig\n",
-        "# config = modeling.AlbertConfig.from_json_file(\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\")\n",
-        "# tokenizer = tokenization.FullTokenizer.from_scratch(vocab_file=\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\", do_lower_case=True, spm_model_file=None)\n",
-        "# tokenizer =  AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True) \n",
-        "# tokenizer =  AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True) \n",
-        "# tokenizer =  AlbertTokenizer.from_pretrained('./content/drive/My Drive/Reuters_Dataset/reut2-021', do_lower_case=True) \n",
-        "# PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/model-fine'\n",
-        "# model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "AttributeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-51-d2e9d85ef3fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAlbertForSequenceClassification\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mAlbertConfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertConfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mtokenizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertTokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_scratch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvocab_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdo_lower_case\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspm_model_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;31m# tokenizer =  AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;31m# tokenizer =  AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mAttributeError\u001b[0m: type object 'AlbertTokenizer' has no attribute 'from_scratch'"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eZYLfSvlsLBl"
-      },
-      "source": [
-        "import pandas as pd\n",
-        "import numpy as np\n",
-        "import torch\n",
-        "import tensorflow as tf"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qZEJy5xu6Llu"
-      },
-      "source": [
-        "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
-        "for item in train_data.iterrows():\n",
-        "  print(item[1][1])\n",
-        "  if item[1][1] != 1 and item[1][1] != 0:\n",
-        "    print(item[1][1])"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "aLEK078bsRpD",
-        "outputId": "621583cf-ec2c-4302-938d-fd12d55dd2d3",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
-        "print(\"Number of training examples {}\".format(len(train_data)))\n",
-        "num_examples = 100000\n",
-        "train = train_data[:num_examples].text.values\n",
-        "labels = train_data[:num_examples].label.values\n",
-        "# train = train_data[]"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Number of training examples 131173\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "mNiWyaLD7Otu",
-        "outputId": "b11835dc-c1f4-43c3-ab9a-4e3919bae840",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 51
-        }
-      },
-      "source": [
-        "print(len(train))\n",
-        "print(len(labels))\n",
-        "# print(labels[:100])\n",
-        "for i in labels:\n",
-        "  if i!=0 and i!=1:\n",
-        "    print(i)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "100000\n",
-            "100000\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "G6MsmCDYsUC2",
-        "outputId": "33b809fb-f668-483a-dfc4-9c61969fb847",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 88
-        }
-      },
-      "source": [
-        "# Print the original sentence.\n",
-        "print(' Original: ', train[10])\n",
-        "\n",
-        "# Print the sentence split into tokens.\n",
-        "print('Tokenized: ', tokenizer.tokenize(train[10]))\n",
-        "\n",
-        "# Print the sentence mapped to token ids.\n",
-        "print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(train[10])))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            " Original:  vix spy new high vix new low spy calls versus lod\n",
-            "Tokenized:  ['▁vi', 'x', '▁spy', '▁new', '▁high', '▁vi', 'x', '▁new', '▁low', '▁spy', '▁call', 's', '▁vers', 'us', '▁lo', 'd']\n",
-            "Token IDs:  [1847, 782, 1181, 30, 141, 1847, 782, 30, 385, 1181, 172, 12, 3770, 595, 2947, 27]\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "_z9qjkZ9sVzV",
-        "outputId": "58b18382-f1d7-461c-9cd7-4f2f8740aa46",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 51
-        }
-      },
-      "source": [
-        "print(train_data.text.apply(lambda x: len(x)).quantile([0.9]))\n",
-        "MAX_LEN = 160"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0.9    160.0\n",
-            "Name: text, dtype: float64\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ArJMjNxmMoHU",
-        "outputId": "483535e7-53bc-4490-fb97-5227dbaf3c2c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "m_l = 0\n",
-        "for x in train:\n",
-        "  if len(x)>m_l:\n",
-        "    m_l = len(x)\n",
-        "print(m_l)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "1892\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "JBfPIx0HsXpY",
-        "outputId": "4fd20283-60bf-4fcf-ac7a-236914066094",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 326
-        }
-      },
-      "source": [
-        "# Tokenize all of the sentences and map the tokens to thier word IDs.\n",
-        "input_ids = []\n",
-        "attention_masks = []\n",
-        "\n",
-        "# For every sentence...\n",
-        "for text in train:\n",
-        "    # `encode_plus` will:\n",
-        "    #   (1) Tokenize the sentence.\n",
-        "    #   (2) Prepend the `[CLS]` token to the start.\n",
-        "    #   (3) Append the `[SEP]` token to the end.\n",
-        "    #   (4) Map tokens to their IDs.\n",
-        "    #   (5) Pad or truncate the sentence to `max_length`\n",
-        "    #   (6) Create attention masks for [PAD] tokens.\n",
-        "    encoded_dict = tokenizer.encode_plus(\n",
-        "                        text,                      # Sentence to encode.\n",
-        "                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
-        "                        max_length = MAX_LEN,           # Pad & truncate all sentences.\n",
-        "                        pad_to_max_length = True,\n",
-        "                        return_attention_mask = True,   # Construct attn. masks.\n",
-        "                        return_tensors = 'pt',     # Return pytorch tensors.\n",
-        "                        truncation = True\n",
-        "                   )\n",
-        "    \n",
-        "    # Add the encoded sentence to the list.    \n",
-        "    input_ids.append(encoded_dict['input_ids'])\n",
-        "    \n",
-        "    # And its attention mask (simply differentiates padding from non-padding).\n",
-        "    attention_masks.append(encoded_dict['attention_mask'])\n",
-        "\n",
-        "# Convert the lists into tensors.\n",
-        "input_ids = torch.cat(input_ids, dim=0)\n",
-        "attention_masks = torch.cat(attention_masks, dim=0)\n",
-        "labels = torch.tensor(labels)\n",
-        "\n",
-        "# Print sentence 0, now as a list of IDs.\n",
-        "print('Original: ', train[10])\n",
-        "print('Token IDs:', input_ids[10])"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.6/dist-packages/transformers/tokenization_utils_base.py:1773: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
-            "  FutureWarning,\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "Original:  vix spy new high vix new low spy calls versus lod\n",
-            "Token IDs: tensor([   2, 1847,  782, 1181,   30,  141, 1847,  782,   30,  385, 1181,  172,\n",
-            "          12, 3770,  595, 2947,   27,    3,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
-            "           0,    0,    0,    0])\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "fZ9Gi4GDsesz",
-        "outputId": "2d759eba-1357-4351-d7be-5bd1c97f0f17",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 51
-        }
-      },
-      "source": [
-        "#training & validation split\n",
-        "from torch.utils.data import TensorDataset, random_split\n",
-        "\n",
-        "\n",
-        "# Combine the training inputs into a TensorDataset.\n",
-        "dataset = TensorDataset(input_ids, attention_masks, labels)\n",
-        "\n",
-        "# Create a 90-10 train-validation split.\n",
-        "\n",
-        "# Calculate the number of samples to include in each set.\n",
-        "train_size = int(0.9 * len(dataset))\n",
-        "val_size = len(dataset) - train_size\n",
-        "\n",
-        "# Divide the dataset by randomly selecting samples.\n",
-        "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n",
-        "\n",
-        "print('{} training samples'.format(train_size))\n",
-        "print('{} validation samples'.format(val_size))"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "90000 training samples\n",
-            "10000 validation samples\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "etq7ymGTshCN"
-      },
-      "source": [
-        "def flat_accuracy(preds, labels):\n",
-        "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
-        "    labels_flat = labels.flatten()\n",
-        "    return np.sum(pred_flat == labels_flat) / len(labels_flat)\n",
-        "\n",
-        "import time\n",
-        "import datetime\n",
-        "\n",
-        "def format_time(elapsed):\n",
-        "    '''\n",
-        "    Takes a time in seconds and returns a string hh:mm:ss\n",
-        "    '''\n",
-        "    # Round to the nearest second.\n",
-        "    elapsed_rounded = int(round((elapsed)))\n",
-        "    \n",
-        "    # Format as hh:mm:ss\n",
-        "    return str(datetime.timedelta(seconds=elapsed_rounded))\n",
-        "\n",
-        "# Set the seed value all over the place to make this reproducible.\n",
-        "import random\n",
-        "def set_random(seed_val):\n",
-        "    random.seed(seed_val)\n",
-        "    np.random.seed(seed_val)\n",
-        "    torch.manual_seed(seed_val)\n",
-        "    torch.cuda.manual_seed_all(seed_val)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "I4MJwmeAsi9g"
-      },
-      "source": [
-        "def train_model(train_dataloader, optimizer, epochs):\n",
-        "    \n",
-        "    # We'll store a number of quantities such as training and validation loss, \n",
-        "    # validation accuracy, and timings.\n",
-        "    training_stats = []\n",
-        "\n",
-        "    # Measure the total training time for the whole run.\n",
-        "    total_t0 = time.time()\n",
-        "\n",
-        "    # For each epoch...\n",
-        "    for epoch_i in range(0, epochs):\n",
-        "\n",
-        "        # ========================================\n",
-        "        #               Training\n",
-        "        # ========================================\n",
-        "\n",
-        "        # Perform one full pass over the training set.\n",
-        "\n",
-        "        print(\"\")\n",
-        "        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
-        "        print('Training...')\n",
-        "\n",
-        "        # Measure how long the training epoch takes.\n",
-        "        t0 = time.time()\n",
-        "\n",
-        "        # Reset the total loss for this epoch.\n",
-        "        total_train_loss = 0\n",
-        "\n",
-        "        # Put the model into training mode. Don't be mislead--the call to \n",
-        "        # `train` just changes the *mode*, it doesn't *perform* the training.\n",
-        "        # `dropout` and `batchnorm` layers behave differently during training\n",
-        "        # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
-        "        model.train()\n",
-        "\n",
-        "        # For each batch of training data...\n",
-        "        for step, batch in enumerate(train_dataloader):\n",
-        "\n",
-        "            # Progress update every 40 batches.\n",
-        "            if step % 40 == 0 and not step == 0:\n",
-        "                # Calculate elapsed time in minutes.\n",
-        "                elapsed = format_time(time.time() - t0)\n",
-        "\n",
-        "                # Report progress.\n",
-        "                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
-        "\n",
-        "            # Unpack this training batch from our dataloader. \n",
-        "            #\n",
-        "            # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
-        "            # `to` method.\n",
-        "            #\n",
-        "            # `batch` contains three pytorch tensors:\n",
-        "            #   [0]: input ids \n",
-        "            #   [1]: attention masks\n",
-        "            #   [2]: labels \n",
-        "            b_input_ids = batch[0].to(device)\n",
-        "            b_input_mask = batch[1].to(device)\n",
-        "            b_labels = batch[2].to(device)\n",
-        "\n",
-        "            # Always clear any previously calculated gradients before performing a\n",
-        "            # backward pass. PyTorch doesn't do this automatically because \n",
-        "            # accumulating the gradients is \"convenient while training RNNs\". \n",
-        "            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
-        "            model.zero_grad()        \n",
-        "\n",
-        "            # Perform a forward pass (evaluate the model on this training batch).\n",
-        "            # The documentation for this `model` function is here: \n",
-        "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
-        "            # It returns different numbers of parameters depending on what arguments\n",
-        "            # arge given and what flags are set. For our useage here, it returns\n",
-        "            # the loss (because we provided labels) and the \"logits\"--the model\n",
-        "            # outputs prior to activation.\n",
-        "            loss, logits = model(b_input_ids, \n",
-        "                                 attention_mask=b_input_mask, \n",
-        "                                 labels=b_labels)\n",
-        "\n",
-        "            # Accumulate the training loss over all of the batches so that we can\n",
-        "            # calculate the average loss at the end. `loss` is a Tensor containing a\n",
-        "            # single value; the `.item()` function just returns the Python value \n",
-        "            # from the tensor.\n",
-        "            total_train_loss += loss.item()\n",
-        "\n",
-        "            # Perform a backward pass to calculate the gradients.\n",
-        "            loss.backward()\n",
-        "\n",
-        "            # Clip the norm of the gradients to 1.0.\n",
-        "            # This is to help prevent the \"exploding gradients\" problem.\n",
-        "            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
-        "\n",
-        "            # Update parameters and take a step using the computed gradient.\n",
-        "            # The optimizer dictates the \"update rule\"--how the parameters are\n",
-        "            # modified based on their gradients, the learning rate, etc.\n",
-        "            optimizer.step()\n",
-        "\n",
-        "            # Update the learning rate.\n",
-        "            scheduler.step()\n",
-        "\n",
-        "        # Calculate the average loss over all of the batches.\n",
-        "        avg_train_loss = total_train_loss / len(train_dataloader)            \n",
-        "\n",
-        "        # Measure how long this epoch took.\n",
-        "        training_time = format_time(time.time() - t0)\n",
-        "\n",
-        "        print(\"\")\n",
-        "        print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
-        "        print(\"  Training epcoh took: {:}\".format(training_time))\n",
-        "\n",
-        "        # ========================================\n",
-        "        #               Validation\n",
-        "        # ========================================\n",
-        "        # After the completion of each training epoch, measure our performance on\n",
-        "        # our validation set.\n",
-        "\n",
-        "        print(\"\")\n",
-        "        print(\"Running Validation...\")\n",
-        "\n",
-        "        t0 = time.time()\n",
-        "\n",
-        "        # Put the model in evaluation mode--the dropout layers behave differently\n",
-        "        # during evaluation.\n",
-        "        model.eval()\n",
-        "\n",
-        "        # Tracking variables \n",
-        "        total_eval_accuracy = 0\n",
-        "        total_eval_loss = 0\n",
-        "        nb_eval_steps = 0\n",
-        "\n",
-        "        # Evaluate data for one epoch\n",
-        "        for batch in validation_dataloader:\n",
-        "\n",
-        "            # Unpack this training batch from our dataloader. \n",
-        "            #\n",
-        "            # As we unpack the batch, we'll also copy each tensor to the GPU using \n",
-        "            # the `to` method.\n",
-        "            #\n",
-        "            # `batch` contains three pytorch tensors:\n",
-        "            #   [0]: input ids \n",
-        "            #   [1]: attention masks\n",
-        "            #   [2]: labels \n",
-        "            b_input_ids = batch[0].to(device)\n",
-        "            b_input_mask = batch[1].to(device)\n",
-        "            b_labels = batch[2].to(device)\n",
-        "\n",
-        "            # Tell pytorch not to bother with constructing the compute graph during\n",
-        "            # the forward pass, since this is only needed for backprop (training).\n",
-        "            with torch.no_grad():        \n",
-        "\n",
-        "                # Forward pass, calculate logit predictions.\n",
-        "                # token_type_ids is the same as the \"segment ids\", which \n",
-        "                # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
-        "                # The documentation for this `model` function is here: \n",
-        "                # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
-        "                # Get the \"logits\" output by the model. The \"logits\" are the output\n",
-        "                # values prior to applying an activation function like the softmax.\n",
-        "                (loss, logits) = model(b_input_ids, \n",
-        "                                       attention_mask=b_input_mask,\n",
-        "                                       labels=b_labels)\n",
-        "\n",
-        "            # Accumulate the validation loss.\n",
-        "            total_eval_loss += loss.item()\n",
-        "\n",
-        "            # Move logits and labels to CPU\n",
-        "            logits = logits.detach().cpu().numpy()\n",
-        "            label_ids = b_labels.to('cpu').numpy()\n",
-        "\n",
-        "            # Calculate the accuracy for this batch of test sentences, and\n",
-        "            # accumulate it over all batches.\n",
-        "            total_eval_accuracy += flat_accuracy(logits, label_ids)\n",
-        "\n",
-        "\n",
-        "        # Report the final accuracy for this validation run.\n",
-        "        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n",
-        "        print(\"  Accuracy: {0:.2f}\".format(avg_val_accuracy))\n",
-        "\n",
-        "        # Calculate the average loss over all of the batches.\n",
-        "        avg_val_loss = total_eval_loss / len(validation_dataloader)\n",
-        "\n",
-        "        # Measure how long the validation run took.\n",
-        "        validation_time = format_time(time.time() - t0)\n",
-        "\n",
-        "        print(\"  Validation Loss: {0:.2f}\".format(avg_val_loss))\n",
-        "        print(\"  Validation took: {:}\".format(validation_time))\n",
-        "\n",
-        "        # Record all statistics from this epoch.\n",
-        "        training_stats.append(\n",
-        "            {\n",
-        "                'epoch': epoch_i + 1,\n",
-        "                'Training Loss': avg_train_loss,\n",
-        "                'Valid. Loss': avg_val_loss,\n",
-        "                'Valid. Accur.': avg_val_accuracy,\n",
-        "                'Training Time': training_time,\n",
-        "                'Validation Time': validation_time\n",
-        "            }\n",
-        "        )\n",
-        "\n",
-        "    print(\"\")\n",
-        "    print(\"Training complete!\")\n",
-        "\n",
-        "    print(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))\n",
-        "    \n",
-        "    return training_stats\n",
-        "\n",
-        "def print_training_stats(training_stats):\n",
-        "    # Display floats with two decimal places.\n",
-        "    pd.set_option('precision', 2)\n",
-        "\n",
-        "    # Create a DataFrame from our training statistics.\n",
-        "    df_stats = pd.DataFrame(data=training_stats)\n",
-        "\n",
-        "    # Use the 'epoch' as the row index.\n",
-        "    df_stats = df_stats.set_index('epoch')\n",
-        "\n",
-        "    # A hack to force the column headers to wrap.\n",
-        "    #df = df.style.set_table_styles([dict(selector=\"th\",props=[('max-width', '70px')])])\n",
-        "\n",
-        "    # Display the table.\n",
-        "    print(df_stats)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Tzz2H0Jpsmtk",
-        "outputId": "d982e8a8-82ec-4a1f-9fc5-afc2e2fcb7d7",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        }
-      },
-      "source": [
-        "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n",
-        "from transformers import get_linear_schedule_with_warmup, AdamW\n",
-        "\n",
-        "from transformers import AlbertForSequenceClassification,AlbertConfig\n",
-        "# from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig\n",
-        "# from transformers import BertForSequenceClassification, BertConfig\n",
-        "# from transformers import ElectraForSequenceClassification\n",
-        "\n",
-        "# ADJUST lr_s and batch_sizes\n",
-        "lr_s = [2e-5]\n",
-        "batch_sizes = [32]\n",
-        "from itertools import product\n",
-        "hyperparameters = list(product(*[lr_s, batch_sizes]))\n",
-        "print(hyperparameters)\n",
-        "training_statistics = []\n",
-        "for lr, batch_size in hyperparameters:\n",
-        "    # config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
-        "    # model = AlbertModel(config)\n",
-        "    # model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n",
-        "    PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel'\n",
-        "    model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
-        "    # if MODEL_CHOICE == ModelChoice.BERT:\n",
-        "    #     model = BertForSequenceClassification.from_pretrained(\"bert-base-uncased\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
-        "    # elif MODEL_CHOICE == ModelChoice.DISTILBERT:\n",
-        "    #     model = DistilBertForSequenceClassification.from_pretrained(\"distilbert-base-uncased\",num_labels = 2,output_attentions = False,output_hidden_states = False)\n",
-        "    # elif MODEL_CHOICE == ModelChoice.ALBERT:\n",
-        "    #     model = AlbertForSequenceClassification.from_pretrained(\"albert-base-v2\", num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
-        "    # elif MODEL_CHOICE == ModelChoice.ELECTRA:\n",
-        "    #     model = ElectraForSequenceClassification.from_pretrained(\"google/electra-base-discriminator\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
-        "    # else:\n",
-        "    #     print(\"Choose proper model!\")\n",
-        "    \n",
-        "    print('START----',model,'END---')\n",
-        "   \n",
-        "    # Tell pytorch to run this model on the GPU.\n",
-        "    model.cuda()\n",
-        "\n",
-        "    # The DataLoader needs to know our batch size for training, so we specify it \n",
-        "    # here. For fine-tuning ALBERT on a specific task, the authors recommend a batch \n",
-        "    # size of 16 or 32.\n",
-        "\n",
-        "    # Create the DataLoaders for our training and validation sets.\n",
-        "    # We'll take training samples in random order. \n",
-        "    train_dataloader = DataLoader(\n",
-        "                train_dataset,  # The training samples.\n",
-        "                sampler = RandomSampler(train_dataset), # Select batches randomly\n",
-        "                batch_size = batch_size # Trains with this batch size.\n",
-        "            )\n",
-        "\n",
-        "    # For validation the order doesn't matter, so we'll just read them sequentially.\n",
-        "    validation_dataloader = DataLoader(\n",
-        "                val_dataset, # The validation samples.\n",
-        "                sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.\n",
-        "                batch_size = batch_size # Evaluate with this batch size.\n",
-        "            )\n",
-        "    \n",
-        "    # Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
-        "    # I believe the 'W' stands for 'Weight Decay fix\"\n",
-        "    optimizer = AdamW(model.parameters(),\n",
-        "                      lr = lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
-        "                      eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
-        "                    )\n",
-        "    \n",
-        "\n",
-        "    # Number of training epochs. The BERT authors recommend between 2 and 4. \n",
-        "    # We chose to run for 4, but we'll see later that this may be over-fitting the\n",
-        "    # training data.\n",
-        "    epochs = 3\n",
-        "\n",
-        "    # Total number of training steps is [number of batches] x [number of epochs]. \n",
-        "    # (Note that this is not the same as the number of training samples).\n",
-        "    total_steps = len(train_dataloader) * epochs\n",
-        "\n",
-        "    # Create the learning rate scheduler.\n",
-        "    scheduler = get_linear_schedule_with_warmup(optimizer, \n",
-        "                                                num_warmup_steps = 0, # Default value in run_glue.py\n",
-        "                                                num_training_steps = total_steps)\n",
-        "\n",
-        "    seed_val = 42\n",
-        "    set_random(seed_val)\n",
-        "    \n",
-        "    print(\"Training with hyperparameters: batch size={}, lr={}\".format(batch_size, lr))\n",
-        "    training_stats = train_model(train_dataloader, optimizer, epochs)\n",
-        "    training_statistics.append(training_stats)\n",
-        "    print_training_stats(training_stats)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "[(2e-05, 32)]\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "Some weights of the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias']\n",
-            "- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
-            "- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-            "Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "START---- AlbertForSequenceClassification(\n",
-            "  (albert): AlbertModel(\n",
-            "    (embeddings): AlbertEmbeddings(\n",
-            "      (word_embeddings): Embedding(20001, 128, padding_idx=0)\n",
-            "      (position_embeddings): Embedding(512, 128)\n",
-            "      (token_type_embeddings): Embedding(2, 128)\n",
-            "      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)\n",
-            "      (dropout): Dropout(p=0, inplace=False)\n",
-            "    )\n",
-            "    (encoder): AlbertTransformer(\n",
-            "      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)\n",
-            "      (albert_layer_groups): ModuleList(\n",
-            "        (0): AlbertLayerGroup(\n",
-            "          (albert_layers): ModuleList(\n",
-            "            (0): AlbertLayer(\n",
-            "              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-            "              (attention): AlbertAttention(\n",
-            "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
-            "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
-            "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
-            "                (attention_dropout): Dropout(p=0, inplace=False)\n",
-            "                (output_dropout): Dropout(p=0, inplace=False)\n",
-            "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-            "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-            "              )\n",
-            "              (ffn): Linear(in_features=768, out_features=3072, bias=True)\n",
-            "              (ffn_output): Linear(in_features=3072, out_features=768, bias=True)\n",
-            "              (dropout): Dropout(p=0, inplace=False)\n",
-            "            )\n",
-            "          )\n",
-            "        )\n",
-            "      )\n",
-            "    )\n",
-            "    (pooler): Linear(in_features=768, out_features=768, bias=True)\n",
-            "    (pooler_activation): Tanh()\n",
-            "  )\n",
-            "  (dropout): Dropout(p=0.1, inplace=False)\n",
-            "  (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
-            ") END---\n",
-            "Training with hyperparameters: batch size=32, lr=2e-05\n",
-            "\n",
-            "======== Epoch 1 / 3 ========\n",
-            "Training...\n",
-            "  Batch    40  of  2,813.    Elapsed: 0:00:19.\n",
-            "  Batch    80  of  2,813.    Elapsed: 0:00:38.\n",
-            "  Batch   120  of  2,813.    Elapsed: 0:00:56.\n",
-            "  Batch   160  of  2,813.    Elapsed: 0:01:15.\n",
-            "  Batch   200  of  2,813.    Elapsed: 0:01:33.\n",
-            "  Batch   240  of  2,813.    Elapsed: 0:01:52.\n",
-            "  Batch   280  of  2,813.    Elapsed: 0:02:11.\n",
-            "  Batch   320  of  2,813.    Elapsed: 0:02:29.\n",
-            "  Batch   360  of  2,813.    Elapsed: 0:02:48.\n",
-            "  Batch   400  of  2,813.    Elapsed: 0:03:06.\n",
-            "  Batch   440  of  2,813.    Elapsed: 0:03:25.\n",
-            "  Batch   480  of  2,813.    Elapsed: 0:03:44.\n",
-            "  Batch   520  of  2,813.    Elapsed: 0:04:02.\n",
-            "  Batch   560  of  2,813.    Elapsed: 0:04:21.\n",
-            "  Batch   600  of  2,813.    Elapsed: 0:04:39.\n",
-            "  Batch   640  of  2,813.    Elapsed: 0:04:58.\n",
-            "  Batch   680  of  2,813.    Elapsed: 0:05:17.\n",
-            "  Batch   720  of  2,813.    Elapsed: 0:05:35.\n",
-            "  Batch   760  of  2,813.    Elapsed: 0:05:54.\n",
-            "  Batch   800  of  2,813.    Elapsed: 0:06:13.\n",
-            "  Batch   840  of  2,813.    Elapsed: 0:06:31.\n",
-            "  Batch   880  of  2,813.    Elapsed: 0:06:50.\n",
-            "  Batch   920  of  2,813.    Elapsed: 0:07:08.\n",
-            "  Batch   960  of  2,813.    Elapsed: 0:07:27.\n",
-            "  Batch 1,000  of  2,813.    Elapsed: 0:07:46.\n",
-            "  Batch 1,040  of  2,813.    Elapsed: 0:08:04.\n",
-            "  Batch 1,080  of  2,813.    Elapsed: 0:08:23.\n",
-            "  Batch 1,120  of  2,813.    Elapsed: 0:08:41.\n",
-            "  Batch 1,160  of  2,813.    Elapsed: 0:09:00.\n",
-            "  Batch 1,200  of  2,813.    Elapsed: 0:09:19.\n",
-            "  Batch 1,240  of  2,813.    Elapsed: 0:09:37.\n",
-            "  Batch 1,280  of  2,813.    Elapsed: 0:09:56.\n",
-            "  Batch 1,320  of  2,813.    Elapsed: 0:10:14.\n",
-            "  Batch 1,360  of  2,813.    Elapsed: 0:10:33.\n",
-            "  Batch 1,400  of  2,813.    Elapsed: 0:10:52.\n",
-            "  Batch 1,440  of  2,813.    Elapsed: 0:11:10.\n",
-            "  Batch 1,480  of  2,813.    Elapsed: 0:11:29.\n",
-            "  Batch 1,520  of  2,813.    Elapsed: 0:11:48.\n",
-            "  Batch 1,560  of  2,813.    Elapsed: 0:12:06.\n",
-            "  Batch 1,600  of  2,813.    Elapsed: 0:12:25.\n",
-            "  Batch 1,640  of  2,813.    Elapsed: 0:12:43.\n",
-            "  Batch 1,680  of  2,813.    Elapsed: 0:13:02.\n",
-            "  Batch 1,720  of  2,813.    Elapsed: 0:13:21.\n",
-            "  Batch 1,760  of  2,813.    Elapsed: 0:13:39.\n",
-            "  Batch 1,800  of  2,813.    Elapsed: 0:13:58.\n",
-            "  Batch 1,840  of  2,813.    Elapsed: 0:14:16.\n",
-            "  Batch 1,880  of  2,813.    Elapsed: 0:14:35.\n",
-            "  Batch 1,920  of  2,813.    Elapsed: 0:14:54.\n",
-            "  Batch 1,960  of  2,813.    Elapsed: 0:15:12.\n",
-            "  Batch 2,000  of  2,813.    Elapsed: 0:15:31.\n",
-            "  Batch 2,040  of  2,813.    Elapsed: 0:15:49.\n",
-            "  Batch 2,080  of  2,813.    Elapsed: 0:16:08.\n",
-            "  Batch 2,120  of  2,813.    Elapsed: 0:16:27.\n",
-            "  Batch 2,160  of  2,813.    Elapsed: 0:16:45.\n",
-            "  Batch 2,200  of  2,813.    Elapsed: 0:17:04.\n",
-            "  Batch 2,240  of  2,813.    Elapsed: 0:17:23.\n",
-            "  Batch 2,280  of  2,813.    Elapsed: 0:17:41.\n",
-            "  Batch 2,320  of  2,813.    Elapsed: 0:18:00.\n",
-            "  Batch 2,360  of  2,813.    Elapsed: 0:18:18.\n",
-            "  Batch 2,400  of  2,813.    Elapsed: 0:18:37.\n",
-            "  Batch 2,440  of  2,813.    Elapsed: 0:18:56.\n",
-            "  Batch 2,480  of  2,813.    Elapsed: 0:19:14.\n",
-            "  Batch 2,520  of  2,813.    Elapsed: 0:19:33.\n",
-            "  Batch 2,560  of  2,813.    Elapsed: 0:19:51.\n",
-            "  Batch 2,600  of  2,813.    Elapsed: 0:20:10.\n",
-            "  Batch 2,640  of  2,813.    Elapsed: 0:20:29.\n",
-            "  Batch 2,680  of  2,813.    Elapsed: 0:20:47.\n",
-            "  Batch 2,720  of  2,813.    Elapsed: 0:21:06.\n",
-            "  Batch 2,760  of  2,813.    Elapsed: 0:21:24.\n",
-            "  Batch 2,800  of  2,813.    Elapsed: 0:21:43.\n",
-            "\n",
-            "  Average training loss: 0.69\n",
-            "  Training epcoh took: 0:21:49\n",
-            "\n",
-            "Running Validation...\n",
-            "  Accuracy: 0.56\n",
-            "  Validation Loss: 0.70\n",
-            "  Validation took: 0:00:51\n",
-            "\n",
-            "======== Epoch 2 / 3 ========\n",
-            "Training...\n",
-            "  Batch    40  of  2,813.    Elapsed: 0:00:19.\n",
-            "  Batch    80  of  2,813.    Elapsed: 0:00:37.\n",
-            "  Batch   120  of  2,813.    Elapsed: 0:00:56.\n",
-            "  Batch   160  of  2,813.    Elapsed: 0:01:14.\n",
-            "  Batch   200  of  2,813.    Elapsed: 0:01:33.\n",
-            "  Batch   240  of  2,813.    Elapsed: 0:01:52.\n",
-            "  Batch   280  of  2,813.    Elapsed: 0:02:10.\n",
-            "  Batch   320  of  2,813.    Elapsed: 0:02:29.\n",
-            "  Batch   360  of  2,813.    Elapsed: 0:02:47.\n",
-            "  Batch   400  of  2,813.    Elapsed: 0:03:06.\n",
-            "  Batch   440  of  2,813.    Elapsed: 0:03:25.\n",
-            "  Batch   480  of  2,813.    Elapsed: 0:03:43.\n",
-            "  Batch   520  of  2,813.    Elapsed: 0:04:02.\n",
-            "  Batch   560  of  2,813.    Elapsed: 0:04:21.\n",
-            "  Batch   600  of  2,813.    Elapsed: 0:04:39.\n",
-            "  Batch   640  of  2,813.    Elapsed: 0:04:58.\n",
-            "  Batch   680  of  2,813.    Elapsed: 0:05:16.\n",
-            "  Batch   720  of  2,813.    Elapsed: 0:05:35.\n",
-            "  Batch   760  of  2,813.    Elapsed: 0:05:54.\n",
-            "  Batch   800  of  2,813.    Elapsed: 0:06:12.\n",
-            "  Batch   840  of  2,813.    Elapsed: 0:06:31.\n",
-            "  Batch   880  of  2,813.    Elapsed: 0:06:49.\n",
-            "  Batch   920  of  2,813.    Elapsed: 0:07:08.\n",
-            "  Batch   960  of  2,813.    Elapsed: 0:07:27.\n",
-            "  Batch 1,000  of  2,813.    Elapsed: 0:07:45.\n",
-            "  Batch 1,040  of  2,813.    Elapsed: 0:08:04.\n",
-            "  Batch 1,080  of  2,813.    Elapsed: 0:08:22.\n",
-            "  Batch 1,120  of  2,813.    Elapsed: 0:08:41.\n",
-            "  Batch 1,160  of  2,813.    Elapsed: 0:09:00.\n",
-            "  Batch 1,200  of  2,813.    Elapsed: 0:09:18.\n",
-            "  Batch 1,240  of  2,813.    Elapsed: 0:09:37.\n",
-            "  Batch 1,280  of  2,813.    Elapsed: 0:09:55.\n",
-            "  Batch 1,320  of  2,813.    Elapsed: 0:10:14.\n",
-            "  Batch 1,360  of  2,813.    Elapsed: 0:10:33.\n",
-            "  Batch 1,400  of  2,813.    Elapsed: 0:10:51.\n",
-            "  Batch 1,440  of  2,813.    Elapsed: 0:11:10.\n",
-            "  Batch 1,480  of  2,813.    Elapsed: 0:11:28.\n",
-            "  Batch 1,520  of  2,813.    Elapsed: 0:11:47.\n",
-            "  Batch 1,560  of  2,813.    Elapsed: 0:12:06.\n",
-            "  Batch 1,600  of  2,813.    Elapsed: 0:12:24.\n",
-            "  Batch 1,640  of  2,813.    Elapsed: 0:12:43.\n",
-            "  Batch 1,680  of  2,813.    Elapsed: 0:13:01.\n",
-            "  Batch 1,720  of  2,813.    Elapsed: 0:13:20.\n",
-            "  Batch 1,760  of  2,813.    Elapsed: 0:13:39.\n",
-            "  Batch 1,800  of  2,813.    Elapsed: 0:13:57.\n",
-            "  Batch 1,840  of  2,813.    Elapsed: 0:14:16.\n",
-            "  Batch 1,880  of  2,813.    Elapsed: 0:14:34.\n",
-            "  Batch 1,920  of  2,813.    Elapsed: 0:14:53.\n",
-            "  Batch 1,960  of  2,813.    Elapsed: 0:15:12.\n",
-            "  Batch 2,000  of  2,813.    Elapsed: 0:15:30.\n",
-            "  Batch 2,040  of  2,813.    Elapsed: 0:15:49.\n",
-            "  Batch 2,080  of  2,813.    Elapsed: 0:16:07.\n",
-            "  Batch 2,120  of  2,813.    Elapsed: 0:16:26.\n",
-            "  Batch 2,160  of  2,813.    Elapsed: 0:16:45.\n",
-            "  Batch 2,200  of  2,813.    Elapsed: 0:17:03.\n",
-            "  Batch 2,240  of  2,813.    Elapsed: 0:17:22.\n",
-            "  Batch 2,280  of  2,813.    Elapsed: 0:17:40.\n",
-            "  Batch 2,320  of  2,813.    Elapsed: 0:17:59.\n",
-            "  Batch 2,360  of  2,813.    Elapsed: 0:18:18.\n",
-            "  Batch 2,400  of  2,813.    Elapsed: 0:18:36.\n",
-            "  Batch 2,440  of  2,813.    Elapsed: 0:18:55.\n",
-            "  Batch 2,480  of  2,813.    Elapsed: 0:19:13.\n",
-            "  Batch 2,520  of  2,813.    Elapsed: 0:19:32.\n",
-            "  Batch 2,560  of  2,813.    Elapsed: 0:19:51.\n",
-            "  Batch 2,600  of  2,813.    Elapsed: 0:20:09.\n",
-            "  Batch 2,640  of  2,813.    Elapsed: 0:20:28.\n",
-            "  Batch 2,680  of  2,813.    Elapsed: 0:20:46.\n",
-            "  Batch 2,720  of  2,813.    Elapsed: 0:21:05.\n",
-            "  Batch 2,760  of  2,813.    Elapsed: 0:21:24.\n",
-            "  Batch 2,800  of  2,813.    Elapsed: 0:21:42.\n",
-            "\n",
-            "  Average training loss: 0.69\n",
-            "  Training epcoh took: 0:21:48\n",
-            "\n",
-            "Running Validation...\n",
-            "  Accuracy: 0.56\n",
-            "  Validation Loss: 0.69\n",
-            "  Validation took: 0:00:51\n",
-            "\n",
-            "======== Epoch 3 / 3 ========\n",
-            "Training...\n",
-            "  Batch    40  of  2,813.    Elapsed: 0:00:19.\n",
-            "  Batch    80  of  2,813.    Elapsed: 0:00:37.\n",
-            "  Batch   120  of  2,813.    Elapsed: 0:00:56.\n",
-            "  Batch   160  of  2,813.    Elapsed: 0:01:14.\n",
-            "  Batch   200  of  2,813.    Elapsed: 0:01:33.\n",
-            "  Batch   240  of  2,813.    Elapsed: 0:01:52.\n",
-            "  Batch   280  of  2,813.    Elapsed: 0:02:10.\n",
-            "  Batch   320  of  2,813.    Elapsed: 0:02:29.\n",
-            "  Batch   360  of  2,813.    Elapsed: 0:02:47.\n",
-            "  Batch   400  of  2,813.    Elapsed: 0:03:06.\n",
-            "  Batch   440  of  2,813.    Elapsed: 0:03:25.\n",
-            "  Batch   480  of  2,813.    Elapsed: 0:03:43.\n",
-            "  Batch   520  of  2,813.    Elapsed: 0:04:02.\n",
-            "  Batch   560  of  2,813.    Elapsed: 0:04:21.\n",
-            "  Batch   600  of  2,813.    Elapsed: 0:04:39.\n",
-            "  Batch   640  of  2,813.    Elapsed: 0:04:58.\n",
-            "  Batch   680  of  2,813.    Elapsed: 0:05:16.\n",
-            "  Batch   720  of  2,813.    Elapsed: 0:05:35.\n",
-            "  Batch   760  of  2,813.    Elapsed: 0:05:54.\n",
-            "  Batch   800  of  2,813.    Elapsed: 0:06:12.\n",
-            "  Batch   840  of  2,813.    Elapsed: 0:06:31.\n",
-            "  Batch   880  of  2,813.    Elapsed: 0:06:49.\n",
-            "  Batch   920  of  2,813.    Elapsed: 0:07:08.\n",
-            "  Batch   960  of  2,813.    Elapsed: 0:07:27.\n",
-            "  Batch 1,000  of  2,813.    Elapsed: 0:07:45.\n",
-            "  Batch 1,040  of  2,813.    Elapsed: 0:08:04.\n",
-            "  Batch 1,080  of  2,813.    Elapsed: 0:08:22.\n",
-            "  Batch 1,120  of  2,813.    Elapsed: 0:08:41.\n",
-            "  Batch 1,160  of  2,813.    Elapsed: 0:09:00.\n",
-            "  Batch 1,200  of  2,813.    Elapsed: 0:09:18.\n",
-            "  Batch 1,240  of  2,813.    Elapsed: 0:09:37.\n",
-            "  Batch 1,280  of  2,813.    Elapsed: 0:09:55.\n",
-            "  Batch 1,320  of  2,813.    Elapsed: 0:10:14.\n",
-            "  Batch 1,360  of  2,813.    Elapsed: 0:10:33.\n",
-            "  Batch 1,400  of  2,813.    Elapsed: 0:10:51.\n",
-            "  Batch 1,440  of  2,813.    Elapsed: 0:11:10.\n",
-            "  Batch 1,480  of  2,813.    Elapsed: 0:11:29.\n",
-            "  Batch 1,520  of  2,813.    Elapsed: 0:11:47.\n",
-            "  Batch 1,560  of  2,813.    Elapsed: 0:12:06.\n",
-            "  Batch 1,600  of  2,813.    Elapsed: 0:12:24.\n",
-            "  Batch 1,640  of  2,813.    Elapsed: 0:12:43.\n",
-            "  Batch 1,680  of  2,813.    Elapsed: 0:13:02.\n",
-            "  Batch 1,720  of  2,813.    Elapsed: 0:13:20.\n",
-            "  Batch 1,760  of  2,813.    Elapsed: 0:13:39.\n",
-            "  Batch 1,800  of  2,813.    Elapsed: 0:13:57.\n",
-            "  Batch 1,840  of  2,813.    Elapsed: 0:14:16.\n",
-            "  Batch 1,880  of  2,813.    Elapsed: 0:14:35.\n",
-            "  Batch 1,920  of  2,813.    Elapsed: 0:14:53.\n",
-            "  Batch 1,960  of  2,813.    Elapsed: 0:15:12.\n",
-            "  Batch 2,000  of  2,813.    Elapsed: 0:15:30.\n",
-            "  Batch 2,040  of  2,813.    Elapsed: 0:15:49.\n",
-            "  Batch 2,080  of  2,813.    Elapsed: 0:16:08.\n",
-            "  Batch 2,120  of  2,813.    Elapsed: 0:16:26.\n",
-            "  Batch 2,160  of  2,813.    Elapsed: 0:16:45.\n",
-            "  Batch 2,200  of  2,813.    Elapsed: 0:17:03.\n",
-            "  Batch 2,240  of  2,813.    Elapsed: 0:17:22.\n",
-            "  Batch 2,280  of  2,813.    Elapsed: 0:17:41.\n",
-            "  Batch 2,320  of  2,813.    Elapsed: 0:17:59.\n",
-            "  Batch 2,360  of  2,813.    Elapsed: 0:18:18.\n",
-            "  Batch 2,400  of  2,813.    Elapsed: 0:18:37.\n",
-            "  Batch 2,440  of  2,813.    Elapsed: 0:18:55.\n",
-            "  Batch 2,480  of  2,813.    Elapsed: 0:19:14.\n",
-            "  Batch 2,520  of  2,813.    Elapsed: 0:19:32.\n",
-            "  Batch 2,560  of  2,813.    Elapsed: 0:19:51.\n",
-            "  Batch 2,600  of  2,813.    Elapsed: 0:20:10.\n",
-            "  Batch 2,640  of  2,813.    Elapsed: 0:20:28.\n",
-            "  Batch 2,680  of  2,813.    Elapsed: 0:20:47.\n",
-            "  Batch 2,720  of  2,813.    Elapsed: 0:21:05.\n",
-            "  Batch 2,760  of  2,813.    Elapsed: 0:21:24.\n",
-            "  Batch 2,800  of  2,813.    Elapsed: 0:21:43.\n",
-            "\n",
-            "  Average training loss: 0.69\n",
-            "  Training epcoh took: 0:21:49\n",
-            "\n",
-            "Running Validation...\n",
-            "  Accuracy: 0.56\n",
-            "  Validation Loss: 0.69\n",
-            "  Validation took: 0:00:51\n",
-            "\n",
-            "Training complete!\n",
-            "Total training took 1:07:58 (h:mm:ss)\n",
-            "       Training Loss  Valid. Loss  Valid. Accur. Training Time Validation Time\n",
-            "epoch                                                                         \n",
-            "1               0.69         0.70           0.56       0:21:49         0:00:51\n",
-            "2               0.69         0.69           0.56       0:21:48         0:00:51\n",
-            "3               0.69         0.69           0.56       0:21:49         0:00:51\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "_K_vQ7feAS_z"
-      },
-      "source": [
-        "model_save_name = 'finetuned_Albert.bin'\n",
-        "path = F\"/content/gdrive/My Drive/ALBERTimplementation/model-fine-train/\"+model_save_name\n",
-        "torch.save(model.state_dict(), path)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Lu-gJG4JbD37"
-      },
-      "source": [
-        "# Li method"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vh_aZP7kbH0E"
-      },
-      "source": [
-        "# At the time of writing, Hugging face didnt provide the class object for \n",
-        "# AlbertForTokenClassification, hence write your own defination below\n",
-        "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n",
-        "from transformers.configuration_albert import AlbertConfig\n",
-        "from transformers.tokenization_bert import BertTokenizer\n",
-        "import torch.nn as nn\n",
-        "from torch.nn import CrossEntropyLoss\n",
-        "class AlbertForTokenClassification(AlbertPreTrainedModel):\n",
-        "\n",
-        "    def __init__(self, albert, config):\n",
-        "        super().__init__(config)\n",
-        "        self.num_labels = config.num_labels\n",
-        "\n",
-        "        self.albert = albert\n",
-        "        self.dropout = nn.Dropout(config.hidden_dropout_prob)\n",
-        "        self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
-        "\n",
-        "    def forward(\n",
-        "        self,\n",
-        "        input_ids=None,\n",
-        "        attention_mask=None,\n",
-        "        token_type_ids=None,\n",
-        "        position_ids=None,\n",
-        "        head_mask=None,\n",
-        "        inputs_embeds=None,\n",
-        "        labels=None,\n",
-        "    ):\n",
-        "\n",
-        "        outputs = self.albert(\n",
-        "            input_ids,\n",
-        "            attention_mask=attention_mask,\n",
-        "            token_type_ids=token_type_ids,\n",
-        "            position_ids=position_ids,\n",
-        "            head_mask=head_mask,\n",
-        "            inputs_embeds=inputs_embeds,\n",
-        "        )\n",
-        "\n",
-        "        sequence_output = outputs[0]\n",
-        "\n",
-        "        sequence_output = self.dropout(sequence_output)\n",
-        "        logits = self.classifier(sequence_output)\n",
-        "\n",
-        "        return logits"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "k2W7RmZcbSwf"
-      },
-      "source": [
-        "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/vocab.txt\" # This is the vocab file output from Build Vocab step\n",
-        "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n",
-        "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n",
-        "tokenizer = BertTokenizer(vocab_file=VOCAB_FILE)\n",
-        "config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
-        "model = AlbertModel(config)\n",
-        "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n",
-        "# If the variables not able to be initialized are only for the MLM and sequence order prediction task\n",
-        "# Then the error could be ignored\n",
-        "# As that is not required for the AlbertForTokenClassification we are trying to build here"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "WtE_Lb7josh4"
-      },
-      "source": [
-        "# df = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv', delimiter='\\t')\n",
-        "# df.isna().values.any()\n",
-        "# df['text'] = df['text'].fillna('0')\n",
-        "# df.to_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv',sep='\\t' ,header=True, index=False)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vtZ6AzKxba9r",
-        "outputId": "9ee3e15e-b857-422d-efaf-aafc86bdf7ce",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 460
-        }
-      },
-      "source": [
-        "# train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
-        "# print(\"Number of training examples {}\".format(len(train_data)))\n",
-        "# num_examples = 100000\n",
-        "# train = train_data[:num_examples].text.values\n",
-        "# labels = train_data[:num_examples].label.values\n",
-        "TRAIN_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv\"\n",
-        "EVAL_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv\"\n",
-        "\n",
-        "import numpy as np\n",
-        "def label_sent(name_tokens, sent_tokens):\n",
-        "    label = []\n",
-        "    i = 0\n",
-        "    if len(name_tokens)>len(sent_tokens):\n",
-        "        label = np.zeros(len(sent_tokens))\n",
-        "    else:\n",
-        "        while i<len(sent_tokens):\n",
-        "            found_match = False\n",
-        "            if name_tokens[0] == sent_tokens[i]:       \n",
-        "                found_match = True\n",
-        "                for j in range(len(name_tokens)-1):\n",
-        "                    if ((i+j+1)>=len(sent_tokens)):\n",
-        "                        return label\n",
-        "                    if name_tokens[j+1] != sent_tokens[i+j+1]:\n",
-        "                        found_match = False\n",
-        "                if found_match:\n",
-        "                    label.extend(list(np.ones(len(name_tokens)).astype(int)))\n",
-        "                    i = i + len(name_tokens)\n",
-        "                else: \n",
-        "                    label.extend([0])\n",
-        "                    i = i+ 1\n",
-        "            else:\n",
-        "                label.extend([0])\n",
-        "                i=i+1\n",
-        "    return label\n",
-        "\n",
-        "import pandas as pd\n",
-        "df_data_train = pd.read_csv(TRAIN_FILE, sep='\\t')\n",
-        "df_data_train['review_tokens'] = df_data_train.text.apply(tokenizer.tokenize)\n",
-        "# df_data_train['dish_name_tokens'] = df_data_train.dish_name_tokens.apply(tokenizer.tokenize)\n",
-        "# df_data_train['review_labels'] = df_data_train.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n",
-        "df_data_train['review_labels'] = df_data_train.label.values\n",
-        "df_data_val = pd.read_csv(EVAL_FILE, sep='\\t')\n",
-        "df_data_val['review_tokens'] = df_data_val.text.apply(tokenizer.tokenize)\n",
-        "# df_data_val['dish_name_tokens'] = df_data_val.dish_name_tokens.apply(tokenizer.tokenize)\n",
-        "# df_data_val['review_labels'] = df_data_val.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n",
-        "df_data_val['review_labels'] = df_data_val.label.values\n",
-        "\n",
-        "MAX_LEN = 50\n",
-        "BATCH_SIZE = 1\n",
-        "from keras.preprocessing.sequence import pad_sequences\n",
-        "import torch\n",
-        "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
-        "\n",
-        "tr_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_train['review_tokens']],\n",
-        "                          maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
-        "tr_tags = pad_sequences(df_data_train['review_labels'],\n",
-        "                     maxlen=MAX_LEN, padding=\"post\",   #changed max len here --Muku\n",
-        "                     dtype=\"long\", truncating=\"post\")\n",
-        "# create the mask to ignore the padded elements in the sequences.\n",
-        "tr_masks = [[float(i>0) for i in ii] for ii in tr_inputs]\n",
-        "tr_inputs = torch.tensor(tr_inputs)\n",
-        "tr_tags = torch.tensor(tr_tags)\n",
-        "tr_masks = torch.tensor(tr_masks)\n",
-        "train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)\n",
-        "train_sampler = RandomSampler(train_data)\n",
-        "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)\n",
-        "\n",
-        "\n",
-        "val_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_val['review_tokens']],\n",
-        "                          maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
-        "val_tags = pad_sequences(df_data_val['review_labels'],\n",
-        "                     maxlen=MAX_LEN, padding=\"post\",\n",
-        "                     dtype=\"long\", truncating=\"post\")\n",
-        "# create the mask to ignore the padded elements in the sequences.\n",
-        "val_masks = [[float(i>0) for i in ii] for ii in val_inputs]\n",
-        "val_inputs = torch.tensor(val_inputs)\n",
-        "val_tags = torch.tensor(val_tags)\n",
-        "val_masks = torch.tensor(val_masks)\n",
-        "val_data = TensorDataset(val_inputs, val_masks, val_tags)\n",
-        "val_sampler = RandomSampler(val_data)\n",
-        "val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=BATCH_SIZE)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m     67\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m             \u001b[0mlengths\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     69\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mflag\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mTypeError\u001b[0m: object of type 'int' has no len()",
-            "\nDuring handling of the above exception, another exception occurred:\n",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-46-85d84b93de9d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     56\u001b[0m tr_tags = pad_sequences(df_data_train['review_labels'],\n\u001b[1;32m     57\u001b[0m                      \u001b[0mmaxlen\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mMAX_LEN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpadding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"post\"\u001b[0m\u001b[0;34m,\u001b[0m   \u001b[0;31m#changed max len here --Muku\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m                      dtype=\"long\", truncating=\"post\")\n\u001b[0m\u001b[1;32m     59\u001b[0m \u001b[0;31m# create the mask to ignore the padded elements in the sequences.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0mtr_masks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mii\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mii\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtr_inputs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m     72\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     73\u001b[0m             raise ValueError('`sequences` must be a list of iterables. '\n\u001b[0;32m---> 74\u001b[0;31m                              'Found non-iterable: ' + str(x))\n\u001b[0m\u001b[1;32m     75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     76\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmaxlen\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: `sequences` must be a list of iterables. Found non-iterable: 1"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "UvGdYSVUbi6H"
-      },
-      "source": [
-        "model_tokenclassification = AlbertForTokenClassification(model, config)\n",
-        "from torch.optim import Adam\n",
-        "LEARNING_RATE = 0.000001\n",
-        "FULL_FINETUNING = True\n",
-        "if FULL_FINETUNING:\n",
-        "    param_optimizer = list(model_tokenclassification.named_parameters())\n",
-        "    no_decay = ['bias', 'gamma', 'beta']\n",
-        "    optimizer_grouped_parameters = [\n",
-        "        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],\n",
-        "         'weight_decay_rate': 0.01},\n",
-        "        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],\n",
-        "         'weight_decay_rate': 0.0}\n",
-        "    ]\n",
-        "else:\n",
-        "    param_optimizer = list(model_tokenclassification.classifier.named_parameters()) \n",
-        "    optimizer_grouped_parameters = [{\"params\": [p for n, p in param_optimizer]}]\n",
-        "optimizer = Adam(optimizer_grouped_parameters, lr=LEARNING_RATE)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "0a4vteOVbpkB"
-      },
-      "source": [
-        "\n",
-        "# from torch.utils.tensorboard import SummaryWriter\n",
-        "import time\n",
-        "import os.path\n",
-        "import torch.nn as nn\n",
-        "EPOCH = 5\n",
-        "MAX_GRAD_NORM = 1.0\n",
-        "ALBERT_FINETUNE_CHECKPOINT = \"outputs/finetune_checkpoint_5epoch_50neg_1e-5lr\"\n",
-        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-        "if torch.cuda.device_count() > 1:\n",
-        "    print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n",
-        "    model_tokenclassification = nn.DataParallel(model_tokenclassification)\n",
-        "model_tokenclassification.to(device)\n",
-        "if os.path.isfile(ALBERT_FINETUNE_CHECKPOINT):\n",
-        "    print(f\"--- Load from checkpoint ---\")\n",
-        "    checkpoint = torch.load(ALBERT_FINETUNE_CHECKPOINT)\n",
-        "    model_tokenclassification.load_state_dict(checkpoint['model_state_dict'])\n",
-        "    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n",
-        "    epoch = checkpoint['epoch']\n",
-        "    loss = checkpoint['loss']\n",
-        "    train_losses = checkpoint['train_losses']\n",
-        "    train_acc = checkpoint['train_acc']\n",
-        "    val_losses = checkpoint['val_losses']\n",
-        "    val_acc = checkpoint['val_acc']\n",
-        "    \n",
-        "else:\n",
-        "    epoch = -1\n",
-        "    train_losses, train_acc, val_losses, val_acc = [], [], [], []\n",
-        "print(f\"--- Resume/Start training ---\")    \n",
-        "for i in range(epoch+1, EPOCH): \n",
-        "    print(f\"--- epoch: {i} ---\")\n",
-        "    start_time = time.time()\n",
-        "    \n",
-        "    # TRAIN loop\n",
-        "    model_tokenclassification.train()\n",
-        "    tr_loss, tr_acc, nb_tr_steps = 0, 0, 0\n",
-        "    for step, batch in enumerate(train_dataloader):\n",
-        "        # add batch to gpu\n",
-        "        batch = tuple(t.to(device) for t in batch)\n",
-        "        b_input_ids, b_input_mask, b_labels = batch\n",
-        "        # forward pass\n",
-        "        b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
-        "                     attention_mask=b_input_mask, labels=b_labels)\n",
-        "        \n",
-        "        loss_fct = CrossEntropyLoss()\n",
-        "        # Only keep active parts of the loss\n",
-        "        b_active_loss = b_input_mask.view(-1) == 1\n",
-        "        b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n",
-        "        b_active_labels = b_labels.view(-1)[b_active_loss]\n",
-        "        loss = loss_fct(b_active_logits, b_active_labels)\n",
-        "        acc = torch.mean((torch.max(b_active_logits.detach(),1)[1] == b_active_labels.detach()).float())\n",
-        "      \n",
-        "        train_losses.append(loss.detach().item())\n",
-        "        train_acc.append(acc)\n",
-        "        # backward pass\n",
-        "        loss.backward()\n",
-        "        # track train loss\n",
-        "        tr_loss += loss.item()\n",
-        "        tr_acc += acc\n",
-        "        nb_tr_steps += 1\n",
-        "        # gradient clipping\n",
-        "        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=MAX_GRAD_NORM)\n",
-        "        # update parameters\n",
-        "        optimizer.step()\n",
-        "        model.zero_grad()\n",
-        "\n",
-        "    # print train loss per epoch\n",
-        "    print(f\"Train loss: {(tr_loss/nb_tr_steps)}\")\n",
-        "    print(f\"Train Accuracy: {(tr_acc/nb_tr_steps)}\")\n",
-        "    print(f\"Train Time: {(time.time()-start_time)/60} mins\")\n",
-        "\n",
-        "    # VALIDATION on validation set\n",
-        "    start_time = time.time()\n",
-        "    model_tokenclassification.eval()\n",
-        "    eval_loss, eval_acc = 0, 0\n",
-        "    nb_eval_steps = 0\n",
-        "    for batch in val_dataloader:\n",
-        "        batch = tuple(t.to(device) for t in batch)\n",
-        "        b_input_ids, b_input_mask, b_labels = batch\n",
-        "\n",
-        "        with torch.no_grad():\n",
-        "            \n",
-        "            b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
-        "                         attention_mask=b_input_mask, labels=b_labels)\n",
-        "\n",
-        "            loss_fct = CrossEntropyLoss()\n",
-        "            # Only keep active parts of the loss\n",
-        "            b_active_loss = b_input_mask.view(-1) == 1\n",
-        "            b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n",
-        "            b_active_labels = b_labels.view(-1)[b_active_loss]\n",
-        "            loss = loss_fct(b_active_logits, b_active_labels)\n",
-        "            acc = np.mean(np.argmax(b_active_logits.detach().cpu().numpy(), axis=1).flatten() == b_active_labels.detach().cpu().numpy().flatten())\n",
-        "\n",
-        "        eval_loss += loss.mean().item()\n",
-        "        eval_acc += acc\n",
-        "        nb_eval_steps += 1    \n",
-        "    eval_loss = eval_loss/nb_eval_steps\n",
-        "    eval_acc = eval_acc/nb_eval_steps\n",
-        "    val_losses.append(eval_loss)\n",
-        "    val_acc.append(eval_acc)\n",
-        "    print(f\"Validation loss: {eval_loss}\")\n",
-        "    print(f\"Validation Accuracy: {(eval_acc)}\")\n",
-        "    print(f\"Validation Time: {(time.time()-start_time)/60} mins\")    \n",
-        "    \n",
-        "    \n",
-        "    print(f\"--- Save to checkpoint ---\")  \n",
-        "    torch.save({\n",
-        "        'epoch': i,\n",
-        "        'model_state_dict': model_tokenclassification.state_dict(),\n",
-        "        'optimizer_state_dict': optimizer.state_dict(),\n",
-        "        'loss': loss,\n",
-        "        'train_losses': train_losses,\n",
-        "        'train_acc': train_acc,\n",
-        "        'val_losses': val_losses,\n",
-        "        'val_acc': val_acc}\n",
-        "        , ALBERT_FINETUNE_CHECKPOINT)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "skGJUzsJbwYn"
-      },
-      "source": [
-        "\n",
-        "def predict(texts):\n",
-        "    tokenized_texts = [tokenizer.tokenize(txt) for txt in texts]\n",
-        "    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],\n",
-        "                              maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
-        "    attention_mask = [[float(i>0) for i in ii] for ii in input_ids]\n",
-        "    \n",
-        "    input_ids = torch.tensor(input_ids)\n",
-        "    attention_mask = torch.tensor(attention_mask)\n",
-        "\n",
-        "    dataset = TensorDataset(input_ids, attention_mask)\n",
-        "    datasampler = SequentialSampler(dataset)\n",
-        "    dataloader = DataLoader(dataset, sampler=datasampler, batch_size=BATCH_SIZE) \n",
-        "    \n",
-        "    predicted_labels = []\n",
-        "    \n",
-        "    for batch in dataloader:\n",
-        "        batch = tuple(t.to(device) for t in batch)\n",
-        "        b_input_ids, b_input_mask = batch\n",
-        "        \n",
-        "        with torch.no_grad():\n",
-        "            logits = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
-        "                           attention_mask=b_input_mask)\n",
-        "\n",
-        "            predicted_labels.append(np.multiply(np.argmax(logits.detach().cpu().numpy(),axis=2), b_input_mask.detach().cpu().numpy()))\n",
-        "    # np.concatenate(predicted_labels), to flatten list of arrays of batch_size * max_len into list of arrays of max_len\n",
-        "    return np.concatenate(predicted_labels).astype(int), tokenized_texts\n",
-        "\n",
-        "texts = df_data_val.review.values\n",
-        "predicted_labels, _ = predict(texts)\n",
-        "df_data_val['predicted_review_label'] = list(predicted_labels)\n",
-        "\n",
-        "def get_dish_candidate_names(predicted_label, tokenized_text):\n",
-        "    name_lists = []\n",
-        "    if len(np.where(predicted_label>0)[0])>0:\n",
-        "        name_idx_combined = np.where(predicted_label>0)[0]\n",
-        "        name_idxs = np.split(name_idx_combined, np.where(np.diff(name_idx_combined) != 1)[0]+1)\n",
-        "        name_lists.append([\" \".join(np.take(tokenized_text,name_idx)) for name_idx in name_idxs])\n",
-        "        # If there duplicate names in the name_lists\n",
-        "        name_lists = np.unique(name_lists)\n",
-        "        return name_lists\n",
-        "    else:\n",
-        "        return None\n",
-        "df_data_val['candidate_name']=df_data_val.apply(lambda row: get_dish_candidate_names(row.predicted_review_label, row.review_tokens)\n",
-        "                                                , axis=1)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file