diff --git a/fine_tuning_ALBERT.ipynb b/fine_tuning_ALBERT.ipynb
deleted file mode 100644
index 143f5c9..0000000
--- a/fine_tuning_ALBERT.ipynb
+++ /dev/null
@@ -1,1971 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "name": "fine-tuning ALBERT.ipynb",
- "provenance": [],
- "authorship_tag": "ABX9TyPQ0neoGAreJPtIYFOz3s34",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "accelerator": "GPU"
- },
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- ""
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "9ulHtYtNrQ8r",
- "outputId": "4d47a5fc-ce15-49bd-89cc-c86e8d6debe9",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- }
- },
- "source": [
- "!pip install keras\n",
- "!pip install tensorflow\n",
- "!pip install transformers\n",
- "!pip3 install albert-tensorflow\n",
- "!pip install torch\n",
- "!pip install sentencepiece"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: keras in /usr/local/lib/python3.6/dist-packages (2.4.3)\n",
- "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras) (3.13)\n",
- "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras) (2.10.0)\n",
- "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras) (1.18.5)\n",
- "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras) (1.4.1)\n",
- "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py->keras) (1.15.0)\n",
- "Requirement already satisfied: tensorflow in /usr/local/lib/python3.6/dist-packages (2.3.0)\n",
- "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n",
- "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n",
- "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n",
- "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n",
- "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n",
- "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n",
- "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n",
- "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n",
- "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n",
- "Requirement already satisfied: scipy==1.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.4.1)\n",
- "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n",
- "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n",
- "Requirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n",
- "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n",
- "Requirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n",
- "Requirement already satisfied: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
- "Requirement already satisfied: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
- "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.9.2->tensorflow) (50.3.0)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n",
- "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n",
- "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n",
- "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n",
- "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n",
- "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n",
- "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n",
- "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n",
- "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n",
- "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n",
- "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n",
- "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n",
- "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n",
- "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n",
- "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n",
- "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n",
- "Collecting transformers\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/19/22/aff234f4a841f8999e68a7a94bdd4b60b4cebcfeca5d67d61cd08c9179de/transformers-3.3.1-py3-none-any.whl (1.1MB)\n",
- "\u001b[K |████████████████████████████████| 1.1MB 2.8MB/s \n",
- "\u001b[?25hCollecting sacremoses\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
- "\u001b[K |████████████████████████████████| 890kB 17.4MB/s \n",
- "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n",
- "Collecting sentencepiece!=0.1.92\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n",
- "\u001b[K |████████████████████████████████| 1.1MB 18.1MB/s \n",
- "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
- "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n",
- "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
- "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
- "Collecting tokenizers==0.8.1.rc2\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/80/83/8b9fccb9e48eeb575ee19179e2bdde0ee9a1904f97de5f02d19016b8804f/tokenizers-0.8.1rc2-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n",
- "\u001b[K |████████████████████████████████| 3.0MB 28.7MB/s \n",
- "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n",
- "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
- "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0)\n",
- "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
- "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n",
- "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n",
- "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n",
- "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
- "Building wheels for collected packages: sacremoses\n",
- " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=7aceb359875e5a113a3b4100f89e9d914f4e2a2ce05ed7eaaf52e10e7dfa0b06\n",
- " Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
- "Successfully built sacremoses\n",
- "Installing collected packages: sacremoses, sentencepiece, tokenizers, transformers\n",
- "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc2 transformers-3.3.1\n",
- "Collecting albert-tensorflow\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ba/1e/e776bb23e6f89a1f1d7d33b50d0bd9c2c7b24b39aa548f041827a9c00d73/albert_tensorflow-1.1-py3-none-any.whl (81kB)\n",
- "\u001b[K |████████████████████████████████| 81kB 2.3MB/s \n",
- "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from albert-tensorflow) (1.15.0)\n",
- "Installing collected packages: albert-tensorflow\n",
- "Successfully installed albert-tensorflow-1.1\n",
- "Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.6.0+cu101)\n",
- "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.18.5)\n",
- "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch) (0.16.0)\n",
- "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (0.1.91)\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "VPur-ModrqiE"
- },
- "source": [
- "# Check GPU"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "eEMLd2nzrtAr",
- "outputId": "ac45d376-2643-4643-873d-b887fa65f29f",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 34
- }
- },
- "source": [
- "import tensorflow as tf\n",
- "\n",
- "# Get the GPU device name.\n",
- "device_name = tf.test.gpu_device_name()\n",
- "\n",
- "# The device name should look like the following:\n",
- "if device_name == '/device:GPU:0':\n",
- " print('Found GPU at: {}'.format(device_name))\n",
- "else:\n",
- " raise SystemError('GPU device not found')"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Found GPU at: /device:GPU:0\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "aJUsW5-trxGv",
- "outputId": "5034d3c9-248b-4711-b07c-162d62d9de9b",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 51
- }
- },
- "source": [
- "import torch\n",
- "\n",
- "# If there's a GPU available...\n",
- "if torch.cuda.is_available(): \n",
- "\n",
- " # Tell PyTorch to use the GPU. \n",
- " device = torch.device(\"cuda\")\n",
- "\n",
- " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n",
- "\n",
- " print('We will use the GPU:', torch.cuda.get_device_name(0))\n",
- "\n",
- "# If not...\n",
- "else:\n",
- " print('No GPU available, using the CPU instead.')\n",
- " device = torch.device(\"cpu\")"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "There are 1 GPU(s) available.\n",
- "We will use the GPU: Tesla P100-PCIE-16GB\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "sZuNMIWGs5L0"
- },
- "source": [
- "# Mounting"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "PGdZlz87rX7Q",
- "outputId": "4c0f3693-aaaa-4f16-d8de-3eaa8a2b1b90",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 34
- }
- },
- "source": [
- "from google.colab import drive\n",
- "drive.mount('/content/gdrive')"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Mounted at /content/gdrive\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "2lMkdNcrrbM0",
- "outputId": "52181ba1-f48d-4883-ac18-73d8e6f65b1c",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 136
- }
- },
- "source": [
- "!git clone https://github.com/mjag7682/ALBERT"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Cloning into 'ALBERT'...\n",
- "remote: Enumerating objects: 9, done.\u001b[K\n",
- "remote: Counting objects: 100% (9/9), done.\u001b[K\n",
- "remote: Compressing objects: 100% (9/9), done.\u001b[K\n",
- "remote: Total 362 (delta 2), reused 0 (delta 0), pack-reused 353\u001b[K\n",
- "Receiving objects: 100% (362/362), 244.39 KiB | 470.00 KiB/s, done.\n",
- "Resolving deltas: 100% (235/235), done.\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "skH_bDk2rdJ_",
- "outputId": "4137e476-8d92-444d-eb2b-3461194b4b68",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- }
- },
- "source": [
- "!pip install -r /content/ALBERT/requirements.txt "
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Collecting tensorflow==1.15.2\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9a/d9/fd234c7bf68638423fb8e7f44af7fcfce3bcaf416b51e6d902391e47ec43/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl (110.5MB)\n",
- "\u001b[K |████████████████████████████████| 110.5MB 65kB/s \n",
- "\u001b[?25hCollecting tensorflow_hub==0.7\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/0e/a91780d07592b1abf9c91344ce459472cc19db3b67fdf3a61dca6ebb2f5c/tensorflow_hub-0.7.0-py2.py3-none-any.whl (89kB)\n",
- "\u001b[K |████████████████████████████████| 92kB 9.6MB/s \n",
- "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from -r /content/ALBERT/requirements.txt (line 5)) (0.1.93)\n",
- "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.15.0)\n",
- "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.32.0)\n",
- "Collecting gast==0.2.2\n",
- " Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz\n",
- "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.8.1)\n",
- "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.18.5)\n",
- "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.12.4)\n",
- "Collecting tensorboard<1.16.0,>=1.15.0\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)\n",
- "\u001b[K |████████████████████████████████| 3.8MB 41.1MB/s \n",
- "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.3.0)\n",
- "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.10.0)\n",
- "Collecting tensorflow-estimator==1.15.1\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)\n",
- "\u001b[K |████████████████████████████████| 512kB 29.1MB/s \n",
- "\u001b[?25hRequirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.35.1)\n",
- "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.2)\n",
- "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.12.1)\n",
- "Collecting keras-applications>=1.0.8\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)\n",
- "\u001b[K |████████████████████████████████| 51kB 5.6MB/s \n",
- "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.0)\n",
- "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.2.0)\n",
- "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (50.3.0)\n",
- "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.0.1)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.2)\n",
- "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.10.0)\n",
- "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.0.0)\n",
- "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.0)\n",
- "Building wheels for collected packages: gast\n",
- " Building wheel for gast (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for gast: filename=gast-0.2.2-cp36-none-any.whl size=7542 sha256=da09531e91a678dff87f3e427990e2f4fae4e07e33f032b7f927656d5c7d3e26\n",
- " Stored in directory: /root/.cache/pip/wheels/5c/2e/7e/a1d4d4fcebe6c381f378ce7743a3ced3699feb89bcfbdadadd\n",
- "Successfully built gast\n",
- "\u001b[31mERROR: tensorflow-probability 0.11.0 has requirement gast>=0.3.2, but you'll have gast 0.2.2 which is incompatible.\u001b[0m\n",
- "Installing collected packages: gast, tensorboard, tensorflow-estimator, keras-applications, tensorflow, tensorflow-hub\n",
- " Found existing installation: gast 0.3.3\n",
- " Uninstalling gast-0.3.3:\n",
- " Successfully uninstalled gast-0.3.3\n",
- " Found existing installation: tensorboard 2.3.0\n",
- " Uninstalling tensorboard-2.3.0:\n",
- " Successfully uninstalled tensorboard-2.3.0\n",
- " Found existing installation: tensorflow-estimator 2.3.0\n",
- " Uninstalling tensorflow-estimator-2.3.0:\n",
- " Successfully uninstalled tensorflow-estimator-2.3.0\n",
- " Found existing installation: tensorflow 2.3.0\n",
- " Uninstalling tensorflow-2.3.0:\n",
- " Successfully uninstalled tensorflow-2.3.0\n",
- " Found existing installation: tensorflow-hub 0.9.0\n",
- " Uninstalling tensorflow-hub-0.9.0:\n",
- " Successfully uninstalled tensorflow-hub-0.9.0\n",
- "Successfully installed gast-0.2.2 keras-applications-1.0.8 tensorboard-1.15.0 tensorflow-1.15.2 tensorflow-estimator-1.15.1 tensorflow-hub-0.7.0\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "jwgFYW9dsFO1"
- },
- "source": [
- "# Fine Tune"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "VW_nppn5tbuc",
- "outputId": "30455f24-785e-42ea-cdd8-5eef33bcee93",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 854
- }
- },
- "source": [
- "!pip install --upgrade tensorflow"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Collecting tensorflow\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ad/ad/769c195c72ac72040635c66cd9ba7b0f4b4fc1ac67e59b99fa6988446c22/tensorflow-2.3.1-cp36-cp36m-manylinux2010_x86_64.whl (320.4MB)\n",
- "\u001b[K |████████████████████████████████| 320.4MB 50kB/s \n",
- "\u001b[?25hRequirement already satisfied, skipping upgrade: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n",
- "Requirement already satisfied, skipping upgrade: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n",
- "Requirement already satisfied, skipping upgrade: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n",
- "Requirement already satisfied, skipping upgrade: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
- "Requirement already satisfied, skipping upgrade: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n",
- "Requirement already satisfied, skipping upgrade: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n",
- "Requirement already satisfied, skipping upgrade: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n",
- "Requirement already satisfied, skipping upgrade: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n",
- "Requirement already satisfied, skipping upgrade: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n",
- "Requirement already satisfied, skipping upgrade: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n",
- "Requirement already satisfied, skipping upgrade: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n",
- "Requirement already satisfied, skipping upgrade: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n",
- "Requirement already satisfied, skipping upgrade: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n",
- "Requirement already satisfied, skipping upgrade: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n",
- "Requirement already satisfied, skipping upgrade: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n",
- "Requirement already satisfied, skipping upgrade: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n",
- "Requirement already satisfied, skipping upgrade: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (50.3.0)\n",
- "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n",
- "Requirement already satisfied, skipping upgrade: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n",
- "Requirement already satisfied, skipping upgrade: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n",
- "Requirement already satisfied, skipping upgrade: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n",
- "Requirement already satisfied, skipping upgrade: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n",
- "Requirement already satisfied, skipping upgrade: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n",
- "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n",
- "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n",
- "Requirement already satisfied, skipping upgrade: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n",
- "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n",
- "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n",
- "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n",
- "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n",
- "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n",
- "Requirement already satisfied, skipping upgrade: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n",
- "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n",
- "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n",
- "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n",
- "Installing collected packages: tensorflow\n",
- " Found existing installation: tensorflow 2.3.0\n",
- " Uninstalling tensorflow-2.3.0:\n",
- " Successfully uninstalled tensorflow-2.3.0\n",
- "Successfully installed tensorflow-2.3.1\n"
- ],
- "name": "stdout"
- },
- {
- "output_type": "display_data",
- "data": {
- "application/vnd.colab-display-data+json": {
- "pip_warning": {
- "packages": [
- "tensorflow"
- ]
- }
- }
- },
- "metadata": {
- "tags": []
- }
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "oBKiimp4YxOf",
- "outputId": "58f244b6-afb1-45b9-c9c8-060ae48116ce",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 51
- }
- },
- "source": [
- "# !pip install modeling"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "\u001b[31mERROR: Could not find a version that satisfies the requirement modeling (from versions: none)\u001b[0m\n",
- "\u001b[31mERROR: No matching distribution found for modeling\u001b[0m\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "5rYCZ946YvOj"
- },
- "source": [
- "from tensorflow.python.compiler.tensorrt import trt_convert as trt\n",
- "import tensorflow as tf\n",
- "# from albert import modeling\n",
- "# import tokenization\n",
- "# import optimization\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "from keras.preprocessing.sequence import pad_sequences"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "dbAgsBcot4n9"
- },
- "source": [
- "from transformers import AlbertTokenizer\n",
- "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n",
- "from transformers import AlbertForSequenceClassification,AlbertConfig\n",
- "from transformers.tokenization_bert import BertTokenizer\n",
- "import torch.nn as nn\n",
- "from torch.nn import CrossEntropyLoss\n",
- "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/data/30k-clean.model\" # This is the vocab file output from Build Vocab step\n",
- "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n",
- "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n",
- "tokenizer = AlbertTokenizer(vocab_file=VOCAB_FILE)\n",
- "config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
- "model = AlbertModel(config)\n",
- "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "U6uLqvcVsJRY",
- "outputId": "9ba394cf-afea-439f-9ad7-8a3f59861df4",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 283
- }
- },
- "source": [
- "# from transformers import AlbertTokenizer\n",
- "# from transformers import AlbertForSequenceClassification,AlbertConfig\n",
- "# config = modeling.AlbertConfig.from_json_file(\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\")\n",
- "# tokenizer = tokenization.FullTokenizer.from_scratch(vocab_file=\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\", do_lower_case=True, spm_model_file=None)\n",
- "# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True) \n",
- "# tokenizer = AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True) \n",
- "# tokenizer = AlbertTokenizer.from_pretrained('./content/drive/My Drive/Reuters_Dataset/reut2-021', do_lower_case=True) \n",
- "# PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/model-fine'\n",
- "# model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "error",
- "ename": "AttributeError",
- "evalue": "ignored",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAlbertForSequenceClassification\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mAlbertConfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertConfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mtokenizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertTokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_scratch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvocab_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdo_lower_case\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspm_model_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;31m# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# tokenizer = AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mAttributeError\u001b[0m: type object 'AlbertTokenizer' has no attribute 'from_scratch'"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "eZYLfSvlsLBl"
- },
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import torch\n",
- "import tensorflow as tf"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "qZEJy5xu6Llu"
- },
- "source": [
- "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
- "for item in train_data.iterrows():\n",
- " print(item[1][1])\n",
- " if item[1][1] != 1 and item[1][1] != 0:\n",
- " print(item[1][1])"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "aLEK078bsRpD",
- "outputId": "621583cf-ec2c-4302-938d-fd12d55dd2d3",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 34
- }
- },
- "source": [
- "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
- "print(\"Number of training examples {}\".format(len(train_data)))\n",
- "num_examples = 100000\n",
- "train = train_data[:num_examples].text.values\n",
- "labels = train_data[:num_examples].label.values\n",
- "# train = train_data[]"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Number of training examples 131173\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "mNiWyaLD7Otu",
- "outputId": "b11835dc-c1f4-43c3-ab9a-4e3919bae840",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 51
- }
- },
- "source": [
- "print(len(train))\n",
- "print(len(labels))\n",
- "# print(labels[:100])\n",
- "for i in labels:\n",
- " if i!=0 and i!=1:\n",
- " print(i)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "100000\n",
- "100000\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "G6MsmCDYsUC2",
- "outputId": "33b809fb-f668-483a-dfc4-9c61969fb847",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 88
- }
- },
- "source": [
- "# Print the original sentence.\n",
- "print(' Original: ', train[10])\n",
- "\n",
- "# Print the sentence split into tokens.\n",
- "print('Tokenized: ', tokenizer.tokenize(train[10]))\n",
- "\n",
- "# Print the sentence mapped to token ids.\n",
- "print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(train[10])))"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- " Original: vix spy new high vix new low spy calls versus lod\n",
- "Tokenized: ['▁vi', 'x', '▁spy', '▁new', '▁high', '▁vi', 'x', '▁new', '▁low', '▁spy', '▁call', 's', '▁vers', 'us', '▁lo', 'd']\n",
- "Token IDs: [1847, 782, 1181, 30, 141, 1847, 782, 30, 385, 1181, 172, 12, 3770, 595, 2947, 27]\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "_z9qjkZ9sVzV",
- "outputId": "58b18382-f1d7-461c-9cd7-4f2f8740aa46",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 51
- }
- },
- "source": [
- "print(train_data.text.apply(lambda x: len(x)).quantile([0.9]))\n",
- "MAX_LEN = 160"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "0.9 160.0\n",
- "Name: text, dtype: float64\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "ArJMjNxmMoHU",
- "outputId": "483535e7-53bc-4490-fb97-5227dbaf3c2c",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 34
- }
- },
- "source": [
- "m_l = 0\n",
- "for x in train:\n",
- " if len(x)>m_l:\n",
- " m_l = len(x)\n",
- "print(m_l)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "1892\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "JBfPIx0HsXpY",
- "outputId": "4fd20283-60bf-4fcf-ac7a-236914066094",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 326
- }
- },
- "source": [
- "# Tokenize all of the sentences and map the tokens to thier word IDs.\n",
- "input_ids = []\n",
- "attention_masks = []\n",
- "\n",
- "# For every sentence...\n",
- "for text in train:\n",
- " # `encode_plus` will:\n",
- " # (1) Tokenize the sentence.\n",
- " # (2) Prepend the `[CLS]` token to the start.\n",
- " # (3) Append the `[SEP]` token to the end.\n",
- " # (4) Map tokens to their IDs.\n",
- " # (5) Pad or truncate the sentence to `max_length`\n",
- " # (6) Create attention masks for [PAD] tokens.\n",
- " encoded_dict = tokenizer.encode_plus(\n",
- " text, # Sentence to encode.\n",
- " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n",
- " max_length = MAX_LEN, # Pad & truncate all sentences.\n",
- " pad_to_max_length = True,\n",
- " return_attention_mask = True, # Construct attn. masks.\n",
- " return_tensors = 'pt', # Return pytorch tensors.\n",
- " truncation = True\n",
- " )\n",
- " \n",
- " # Add the encoded sentence to the list. \n",
- " input_ids.append(encoded_dict['input_ids'])\n",
- " \n",
- " # And its attention mask (simply differentiates padding from non-padding).\n",
- " attention_masks.append(encoded_dict['attention_mask'])\n",
- "\n",
- "# Convert the lists into tensors.\n",
- "input_ids = torch.cat(input_ids, dim=0)\n",
- "attention_masks = torch.cat(attention_masks, dim=0)\n",
- "labels = torch.tensor(labels)\n",
- "\n",
- "# Print sentence 0, now as a list of IDs.\n",
- "print('Original: ', train[10])\n",
- "print('Token IDs:', input_ids[10])"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "/usr/local/lib/python3.6/dist-packages/transformers/tokenization_utils_base.py:1773: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
- " FutureWarning,\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "Original: vix spy new high vix new low spy calls versus lod\n",
- "Token IDs: tensor([ 2, 1847, 782, 1181, 30, 141, 1847, 782, 30, 385, 1181, 172,\n",
- " 12, 3770, 595, 2947, 27, 3, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
- " 0, 0, 0, 0])\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "fZ9Gi4GDsesz",
- "outputId": "2d759eba-1357-4351-d7be-5bd1c97f0f17",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 51
- }
- },
- "source": [
- "#training & validation split\n",
- "from torch.utils.data import TensorDataset, random_split\n",
- "\n",
- "\n",
- "# Combine the training inputs into a TensorDataset.\n",
- "dataset = TensorDataset(input_ids, attention_masks, labels)\n",
- "\n",
- "# Create a 90-10 train-validation split.\n",
- "\n",
- "# Calculate the number of samples to include in each set.\n",
- "train_size = int(0.9 * len(dataset))\n",
- "val_size = len(dataset) - train_size\n",
- "\n",
- "# Divide the dataset by randomly selecting samples.\n",
- "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n",
- "\n",
- "print('{} training samples'.format(train_size))\n",
- "print('{} validation samples'.format(val_size))"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "90000 training samples\n",
- "10000 validation samples\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "etq7ymGTshCN"
- },
- "source": [
- "def flat_accuracy(preds, labels):\n",
- " pred_flat = np.argmax(preds, axis=1).flatten()\n",
- " labels_flat = labels.flatten()\n",
- " return np.sum(pred_flat == labels_flat) / len(labels_flat)\n",
- "\n",
- "import time\n",
- "import datetime\n",
- "\n",
- "def format_time(elapsed):\n",
- " '''\n",
- " Takes a time in seconds and returns a string hh:mm:ss\n",
- " '''\n",
- " # Round to the nearest second.\n",
- " elapsed_rounded = int(round((elapsed)))\n",
- " \n",
- " # Format as hh:mm:ss\n",
- " return str(datetime.timedelta(seconds=elapsed_rounded))\n",
- "\n",
- "# Set the seed value all over the place to make this reproducible.\n",
- "import random\n",
- "def set_random(seed_val):\n",
- " random.seed(seed_val)\n",
- " np.random.seed(seed_val)\n",
- " torch.manual_seed(seed_val)\n",
- " torch.cuda.manual_seed_all(seed_val)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "I4MJwmeAsi9g"
- },
- "source": [
- "def train_model(train_dataloader, optimizer, epochs):\n",
- " \n",
- " # We'll store a number of quantities such as training and validation loss, \n",
- " # validation accuracy, and timings.\n",
- " training_stats = []\n",
- "\n",
- " # Measure the total training time for the whole run.\n",
- " total_t0 = time.time()\n",
- "\n",
- " # For each epoch...\n",
- " for epoch_i in range(0, epochs):\n",
- "\n",
- " # ========================================\n",
- " # Training\n",
- " # ========================================\n",
- "\n",
- " # Perform one full pass over the training set.\n",
- "\n",
- " print(\"\")\n",
- " print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
- " print('Training...')\n",
- "\n",
- " # Measure how long the training epoch takes.\n",
- " t0 = time.time()\n",
- "\n",
- " # Reset the total loss for this epoch.\n",
- " total_train_loss = 0\n",
- "\n",
- " # Put the model into training mode. Don't be mislead--the call to \n",
- " # `train` just changes the *mode*, it doesn't *perform* the training.\n",
- " # `dropout` and `batchnorm` layers behave differently during training\n",
- " # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
- " model.train()\n",
- "\n",
- " # For each batch of training data...\n",
- " for step, batch in enumerate(train_dataloader):\n",
- "\n",
- " # Progress update every 40 batches.\n",
- " if step % 40 == 0 and not step == 0:\n",
- " # Calculate elapsed time in minutes.\n",
- " elapsed = format_time(time.time() - t0)\n",
- "\n",
- " # Report progress.\n",
- " print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
- "\n",
- " # Unpack this training batch from our dataloader. \n",
- " #\n",
- " # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
- " # `to` method.\n",
- " #\n",
- " # `batch` contains three pytorch tensors:\n",
- " # [0]: input ids \n",
- " # [1]: attention masks\n",
- " # [2]: labels \n",
- " b_input_ids = batch[0].to(device)\n",
- " b_input_mask = batch[1].to(device)\n",
- " b_labels = batch[2].to(device)\n",
- "\n",
- " # Always clear any previously calculated gradients before performing a\n",
- " # backward pass. PyTorch doesn't do this automatically because \n",
- " # accumulating the gradients is \"convenient while training RNNs\". \n",
- " # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
- " model.zero_grad() \n",
- "\n",
- " # Perform a forward pass (evaluate the model on this training batch).\n",
- " # The documentation for this `model` function is here: \n",
- " # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
- " # It returns different numbers of parameters depending on what arguments\n",
- " # arge given and what flags are set. For our useage here, it returns\n",
- " # the loss (because we provided labels) and the \"logits\"--the model\n",
- " # outputs prior to activation.\n",
- " loss, logits = model(b_input_ids, \n",
- " attention_mask=b_input_mask, \n",
- " labels=b_labels)\n",
- "\n",
- " # Accumulate the training loss over all of the batches so that we can\n",
- " # calculate the average loss at the end. `loss` is a Tensor containing a\n",
- " # single value; the `.item()` function just returns the Python value \n",
- " # from the tensor.\n",
- " total_train_loss += loss.item()\n",
- "\n",
- " # Perform a backward pass to calculate the gradients.\n",
- " loss.backward()\n",
- "\n",
- " # Clip the norm of the gradients to 1.0.\n",
- " # This is to help prevent the \"exploding gradients\" problem.\n",
- " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
- "\n",
- " # Update parameters and take a step using the computed gradient.\n",
- " # The optimizer dictates the \"update rule\"--how the parameters are\n",
- " # modified based on their gradients, the learning rate, etc.\n",
- " optimizer.step()\n",
- "\n",
- " # Update the learning rate.\n",
- " scheduler.step()\n",
- "\n",
- " # Calculate the average loss over all of the batches.\n",
- " avg_train_loss = total_train_loss / len(train_dataloader) \n",
- "\n",
- " # Measure how long this epoch took.\n",
- " training_time = format_time(time.time() - t0)\n",
- "\n",
- " print(\"\")\n",
- " print(\" Average training loss: {0:.2f}\".format(avg_train_loss))\n",
- " print(\" Training epcoh took: {:}\".format(training_time))\n",
- "\n",
- " # ========================================\n",
- " # Validation\n",
- " # ========================================\n",
- " # After the completion of each training epoch, measure our performance on\n",
- " # our validation set.\n",
- "\n",
- " print(\"\")\n",
- " print(\"Running Validation...\")\n",
- "\n",
- " t0 = time.time()\n",
- "\n",
- " # Put the model in evaluation mode--the dropout layers behave differently\n",
- " # during evaluation.\n",
- " model.eval()\n",
- "\n",
- " # Tracking variables \n",
- " total_eval_accuracy = 0\n",
- " total_eval_loss = 0\n",
- " nb_eval_steps = 0\n",
- "\n",
- " # Evaluate data for one epoch\n",
- " for batch in validation_dataloader:\n",
- "\n",
- " # Unpack this training batch from our dataloader. \n",
- " #\n",
- " # As we unpack the batch, we'll also copy each tensor to the GPU using \n",
- " # the `to` method.\n",
- " #\n",
- " # `batch` contains three pytorch tensors:\n",
- " # [0]: input ids \n",
- " # [1]: attention masks\n",
- " # [2]: labels \n",
- " b_input_ids = batch[0].to(device)\n",
- " b_input_mask = batch[1].to(device)\n",
- " b_labels = batch[2].to(device)\n",
- "\n",
- " # Tell pytorch not to bother with constructing the compute graph during\n",
- " # the forward pass, since this is only needed for backprop (training).\n",
- " with torch.no_grad(): \n",
- "\n",
- " # Forward pass, calculate logit predictions.\n",
- " # token_type_ids is the same as the \"segment ids\", which \n",
- " # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
- " # The documentation for this `model` function is here: \n",
- " # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
- " # Get the \"logits\" output by the model. The \"logits\" are the output\n",
- " # values prior to applying an activation function like the softmax.\n",
- " (loss, logits) = model(b_input_ids, \n",
- " attention_mask=b_input_mask,\n",
- " labels=b_labels)\n",
- "\n",
- " # Accumulate the validation loss.\n",
- " total_eval_loss += loss.item()\n",
- "\n",
- " # Move logits and labels to CPU\n",
- " logits = logits.detach().cpu().numpy()\n",
- " label_ids = b_labels.to('cpu').numpy()\n",
- "\n",
- " # Calculate the accuracy for this batch of test sentences, and\n",
- " # accumulate it over all batches.\n",
- " total_eval_accuracy += flat_accuracy(logits, label_ids)\n",
- "\n",
- "\n",
- " # Report the final accuracy for this validation run.\n",
- " avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n",
- " print(\" Accuracy: {0:.2f}\".format(avg_val_accuracy))\n",
- "\n",
- " # Calculate the average loss over all of the batches.\n",
- " avg_val_loss = total_eval_loss / len(validation_dataloader)\n",
- "\n",
- " # Measure how long the validation run took.\n",
- " validation_time = format_time(time.time() - t0)\n",
- "\n",
- " print(\" Validation Loss: {0:.2f}\".format(avg_val_loss))\n",
- " print(\" Validation took: {:}\".format(validation_time))\n",
- "\n",
- " # Record all statistics from this epoch.\n",
- " training_stats.append(\n",
- " {\n",
- " 'epoch': epoch_i + 1,\n",
- " 'Training Loss': avg_train_loss,\n",
- " 'Valid. Loss': avg_val_loss,\n",
- " 'Valid. Accur.': avg_val_accuracy,\n",
- " 'Training Time': training_time,\n",
- " 'Validation Time': validation_time\n",
- " }\n",
- " )\n",
- "\n",
- " print(\"\")\n",
- " print(\"Training complete!\")\n",
- "\n",
- " print(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))\n",
- " \n",
- " return training_stats\n",
- "\n",
- "def print_training_stats(training_stats):\n",
- " # Display floats with two decimal places.\n",
- " pd.set_option('precision', 2)\n",
- "\n",
- " # Create a DataFrame from our training statistics.\n",
- " df_stats = pd.DataFrame(data=training_stats)\n",
- "\n",
- " # Use the 'epoch' as the row index.\n",
- " df_stats = df_stats.set_index('epoch')\n",
- "\n",
- " # A hack to force the column headers to wrap.\n",
- " #df = df.style.set_table_styles([dict(selector=\"th\",props=[('max-width', '70px')])])\n",
- "\n",
- " # Display the table.\n",
- " print(df_stats)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "Tzz2H0Jpsmtk",
- "outputId": "d982e8a8-82ec-4a1f-9fc5-afc2e2fcb7d7",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- }
- },
- "source": [
- "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n",
- "from transformers import get_linear_schedule_with_warmup, AdamW\n",
- "\n",
- "from transformers import AlbertForSequenceClassification,AlbertConfig\n",
- "# from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig\n",
- "# from transformers import BertForSequenceClassification, BertConfig\n",
- "# from transformers import ElectraForSequenceClassification\n",
- "\n",
- "# ADJUST lr_s and batch_sizes\n",
- "lr_s = [2e-5]\n",
- "batch_sizes = [32]\n",
- "from itertools import product\n",
- "hyperparameters = list(product(*[lr_s, batch_sizes]))\n",
- "print(hyperparameters)\n",
- "training_statistics = []\n",
- "for lr, batch_size in hyperparameters:\n",
- " # config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
- " # model = AlbertModel(config)\n",
- " # model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n",
- " PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel'\n",
- " model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
- " # if MODEL_CHOICE == ModelChoice.BERT:\n",
- " # model = BertForSequenceClassification.from_pretrained(\"bert-base-uncased\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
- " # elif MODEL_CHOICE == ModelChoice.DISTILBERT:\n",
- " # model = DistilBertForSequenceClassification.from_pretrained(\"distilbert-base-uncased\",num_labels = 2,output_attentions = False,output_hidden_states = False)\n",
- " # elif MODEL_CHOICE == ModelChoice.ALBERT:\n",
- " # model = AlbertForSequenceClassification.from_pretrained(\"albert-base-v2\", num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
- " # elif MODEL_CHOICE == ModelChoice.ELECTRA:\n",
- " # model = ElectraForSequenceClassification.from_pretrained(\"google/electra-base-discriminator\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n",
- " # else:\n",
- " # print(\"Choose proper model!\")\n",
- " \n",
- " print('START----',model,'END---')\n",
- " \n",
- " # Tell pytorch to run this model on the GPU.\n",
- " model.cuda()\n",
- "\n",
- " # The DataLoader needs to know our batch size for training, so we specify it \n",
- " # here. For fine-tuning ALBERT on a specific task, the authors recommend a batch \n",
- " # size of 16 or 32.\n",
- "\n",
- " # Create the DataLoaders for our training and validation sets.\n",
- " # We'll take training samples in random order. \n",
- " train_dataloader = DataLoader(\n",
- " train_dataset, # The training samples.\n",
- " sampler = RandomSampler(train_dataset), # Select batches randomly\n",
- " batch_size = batch_size # Trains with this batch size.\n",
- " )\n",
- "\n",
- " # For validation the order doesn't matter, so we'll just read them sequentially.\n",
- " validation_dataloader = DataLoader(\n",
- " val_dataset, # The validation samples.\n",
- " sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.\n",
- " batch_size = batch_size # Evaluate with this batch size.\n",
- " )\n",
- " \n",
- " # Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n",
- " # I believe the 'W' stands for 'Weight Decay fix\"\n",
- " optimizer = AdamW(model.parameters(),\n",
- " lr = lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
- " eps = 1e-8 # args.adam_epsilon - default is 1e-8.\n",
- " )\n",
- " \n",
- "\n",
- " # Number of training epochs. The BERT authors recommend between 2 and 4. \n",
- " # We chose to run for 4, but we'll see later that this may be over-fitting the\n",
- " # training data.\n",
- " epochs = 3\n",
- "\n",
- " # Total number of training steps is [number of batches] x [number of epochs]. \n",
- " # (Note that this is not the same as the number of training samples).\n",
- " total_steps = len(train_dataloader) * epochs\n",
- "\n",
- " # Create the learning rate scheduler.\n",
- " scheduler = get_linear_schedule_with_warmup(optimizer, \n",
- " num_warmup_steps = 0, # Default value in run_glue.py\n",
- " num_training_steps = total_steps)\n",
- "\n",
- " seed_val = 42\n",
- " set_random(seed_val)\n",
- " \n",
- " print(\"Training with hyperparameters: batch size={}, lr={}\".format(batch_size, lr))\n",
- " training_stats = train_model(train_dataloader, optimizer, epochs)\n",
- " training_statistics.append(training_stats)\n",
- " print_training_stats(training_stats)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "[(2e-05, 32)]\n"
- ],
- "name": "stdout"
- },
- {
- "output_type": "stream",
- "text": [
- "Some weights of the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias']\n",
- "- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
- "- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
- ],
- "name": "stderr"
- },
- {
- "output_type": "stream",
- "text": [
- "START---- AlbertForSequenceClassification(\n",
- " (albert): AlbertModel(\n",
- " (embeddings): AlbertEmbeddings(\n",
- " (word_embeddings): Embedding(20001, 128, padding_idx=0)\n",
- " (position_embeddings): Embedding(512, 128)\n",
- " (token_type_embeddings): Embedding(2, 128)\n",
- " (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)\n",
- " (dropout): Dropout(p=0, inplace=False)\n",
- " )\n",
- " (encoder): AlbertTransformer(\n",
- " (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)\n",
- " (albert_layer_groups): ModuleList(\n",
- " (0): AlbertLayerGroup(\n",
- " (albert_layers): ModuleList(\n",
- " (0): AlbertLayer(\n",
- " (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
- " (attention): AlbertAttention(\n",
- " (query): Linear(in_features=768, out_features=768, bias=True)\n",
- " (key): Linear(in_features=768, out_features=768, bias=True)\n",
- " (value): Linear(in_features=768, out_features=768, bias=True)\n",
- " (attention_dropout): Dropout(p=0, inplace=False)\n",
- " (output_dropout): Dropout(p=0, inplace=False)\n",
- " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
- " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
- " )\n",
- " (ffn): Linear(in_features=768, out_features=3072, bias=True)\n",
- " (ffn_output): Linear(in_features=3072, out_features=768, bias=True)\n",
- " (dropout): Dropout(p=0, inplace=False)\n",
- " )\n",
- " )\n",
- " )\n",
- " )\n",
- " )\n",
- " (pooler): Linear(in_features=768, out_features=768, bias=True)\n",
- " (pooler_activation): Tanh()\n",
- " )\n",
- " (dropout): Dropout(p=0.1, inplace=False)\n",
- " (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
- ") END---\n",
- "Training with hyperparameters: batch size=32, lr=2e-05\n",
- "\n",
- "======== Epoch 1 / 3 ========\n",
- "Training...\n",
- " Batch 40 of 2,813. Elapsed: 0:00:19.\n",
- " Batch 80 of 2,813. Elapsed: 0:00:38.\n",
- " Batch 120 of 2,813. Elapsed: 0:00:56.\n",
- " Batch 160 of 2,813. Elapsed: 0:01:15.\n",
- " Batch 200 of 2,813. Elapsed: 0:01:33.\n",
- " Batch 240 of 2,813. Elapsed: 0:01:52.\n",
- " Batch 280 of 2,813. Elapsed: 0:02:11.\n",
- " Batch 320 of 2,813. Elapsed: 0:02:29.\n",
- " Batch 360 of 2,813. Elapsed: 0:02:48.\n",
- " Batch 400 of 2,813. Elapsed: 0:03:06.\n",
- " Batch 440 of 2,813. Elapsed: 0:03:25.\n",
- " Batch 480 of 2,813. Elapsed: 0:03:44.\n",
- " Batch 520 of 2,813. Elapsed: 0:04:02.\n",
- " Batch 560 of 2,813. Elapsed: 0:04:21.\n",
- " Batch 600 of 2,813. Elapsed: 0:04:39.\n",
- " Batch 640 of 2,813. Elapsed: 0:04:58.\n",
- " Batch 680 of 2,813. Elapsed: 0:05:17.\n",
- " Batch 720 of 2,813. Elapsed: 0:05:35.\n",
- " Batch 760 of 2,813. Elapsed: 0:05:54.\n",
- " Batch 800 of 2,813. Elapsed: 0:06:13.\n",
- " Batch 840 of 2,813. Elapsed: 0:06:31.\n",
- " Batch 880 of 2,813. Elapsed: 0:06:50.\n",
- " Batch 920 of 2,813. Elapsed: 0:07:08.\n",
- " Batch 960 of 2,813. Elapsed: 0:07:27.\n",
- " Batch 1,000 of 2,813. Elapsed: 0:07:46.\n",
- " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n",
- " Batch 1,080 of 2,813. Elapsed: 0:08:23.\n",
- " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n",
- " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n",
- " Batch 1,200 of 2,813. Elapsed: 0:09:19.\n",
- " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n",
- " Batch 1,280 of 2,813. Elapsed: 0:09:56.\n",
- " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n",
- " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n",
- " Batch 1,400 of 2,813. Elapsed: 0:10:52.\n",
- " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n",
- " Batch 1,480 of 2,813. Elapsed: 0:11:29.\n",
- " Batch 1,520 of 2,813. Elapsed: 0:11:48.\n",
- " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n",
- " Batch 1,600 of 2,813. Elapsed: 0:12:25.\n",
- " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n",
- " Batch 1,680 of 2,813. Elapsed: 0:13:02.\n",
- " Batch 1,720 of 2,813. Elapsed: 0:13:21.\n",
- " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n",
- " Batch 1,800 of 2,813. Elapsed: 0:13:58.\n",
- " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n",
- " Batch 1,880 of 2,813. Elapsed: 0:14:35.\n",
- " Batch 1,920 of 2,813. Elapsed: 0:14:54.\n",
- " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n",
- " Batch 2,000 of 2,813. Elapsed: 0:15:31.\n",
- " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n",
- " Batch 2,080 of 2,813. Elapsed: 0:16:08.\n",
- " Batch 2,120 of 2,813. Elapsed: 0:16:27.\n",
- " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n",
- " Batch 2,200 of 2,813. Elapsed: 0:17:04.\n",
- " Batch 2,240 of 2,813. Elapsed: 0:17:23.\n",
- " Batch 2,280 of 2,813. Elapsed: 0:17:41.\n",
- " Batch 2,320 of 2,813. Elapsed: 0:18:00.\n",
- " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n",
- " Batch 2,400 of 2,813. Elapsed: 0:18:37.\n",
- " Batch 2,440 of 2,813. Elapsed: 0:18:56.\n",
- " Batch 2,480 of 2,813. Elapsed: 0:19:14.\n",
- " Batch 2,520 of 2,813. Elapsed: 0:19:33.\n",
- " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n",
- " Batch 2,600 of 2,813. Elapsed: 0:20:10.\n",
- " Batch 2,640 of 2,813. Elapsed: 0:20:29.\n",
- " Batch 2,680 of 2,813. Elapsed: 0:20:47.\n",
- " Batch 2,720 of 2,813. Elapsed: 0:21:06.\n",
- " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n",
- " Batch 2,800 of 2,813. Elapsed: 0:21:43.\n",
- "\n",
- " Average training loss: 0.69\n",
- " Training epcoh took: 0:21:49\n",
- "\n",
- "Running Validation...\n",
- " Accuracy: 0.56\n",
- " Validation Loss: 0.70\n",
- " Validation took: 0:00:51\n",
- "\n",
- "======== Epoch 2 / 3 ========\n",
- "Training...\n",
- " Batch 40 of 2,813. Elapsed: 0:00:19.\n",
- " Batch 80 of 2,813. Elapsed: 0:00:37.\n",
- " Batch 120 of 2,813. Elapsed: 0:00:56.\n",
- " Batch 160 of 2,813. Elapsed: 0:01:14.\n",
- " Batch 200 of 2,813. Elapsed: 0:01:33.\n",
- " Batch 240 of 2,813. Elapsed: 0:01:52.\n",
- " Batch 280 of 2,813. Elapsed: 0:02:10.\n",
- " Batch 320 of 2,813. Elapsed: 0:02:29.\n",
- " Batch 360 of 2,813. Elapsed: 0:02:47.\n",
- " Batch 400 of 2,813. Elapsed: 0:03:06.\n",
- " Batch 440 of 2,813. Elapsed: 0:03:25.\n",
- " Batch 480 of 2,813. Elapsed: 0:03:43.\n",
- " Batch 520 of 2,813. Elapsed: 0:04:02.\n",
- " Batch 560 of 2,813. Elapsed: 0:04:21.\n",
- " Batch 600 of 2,813. Elapsed: 0:04:39.\n",
- " Batch 640 of 2,813. Elapsed: 0:04:58.\n",
- " Batch 680 of 2,813. Elapsed: 0:05:16.\n",
- " Batch 720 of 2,813. Elapsed: 0:05:35.\n",
- " Batch 760 of 2,813. Elapsed: 0:05:54.\n",
- " Batch 800 of 2,813. Elapsed: 0:06:12.\n",
- " Batch 840 of 2,813. Elapsed: 0:06:31.\n",
- " Batch 880 of 2,813. Elapsed: 0:06:49.\n",
- " Batch 920 of 2,813. Elapsed: 0:07:08.\n",
- " Batch 960 of 2,813. Elapsed: 0:07:27.\n",
- " Batch 1,000 of 2,813. Elapsed: 0:07:45.\n",
- " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n",
- " Batch 1,080 of 2,813. Elapsed: 0:08:22.\n",
- " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n",
- " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n",
- " Batch 1,200 of 2,813. Elapsed: 0:09:18.\n",
- " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n",
- " Batch 1,280 of 2,813. Elapsed: 0:09:55.\n",
- " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n",
- " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n",
- " Batch 1,400 of 2,813. Elapsed: 0:10:51.\n",
- " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n",
- " Batch 1,480 of 2,813. Elapsed: 0:11:28.\n",
- " Batch 1,520 of 2,813. Elapsed: 0:11:47.\n",
- " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n",
- " Batch 1,600 of 2,813. Elapsed: 0:12:24.\n",
- " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n",
- " Batch 1,680 of 2,813. Elapsed: 0:13:01.\n",
- " Batch 1,720 of 2,813. Elapsed: 0:13:20.\n",
- " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n",
- " Batch 1,800 of 2,813. Elapsed: 0:13:57.\n",
- " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n",
- " Batch 1,880 of 2,813. Elapsed: 0:14:34.\n",
- " Batch 1,920 of 2,813. Elapsed: 0:14:53.\n",
- " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n",
- " Batch 2,000 of 2,813. Elapsed: 0:15:30.\n",
- " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n",
- " Batch 2,080 of 2,813. Elapsed: 0:16:07.\n",
- " Batch 2,120 of 2,813. Elapsed: 0:16:26.\n",
- " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n",
- " Batch 2,200 of 2,813. Elapsed: 0:17:03.\n",
- " Batch 2,240 of 2,813. Elapsed: 0:17:22.\n",
- " Batch 2,280 of 2,813. Elapsed: 0:17:40.\n",
- " Batch 2,320 of 2,813. Elapsed: 0:17:59.\n",
- " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n",
- " Batch 2,400 of 2,813. Elapsed: 0:18:36.\n",
- " Batch 2,440 of 2,813. Elapsed: 0:18:55.\n",
- " Batch 2,480 of 2,813. Elapsed: 0:19:13.\n",
- " Batch 2,520 of 2,813. Elapsed: 0:19:32.\n",
- " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n",
- " Batch 2,600 of 2,813. Elapsed: 0:20:09.\n",
- " Batch 2,640 of 2,813. Elapsed: 0:20:28.\n",
- " Batch 2,680 of 2,813. Elapsed: 0:20:46.\n",
- " Batch 2,720 of 2,813. Elapsed: 0:21:05.\n",
- " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n",
- " Batch 2,800 of 2,813. Elapsed: 0:21:42.\n",
- "\n",
- " Average training loss: 0.69\n",
- " Training epcoh took: 0:21:48\n",
- "\n",
- "Running Validation...\n",
- " Accuracy: 0.56\n",
- " Validation Loss: 0.69\n",
- " Validation took: 0:00:51\n",
- "\n",
- "======== Epoch 3 / 3 ========\n",
- "Training...\n",
- " Batch 40 of 2,813. Elapsed: 0:00:19.\n",
- " Batch 80 of 2,813. Elapsed: 0:00:37.\n",
- " Batch 120 of 2,813. Elapsed: 0:00:56.\n",
- " Batch 160 of 2,813. Elapsed: 0:01:14.\n",
- " Batch 200 of 2,813. Elapsed: 0:01:33.\n",
- " Batch 240 of 2,813. Elapsed: 0:01:52.\n",
- " Batch 280 of 2,813. Elapsed: 0:02:10.\n",
- " Batch 320 of 2,813. Elapsed: 0:02:29.\n",
- " Batch 360 of 2,813. Elapsed: 0:02:47.\n",
- " Batch 400 of 2,813. Elapsed: 0:03:06.\n",
- " Batch 440 of 2,813. Elapsed: 0:03:25.\n",
- " Batch 480 of 2,813. Elapsed: 0:03:43.\n",
- " Batch 520 of 2,813. Elapsed: 0:04:02.\n",
- " Batch 560 of 2,813. Elapsed: 0:04:21.\n",
- " Batch 600 of 2,813. Elapsed: 0:04:39.\n",
- " Batch 640 of 2,813. Elapsed: 0:04:58.\n",
- " Batch 680 of 2,813. Elapsed: 0:05:16.\n",
- " Batch 720 of 2,813. Elapsed: 0:05:35.\n",
- " Batch 760 of 2,813. Elapsed: 0:05:54.\n",
- " Batch 800 of 2,813. Elapsed: 0:06:12.\n",
- " Batch 840 of 2,813. Elapsed: 0:06:31.\n",
- " Batch 880 of 2,813. Elapsed: 0:06:49.\n",
- " Batch 920 of 2,813. Elapsed: 0:07:08.\n",
- " Batch 960 of 2,813. Elapsed: 0:07:27.\n",
- " Batch 1,000 of 2,813. Elapsed: 0:07:45.\n",
- " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n",
- " Batch 1,080 of 2,813. Elapsed: 0:08:22.\n",
- " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n",
- " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n",
- " Batch 1,200 of 2,813. Elapsed: 0:09:18.\n",
- " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n",
- " Batch 1,280 of 2,813. Elapsed: 0:09:55.\n",
- " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n",
- " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n",
- " Batch 1,400 of 2,813. Elapsed: 0:10:51.\n",
- " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n",
- " Batch 1,480 of 2,813. Elapsed: 0:11:29.\n",
- " Batch 1,520 of 2,813. Elapsed: 0:11:47.\n",
- " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n",
- " Batch 1,600 of 2,813. Elapsed: 0:12:24.\n",
- " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n",
- " Batch 1,680 of 2,813. Elapsed: 0:13:02.\n",
- " Batch 1,720 of 2,813. Elapsed: 0:13:20.\n",
- " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n",
- " Batch 1,800 of 2,813. Elapsed: 0:13:57.\n",
- " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n",
- " Batch 1,880 of 2,813. Elapsed: 0:14:35.\n",
- " Batch 1,920 of 2,813. Elapsed: 0:14:53.\n",
- " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n",
- " Batch 2,000 of 2,813. Elapsed: 0:15:30.\n",
- " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n",
- " Batch 2,080 of 2,813. Elapsed: 0:16:08.\n",
- " Batch 2,120 of 2,813. Elapsed: 0:16:26.\n",
- " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n",
- " Batch 2,200 of 2,813. Elapsed: 0:17:03.\n",
- " Batch 2,240 of 2,813. Elapsed: 0:17:22.\n",
- " Batch 2,280 of 2,813. Elapsed: 0:17:41.\n",
- " Batch 2,320 of 2,813. Elapsed: 0:17:59.\n",
- " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n",
- " Batch 2,400 of 2,813. Elapsed: 0:18:37.\n",
- " Batch 2,440 of 2,813. Elapsed: 0:18:55.\n",
- " Batch 2,480 of 2,813. Elapsed: 0:19:14.\n",
- " Batch 2,520 of 2,813. Elapsed: 0:19:32.\n",
- " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n",
- " Batch 2,600 of 2,813. Elapsed: 0:20:10.\n",
- " Batch 2,640 of 2,813. Elapsed: 0:20:28.\n",
- " Batch 2,680 of 2,813. Elapsed: 0:20:47.\n",
- " Batch 2,720 of 2,813. Elapsed: 0:21:05.\n",
- " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n",
- " Batch 2,800 of 2,813. Elapsed: 0:21:43.\n",
- "\n",
- " Average training loss: 0.69\n",
- " Training epcoh took: 0:21:49\n",
- "\n",
- "Running Validation...\n",
- " Accuracy: 0.56\n",
- " Validation Loss: 0.69\n",
- " Validation took: 0:00:51\n",
- "\n",
- "Training complete!\n",
- "Total training took 1:07:58 (h:mm:ss)\n",
- " Training Loss Valid. Loss Valid. Accur. Training Time Validation Time\n",
- "epoch \n",
- "1 0.69 0.70 0.56 0:21:49 0:00:51\n",
- "2 0.69 0.69 0.56 0:21:48 0:00:51\n",
- "3 0.69 0.69 0.56 0:21:49 0:00:51\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "_K_vQ7feAS_z"
- },
- "source": [
- "model_save_name = 'finetuned_Albert.bin'\n",
- "path = F\"/content/gdrive/My Drive/ALBERTimplementation/model-fine-train/\"+model_save_name\n",
- "torch.save(model.state_dict(), path)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Lu-gJG4JbD37"
- },
- "source": [
- "# Li method"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "vh_aZP7kbH0E"
- },
- "source": [
- "# At the time of writing, Hugging face didnt provide the class object for \n",
- "# AlbertForTokenClassification, hence write your own defination below\n",
- "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n",
- "from transformers.configuration_albert import AlbertConfig\n",
- "from transformers.tokenization_bert import BertTokenizer\n",
- "import torch.nn as nn\n",
- "from torch.nn import CrossEntropyLoss\n",
- "class AlbertForTokenClassification(AlbertPreTrainedModel):\n",
- "\n",
- " def __init__(self, albert, config):\n",
- " super().__init__(config)\n",
- " self.num_labels = config.num_labels\n",
- "\n",
- " self.albert = albert\n",
- " self.dropout = nn.Dropout(config.hidden_dropout_prob)\n",
- " self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
- "\n",
- " def forward(\n",
- " self,\n",
- " input_ids=None,\n",
- " attention_mask=None,\n",
- " token_type_ids=None,\n",
- " position_ids=None,\n",
- " head_mask=None,\n",
- " inputs_embeds=None,\n",
- " labels=None,\n",
- " ):\n",
- "\n",
- " outputs = self.albert(\n",
- " input_ids,\n",
- " attention_mask=attention_mask,\n",
- " token_type_ids=token_type_ids,\n",
- " position_ids=position_ids,\n",
- " head_mask=head_mask,\n",
- " inputs_embeds=inputs_embeds,\n",
- " )\n",
- "\n",
- " sequence_output = outputs[0]\n",
- "\n",
- " sequence_output = self.dropout(sequence_output)\n",
- " logits = self.classifier(sequence_output)\n",
- "\n",
- " return logits"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "k2W7RmZcbSwf"
- },
- "source": [
- "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/vocab.txt\" # This is the vocab file output from Build Vocab step\n",
- "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n",
- "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n",
- "tokenizer = BertTokenizer(vocab_file=VOCAB_FILE)\n",
- "config = AlbertConfig.from_json_file(CONFIG_FILE)\n",
- "model = AlbertModel(config)\n",
- "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n",
- "# If the variables not able to be initialized are only for the MLM and sequence order prediction task\n",
- "# Then the error could be ignored\n",
- "# As that is not required for the AlbertForTokenClassification we are trying to build here"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "WtE_Lb7josh4"
- },
- "source": [
- "# df = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv', delimiter='\\t')\n",
- "# df.isna().values.any()\n",
- "# df['text'] = df['text'].fillna('0')\n",
- "# df.to_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv',sep='\\t' ,header=True, index=False)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "vtZ6AzKxba9r",
- "outputId": "9ee3e15e-b857-422d-efaf-aafc86bdf7ce",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 460
- }
- },
- "source": [
- "# train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n",
- "# print(\"Number of training examples {}\".format(len(train_data)))\n",
- "# num_examples = 100000\n",
- "# train = train_data[:num_examples].text.values\n",
- "# labels = train_data[:num_examples].label.values\n",
- "TRAIN_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv\"\n",
- "EVAL_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv\"\n",
- "\n",
- "import numpy as np\n",
- "def label_sent(name_tokens, sent_tokens):\n",
- " label = []\n",
- " i = 0\n",
- " if len(name_tokens)>len(sent_tokens):\n",
- " label = np.zeros(len(sent_tokens))\n",
- " else:\n",
- " while i=len(sent_tokens)):\n",
- " return label\n",
- " if name_tokens[j+1] != sent_tokens[i+j+1]:\n",
- " found_match = False\n",
- " if found_match:\n",
- " label.extend(list(np.ones(len(name_tokens)).astype(int)))\n",
- " i = i + len(name_tokens)\n",
- " else: \n",
- " label.extend([0])\n",
- " i = i+ 1\n",
- " else:\n",
- " label.extend([0])\n",
- " i=i+1\n",
- " return label\n",
- "\n",
- "import pandas as pd\n",
- "df_data_train = pd.read_csv(TRAIN_FILE, sep='\\t')\n",
- "df_data_train['review_tokens'] = df_data_train.text.apply(tokenizer.tokenize)\n",
- "# df_data_train['dish_name_tokens'] = df_data_train.dish_name_tokens.apply(tokenizer.tokenize)\n",
- "# df_data_train['review_labels'] = df_data_train.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n",
- "df_data_train['review_labels'] = df_data_train.label.values\n",
- "df_data_val = pd.read_csv(EVAL_FILE, sep='\\t')\n",
- "df_data_val['review_tokens'] = df_data_val.text.apply(tokenizer.tokenize)\n",
- "# df_data_val['dish_name_tokens'] = df_data_val.dish_name_tokens.apply(tokenizer.tokenize)\n",
- "# df_data_val['review_labels'] = df_data_val.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n",
- "df_data_val['review_labels'] = df_data_val.label.values\n",
- "\n",
- "MAX_LEN = 50\n",
- "BATCH_SIZE = 1\n",
- "from keras.preprocessing.sequence import pad_sequences\n",
- "import torch\n",
- "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
- "\n",
- "tr_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_train['review_tokens']],\n",
- " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
- "tr_tags = pad_sequences(df_data_train['review_labels'],\n",
- " maxlen=MAX_LEN, padding=\"post\", #changed max len here --Muku\n",
- " dtype=\"long\", truncating=\"post\")\n",
- "# create the mask to ignore the padded elements in the sequences.\n",
- "tr_masks = [[float(i>0) for i in ii] for ii in tr_inputs]\n",
- "tr_inputs = torch.tensor(tr_inputs)\n",
- "tr_tags = torch.tensor(tr_tags)\n",
- "tr_masks = torch.tensor(tr_masks)\n",
- "train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)\n",
- "train_sampler = RandomSampler(train_data)\n",
- "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)\n",
- "\n",
- "\n",
- "val_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_val['review_tokens']],\n",
- " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
- "val_tags = pad_sequences(df_data_val['review_labels'],\n",
- " maxlen=MAX_LEN, padding=\"post\",\n",
- " dtype=\"long\", truncating=\"post\")\n",
- "# create the mask to ignore the padded elements in the sequences.\n",
- "val_masks = [[float(i>0) for i in ii] for ii in val_inputs]\n",
- "val_inputs = torch.tensor(val_inputs)\n",
- "val_tags = torch.tensor(val_tags)\n",
- "val_masks = torch.tensor(val_masks)\n",
- "val_data = TensorDataset(val_inputs, val_masks, val_tags)\n",
- "val_sampler = RandomSampler(val_data)\n",
- "val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=BATCH_SIZE)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "error",
- "ename": "ValueError",
- "evalue": "ignored",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mlengths\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mflag\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: object of type 'int' has no len()",
- "\nDuring handling of the above exception, another exception occurred:\n",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 56\u001b[0m tr_tags = pad_sequences(df_data_train['review_labels'],\n\u001b[1;32m 57\u001b[0m \u001b[0mmaxlen\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mMAX_LEN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpadding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"post\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m#changed max len here --Muku\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m dtype=\"long\", truncating=\"post\")\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0;31m# create the mask to ignore the padded elements in the sequences.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mtr_masks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mii\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mii\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtr_inputs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n",
- "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m raise ValueError('`sequences` must be a list of iterables. '\n\u001b[0;32m---> 74\u001b[0;31m 'Found non-iterable: ' + str(x))\n\u001b[0m\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmaxlen\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: `sequences` must be a list of iterables. Found non-iterable: 1"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "UvGdYSVUbi6H"
- },
- "source": [
- "model_tokenclassification = AlbertForTokenClassification(model, config)\n",
- "from torch.optim import Adam\n",
- "LEARNING_RATE = 0.000001\n",
- "FULL_FINETUNING = True\n",
- "if FULL_FINETUNING:\n",
- " param_optimizer = list(model_tokenclassification.named_parameters())\n",
- " no_decay = ['bias', 'gamma', 'beta']\n",
- " optimizer_grouped_parameters = [\n",
- " {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],\n",
- " 'weight_decay_rate': 0.01},\n",
- " {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],\n",
- " 'weight_decay_rate': 0.0}\n",
- " ]\n",
- "else:\n",
- " param_optimizer = list(model_tokenclassification.classifier.named_parameters()) \n",
- " optimizer_grouped_parameters = [{\"params\": [p for n, p in param_optimizer]}]\n",
- "optimizer = Adam(optimizer_grouped_parameters, lr=LEARNING_RATE)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "0a4vteOVbpkB"
- },
- "source": [
- "\n",
- "# from torch.utils.tensorboard import SummaryWriter\n",
- "import time\n",
- "import os.path\n",
- "import torch.nn as nn\n",
- "EPOCH = 5\n",
- "MAX_GRAD_NORM = 1.0\n",
- "ALBERT_FINETUNE_CHECKPOINT = \"outputs/finetune_checkpoint_5epoch_50neg_1e-5lr\"\n",
- "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
- "if torch.cuda.device_count() > 1:\n",
- " print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n",
- " model_tokenclassification = nn.DataParallel(model_tokenclassification)\n",
- "model_tokenclassification.to(device)\n",
- "if os.path.isfile(ALBERT_FINETUNE_CHECKPOINT):\n",
- " print(f\"--- Load from checkpoint ---\")\n",
- " checkpoint = torch.load(ALBERT_FINETUNE_CHECKPOINT)\n",
- " model_tokenclassification.load_state_dict(checkpoint['model_state_dict'])\n",
- " optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n",
- " epoch = checkpoint['epoch']\n",
- " loss = checkpoint['loss']\n",
- " train_losses = checkpoint['train_losses']\n",
- " train_acc = checkpoint['train_acc']\n",
- " val_losses = checkpoint['val_losses']\n",
- " val_acc = checkpoint['val_acc']\n",
- " \n",
- "else:\n",
- " epoch = -1\n",
- " train_losses, train_acc, val_losses, val_acc = [], [], [], []\n",
- "print(f\"--- Resume/Start training ---\") \n",
- "for i in range(epoch+1, EPOCH): \n",
- " print(f\"--- epoch: {i} ---\")\n",
- " start_time = time.time()\n",
- " \n",
- " # TRAIN loop\n",
- " model_tokenclassification.train()\n",
- " tr_loss, tr_acc, nb_tr_steps = 0, 0, 0\n",
- " for step, batch in enumerate(train_dataloader):\n",
- " # add batch to gpu\n",
- " batch = tuple(t.to(device) for t in batch)\n",
- " b_input_ids, b_input_mask, b_labels = batch\n",
- " # forward pass\n",
- " b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
- " attention_mask=b_input_mask, labels=b_labels)\n",
- " \n",
- " loss_fct = CrossEntropyLoss()\n",
- " # Only keep active parts of the loss\n",
- " b_active_loss = b_input_mask.view(-1) == 1\n",
- " b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n",
- " b_active_labels = b_labels.view(-1)[b_active_loss]\n",
- " loss = loss_fct(b_active_logits, b_active_labels)\n",
- " acc = torch.mean((torch.max(b_active_logits.detach(),1)[1] == b_active_labels.detach()).float())\n",
- " \n",
- " train_losses.append(loss.detach().item())\n",
- " train_acc.append(acc)\n",
- " # backward pass\n",
- " loss.backward()\n",
- " # track train loss\n",
- " tr_loss += loss.item()\n",
- " tr_acc += acc\n",
- " nb_tr_steps += 1\n",
- " # gradient clipping\n",
- " torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=MAX_GRAD_NORM)\n",
- " # update parameters\n",
- " optimizer.step()\n",
- " model.zero_grad()\n",
- "\n",
- " # print train loss per epoch\n",
- " print(f\"Train loss: {(tr_loss/nb_tr_steps)}\")\n",
- " print(f\"Train Accuracy: {(tr_acc/nb_tr_steps)}\")\n",
- " print(f\"Train Time: {(time.time()-start_time)/60} mins\")\n",
- "\n",
- " # VALIDATION on validation set\n",
- " start_time = time.time()\n",
- " model_tokenclassification.eval()\n",
- " eval_loss, eval_acc = 0, 0\n",
- " nb_eval_steps = 0\n",
- " for batch in val_dataloader:\n",
- " batch = tuple(t.to(device) for t in batch)\n",
- " b_input_ids, b_input_mask, b_labels = batch\n",
- "\n",
- " with torch.no_grad():\n",
- " \n",
- " b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
- " attention_mask=b_input_mask, labels=b_labels)\n",
- "\n",
- " loss_fct = CrossEntropyLoss()\n",
- " # Only keep active parts of the loss\n",
- " b_active_loss = b_input_mask.view(-1) == 1\n",
- " b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n",
- " b_active_labels = b_labels.view(-1)[b_active_loss]\n",
- " loss = loss_fct(b_active_logits, b_active_labels)\n",
- " acc = np.mean(np.argmax(b_active_logits.detach().cpu().numpy(), axis=1).flatten() == b_active_labels.detach().cpu().numpy().flatten())\n",
- "\n",
- " eval_loss += loss.mean().item()\n",
- " eval_acc += acc\n",
- " nb_eval_steps += 1 \n",
- " eval_loss = eval_loss/nb_eval_steps\n",
- " eval_acc = eval_acc/nb_eval_steps\n",
- " val_losses.append(eval_loss)\n",
- " val_acc.append(eval_acc)\n",
- " print(f\"Validation loss: {eval_loss}\")\n",
- " print(f\"Validation Accuracy: {(eval_acc)}\")\n",
- " print(f\"Validation Time: {(time.time()-start_time)/60} mins\") \n",
- " \n",
- " \n",
- " print(f\"--- Save to checkpoint ---\") \n",
- " torch.save({\n",
- " 'epoch': i,\n",
- " 'model_state_dict': model_tokenclassification.state_dict(),\n",
- " 'optimizer_state_dict': optimizer.state_dict(),\n",
- " 'loss': loss,\n",
- " 'train_losses': train_losses,\n",
- " 'train_acc': train_acc,\n",
- " 'val_losses': val_losses,\n",
- " 'val_acc': val_acc}\n",
- " , ALBERT_FINETUNE_CHECKPOINT)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "skGJUzsJbwYn"
- },
- "source": [
- "\n",
- "def predict(texts):\n",
- " tokenized_texts = [tokenizer.tokenize(txt) for txt in texts]\n",
- " input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],\n",
- " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n",
- " attention_mask = [[float(i>0) for i in ii] for ii in input_ids]\n",
- " \n",
- " input_ids = torch.tensor(input_ids)\n",
- " attention_mask = torch.tensor(attention_mask)\n",
- "\n",
- " dataset = TensorDataset(input_ids, attention_mask)\n",
- " datasampler = SequentialSampler(dataset)\n",
- " dataloader = DataLoader(dataset, sampler=datasampler, batch_size=BATCH_SIZE) \n",
- " \n",
- " predicted_labels = []\n",
- " \n",
- " for batch in dataloader:\n",
- " batch = tuple(t.to(device) for t in batch)\n",
- " b_input_ids, b_input_mask = batch\n",
- " \n",
- " with torch.no_grad():\n",
- " logits = model_tokenclassification(b_input_ids, token_type_ids=None,\n",
- " attention_mask=b_input_mask)\n",
- "\n",
- " predicted_labels.append(np.multiply(np.argmax(logits.detach().cpu().numpy(),axis=2), b_input_mask.detach().cpu().numpy()))\n",
- " # np.concatenate(predicted_labels), to flatten list of arrays of batch_size * max_len into list of arrays of max_len\n",
- " return np.concatenate(predicted_labels).astype(int), tokenized_texts\n",
- "\n",
- "texts = df_data_val.review.values\n",
- "predicted_labels, _ = predict(texts)\n",
- "df_data_val['predicted_review_label'] = list(predicted_labels)\n",
- "\n",
- "def get_dish_candidate_names(predicted_label, tokenized_text):\n",
- " name_lists = []\n",
- " if len(np.where(predicted_label>0)[0])>0:\n",
- " name_idx_combined = np.where(predicted_label>0)[0]\n",
- " name_idxs = np.split(name_idx_combined, np.where(np.diff(name_idx_combined) != 1)[0]+1)\n",
- " name_lists.append([\" \".join(np.take(tokenized_text,name_idx)) for name_idx in name_idxs])\n",
- " # If there duplicate names in the name_lists\n",
- " name_lists = np.unique(name_lists)\n",
- " return name_lists\n",
- " else:\n",
- " return None\n",
- "df_data_val['candidate_name']=df_data_val.apply(lambda row: get_dish_candidate_names(row.predicted_review_label, row.review_tokens)\n",
- " , axis=1)"
- ],
- "execution_count": null,
- "outputs": []
- }
- ]
-}
\ No newline at end of file