diff --git a/fine_tuning_ALBERT.ipynb b/fine_tuning_ALBERT.ipynb deleted file mode 100644 index 143f5c9..0000000 --- a/fine_tuning_ALBERT.ipynb +++ /dev/null @@ -1,1971 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "fine-tuning ALBERT.ipynb", - "provenance": [], - "authorship_tag": "ABX9TyPQ0neoGAreJPtIYFOz3s34", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "9ulHtYtNrQ8r", - "outputId": "4d47a5fc-ce15-49bd-89cc-c86e8d6debe9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "source": [ - "!pip install keras\n", - "!pip install tensorflow\n", - "!pip install transformers\n", - "!pip3 install albert-tensorflow\n", - "!pip install torch\n", - "!pip install sentencepiece" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Requirement already satisfied: keras in /usr/local/lib/python3.6/dist-packages (2.4.3)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras) (3.13)\n", - "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras) (2.10.0)\n", - "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras) (1.18.5)\n", - "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras) (1.4.1)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from h5py->keras) (1.15.0)\n", - "Requirement already satisfied: tensorflow in /usr/local/lib/python3.6/dist-packages (2.3.0)\n", - "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n", - "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n", - "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n", - "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n", - "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n", - "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n", - "Requirement already satisfied: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n", - "Requirement already satisfied: scipy==1.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.4.1)\n", - "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n", - "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n", - "Requirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n", - "Requirement already satisfied: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n", - "Requirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n", - "Requirement already satisfied: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n", - "Requirement already satisfied: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.9.2->tensorflow) (50.3.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n", - "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n", - "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n", - "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n", - "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n", - "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n", - "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n", - "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n", - "Collecting transformers\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/19/22/aff234f4a841f8999e68a7a94bdd4b60b4cebcfeca5d67d61cd08c9179de/transformers-3.3.1-py3-none-any.whl (1.1MB)\n", - "\u001b[K |████████████████████████████████| 1.1MB 2.8MB/s \n", - "\u001b[?25hCollecting sacremoses\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n", - "\u001b[K |████████████████████████████████| 890kB 17.4MB/s \n", - "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n", - "Collecting sentencepiece!=0.1.92\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n", - "\u001b[K |████████████████████████████████| 1.1MB 18.1MB/s \n", - "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n", - "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n", - "Collecting tokenizers==0.8.1.rc2\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/80/83/8b9fccb9e48eeb575ee19179e2bdde0ee9a1904f97de5f02d19016b8804f/tokenizers-0.8.1rc2-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n", - "\u001b[K |████████████████████████████████| 3.0MB 28.7MB/s \n", - "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n", - "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n", - "Building wheels for collected packages: sacremoses\n", - " Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=7aceb359875e5a113a3b4100f89e9d914f4e2a2ce05ed7eaaf52e10e7dfa0b06\n", - " Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n", - "Successfully built sacremoses\n", - "Installing collected packages: sacremoses, sentencepiece, tokenizers, transformers\n", - "Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc2 transformers-3.3.1\n", - "Collecting albert-tensorflow\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ba/1e/e776bb23e6f89a1f1d7d33b50d0bd9c2c7b24b39aa548f041827a9c00d73/albert_tensorflow-1.1-py3-none-any.whl (81kB)\n", - "\u001b[K |████████████████████████████████| 81kB 2.3MB/s \n", - "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from albert-tensorflow) (1.15.0)\n", - "Installing collected packages: albert-tensorflow\n", - "Successfully installed albert-tensorflow-1.1\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.6.0+cu101)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.18.5)\n", - "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch) (0.16.0)\n", - "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (0.1.91)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VPur-ModrqiE" - }, - "source": [ - "# Check GPU" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "eEMLd2nzrtAr", - "outputId": "ac45d376-2643-4643-873d-b887fa65f29f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "import tensorflow as tf\n", - "\n", - "# Get the GPU device name.\n", - "device_name = tf.test.gpu_device_name()\n", - "\n", - "# The device name should look like the following:\n", - "if device_name == '/device:GPU:0':\n", - " print('Found GPU at: {}'.format(device_name))\n", - "else:\n", - " raise SystemError('GPU device not found')" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Found GPU at: /device:GPU:0\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "aJUsW5-trxGv", - "outputId": "5034d3c9-248b-4711-b07c-162d62d9de9b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "import torch\n", - "\n", - "# If there's a GPU available...\n", - "if torch.cuda.is_available(): \n", - "\n", - " # Tell PyTorch to use the GPU. \n", - " device = torch.device(\"cuda\")\n", - "\n", - " print('There are %d GPU(s) available.' % torch.cuda.device_count())\n", - "\n", - " print('We will use the GPU:', torch.cuda.get_device_name(0))\n", - "\n", - "# If not...\n", - "else:\n", - " print('No GPU available, using the CPU instead.')\n", - " device = torch.device(\"cpu\")" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "There are 1 GPU(s) available.\n", - "We will use the GPU: Tesla P100-PCIE-16GB\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sZuNMIWGs5L0" - }, - "source": [ - "# Mounting" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "PGdZlz87rX7Q", - "outputId": "4c0f3693-aaaa-4f16-d8de-3eaa8a2b1b90", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/gdrive')" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Mounted at /content/gdrive\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2lMkdNcrrbM0", - "outputId": "52181ba1-f48d-4883-ac18-73d8e6f65b1c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 136 - } - }, - "source": [ - "!git clone https://github.com/mjag7682/ALBERT" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Cloning into 'ALBERT'...\n", - "remote: Enumerating objects: 9, done.\u001b[K\n", - "remote: Counting objects: 100% (9/9), done.\u001b[K\n", - "remote: Compressing objects: 100% (9/9), done.\u001b[K\n", - "remote: Total 362 (delta 2), reused 0 (delta 0), pack-reused 353\u001b[K\n", - "Receiving objects: 100% (362/362), 244.39 KiB | 470.00 KiB/s, done.\n", - "Resolving deltas: 100% (235/235), done.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "skH_bDk2rdJ_", - "outputId": "4137e476-8d92-444d-eb2b-3461194b4b68", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "source": [ - "!pip install -r /content/ALBERT/requirements.txt " - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Collecting tensorflow==1.15.2\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9a/d9/fd234c7bf68638423fb8e7f44af7fcfce3bcaf416b51e6d902391e47ec43/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl (110.5MB)\n", - "\u001b[K |████████████████████████████████| 110.5MB 65kB/s \n", - "\u001b[?25hCollecting tensorflow_hub==0.7\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/00/0e/a91780d07592b1abf9c91344ce459472cc19db3b67fdf3a61dca6ebb2f5c/tensorflow_hub-0.7.0-py2.py3-none-any.whl (89kB)\n", - "\u001b[K |████████████████████████████████| 92kB 9.6MB/s \n", - "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from -r /content/ALBERT/requirements.txt (line 5)) (0.1.93)\n", - "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.15.0)\n", - "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.32.0)\n", - "Collecting gast==0.2.2\n", - " Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz\n", - "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.8.1)\n", - "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.18.5)\n", - "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.12.4)\n", - "Collecting tensorboard<1.16.0,>=1.15.0\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)\n", - "\u001b[K |████████████████████████████████| 3.8MB 41.1MB/s \n", - "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.3.0)\n", - "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.10.0)\n", - "Collecting tensorflow-estimator==1.15.1\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)\n", - "\u001b[K |████████████████████████████████| 512kB 29.1MB/s \n", - "\u001b[?25hRequirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.35.1)\n", - "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.2)\n", - "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.12.1)\n", - "Collecting keras-applications>=1.0.8\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)\n", - "\u001b[K |████████████████████████████████| 51kB 5.6MB/s \n", - "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.1.0)\n", - "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (0.2.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (50.3.0)\n", - "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (1.0.1)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.2)\n", - "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.10.0)\n", - "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (2.0.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.2->-r /content/ALBERT/requirements.txt (line 2)) (3.2.0)\n", - "Building wheels for collected packages: gast\n", - " Building wheel for gast (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for gast: filename=gast-0.2.2-cp36-none-any.whl size=7542 sha256=da09531e91a678dff87f3e427990e2f4fae4e07e33f032b7f927656d5c7d3e26\n", - " Stored in directory: /root/.cache/pip/wheels/5c/2e/7e/a1d4d4fcebe6c381f378ce7743a3ced3699feb89bcfbdadadd\n", - "Successfully built gast\n", - "\u001b[31mERROR: tensorflow-probability 0.11.0 has requirement gast>=0.3.2, but you'll have gast 0.2.2 which is incompatible.\u001b[0m\n", - "Installing collected packages: gast, tensorboard, tensorflow-estimator, keras-applications, tensorflow, tensorflow-hub\n", - " Found existing installation: gast 0.3.3\n", - " Uninstalling gast-0.3.3:\n", - " Successfully uninstalled gast-0.3.3\n", - " Found existing installation: tensorboard 2.3.0\n", - " Uninstalling tensorboard-2.3.0:\n", - " Successfully uninstalled tensorboard-2.3.0\n", - " Found existing installation: tensorflow-estimator 2.3.0\n", - " Uninstalling tensorflow-estimator-2.3.0:\n", - " Successfully uninstalled tensorflow-estimator-2.3.0\n", - " Found existing installation: tensorflow 2.3.0\n", - " Uninstalling tensorflow-2.3.0:\n", - " Successfully uninstalled tensorflow-2.3.0\n", - " Found existing installation: tensorflow-hub 0.9.0\n", - " Uninstalling tensorflow-hub-0.9.0:\n", - " Successfully uninstalled tensorflow-hub-0.9.0\n", - "Successfully installed gast-0.2.2 keras-applications-1.0.8 tensorboard-1.15.0 tensorflow-1.15.2 tensorflow-estimator-1.15.1 tensorflow-hub-0.7.0\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jwgFYW9dsFO1" - }, - "source": [ - "# Fine Tune" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "VW_nppn5tbuc", - "outputId": "30455f24-785e-42ea-cdd8-5eef33bcee93", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 854 - } - }, - "source": [ - "!pip install --upgrade tensorflow" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Collecting tensorflow\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ad/ad/769c195c72ac72040635c66cd9ba7b0f4b4fc1ac67e59b99fa6988446c22/tensorflow-2.3.1-cp36-cp36m-manylinux2010_x86_64.whl (320.4MB)\n", - "\u001b[K |████████████████████████████████| 320.4MB 50kB/s \n", - "\u001b[?25hRequirement already satisfied, skipping upgrade: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.0)\n", - "Requirement already satisfied, skipping upgrade: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.6.3)\n", - "Requirement already satisfied, skipping upgrade: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.3.3)\n", - "Requirement already satisfied, skipping upgrade: tensorboard<3,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n", - "Requirement already satisfied, skipping upgrade: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.35.1)\n", - "Requirement already satisfied, skipping upgrade: keras-preprocessing<1.2,>=1.1.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.1.2)\n", - "Requirement already satisfied, skipping upgrade: numpy<1.19.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.18.5)\n", - "Requirement already satisfied, skipping upgrade: tensorflow-estimator<2.4.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.3.0)\n", - "Requirement already satisfied, skipping upgrade: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.2.0)\n", - "Requirement already satisfied, skipping upgrade: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.3.0)\n", - "Requirement already satisfied, skipping upgrade: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.32.0)\n", - "Requirement already satisfied, skipping upgrade: six>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.15.0)\n", - "Requirement already satisfied, skipping upgrade: protobuf>=3.9.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (3.12.4)\n", - "Requirement already satisfied, skipping upgrade: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (1.12.1)\n", - "Requirement already satisfied, skipping upgrade: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (0.10.0)\n", - "Requirement already satisfied, skipping upgrade: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow) (2.10.0)\n", - "Requirement already satisfied, skipping upgrade: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (50.3.0)\n", - "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (3.2.2)\n", - "Requirement already satisfied, skipping upgrade: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.17.2)\n", - "Requirement already satisfied, skipping upgrade: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.0.1)\n", - "Requirement already satisfied, skipping upgrade: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (1.7.0)\n", - "Requirement already satisfied, skipping upgrade: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (2.23.0)\n", - "Requirement already satisfied, skipping upgrade: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<3,>=2.3.0->tensorflow) (0.4.1)\n", - "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (2.0.0)\n", - "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.2.8)\n", - "Requirement already satisfied, skipping upgrade: rsa<5,>=3.1.4; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.6)\n", - "Requirement already satisfied, skipping upgrade: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (4.1.1)\n", - "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2020.6.20)\n", - "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (1.24.3)\n", - "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (3.0.4)\n", - "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<3,>=2.3.0->tensorflow) (2.10)\n", - "Requirement already satisfied, skipping upgrade: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (1.3.0)\n", - "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<3,>=2.3.0->tensorflow) (3.2.0)\n", - "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow) (0.4.8)\n", - "Requirement already satisfied, skipping upgrade: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow) (3.1.0)\n", - "Installing collected packages: tensorflow\n", - " Found existing installation: tensorflow 2.3.0\n", - " Uninstalling tensorflow-2.3.0:\n", - " Successfully uninstalled tensorflow-2.3.0\n", - "Successfully installed tensorflow-2.3.1\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", - "data": { - "application/vnd.colab-display-data+json": { - "pip_warning": { - "packages": [ - "tensorflow" - ] - } - } - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "oBKiimp4YxOf", - "outputId": "58f244b6-afb1-45b9-c9c8-060ae48116ce", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "# !pip install modeling" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\u001b[31mERROR: Could not find a version that satisfies the requirement modeling (from versions: none)\u001b[0m\n", - "\u001b[31mERROR: No matching distribution found for modeling\u001b[0m\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5rYCZ946YvOj" - }, - "source": [ - "from tensorflow.python.compiler.tensorrt import trt_convert as trt\n", - "import tensorflow as tf\n", - "# from albert import modeling\n", - "# import tokenization\n", - "# import optimization\n", - "import pandas as pd\n", - "import numpy as np\n", - "from keras.preprocessing.sequence import pad_sequences" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "dbAgsBcot4n9" - }, - "source": [ - "from transformers import AlbertTokenizer\n", - "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n", - "from transformers import AlbertForSequenceClassification,AlbertConfig\n", - "from transformers.tokenization_bert import BertTokenizer\n", - "import torch.nn as nn\n", - "from torch.nn import CrossEntropyLoss\n", - "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/data/30k-clean.model\" # This is the vocab file output from Build Vocab step\n", - "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n", - "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n", - "tokenizer = AlbertTokenizer(vocab_file=VOCAB_FILE)\n", - "config = AlbertConfig.from_json_file(CONFIG_FILE)\n", - "model = AlbertModel(config)\n", - "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "U6uLqvcVsJRY", - "outputId": "9ba394cf-afea-439f-9ad7-8a3f59861df4", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 283 - } - }, - "source": [ - "# from transformers import AlbertTokenizer\n", - "# from transformers import AlbertForSequenceClassification,AlbertConfig\n", - "# config = modeling.AlbertConfig.from_json_file(\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\")\n", - "# tokenizer = tokenization.FullTokenizer.from_scratch(vocab_file=\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\", do_lower_case=True, spm_model_file=None)\n", - "# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True) \n", - "# tokenizer = AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True) \n", - "# tokenizer = AlbertTokenizer.from_pretrained('./content/drive/My Drive/Reuters_Dataset/reut2-021', do_lower_case=True) \n", - "# PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/model-fine'\n", - "# model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "error", - "ename": "AttributeError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtransformers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAlbertForSequenceClassification\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mAlbertConfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertConfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/config.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mtokenizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlbertTokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_scratch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvocab_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"/content/gdrive/My Drive/ALBERTimplementation/model-fine/vocab.txt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdo_lower_case\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspm_model_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;31m# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# tokenizer = AlbertTokenizer.from_pretrained('/content/gdrive/My Drive/ALBERTimplementation/model-fine', do_lower_case=True)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: type object 'AlbertTokenizer' has no attribute 'from_scratch'" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "eZYLfSvlsLBl" - }, - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import torch\n", - "import tensorflow as tf" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "qZEJy5xu6Llu" - }, - "source": [ - "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n", - "for item in train_data.iterrows():\n", - " print(item[1][1])\n", - " if item[1][1] != 1 and item[1][1] != 0:\n", - " print(item[1][1])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "aLEK078bsRpD", - "outputId": "621583cf-ec2c-4302-938d-fd12d55dd2d3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n", - "print(\"Number of training examples {}\".format(len(train_data)))\n", - "num_examples = 100000\n", - "train = train_data[:num_examples].text.values\n", - "labels = train_data[:num_examples].label.values\n", - "# train = train_data[]" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Number of training examples 131173\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mNiWyaLD7Otu", - "outputId": "b11835dc-c1f4-43c3-ab9a-4e3919bae840", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "print(len(train))\n", - "print(len(labels))\n", - "# print(labels[:100])\n", - "for i in labels:\n", - " if i!=0 and i!=1:\n", - " print(i)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "100000\n", - "100000\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "G6MsmCDYsUC2", - "outputId": "33b809fb-f668-483a-dfc4-9c61969fb847", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 88 - } - }, - "source": [ - "# Print the original sentence.\n", - "print(' Original: ', train[10])\n", - "\n", - "# Print the sentence split into tokens.\n", - "print('Tokenized: ', tokenizer.tokenize(train[10]))\n", - "\n", - "# Print the sentence mapped to token ids.\n", - "print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(train[10])))" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - " Original: vix spy new high vix new low spy calls versus lod\n", - "Tokenized: ['▁vi', 'x', '▁spy', '▁new', '▁high', '▁vi', 'x', '▁new', '▁low', '▁spy', '▁call', 's', '▁vers', 'us', '▁lo', 'd']\n", - "Token IDs: [1847, 782, 1181, 30, 141, 1847, 782, 30, 385, 1181, 172, 12, 3770, 595, 2947, 27]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_z9qjkZ9sVzV", - "outputId": "58b18382-f1d7-461c-9cd7-4f2f8740aa46", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "print(train_data.text.apply(lambda x: len(x)).quantile([0.9]))\n", - "MAX_LEN = 160" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "0.9 160.0\n", - "Name: text, dtype: float64\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ArJMjNxmMoHU", - "outputId": "483535e7-53bc-4490-fb97-5227dbaf3c2c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "m_l = 0\n", - "for x in train:\n", - " if len(x)>m_l:\n", - " m_l = len(x)\n", - "print(m_l)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1892\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JBfPIx0HsXpY", - "outputId": "4fd20283-60bf-4fcf-ac7a-236914066094", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 326 - } - }, - "source": [ - "# Tokenize all of the sentences and map the tokens to thier word IDs.\n", - "input_ids = []\n", - "attention_masks = []\n", - "\n", - "# For every sentence...\n", - "for text in train:\n", - " # `encode_plus` will:\n", - " # (1) Tokenize the sentence.\n", - " # (2) Prepend the `[CLS]` token to the start.\n", - " # (3) Append the `[SEP]` token to the end.\n", - " # (4) Map tokens to their IDs.\n", - " # (5) Pad or truncate the sentence to `max_length`\n", - " # (6) Create attention masks for [PAD] tokens.\n", - " encoded_dict = tokenizer.encode_plus(\n", - " text, # Sentence to encode.\n", - " add_special_tokens = True, # Add '[CLS]' and '[SEP]'\n", - " max_length = MAX_LEN, # Pad & truncate all sentences.\n", - " pad_to_max_length = True,\n", - " return_attention_mask = True, # Construct attn. masks.\n", - " return_tensors = 'pt', # Return pytorch tensors.\n", - " truncation = True\n", - " )\n", - " \n", - " # Add the encoded sentence to the list. \n", - " input_ids.append(encoded_dict['input_ids'])\n", - " \n", - " # And its attention mask (simply differentiates padding from non-padding).\n", - " attention_masks.append(encoded_dict['attention_mask'])\n", - "\n", - "# Convert the lists into tensors.\n", - "input_ids = torch.cat(input_ids, dim=0)\n", - "attention_masks = torch.cat(attention_masks, dim=0)\n", - "labels = torch.tensor(labels)\n", - "\n", - "# Print sentence 0, now as a list of IDs.\n", - "print('Original: ', train[10])\n", - "print('Token IDs:', input_ids[10])" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/transformers/tokenization_utils_base.py:1773: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n", - " FutureWarning,\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "Original: vix spy new high vix new low spy calls versus lod\n", - "Token IDs: tensor([ 2, 1847, 782, 1181, 30, 141, 1847, 782, 30, 385, 1181, 172,\n", - " 12, 3770, 595, 2947, 27, 3, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0])\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fZ9Gi4GDsesz", - "outputId": "2d759eba-1357-4351-d7be-5bd1c97f0f17", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "#training & validation split\n", - "from torch.utils.data import TensorDataset, random_split\n", - "\n", - "\n", - "# Combine the training inputs into a TensorDataset.\n", - "dataset = TensorDataset(input_ids, attention_masks, labels)\n", - "\n", - "# Create a 90-10 train-validation split.\n", - "\n", - "# Calculate the number of samples to include in each set.\n", - "train_size = int(0.9 * len(dataset))\n", - "val_size = len(dataset) - train_size\n", - "\n", - "# Divide the dataset by randomly selecting samples.\n", - "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n", - "\n", - "print('{} training samples'.format(train_size))\n", - "print('{} validation samples'.format(val_size))" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "90000 training samples\n", - "10000 validation samples\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "etq7ymGTshCN" - }, - "source": [ - "def flat_accuracy(preds, labels):\n", - " pred_flat = np.argmax(preds, axis=1).flatten()\n", - " labels_flat = labels.flatten()\n", - " return np.sum(pred_flat == labels_flat) / len(labels_flat)\n", - "\n", - "import time\n", - "import datetime\n", - "\n", - "def format_time(elapsed):\n", - " '''\n", - " Takes a time in seconds and returns a string hh:mm:ss\n", - " '''\n", - " # Round to the nearest second.\n", - " elapsed_rounded = int(round((elapsed)))\n", - " \n", - " # Format as hh:mm:ss\n", - " return str(datetime.timedelta(seconds=elapsed_rounded))\n", - "\n", - "# Set the seed value all over the place to make this reproducible.\n", - "import random\n", - "def set_random(seed_val):\n", - " random.seed(seed_val)\n", - " np.random.seed(seed_val)\n", - " torch.manual_seed(seed_val)\n", - " torch.cuda.manual_seed_all(seed_val)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "I4MJwmeAsi9g" - }, - "source": [ - "def train_model(train_dataloader, optimizer, epochs):\n", - " \n", - " # We'll store a number of quantities such as training and validation loss, \n", - " # validation accuracy, and timings.\n", - " training_stats = []\n", - "\n", - " # Measure the total training time for the whole run.\n", - " total_t0 = time.time()\n", - "\n", - " # For each epoch...\n", - " for epoch_i in range(0, epochs):\n", - "\n", - " # ========================================\n", - " # Training\n", - " # ========================================\n", - "\n", - " # Perform one full pass over the training set.\n", - "\n", - " print(\"\")\n", - " print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n", - " print('Training...')\n", - "\n", - " # Measure how long the training epoch takes.\n", - " t0 = time.time()\n", - "\n", - " # Reset the total loss for this epoch.\n", - " total_train_loss = 0\n", - "\n", - " # Put the model into training mode. Don't be mislead--the call to \n", - " # `train` just changes the *mode*, it doesn't *perform* the training.\n", - " # `dropout` and `batchnorm` layers behave differently during training\n", - " # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n", - " model.train()\n", - "\n", - " # For each batch of training data...\n", - " for step, batch in enumerate(train_dataloader):\n", - "\n", - " # Progress update every 40 batches.\n", - " if step % 40 == 0 and not step == 0:\n", - " # Calculate elapsed time in minutes.\n", - " elapsed = format_time(time.time() - t0)\n", - "\n", - " # Report progress.\n", - " print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n", - "\n", - " # Unpack this training batch from our dataloader. \n", - " #\n", - " # As we unpack the batch, we'll also copy each tensor to the GPU using the \n", - " # `to` method.\n", - " #\n", - " # `batch` contains three pytorch tensors:\n", - " # [0]: input ids \n", - " # [1]: attention masks\n", - " # [2]: labels \n", - " b_input_ids = batch[0].to(device)\n", - " b_input_mask = batch[1].to(device)\n", - " b_labels = batch[2].to(device)\n", - "\n", - " # Always clear any previously calculated gradients before performing a\n", - " # backward pass. PyTorch doesn't do this automatically because \n", - " # accumulating the gradients is \"convenient while training RNNs\". \n", - " # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n", - " model.zero_grad() \n", - "\n", - " # Perform a forward pass (evaluate the model on this training batch).\n", - " # The documentation for this `model` function is here: \n", - " # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n", - " # It returns different numbers of parameters depending on what arguments\n", - " # arge given and what flags are set. For our useage here, it returns\n", - " # the loss (because we provided labels) and the \"logits\"--the model\n", - " # outputs prior to activation.\n", - " loss, logits = model(b_input_ids, \n", - " attention_mask=b_input_mask, \n", - " labels=b_labels)\n", - "\n", - " # Accumulate the training loss over all of the batches so that we can\n", - " # calculate the average loss at the end. `loss` is a Tensor containing a\n", - " # single value; the `.item()` function just returns the Python value \n", - " # from the tensor.\n", - " total_train_loss += loss.item()\n", - "\n", - " # Perform a backward pass to calculate the gradients.\n", - " loss.backward()\n", - "\n", - " # Clip the norm of the gradients to 1.0.\n", - " # This is to help prevent the \"exploding gradients\" problem.\n", - " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", - "\n", - " # Update parameters and take a step using the computed gradient.\n", - " # The optimizer dictates the \"update rule\"--how the parameters are\n", - " # modified based on their gradients, the learning rate, etc.\n", - " optimizer.step()\n", - "\n", - " # Update the learning rate.\n", - " scheduler.step()\n", - "\n", - " # Calculate the average loss over all of the batches.\n", - " avg_train_loss = total_train_loss / len(train_dataloader) \n", - "\n", - " # Measure how long this epoch took.\n", - " training_time = format_time(time.time() - t0)\n", - "\n", - " print(\"\")\n", - " print(\" Average training loss: {0:.2f}\".format(avg_train_loss))\n", - " print(\" Training epcoh took: {:}\".format(training_time))\n", - "\n", - " # ========================================\n", - " # Validation\n", - " # ========================================\n", - " # After the completion of each training epoch, measure our performance on\n", - " # our validation set.\n", - "\n", - " print(\"\")\n", - " print(\"Running Validation...\")\n", - "\n", - " t0 = time.time()\n", - "\n", - " # Put the model in evaluation mode--the dropout layers behave differently\n", - " # during evaluation.\n", - " model.eval()\n", - "\n", - " # Tracking variables \n", - " total_eval_accuracy = 0\n", - " total_eval_loss = 0\n", - " nb_eval_steps = 0\n", - "\n", - " # Evaluate data for one epoch\n", - " for batch in validation_dataloader:\n", - "\n", - " # Unpack this training batch from our dataloader. \n", - " #\n", - " # As we unpack the batch, we'll also copy each tensor to the GPU using \n", - " # the `to` method.\n", - " #\n", - " # `batch` contains three pytorch tensors:\n", - " # [0]: input ids \n", - " # [1]: attention masks\n", - " # [2]: labels \n", - " b_input_ids = batch[0].to(device)\n", - " b_input_mask = batch[1].to(device)\n", - " b_labels = batch[2].to(device)\n", - "\n", - " # Tell pytorch not to bother with constructing the compute graph during\n", - " # the forward pass, since this is only needed for backprop (training).\n", - " with torch.no_grad(): \n", - "\n", - " # Forward pass, calculate logit predictions.\n", - " # token_type_ids is the same as the \"segment ids\", which \n", - " # differentiates sentence 1 and 2 in 2-sentence tasks.\n", - " # The documentation for this `model` function is here: \n", - " # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n", - " # Get the \"logits\" output by the model. The \"logits\" are the output\n", - " # values prior to applying an activation function like the softmax.\n", - " (loss, logits) = model(b_input_ids, \n", - " attention_mask=b_input_mask,\n", - " labels=b_labels)\n", - "\n", - " # Accumulate the validation loss.\n", - " total_eval_loss += loss.item()\n", - "\n", - " # Move logits and labels to CPU\n", - " logits = logits.detach().cpu().numpy()\n", - " label_ids = b_labels.to('cpu').numpy()\n", - "\n", - " # Calculate the accuracy for this batch of test sentences, and\n", - " # accumulate it over all batches.\n", - " total_eval_accuracy += flat_accuracy(logits, label_ids)\n", - "\n", - "\n", - " # Report the final accuracy for this validation run.\n", - " avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)\n", - " print(\" Accuracy: {0:.2f}\".format(avg_val_accuracy))\n", - "\n", - " # Calculate the average loss over all of the batches.\n", - " avg_val_loss = total_eval_loss / len(validation_dataloader)\n", - "\n", - " # Measure how long the validation run took.\n", - " validation_time = format_time(time.time() - t0)\n", - "\n", - " print(\" Validation Loss: {0:.2f}\".format(avg_val_loss))\n", - " print(\" Validation took: {:}\".format(validation_time))\n", - "\n", - " # Record all statistics from this epoch.\n", - " training_stats.append(\n", - " {\n", - " 'epoch': epoch_i + 1,\n", - " 'Training Loss': avg_train_loss,\n", - " 'Valid. Loss': avg_val_loss,\n", - " 'Valid. Accur.': avg_val_accuracy,\n", - " 'Training Time': training_time,\n", - " 'Validation Time': validation_time\n", - " }\n", - " )\n", - "\n", - " print(\"\")\n", - " print(\"Training complete!\")\n", - "\n", - " print(\"Total training took {:} (h:mm:ss)\".format(format_time(time.time()-total_t0)))\n", - " \n", - " return training_stats\n", - "\n", - "def print_training_stats(training_stats):\n", - " # Display floats with two decimal places.\n", - " pd.set_option('precision', 2)\n", - "\n", - " # Create a DataFrame from our training statistics.\n", - " df_stats = pd.DataFrame(data=training_stats)\n", - "\n", - " # Use the 'epoch' as the row index.\n", - " df_stats = df_stats.set_index('epoch')\n", - "\n", - " # A hack to force the column headers to wrap.\n", - " #df = df.style.set_table_styles([dict(selector=\"th\",props=[('max-width', '70px')])])\n", - "\n", - " # Display the table.\n", - " print(df_stats)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Tzz2H0Jpsmtk", - "outputId": "d982e8a8-82ec-4a1f-9fc5-afc2e2fcb7d7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - } - }, - "source": [ - "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n", - "from transformers import get_linear_schedule_with_warmup, AdamW\n", - "\n", - "from transformers import AlbertForSequenceClassification,AlbertConfig\n", - "# from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig\n", - "# from transformers import BertForSequenceClassification, BertConfig\n", - "# from transformers import ElectraForSequenceClassification\n", - "\n", - "# ADJUST lr_s and batch_sizes\n", - "lr_s = [2e-5]\n", - "batch_sizes = [32]\n", - "from itertools import product\n", - "hyperparameters = list(product(*[lr_s, batch_sizes]))\n", - "print(hyperparameters)\n", - "training_statistics = []\n", - "for lr, batch_size in hyperparameters:\n", - " # config = AlbertConfig.from_json_file(CONFIG_FILE)\n", - " # model = AlbertModel(config)\n", - " # model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n", - " PRE_TRAINED_MODEL_NAME_OR_PATH = '/content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel'\n", - " model = AlbertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME_OR_PATH, num_labels = 2, output_attentions = False, output_hidden_states = False)\n", - " # if MODEL_CHOICE == ModelChoice.BERT:\n", - " # model = BertForSequenceClassification.from_pretrained(\"bert-base-uncased\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n", - " # elif MODEL_CHOICE == ModelChoice.DISTILBERT:\n", - " # model = DistilBertForSequenceClassification.from_pretrained(\"distilbert-base-uncased\",num_labels = 2,output_attentions = False,output_hidden_states = False)\n", - " # elif MODEL_CHOICE == ModelChoice.ALBERT:\n", - " # model = AlbertForSequenceClassification.from_pretrained(\"albert-base-v2\", num_labels = 2, output_attentions = False, output_hidden_states = False)\n", - " # elif MODEL_CHOICE == ModelChoice.ELECTRA:\n", - " # model = ElectraForSequenceClassification.from_pretrained(\"google/electra-base-discriminator\",num_labels = 2, output_attentions = False, output_hidden_states = False)\n", - " # else:\n", - " # print(\"Choose proper model!\")\n", - " \n", - " print('START----',model,'END---')\n", - " \n", - " # Tell pytorch to run this model on the GPU.\n", - " model.cuda()\n", - "\n", - " # The DataLoader needs to know our batch size for training, so we specify it \n", - " # here. For fine-tuning ALBERT on a specific task, the authors recommend a batch \n", - " # size of 16 or 32.\n", - "\n", - " # Create the DataLoaders for our training and validation sets.\n", - " # We'll take training samples in random order. \n", - " train_dataloader = DataLoader(\n", - " train_dataset, # The training samples.\n", - " sampler = RandomSampler(train_dataset), # Select batches randomly\n", - " batch_size = batch_size # Trains with this batch size.\n", - " )\n", - "\n", - " # For validation the order doesn't matter, so we'll just read them sequentially.\n", - " validation_dataloader = DataLoader(\n", - " val_dataset, # The validation samples.\n", - " sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.\n", - " batch_size = batch_size # Evaluate with this batch size.\n", - " )\n", - " \n", - " # Note: AdamW is a class from the huggingface library (as opposed to pytorch) \n", - " # I believe the 'W' stands for 'Weight Decay fix\"\n", - " optimizer = AdamW(model.parameters(),\n", - " lr = lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n", - " eps = 1e-8 # args.adam_epsilon - default is 1e-8.\n", - " )\n", - " \n", - "\n", - " # Number of training epochs. The BERT authors recommend between 2 and 4. \n", - " # We chose to run for 4, but we'll see later that this may be over-fitting the\n", - " # training data.\n", - " epochs = 3\n", - "\n", - " # Total number of training steps is [number of batches] x [number of epochs]. \n", - " # (Note that this is not the same as the number of training samples).\n", - " total_steps = len(train_dataloader) * epochs\n", - "\n", - " # Create the learning rate scheduler.\n", - " scheduler = get_linear_schedule_with_warmup(optimizer, \n", - " num_warmup_steps = 0, # Default value in run_glue.py\n", - " num_training_steps = total_steps)\n", - "\n", - " seed_val = 42\n", - " set_random(seed_val)\n", - " \n", - " print(\"Training with hyperparameters: batch size={}, lr={}\".format(batch_size, lr))\n", - " training_stats = train_model(train_dataloader, optimizer, epochs)\n", - " training_statistics.append(training_stats)\n", - " print_training_stats(training_stats)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(2e-05, 32)]\n" - ], - "name": "stdout" - }, - { - "output_type": "stream", - "text": [ - "Some weights of the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias']\n", - "- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n", - "- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at /content/gdrive/My Drive/ALBERTimplementation/AGnewsmodel and are newly initialized: ['classifier.weight', 'classifier.bias']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ], - "name": "stderr" - }, - { - "output_type": "stream", - "text": [ - "START---- AlbertForSequenceClassification(\n", - " (albert): AlbertModel(\n", - " (embeddings): AlbertEmbeddings(\n", - " (word_embeddings): Embedding(20001, 128, padding_idx=0)\n", - " (position_embeddings): Embedding(512, 128)\n", - " (token_type_embeddings): Embedding(2, 128)\n", - " (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " )\n", - " (encoder): AlbertTransformer(\n", - " (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)\n", - " (albert_layer_groups): ModuleList(\n", - " (0): AlbertLayerGroup(\n", - " (albert_layers): ModuleList(\n", - " (0): AlbertLayer(\n", - " (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " (attention): AlbertAttention(\n", - " (query): Linear(in_features=768, out_features=768, bias=True)\n", - " (key): Linear(in_features=768, out_features=768, bias=True)\n", - " (value): Linear(in_features=768, out_features=768, bias=True)\n", - " (attention_dropout): Dropout(p=0, inplace=False)\n", - " (output_dropout): Dropout(p=0, inplace=False)\n", - " (dense): Linear(in_features=768, out_features=768, bias=True)\n", - " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", - " )\n", - " (ffn): Linear(in_features=768, out_features=3072, bias=True)\n", - " (ffn_output): Linear(in_features=3072, out_features=768, bias=True)\n", - " (dropout): Dropout(p=0, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " )\n", - " )\n", - " (pooler): Linear(in_features=768, out_features=768, bias=True)\n", - " (pooler_activation): Tanh()\n", - " )\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (classifier): Linear(in_features=768, out_features=2, bias=True)\n", - ") END---\n", - "Training with hyperparameters: batch size=32, lr=2e-05\n", - "\n", - "======== Epoch 1 / 3 ========\n", - "Training...\n", - " Batch 40 of 2,813. Elapsed: 0:00:19.\n", - " Batch 80 of 2,813. Elapsed: 0:00:38.\n", - " Batch 120 of 2,813. Elapsed: 0:00:56.\n", - " Batch 160 of 2,813. Elapsed: 0:01:15.\n", - " Batch 200 of 2,813. Elapsed: 0:01:33.\n", - " Batch 240 of 2,813. Elapsed: 0:01:52.\n", - " Batch 280 of 2,813. Elapsed: 0:02:11.\n", - " Batch 320 of 2,813. Elapsed: 0:02:29.\n", - " Batch 360 of 2,813. Elapsed: 0:02:48.\n", - " Batch 400 of 2,813. Elapsed: 0:03:06.\n", - " Batch 440 of 2,813. Elapsed: 0:03:25.\n", - " Batch 480 of 2,813. Elapsed: 0:03:44.\n", - " Batch 520 of 2,813. Elapsed: 0:04:02.\n", - " Batch 560 of 2,813. Elapsed: 0:04:21.\n", - " Batch 600 of 2,813. Elapsed: 0:04:39.\n", - " Batch 640 of 2,813. Elapsed: 0:04:58.\n", - " Batch 680 of 2,813. Elapsed: 0:05:17.\n", - " Batch 720 of 2,813. Elapsed: 0:05:35.\n", - " Batch 760 of 2,813. Elapsed: 0:05:54.\n", - " Batch 800 of 2,813. Elapsed: 0:06:13.\n", - " Batch 840 of 2,813. Elapsed: 0:06:31.\n", - " Batch 880 of 2,813. Elapsed: 0:06:50.\n", - " Batch 920 of 2,813. Elapsed: 0:07:08.\n", - " Batch 960 of 2,813. Elapsed: 0:07:27.\n", - " Batch 1,000 of 2,813. Elapsed: 0:07:46.\n", - " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n", - " Batch 1,080 of 2,813. Elapsed: 0:08:23.\n", - " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n", - " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n", - " Batch 1,200 of 2,813. Elapsed: 0:09:19.\n", - " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n", - " Batch 1,280 of 2,813. Elapsed: 0:09:56.\n", - " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n", - " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n", - " Batch 1,400 of 2,813. Elapsed: 0:10:52.\n", - " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n", - " Batch 1,480 of 2,813. Elapsed: 0:11:29.\n", - " Batch 1,520 of 2,813. Elapsed: 0:11:48.\n", - " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n", - " Batch 1,600 of 2,813. Elapsed: 0:12:25.\n", - " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n", - " Batch 1,680 of 2,813. Elapsed: 0:13:02.\n", - " Batch 1,720 of 2,813. Elapsed: 0:13:21.\n", - " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n", - " Batch 1,800 of 2,813. Elapsed: 0:13:58.\n", - " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n", - " Batch 1,880 of 2,813. Elapsed: 0:14:35.\n", - " Batch 1,920 of 2,813. Elapsed: 0:14:54.\n", - " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n", - " Batch 2,000 of 2,813. Elapsed: 0:15:31.\n", - " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n", - " Batch 2,080 of 2,813. Elapsed: 0:16:08.\n", - " Batch 2,120 of 2,813. Elapsed: 0:16:27.\n", - " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n", - " Batch 2,200 of 2,813. Elapsed: 0:17:04.\n", - " Batch 2,240 of 2,813. Elapsed: 0:17:23.\n", - " Batch 2,280 of 2,813. Elapsed: 0:17:41.\n", - " Batch 2,320 of 2,813. Elapsed: 0:18:00.\n", - " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n", - " Batch 2,400 of 2,813. Elapsed: 0:18:37.\n", - " Batch 2,440 of 2,813. Elapsed: 0:18:56.\n", - " Batch 2,480 of 2,813. Elapsed: 0:19:14.\n", - " Batch 2,520 of 2,813. Elapsed: 0:19:33.\n", - " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n", - " Batch 2,600 of 2,813. Elapsed: 0:20:10.\n", - " Batch 2,640 of 2,813. Elapsed: 0:20:29.\n", - " Batch 2,680 of 2,813. Elapsed: 0:20:47.\n", - " Batch 2,720 of 2,813. Elapsed: 0:21:06.\n", - " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n", - " Batch 2,800 of 2,813. Elapsed: 0:21:43.\n", - "\n", - " Average training loss: 0.69\n", - " Training epcoh took: 0:21:49\n", - "\n", - "Running Validation...\n", - " Accuracy: 0.56\n", - " Validation Loss: 0.70\n", - " Validation took: 0:00:51\n", - "\n", - "======== Epoch 2 / 3 ========\n", - "Training...\n", - " Batch 40 of 2,813. Elapsed: 0:00:19.\n", - " Batch 80 of 2,813. Elapsed: 0:00:37.\n", - " Batch 120 of 2,813. Elapsed: 0:00:56.\n", - " Batch 160 of 2,813. Elapsed: 0:01:14.\n", - " Batch 200 of 2,813. Elapsed: 0:01:33.\n", - " Batch 240 of 2,813. Elapsed: 0:01:52.\n", - " Batch 280 of 2,813. Elapsed: 0:02:10.\n", - " Batch 320 of 2,813. Elapsed: 0:02:29.\n", - " Batch 360 of 2,813. Elapsed: 0:02:47.\n", - " Batch 400 of 2,813. Elapsed: 0:03:06.\n", - " Batch 440 of 2,813. Elapsed: 0:03:25.\n", - " Batch 480 of 2,813. Elapsed: 0:03:43.\n", - " Batch 520 of 2,813. Elapsed: 0:04:02.\n", - " Batch 560 of 2,813. Elapsed: 0:04:21.\n", - " Batch 600 of 2,813. Elapsed: 0:04:39.\n", - " Batch 640 of 2,813. Elapsed: 0:04:58.\n", - " Batch 680 of 2,813. Elapsed: 0:05:16.\n", - " Batch 720 of 2,813. Elapsed: 0:05:35.\n", - " Batch 760 of 2,813. Elapsed: 0:05:54.\n", - " Batch 800 of 2,813. Elapsed: 0:06:12.\n", - " Batch 840 of 2,813. Elapsed: 0:06:31.\n", - " Batch 880 of 2,813. Elapsed: 0:06:49.\n", - " Batch 920 of 2,813. Elapsed: 0:07:08.\n", - " Batch 960 of 2,813. Elapsed: 0:07:27.\n", - " Batch 1,000 of 2,813. Elapsed: 0:07:45.\n", - " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n", - " Batch 1,080 of 2,813. Elapsed: 0:08:22.\n", - " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n", - " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n", - " Batch 1,200 of 2,813. Elapsed: 0:09:18.\n", - " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n", - " Batch 1,280 of 2,813. Elapsed: 0:09:55.\n", - " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n", - " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n", - " Batch 1,400 of 2,813. Elapsed: 0:10:51.\n", - " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n", - " Batch 1,480 of 2,813. Elapsed: 0:11:28.\n", - " Batch 1,520 of 2,813. Elapsed: 0:11:47.\n", - " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n", - " Batch 1,600 of 2,813. Elapsed: 0:12:24.\n", - " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n", - " Batch 1,680 of 2,813. Elapsed: 0:13:01.\n", - " Batch 1,720 of 2,813. Elapsed: 0:13:20.\n", - " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n", - " Batch 1,800 of 2,813. Elapsed: 0:13:57.\n", - " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n", - " Batch 1,880 of 2,813. Elapsed: 0:14:34.\n", - " Batch 1,920 of 2,813. Elapsed: 0:14:53.\n", - " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n", - " Batch 2,000 of 2,813. Elapsed: 0:15:30.\n", - " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n", - " Batch 2,080 of 2,813. Elapsed: 0:16:07.\n", - " Batch 2,120 of 2,813. Elapsed: 0:16:26.\n", - " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n", - " Batch 2,200 of 2,813. Elapsed: 0:17:03.\n", - " Batch 2,240 of 2,813. Elapsed: 0:17:22.\n", - " Batch 2,280 of 2,813. Elapsed: 0:17:40.\n", - " Batch 2,320 of 2,813. Elapsed: 0:17:59.\n", - " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n", - " Batch 2,400 of 2,813. Elapsed: 0:18:36.\n", - " Batch 2,440 of 2,813. Elapsed: 0:18:55.\n", - " Batch 2,480 of 2,813. Elapsed: 0:19:13.\n", - " Batch 2,520 of 2,813. Elapsed: 0:19:32.\n", - " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n", - " Batch 2,600 of 2,813. Elapsed: 0:20:09.\n", - " Batch 2,640 of 2,813. Elapsed: 0:20:28.\n", - " Batch 2,680 of 2,813. Elapsed: 0:20:46.\n", - " Batch 2,720 of 2,813. Elapsed: 0:21:05.\n", - " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n", - " Batch 2,800 of 2,813. Elapsed: 0:21:42.\n", - "\n", - " Average training loss: 0.69\n", - " Training epcoh took: 0:21:48\n", - "\n", - "Running Validation...\n", - " Accuracy: 0.56\n", - " Validation Loss: 0.69\n", - " Validation took: 0:00:51\n", - "\n", - "======== Epoch 3 / 3 ========\n", - "Training...\n", - " Batch 40 of 2,813. Elapsed: 0:00:19.\n", - " Batch 80 of 2,813. Elapsed: 0:00:37.\n", - " Batch 120 of 2,813. Elapsed: 0:00:56.\n", - " Batch 160 of 2,813. Elapsed: 0:01:14.\n", - " Batch 200 of 2,813. Elapsed: 0:01:33.\n", - " Batch 240 of 2,813. Elapsed: 0:01:52.\n", - " Batch 280 of 2,813. Elapsed: 0:02:10.\n", - " Batch 320 of 2,813. Elapsed: 0:02:29.\n", - " Batch 360 of 2,813. Elapsed: 0:02:47.\n", - " Batch 400 of 2,813. Elapsed: 0:03:06.\n", - " Batch 440 of 2,813. Elapsed: 0:03:25.\n", - " Batch 480 of 2,813. Elapsed: 0:03:43.\n", - " Batch 520 of 2,813. Elapsed: 0:04:02.\n", - " Batch 560 of 2,813. Elapsed: 0:04:21.\n", - " Batch 600 of 2,813. Elapsed: 0:04:39.\n", - " Batch 640 of 2,813. Elapsed: 0:04:58.\n", - " Batch 680 of 2,813. Elapsed: 0:05:16.\n", - " Batch 720 of 2,813. Elapsed: 0:05:35.\n", - " Batch 760 of 2,813. Elapsed: 0:05:54.\n", - " Batch 800 of 2,813. Elapsed: 0:06:12.\n", - " Batch 840 of 2,813. Elapsed: 0:06:31.\n", - " Batch 880 of 2,813. Elapsed: 0:06:49.\n", - " Batch 920 of 2,813. Elapsed: 0:07:08.\n", - " Batch 960 of 2,813. Elapsed: 0:07:27.\n", - " Batch 1,000 of 2,813. Elapsed: 0:07:45.\n", - " Batch 1,040 of 2,813. Elapsed: 0:08:04.\n", - " Batch 1,080 of 2,813. Elapsed: 0:08:22.\n", - " Batch 1,120 of 2,813. Elapsed: 0:08:41.\n", - " Batch 1,160 of 2,813. Elapsed: 0:09:00.\n", - " Batch 1,200 of 2,813. Elapsed: 0:09:18.\n", - " Batch 1,240 of 2,813. Elapsed: 0:09:37.\n", - " Batch 1,280 of 2,813. Elapsed: 0:09:55.\n", - " Batch 1,320 of 2,813. Elapsed: 0:10:14.\n", - " Batch 1,360 of 2,813. Elapsed: 0:10:33.\n", - " Batch 1,400 of 2,813. Elapsed: 0:10:51.\n", - " Batch 1,440 of 2,813. Elapsed: 0:11:10.\n", - " Batch 1,480 of 2,813. Elapsed: 0:11:29.\n", - " Batch 1,520 of 2,813. Elapsed: 0:11:47.\n", - " Batch 1,560 of 2,813. Elapsed: 0:12:06.\n", - " Batch 1,600 of 2,813. Elapsed: 0:12:24.\n", - " Batch 1,640 of 2,813. Elapsed: 0:12:43.\n", - " Batch 1,680 of 2,813. Elapsed: 0:13:02.\n", - " Batch 1,720 of 2,813. Elapsed: 0:13:20.\n", - " Batch 1,760 of 2,813. Elapsed: 0:13:39.\n", - " Batch 1,800 of 2,813. Elapsed: 0:13:57.\n", - " Batch 1,840 of 2,813. Elapsed: 0:14:16.\n", - " Batch 1,880 of 2,813. Elapsed: 0:14:35.\n", - " Batch 1,920 of 2,813. Elapsed: 0:14:53.\n", - " Batch 1,960 of 2,813. Elapsed: 0:15:12.\n", - " Batch 2,000 of 2,813. Elapsed: 0:15:30.\n", - " Batch 2,040 of 2,813. Elapsed: 0:15:49.\n", - " Batch 2,080 of 2,813. Elapsed: 0:16:08.\n", - " Batch 2,120 of 2,813. Elapsed: 0:16:26.\n", - " Batch 2,160 of 2,813. Elapsed: 0:16:45.\n", - " Batch 2,200 of 2,813. Elapsed: 0:17:03.\n", - " Batch 2,240 of 2,813. Elapsed: 0:17:22.\n", - " Batch 2,280 of 2,813. Elapsed: 0:17:41.\n", - " Batch 2,320 of 2,813. Elapsed: 0:17:59.\n", - " Batch 2,360 of 2,813. Elapsed: 0:18:18.\n", - " Batch 2,400 of 2,813. Elapsed: 0:18:37.\n", - " Batch 2,440 of 2,813. Elapsed: 0:18:55.\n", - " Batch 2,480 of 2,813. Elapsed: 0:19:14.\n", - " Batch 2,520 of 2,813. Elapsed: 0:19:32.\n", - " Batch 2,560 of 2,813. Elapsed: 0:19:51.\n", - " Batch 2,600 of 2,813. Elapsed: 0:20:10.\n", - " Batch 2,640 of 2,813. Elapsed: 0:20:28.\n", - " Batch 2,680 of 2,813. Elapsed: 0:20:47.\n", - " Batch 2,720 of 2,813. Elapsed: 0:21:05.\n", - " Batch 2,760 of 2,813. Elapsed: 0:21:24.\n", - " Batch 2,800 of 2,813. Elapsed: 0:21:43.\n", - "\n", - " Average training loss: 0.69\n", - " Training epcoh took: 0:21:49\n", - "\n", - "Running Validation...\n", - " Accuracy: 0.56\n", - " Validation Loss: 0.69\n", - " Validation took: 0:00:51\n", - "\n", - "Training complete!\n", - "Total training took 1:07:58 (h:mm:ss)\n", - " Training Loss Valid. Loss Valid. Accur. Training Time Validation Time\n", - "epoch \n", - "1 0.69 0.70 0.56 0:21:49 0:00:51\n", - "2 0.69 0.69 0.56 0:21:48 0:00:51\n", - "3 0.69 0.69 0.56 0:21:49 0:00:51\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_K_vQ7feAS_z" - }, - "source": [ - "model_save_name = 'finetuned_Albert.bin'\n", - "path = F\"/content/gdrive/My Drive/ALBERTimplementation/model-fine-train/\"+model_save_name\n", - "torch.save(model.state_dict(), path)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Lu-gJG4JbD37" - }, - "source": [ - "# Li method" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vh_aZP7kbH0E" - }, - "source": [ - "# At the time of writing, Hugging face didnt provide the class object for \n", - "# AlbertForTokenClassification, hence write your own defination below\n", - "from transformers.modeling_albert import AlbertModel, load_tf_weights_in_albert, AlbertPreTrainedModel\n", - "from transformers.configuration_albert import AlbertConfig\n", - "from transformers.tokenization_bert import BertTokenizer\n", - "import torch.nn as nn\n", - "from torch.nn import CrossEntropyLoss\n", - "class AlbertForTokenClassification(AlbertPreTrainedModel):\n", - "\n", - " def __init__(self, albert, config):\n", - " super().__init__(config)\n", - " self.num_labels = config.num_labels\n", - "\n", - " self.albert = albert\n", - " self.dropout = nn.Dropout(config.hidden_dropout_prob)\n", - " self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n", - "\n", - " def forward(\n", - " self,\n", - " input_ids=None,\n", - " attention_mask=None,\n", - " token_type_ids=None,\n", - " position_ids=None,\n", - " head_mask=None,\n", - " inputs_embeds=None,\n", - " labels=None,\n", - " ):\n", - "\n", - " outputs = self.albert(\n", - " input_ids,\n", - " attention_mask=attention_mask,\n", - " token_type_ids=token_type_ids,\n", - " position_ids=position_ids,\n", - " head_mask=head_mask,\n", - " inputs_embeds=inputs_embeds,\n", - " )\n", - "\n", - " sequence_output = outputs[0]\n", - "\n", - " sequence_output = self.dropout(sequence_output)\n", - " logits = self.classifier(sequence_output)\n", - "\n", - " return logits" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "k2W7RmZcbSwf" - }, - "source": [ - "VOCAB_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/vocab.txt\" # This is the vocab file output from Build Vocab step\n", - "CONFIG_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/albert_config.json\"\n", - "ALBERT_PRETRAIN_CHECKPOINT = \"/content/gdrive/My Drive/ALBERTimplementation/AG_News/model.ckpt-best.index\" # This is the model checkpoint output from Albert Pretrain step\n", - "tokenizer = BertTokenizer(vocab_file=VOCAB_FILE)\n", - "config = AlbertConfig.from_json_file(CONFIG_FILE)\n", - "model = AlbertModel(config)\n", - "model = load_tf_weights_in_albert(model, config,ALBERT_PRETRAIN_CHECKPOINT)\n", - "# If the variables not able to be initialized are only for the MLM and sequence order prediction task\n", - "# Then the error could be ignored\n", - "# As that is not required for the AlbertForTokenClassification we are trying to build here" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "WtE_Lb7josh4" - }, - "source": [ - "# df = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv', delimiter='\\t')\n", - "# df.isna().values.any()\n", - "# df['text'] = df['text'].fillna('0')\n", - "# df.to_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv',sep='\\t' ,header=True, index=False)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vtZ6AzKxba9r", - "outputId": "9ee3e15e-b857-422d-efaf-aafc86bdf7ce", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 460 - } - }, - "source": [ - "# train_data = pd.read_csv('/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv', sep='\\t')\n", - "# print(\"Number of training examples {}\".format(len(train_data)))\n", - "# num_examples = 100000\n", - "# train = train_data[:num_examples].text.values\n", - "# labels = train_data[:num_examples].label.values\n", - "TRAIN_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/train.tsv\"\n", - "EVAL_FILE = \"/content/gdrive/My Drive/ALBERTimplementation/sentiment_data/validation.tsv\"\n", - "\n", - "import numpy as np\n", - "def label_sent(name_tokens, sent_tokens):\n", - " label = []\n", - " i = 0\n", - " if len(name_tokens)>len(sent_tokens):\n", - " label = np.zeros(len(sent_tokens))\n", - " else:\n", - " while i=len(sent_tokens)):\n", - " return label\n", - " if name_tokens[j+1] != sent_tokens[i+j+1]:\n", - " found_match = False\n", - " if found_match:\n", - " label.extend(list(np.ones(len(name_tokens)).astype(int)))\n", - " i = i + len(name_tokens)\n", - " else: \n", - " label.extend([0])\n", - " i = i+ 1\n", - " else:\n", - " label.extend([0])\n", - " i=i+1\n", - " return label\n", - "\n", - "import pandas as pd\n", - "df_data_train = pd.read_csv(TRAIN_FILE, sep='\\t')\n", - "df_data_train['review_tokens'] = df_data_train.text.apply(tokenizer.tokenize)\n", - "# df_data_train['dish_name_tokens'] = df_data_train.dish_name_tokens.apply(tokenizer.tokenize)\n", - "# df_data_train['review_labels'] = df_data_train.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n", - "df_data_train['review_labels'] = df_data_train.label.values\n", - "df_data_val = pd.read_csv(EVAL_FILE, sep='\\t')\n", - "df_data_val['review_tokens'] = df_data_val.text.apply(tokenizer.tokenize)\n", - "# df_data_val['dish_name_tokens'] = df_data_val.dish_name_tokens.apply(tokenizer.tokenize)\n", - "# df_data_val['review_labels'] = df_data_val.apply(lambda row: label_sent(row['dish_name_tokens'] row['review_tokens']), axis=1)\n", - "df_data_val['review_labels'] = df_data_val.label.values\n", - "\n", - "MAX_LEN = 50\n", - "BATCH_SIZE = 1\n", - "from keras.preprocessing.sequence import pad_sequences\n", - "import torch\n", - "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n", - "\n", - "tr_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_train['review_tokens']],\n", - " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", - "tr_tags = pad_sequences(df_data_train['review_labels'],\n", - " maxlen=MAX_LEN, padding=\"post\", #changed max len here --Muku\n", - " dtype=\"long\", truncating=\"post\")\n", - "# create the mask to ignore the padded elements in the sequences.\n", - "tr_masks = [[float(i>0) for i in ii] for ii in tr_inputs]\n", - "tr_inputs = torch.tensor(tr_inputs)\n", - "tr_tags = torch.tensor(tr_tags)\n", - "tr_masks = torch.tensor(tr_masks)\n", - "train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)\n", - "train_sampler = RandomSampler(train_data)\n", - "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)\n", - "\n", - "\n", - "val_inputs = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in df_data_val['review_tokens']],\n", - " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", - "val_tags = pad_sequences(df_data_val['review_labels'],\n", - " maxlen=MAX_LEN, padding=\"post\",\n", - " dtype=\"long\", truncating=\"post\")\n", - "# create the mask to ignore the padded elements in the sequences.\n", - "val_masks = [[float(i>0) for i in ii] for ii in val_inputs]\n", - "val_inputs = torch.tensor(val_inputs)\n", - "val_tags = torch.tensor(val_tags)\n", - "val_masks = torch.tensor(val_masks)\n", - "val_data = TensorDataset(val_inputs, val_masks, val_tags)\n", - "val_sampler = RandomSampler(val_data)\n", - "val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=BATCH_SIZE)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "error", - "ename": "ValueError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mlengths\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mflag\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: object of type 'int' has no len()", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 56\u001b[0m tr_tags = pad_sequences(df_data_train['review_labels'],\n\u001b[1;32m 57\u001b[0m \u001b[0mmaxlen\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mMAX_LEN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpadding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"post\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m#changed max len here --Muku\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m dtype=\"long\", truncating=\"post\")\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0;31m# create the mask to ignore the padded elements in the sequences.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mtr_masks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mii\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mii\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtr_inputs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras_preprocessing/sequence.py\u001b[0m in \u001b[0;36mpad_sequences\u001b[0;34m(sequences, maxlen, dtype, padding, truncating, value)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m raise ValueError('`sequences` must be a list of iterables. '\n\u001b[0;32m---> 74\u001b[0;31m 'Found non-iterable: ' + str(x))\n\u001b[0m\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmaxlen\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: `sequences` must be a list of iterables. Found non-iterable: 1" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "UvGdYSVUbi6H" - }, - "source": [ - "model_tokenclassification = AlbertForTokenClassification(model, config)\n", - "from torch.optim import Adam\n", - "LEARNING_RATE = 0.000001\n", - "FULL_FINETUNING = True\n", - "if FULL_FINETUNING:\n", - " param_optimizer = list(model_tokenclassification.named_parameters())\n", - " no_decay = ['bias', 'gamma', 'beta']\n", - " optimizer_grouped_parameters = [\n", - " {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],\n", - " 'weight_decay_rate': 0.01},\n", - " {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],\n", - " 'weight_decay_rate': 0.0}\n", - " ]\n", - "else:\n", - " param_optimizer = list(model_tokenclassification.classifier.named_parameters()) \n", - " optimizer_grouped_parameters = [{\"params\": [p for n, p in param_optimizer]}]\n", - "optimizer = Adam(optimizer_grouped_parameters, lr=LEARNING_RATE)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "0a4vteOVbpkB" - }, - "source": [ - "\n", - "# from torch.utils.tensorboard import SummaryWriter\n", - "import time\n", - "import os.path\n", - "import torch.nn as nn\n", - "EPOCH = 5\n", - "MAX_GRAD_NORM = 1.0\n", - "ALBERT_FINETUNE_CHECKPOINT = \"outputs/finetune_checkpoint_5epoch_50neg_1e-5lr\"\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "if torch.cuda.device_count() > 1:\n", - " print(\"Let's use\", torch.cuda.device_count(), \"GPUs!\")\n", - " model_tokenclassification = nn.DataParallel(model_tokenclassification)\n", - "model_tokenclassification.to(device)\n", - "if os.path.isfile(ALBERT_FINETUNE_CHECKPOINT):\n", - " print(f\"--- Load from checkpoint ---\")\n", - " checkpoint = torch.load(ALBERT_FINETUNE_CHECKPOINT)\n", - " model_tokenclassification.load_state_dict(checkpoint['model_state_dict'])\n", - " optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n", - " epoch = checkpoint['epoch']\n", - " loss = checkpoint['loss']\n", - " train_losses = checkpoint['train_losses']\n", - " train_acc = checkpoint['train_acc']\n", - " val_losses = checkpoint['val_losses']\n", - " val_acc = checkpoint['val_acc']\n", - " \n", - "else:\n", - " epoch = -1\n", - " train_losses, train_acc, val_losses, val_acc = [], [], [], []\n", - "print(f\"--- Resume/Start training ---\") \n", - "for i in range(epoch+1, EPOCH): \n", - " print(f\"--- epoch: {i} ---\")\n", - " start_time = time.time()\n", - " \n", - " # TRAIN loop\n", - " model_tokenclassification.train()\n", - " tr_loss, tr_acc, nb_tr_steps = 0, 0, 0\n", - " for step, batch in enumerate(train_dataloader):\n", - " # add batch to gpu\n", - " batch = tuple(t.to(device) for t in batch)\n", - " b_input_ids, b_input_mask, b_labels = batch\n", - " # forward pass\n", - " b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n", - " attention_mask=b_input_mask, labels=b_labels)\n", - " \n", - " loss_fct = CrossEntropyLoss()\n", - " # Only keep active parts of the loss\n", - " b_active_loss = b_input_mask.view(-1) == 1\n", - " b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n", - " b_active_labels = b_labels.view(-1)[b_active_loss]\n", - " loss = loss_fct(b_active_logits, b_active_labels)\n", - " acc = torch.mean((torch.max(b_active_logits.detach(),1)[1] == b_active_labels.detach()).float())\n", - " \n", - " train_losses.append(loss.detach().item())\n", - " train_acc.append(acc)\n", - " # backward pass\n", - " loss.backward()\n", - " # track train loss\n", - " tr_loss += loss.item()\n", - " tr_acc += acc\n", - " nb_tr_steps += 1\n", - " # gradient clipping\n", - " torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=MAX_GRAD_NORM)\n", - " # update parameters\n", - " optimizer.step()\n", - " model.zero_grad()\n", - "\n", - " # print train loss per epoch\n", - " print(f\"Train loss: {(tr_loss/nb_tr_steps)}\")\n", - " print(f\"Train Accuracy: {(tr_acc/nb_tr_steps)}\")\n", - " print(f\"Train Time: {(time.time()-start_time)/60} mins\")\n", - "\n", - " # VALIDATION on validation set\n", - " start_time = time.time()\n", - " model_tokenclassification.eval()\n", - " eval_loss, eval_acc = 0, 0\n", - " nb_eval_steps = 0\n", - " for batch in val_dataloader:\n", - " batch = tuple(t.to(device) for t in batch)\n", - " b_input_ids, b_input_mask, b_labels = batch\n", - "\n", - " with torch.no_grad():\n", - " \n", - " b_outputs = model_tokenclassification(b_input_ids, token_type_ids=None,\n", - " attention_mask=b_input_mask, labels=b_labels)\n", - "\n", - " loss_fct = CrossEntropyLoss()\n", - " # Only keep active parts of the loss\n", - " b_active_loss = b_input_mask.view(-1) == 1\n", - " b_active_logits = b_outputs.view(-1, config.num_labels)[b_active_loss]\n", - " b_active_labels = b_labels.view(-1)[b_active_loss]\n", - " loss = loss_fct(b_active_logits, b_active_labels)\n", - " acc = np.mean(np.argmax(b_active_logits.detach().cpu().numpy(), axis=1).flatten() == b_active_labels.detach().cpu().numpy().flatten())\n", - "\n", - " eval_loss += loss.mean().item()\n", - " eval_acc += acc\n", - " nb_eval_steps += 1 \n", - " eval_loss = eval_loss/nb_eval_steps\n", - " eval_acc = eval_acc/nb_eval_steps\n", - " val_losses.append(eval_loss)\n", - " val_acc.append(eval_acc)\n", - " print(f\"Validation loss: {eval_loss}\")\n", - " print(f\"Validation Accuracy: {(eval_acc)}\")\n", - " print(f\"Validation Time: {(time.time()-start_time)/60} mins\") \n", - " \n", - " \n", - " print(f\"--- Save to checkpoint ---\") \n", - " torch.save({\n", - " 'epoch': i,\n", - " 'model_state_dict': model_tokenclassification.state_dict(),\n", - " 'optimizer_state_dict': optimizer.state_dict(),\n", - " 'loss': loss,\n", - " 'train_losses': train_losses,\n", - " 'train_acc': train_acc,\n", - " 'val_losses': val_losses,\n", - " 'val_acc': val_acc}\n", - " , ALBERT_FINETUNE_CHECKPOINT)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "skGJUzsJbwYn" - }, - "source": [ - "\n", - "def predict(texts):\n", - " tokenized_texts = [tokenizer.tokenize(txt) for txt in texts]\n", - " input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],\n", - " maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")\n", - " attention_mask = [[float(i>0) for i in ii] for ii in input_ids]\n", - " \n", - " input_ids = torch.tensor(input_ids)\n", - " attention_mask = torch.tensor(attention_mask)\n", - "\n", - " dataset = TensorDataset(input_ids, attention_mask)\n", - " datasampler = SequentialSampler(dataset)\n", - " dataloader = DataLoader(dataset, sampler=datasampler, batch_size=BATCH_SIZE) \n", - " \n", - " predicted_labels = []\n", - " \n", - " for batch in dataloader:\n", - " batch = tuple(t.to(device) for t in batch)\n", - " b_input_ids, b_input_mask = batch\n", - " \n", - " with torch.no_grad():\n", - " logits = model_tokenclassification(b_input_ids, token_type_ids=None,\n", - " attention_mask=b_input_mask)\n", - "\n", - " predicted_labels.append(np.multiply(np.argmax(logits.detach().cpu().numpy(),axis=2), b_input_mask.detach().cpu().numpy()))\n", - " # np.concatenate(predicted_labels), to flatten list of arrays of batch_size * max_len into list of arrays of max_len\n", - " return np.concatenate(predicted_labels).astype(int), tokenized_texts\n", - "\n", - "texts = df_data_val.review.values\n", - "predicted_labels, _ = predict(texts)\n", - "df_data_val['predicted_review_label'] = list(predicted_labels)\n", - "\n", - "def get_dish_candidate_names(predicted_label, tokenized_text):\n", - " name_lists = []\n", - " if len(np.where(predicted_label>0)[0])>0:\n", - " name_idx_combined = np.where(predicted_label>0)[0]\n", - " name_idxs = np.split(name_idx_combined, np.where(np.diff(name_idx_combined) != 1)[0]+1)\n", - " name_lists.append([\" \".join(np.take(tokenized_text,name_idx)) for name_idx in name_idxs])\n", - " # If there duplicate names in the name_lists\n", - " name_lists = np.unique(name_lists)\n", - " return name_lists\n", - " else:\n", - " return None\n", - "df_data_val['candidate_name']=df_data_val.apply(lambda row: get_dish_candidate_names(row.predicted_review_label, row.review_tokens)\n", - " , axis=1)" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file