From c0f8c03462b3560622d883fcdd0b63c1db5b2860 Mon Sep 17 00:00:00 2001 From: Trevor Grant Date: Wed, 30 Aug 2023 16:26:38 -0500 Subject: [PATCH 1/4] Signed-off-by: Trevor Grant --- ...g_falcon_rw_1b_model_with_caikit_nlp.ipynb | 1219 +++++++++++++++++ 1 file changed, 1219 insertions(+) create mode 100644 Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb diff --git a/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb b/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb new file mode 100644 index 00000000..cc9e9806 --- /dev/null +++ b/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb @@ -0,0 +1,1219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nqAU3Yh-rha5" + }, + "source": [ + "# Step 1: Install Caikit\n", + "\n", + "## Installation and Setup\n", + "\n", + "In this example Jupyter notebook, we'll be using various Python libraries and pre-trained models for evaluating and analyzing natural language processing tasks. Before we proceed, we need to install the required dependencies and download some essential resources.\n", + "\n", + "### 1. Installing Libraries\n", + "\n", + "To begin, we'll install the following Python packages using `pip`:\n", + "\n", + "- `evaluate`: A library for evaluating model performance on different NLP tasks.\n", + "- `rouge_score`: A package for calculating ROUGE (Recall-Oriented Understudy for Gisting Evaluation) metrics for text summarization.\n", + "\n", + "Please note that these libraries may have dependencies, so we'll ensure all the necessary requirements are met during the installation process.\n", + "\n", + "```python\n", + "!pip install evaluate\n", + "!pip install rouge_score\n", + "```\n", + "\n", + "### 2. Installing `caikit` and `caikit-nlp`\n", + "\n", + "Next, we'll install specific versions of the caikit and caikit-nlp libraries, as the project is still in beta and breaking changes can happen.\n", + "\n", + "```python\n", + "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", + "!pip install git+https://github.com/caikit/caikit-nlp\n", + "```\n", + "\n", + "### 3. Downloading Additional Resources\n", + "\n", + "In order to explore the capabilities of pre-trained models, we'll need to download the caikit-nlp repository.\n", + "\n", + "\n", + "```python\n", + "!git clone https://github.com/caikit/caikit-nlp\n", + "```\n", + "\n", + "Now that we have all the necessary libraries and resources installed, we can move on to the next steps in our NLP analysis using these powerful tools!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZhZcVULDrTRz", + "outputId": "f0092a68-49eb-4855-ea86-119fbdf4ed31" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting evaluate\n", + " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/81.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets>=2.0.0 (from evaluate)\n", + " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/519.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.23.5)\n", + "Collecting dill (from evaluate)\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from evaluate) (4.66.1)\n", + "Collecting xxhash (from evaluate)\n", + " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from evaluate)\n", + " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2023.6.0)\n", + "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.16.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from evaluate) (23.1)\n", + "Collecting responses<0.19 (from evaluate)\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (3.8.5)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.7.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2023.7.22)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2023.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", + "Installing collected packages: xxhash, dill, responses, multiprocess, datasets, evaluate\n", + "Successfully installed datasets-2.14.4 dill-0.3.7 evaluate-0.4.0 multiprocess-0.70.15 responses-0.18.0 xxhash-3.3.0\n", + "Collecting rouge_score\n", + " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.4.0)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge_score) (3.8.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.23.5)\n", + "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.16.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (2023.6.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (4.66.1)\n", + "Building wheels for collected packages: rouge_score\n", + " Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=399dede598bd07e104577a176856f7d1d7a65064e2ef8884fd211c917e9a0cba\n", + " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", + "Successfully built rouge_score\n", + "Installing collected packages: rouge_score\n", + "Successfully installed rouge_score-0.1.2\n", + "Collecting git+https://github.com/caikit/caikit@v0.11.3\n", + " Cloning https://github.com/caikit/caikit (to revision v0.11.3) to /tmp/pip-req-build-x9xbt7q1\n", + " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit /tmp/pip-req-build-x9xbt7q1\n", + " Running command git checkout -q da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", + " Resolved https://github.com/caikit/caikit to commit da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting alchemy-config<2.0.0,>=1.1.1 (from caikit==0.0.1)\n", + " Downloading alchemy_config-1.1.2-py3-none-any.whl (7.2 kB)\n", + "Collecting alchemy-logging<2.0.0,>=1.0.4 (from caikit==0.0.1)\n", + " Downloading alchemy_logging-1.2.0-py3-none-any.whl (14 kB)\n", + "Collecting anytree<3.0,>=2.7.0 (from caikit==0.0.1)\n", + " Downloading anytree-2.9.0-py3-none-any.whl (38 kB)\n", + "Collecting docstring-parser<0.16.0,>=0.14.1 (from caikit==0.0.1)\n", + " Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", + "Requirement already satisfied: grpcio!=1.55.0,<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.57.0)\n", + "Collecting ijson<3.3.0,>=3.1.4 (from caikit==0.0.1)\n", + " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting munch<5.0,>=2.5.0 (from caikit==0.0.1)\n", + " Downloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n", + "Requirement already satisfied: numpy<2,>=1.20 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.23.5)\n", + "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (3.20.3)\n", + "Collecting py-to-proto!=0.2.1,<0.5.0,>=0.4.0 (from caikit==0.0.1)\n", + " Downloading py_to_proto-0.4.1-py310-none-any.whl (32 kB)\n", + "Requirement already satisfied: PyYAML<7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (6.0.1)\n", + "Collecting semver<4.0,>=2.13.0 (from caikit==0.0.1)\n", + " Downloading semver-3.0.1-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.16.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.59.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (4.66.1)\n", + "Building wheels for collected packages: caikit\n", + " Building wheel for caikit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for caikit: filename=caikit-0.0.1-py3-none-any.whl size=288707 sha256=389948254c4ab8f2e5812f47634d3f7a158c0fdc01bece32e0457c279b15c635\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ocq0f8b7/wheels/83/70/e6/fbfc01278ea550744ce890a667227c6f09bb2e8de0a7414191\n", + "Successfully built caikit\n", + "Installing collected packages: ijson, alchemy-logging, semver, py-to-proto, munch, docstring-parser, anytree, alchemy-config, caikit\n", + "Successfully installed alchemy-config-1.1.2 alchemy-logging-1.2.0 anytree-2.9.0 caikit-0.0.1 docstring-parser-0.15 ijson-3.2.3 munch-4.0.0 py-to-proto-0.4.1 semver-3.0.1\n", + "Collecting git+https://github.com/caikit/caikit-nlp\n", + " Cloning https://github.com/caikit/caikit-nlp to /tmp/pip-req-build-zqu32mo5\n", + " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit-nlp /tmp/pip-req-build-zqu32mo5\n", + " Resolved https://github.com/caikit/caikit-nlp to commit 77bc04e52d1fed2020a090b0f990b0a21c243601\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting peft@ git+https://github.com/huggingface/peft.git#8c17d556a8fe9522e10d73d7bd3fad46a6ecae14 (from caikit-nlp==0.0.1)\n", + " Cloning https://github.com/huggingface/peft.git to /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", + " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", + " Resolved https://github.com/huggingface/peft.git to commit 7d99466446d65219a401a2c64f1d84d1b1be31b4\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0 (from caikit-nlp==0.0.1)\n", + " Downloading caikit-0.17.2-py3-none-any.whl (309 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting caikit-tgis-backend<0.2.0,>=0.1.16 (from caikit-nlp==0.0.1)\n", + " Downloading caikit_tgis_backend-0.1.16-py3-none-any.whl (24 kB)\n", + "Collecting accelerate>=0.21.0 (from caikit-nlp==0.0.1)\n", + " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: datasets>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.14.4)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.16.4)\n", + "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.23.5)\n", + "Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.2.2)\n", + "Requirement already satisfied: scipy>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.10.1)\n", + "Requirement already satisfied: tokenizers>=0.13.3 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.13.3)\n", + "Requirement already satisfied: torch>=1.13.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.0.1+cu118)\n", + "Requirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.66.1)\n", + "Requirement already satisfied: transformers>=4.31.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.32.1)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (23.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (6.0.1)\n", + "Collecting caikit<0.20.0,>=0.16.0 (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1)\n", + " Downloading caikit-0.18.0-py3-none-any.whl (309 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m35.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: grpcio<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (1.57.0)\n", + "Requirement already satisfied: requests<3,>=2.28.2 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.31.0)\n", + "Requirement already satisfied: alchemy-config<2.0.0,>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.2)\n", + "Requirement already satisfied: alchemy-logging<2.0.0,>=1.0.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.2.0)\n", + "Requirement already satisfied: anytree<3.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.9.0)\n", + "Requirement already satisfied: docstring-parser<0.16.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.15)\n", + "Requirement already satisfied: ijson<3.3.0,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.2.3)\n", + "Requirement already satisfied: munch<5.0,>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (4.0.0)\n", + "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.20.3)\n", + "Requirement already satisfied: py-to-proto!=0.2.1,<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.4.1)\n", + "Requirement already satisfied: semver<4.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.0.1)\n", + "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.16.0)\n", + "Collecting fastapi[all]<1,>=0.95 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading fastapi-0.103.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.2/66.2 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting sse-starlette<2,>=1.6.1 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading sse_starlette-1.6.5-py3-none-any.whl (9.6 kB)\n", + "Collecting grpcio-health-checking<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading grpcio_health_checking-1.57.0-py3-none-any.whl (8.6 kB)\n", + "Collecting grpcio-reflection<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading grpcio_reflection-1.57.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: prometheus_client<1.0,>=0.12.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.17.1)\n", + "Collecting py-grpc-prometheus<0.8,>=0.7.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading py_grpc_prometheus-0.7.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (9.0.0)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.3.7)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.3.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.70.15)\n", + "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.8.5)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (4.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2023.3)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (3.2.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (3.27.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (16.0.6)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (2023.6.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (0.3.3)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.2.1)\n", + "Collecting starlette<0.28.0,>=0.27.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting email-validator>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading email_validator-2.0.0.post2-py3-none-any.whl (31 kB)\n", + "Collecting httpx>=0.23.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: itsdangerous>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.1.2)\n", + "Collecting orjson>=3.2.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.9/139.9 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pydantic-extra-types>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading pydantic_extra_types-2.1.0-py3-none-any.whl (16 kB)\n", + "Collecting pydantic-settings>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading pydantic_settings-2.0.3-py3-none-any.whl (11 kB)\n", + "Collecting python-multipart>=0.0.5 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.9/53.9 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvicorn[standard]>=0.12.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (3.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.3.1)\n", + "Collecting protobuf<5,>=3.19.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl (311 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.4/311.4 kB\u001b[0m \u001b[31m33.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.1->caikit-nlp==0.0.1) (2.1.3)\n", + "Requirement already satisfied: setuptools>=39.0.1 in /usr/local/lib/python3.10/dist-packages (from py-grpc-prometheus<0.8,>=0.7.0->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (67.7.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2023.7.22)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.1->caikit-nlp==0.0.1) (1.3.0)\n", + "Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.4/300.4 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httpcore<0.18.0,>=0.15.0 (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.3.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.5.0)\n", + "Requirement already satisfied: pydantic-core==2.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.6.1)\n", + "Collecting python-dotenv>=0.21.0 (from pydantic-settings>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.7.1)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (8.1.7)\n", + "Collecting h11>=0.8 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httptools-0.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (428 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m428.8/428.8 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading uvloop-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m88.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading watchfiles-0.20.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.3)\n", + "Building wheels for collected packages: caikit-nlp, peft\n", + " Building wheel for caikit-nlp (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for caikit-nlp: filename=caikit_nlp-0.0.1-py3-none-any.whl size=71738 sha256=5176f598fbf41d2eaf242a7aa6d1508afeb1415739492bbdebce9c71924e4ff8\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/29/1c/3c/060d91e84e7a56eab1cb92fe59f09d22e106a75668a0cb62db\n", + " Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for peft: filename=peft-0.6.0.dev0-py3-none-any.whl size=106759 sha256=1bf4c6bdd17ccf9f387eea1e69f85089c63663c900ac5f38706b318d3c208e1e\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/d7/c7/de/1368fac8590e1b103ddc2ec2a28ad51d83aded1a3830e8a087\n", + "Successfully built caikit-nlp peft\n", + "Installing collected packages: websockets, uvloop, ujson, python-multipart, python-dotenv, py-grpc-prometheus, protobuf, orjson, httptools, h11, dnspython, watchfiles, uvicorn, starlette, httpcore, grpcio-reflection, grpcio-health-checking, email-validator, sse-starlette, pydantic-settings, pydantic-extra-types, httpx, fastapi, caikit, caikit-tgis-backend, accelerate, peft, caikit-nlp\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 3.20.3\n", + " Uninstalling protobuf-3.20.3:\n", + " Successfully uninstalled protobuf-3.20.3\n", + " Attempting uninstall: caikit\n", + " Found existing installation: caikit 0.0.1\n", + " Uninstalling caikit-0.0.1:\n", + " Successfully uninstalled caikit-0.0.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.24.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed accelerate-0.22.0 caikit-0.17.2 caikit-nlp-0.0.1 caikit-tgis-backend-0.1.16 dnspython-2.4.2 email-validator-2.0.0.post2 fastapi-0.103.0 grpcio-health-checking-1.57.0 grpcio-reflection-1.57.0 h11-0.14.0 httpcore-0.17.3 httptools-0.6.0 httpx-0.24.1 orjson-3.9.5 peft-0.6.0.dev0 protobuf-4.24.2 py-grpc-prometheus-0.7.0 pydantic-extra-types-2.1.0 pydantic-settings-2.0.3 python-dotenv-1.0.0 python-multipart-0.0.6 sse-starlette-1.6.5 starlette-0.27.0 ujson-5.8.0 uvicorn-0.23.2 uvloop-0.17.0 watchfiles-0.20.0 websockets-11.0.3\n", + "Cloning into 'caikit-nlp'...\n", + "remote: Enumerating objects: 2536, done.\u001b[K\n", + "remote: Counting objects: 100% (920/920), done.\u001b[K\n", + "remote: Compressing objects: 100% (268/268), done.\u001b[K\n", + "remote: Total 2536 (delta 734), reused 657 (delta 650), pack-reused 1616\u001b[K\n", + "Receiving objects: 100% (2536/2536), 1.89 MiB | 2.56 MiB/s, done.\n", + "Resolving deltas: 100% (1816/1816), done.\n" + ] + } + ], + "source": [ + "!pip install evaluate\n", + "!pip install rouge_score\n", + "\n", + "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", + "!pip install git+https://github.com/caikit/caikit-nlp\n", + "\n", + "!git clone https://github.com/caikit/caikit-nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7RIXUii94aLy" + }, + "source": [ + "# Step 2. Fine tuning Tuning\n", + "\n", + "```\n", + "!python caikit-nlp/examples/run_fine_tuning.py --dataset \"billsum\" \\\n", + " --model_name tiiuae/falcon-rw-1b \\\n", + " --num_epochs 3 \\\n", + " --output_dir tmp/falcon-rw-1b \\\n", + " --batch_size=8 \\\n", + " --accumulate_steps 32 \\\n", + " --max_source_length 512 \\\n", + " --metric rouge \\\n", + " --torch_dtype bfloat16 \\\n", + " --evaluate\n", + "```\n", + "\n", + "This is a command-line instruction to run a Python script called `run_fine_tuning.py` using the python interpreter. It is part of the `caikit-nlp` package and is meant to tune a pre-trained model (in this case gpt2-mediudm) on a specific dataset (in this case `billsum`).\n", + "\n", + "Let's explain each argument in the command:\n", + "\n", + "1. `caikit-nlp/examples/run_peft_tuning.py`: This specifies the path to the Python script that will be executed. It is a part of the caikit-nlp library and contains the implementation of the PEFT approach.\n", + "1. `--dataset \"billsum\"`: This specifies the dataset to be used for tuning. In this example, the dataset is `billsum` which is a summary of US Congressional and California State Bills [link](https://huggingface.co/datasets/billsum).\n", + "1. `--model_name tiiuae/falcon-rw-1b`: This indicates the base model that will be used for prompt tuning. In this case, it's `tiiuae/falcon-rw-1b`, which refers to the Falcon 1B model from Hugging Face [link](https://huggingface.co/tiiuae/falcon-rw-1b).\n", + "1. `--num_epochs 3`: This sets the number of epochs (training iterations) for the prompt-tuning process. Here, it's set to 3, meaning the model will go through the dataset three times during fine-tuning.\n", + "1. `--output_dir tmp/falcon-rw-1b`: This sets the directory where the prompt-tuned model and related outputs will be stored. In this case, it's set to the `tmp/falcon-rw-1b` directory.\n", + "1. `--batch_size=8`: This sets the batch size used during training. The data will be divided into batches of 8 samples each.\n", + "1. `--accumulate_steps 32`: This specifies the number of steps before gradients are accumulated and the weights are updated. It can be useful for larger batch sizes when the GPU memory is limited.\n", + "1. `--max_source_length 512`: This is a flag to set the maximum length of the input sequence.\n", + "1. `--metric rouge`: This sets the evaluation metric to ROUGE.\n", + "1. `--torch_dtype bfloat16`: This specifies what dtype to use for the training. `float32` is considered 'full precision', though other options such as `float16` and `bfloat16` (known as half precision) also exist. `bfloat16` has wider range, but less precision than `float16` but is only available on Ampre class GPUs (such as the A100)\n", + "1. `--evaluate` this signals to the script to evaluate the model at the end of finetuning.\n", + "\n", + "\n", + "Overall, this command line script is fine-tuning the tiiuae/falcon-rw-1b model on the `billsum` dataset, with specific settings for maximum source length, batch size, accumulation steps, and so on. For a full list of available args, their descriptions, and default values, run `!python caikit-nlp/examples/run_fine_tuning.py --help`\n", + " \n", + "The results of prompt-tuning will be stored in the `tmp/falcon-rw-1b` directory." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_bIQQI_G0t5z" + }, + "source": [ + "# The `tiiuae/falcon-rw-1b` Model\n", + "\n", + "The next model we'll fine tune is `tiiuae/falcon-rw-1b` ([link](https://huggingface.co/tiiuae/falcon-rw-1b)). This model has 1B parameters. We'll fine tune it on the `billsum` dataset and then see how it performs on summarizing new bills." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L4deByI5aRNQ", + "outputId": "feaf7ce3-c836-4af1-996d-f40982206c76" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "env: ALLOW_DOWNLOADS=true\n", + "2023-08-23 14:46:29.987417: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/errors/__init__.py:29: DeprecationWarning: The caikit.toolkit.errors package has moved to caikit.core.exceptions\n", + " _warnings.warn(\n", + " is still in the BETA phase and subject to change!\n", + "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/error_handler.py:29: DeprecationWarning: The caikit.toolkit.error_handler package has moved to caikit.core.exceptions\n", + " _warnings.warn(\n", + "\u001b[94mExperiment Configuration\n", + "- Model Name: [tiiuae/falcon-rw-1b]\n", + " |- Inferred Model Resource Type: []\n", + "- Dataset: [billsum]\n", + "- Number of Epochs: [3]\n", + "- Learning Rate: [2e-05]\n", + "- Batch Size: [8]\n", + "- Output Directory: [tmp/falcon-rw-1b]\n", + "- Maximum source sequence length: [512]\n", + "- Maximum target sequence length: [128]\n", + "- Gradient accumulation steps: [32]\n", + "- Enable evaluation: [True]\n", + "- Evaluation metrics: [['rouge']]\n", + "- Torch dtype to use for training: [bfloat16]\u001b[0m\n", + "\u001b[94m[Loading the dataset...]\u001b[0m\n", + "2023-08-23T14:46:39.081970 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", + "2023-08-23T14:46:39.088425 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", + "\u001b[94m[Loading the base model resource...]\u001b[0m\n", + "\u001b[94m[Starting the training...]\u001b[0m\n", + "2023-08-23T14:49:50.656386 [PEFT_:DBUG] Shuffling enabled? True\n", + "2023-08-23T14:49:50.656523 [PEFT_:DBUG] Shuffling buffer size: 124654\n", + " 0% 0/1458 [00:00 Amends the Illinois Vehicle Code. Defines \"immediate hazard\". Provides instances in which an individual operating a bicycle approaching a stop sign may proceed through the intersection without stopping at the stop sign." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "ol9-BjpBOboR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "EDtesOUJO7Ak" + }, + "outputs": [], + "source": [ + "from transformers import pipeline, set_seed\n", + "\n", + "generator = pipeline('text-generation', model='tmp/falcon-rw-1b/artifacts')\n", + "set_seed(42)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "X6F0hhEYtTi_" + }, + "outputs": [], + "source": [ + "text = \"\"\"summarize:\n", + " Be it enacted by the People of the State of Illinois,\n", + "represented in the General Assembly:\n", + "\n", + "Section 5. The Illinois Vehicle Code is amended by adding\n", + "Section 11-1511.5 as follows:\n", + "\n", + "(625 ILCS 5/11-1511.5 new)\n", + "Sec. 11-1511.5. Operation of bicycle approaching a stop\n", + "sign.\n", + "\t\t(a) As used in this Section, \"immediate hazard\" means a\n", + "vehicle approaching an intersection at a proximity and rate of\n", + "speed sufficient to indicate to a reasonable person that there\n", + "is a danger of collision or accident.\n", + " (b) Except as provided in subsection (c), an individual\n", + "operating a bicycle approaching a stop sign may proceed\n", + "through the intersection without stopping at the stop sign if:\n", + " (1) the individual slows to a reasonable speed; and\n", + " (2) the individual yields the right-of-way to:\n", + " (i) any pedestrian within the intersection or an\n", + " adjacent crosswalk;\n", + " (ii) other traffic within the intersection; and\n", + " (iii) oncoming traffic that poses an immediate\n", + " hazard during the time the individual is traveling\n", + " through the intersection.\n", + "\n", + "\n", + "\n", + "\n", + "HB3923\t- 2 -\tLRB103 26384 MXP 52747 b\n", + " (c) Subsection (b) does not apply to an intersection with\n", + "an active railroad grade crossing.\n", + "\"\"\"\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wb5xooVEajwx", + "outputId": "17920fc3-7856-4920-8327-ae53cc8d051b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\n", + "HJR3149 Existing law generally prohibits someone from operating A violation for which notice will be issued pursuant tressign unless they are riding their bicycles between stops signs except under certain circumstances specified above bicyclists shall yield rights Of pedestrians walking upon footpaths,. Existiby passing laws governingThe act establishes penaltiesfor various violations involving safety when operatedbetween postedA misdemeanorpersonality disorderor mental illnessas definedin existing federal Law makesit unlawfulto operateany vehicleuponbe enforcedby localauthoritiesand prohibitswithoutIllinoisVehice code providesthat operationoffenders have been convicted orofendorsedictingthe\n" + ] + } + ], + "source": [ + "import caikit_nlp\n", + "model = caikit_nlp.modules.text_generation.TextGeneration.load('tmp/falcon-rw-1b')\n", + "\n", + "output = model.run(text, max_new_tokens= 128)\n", + "print('\\n\\n\\n'+output.generated_text.replace(text, ''))\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Conclusion\n", + "\n", + "It doesn't do horrible. Again, some strange characters and formatting, as it goes on it gets worse." + ], + "metadata": { + "id": "w_GXzvUZU_e0" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Wa9svk3NVRj7" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "machine_shape": "hm", + "provenance": [], + "authorship_tag": "ABX9TyO8kbTWZRbxYVOQ87HDTIO2", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 26fa4da392cb9327a674e5576009fe83a2c9e9ca Mon Sep 17 00:00:00 2001 From: Trevor Grant Date: Thu, 31 Aug 2023 10:53:35 -0500 Subject: [PATCH 2/4] Signed-off-by: Trevor Grant --- ...g_falcon_rw_1b_model_with_caikit_nlp.ipynb | 1192 +++++++++++++++++ 1 file changed, 1192 insertions(+) create mode 100644 examples/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb diff --git a/examples/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb b/examples/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb new file mode 100644 index 00000000..11d2fde6 --- /dev/null +++ b/examples/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb @@ -0,0 +1,1192 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nqAU3Yh-rha5" + }, + "source": [ + "# Step 1: Install Caikit\n", + "\n", + "## Installation and Setup\n", + "\n", + "In this example Jupyter notebook, we'll be using various Python libraries and pre-trained models for evaluating and analyzing natural language processing tasks. Before we proceed, we need to install the required dependencies and download some essential resources.\n", + "\n", + "### 1. Installing Libraries\n", + "\n", + "To begin, we'll install the following Python packages using `pip`:\n", + "\n", + "- `evaluate`: A library for evaluating model performance on different NLP tasks.\n", + "- `rouge_score`: A package for calculating ROUGE (Recall-Oriented Understudy for Gisting Evaluation) metrics for text summarization.\n", + "\n", + "Please note that these libraries may have dependencies, so we'll ensure all the necessary requirements are met during the installation process.\n", + "\n", + "```python\n", + "!pip install evaluate\n", + "!pip install rouge_score\n", + "```\n", + "\n", + "### 2. Installing `caikit` and `caikit-nlp`\n", + "\n", + "Next, we'll install specific versions of the caikit and caikit-nlp libraries, as the project is still in beta and breaking changes can happen.\n", + "\n", + "```python\n", + "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", + "!pip install git+https://github.com/caikit/caikit-nlp\n", + "```\n", + "\n", + "### 3. Downloading Additional Resources\n", + "\n", + "In order to explore the capabilities of pre-trained models, we'll need to download the caikit-nlp repository.\n", + "\n", + "\n", + "```python\n", + "!git clone https://github.com/caikit/caikit-nlp\n", + "```\n", + "\n", + "Now that we have all the necessary libraries and resources installed, we can move on to the next steps in our NLP analysis using these powerful tools!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZhZcVULDrTRz", + "outputId": "f0092a68-49eb-4855-ea86-119fbdf4ed31" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting evaluate\n", + " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/81.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets>=2.0.0 (from evaluate)\n", + " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/519.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.23.5)\n", + "Collecting dill (from evaluate)\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from evaluate) (4.66.1)\n", + "Collecting xxhash (from evaluate)\n", + " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from evaluate)\n", + " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2023.6.0)\n", + "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.16.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from evaluate) (23.1)\n", + "Collecting responses<0.19 (from evaluate)\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (3.8.5)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.7.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2023.7.22)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2023.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", + "Installing collected packages: xxhash, dill, responses, multiprocess, datasets, evaluate\n", + "Successfully installed datasets-2.14.4 dill-0.3.7 evaluate-0.4.0 multiprocess-0.70.15 responses-0.18.0 xxhash-3.3.0\n", + "Collecting rouge_score\n", + " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.4.0)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge_score) (3.8.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.23.5)\n", + "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.16.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (2023.6.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (4.66.1)\n", + "Building wheels for collected packages: rouge_score\n", + " Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=399dede598bd07e104577a176856f7d1d7a65064e2ef8884fd211c917e9a0cba\n", + " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", + "Successfully built rouge_score\n", + "Installing collected packages: rouge_score\n", + "Successfully installed rouge_score-0.1.2\n", + "Collecting git+https://github.com/caikit/caikit@v0.11.3\n", + " Cloning https://github.com/caikit/caikit (to revision v0.11.3) to /tmp/pip-req-build-x9xbt7q1\n", + " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit /tmp/pip-req-build-x9xbt7q1\n", + " Running command git checkout -q da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", + " Resolved https://github.com/caikit/caikit to commit da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting alchemy-config<2.0.0,>=1.1.1 (from caikit==0.0.1)\n", + " Downloading alchemy_config-1.1.2-py3-none-any.whl (7.2 kB)\n", + "Collecting alchemy-logging<2.0.0,>=1.0.4 (from caikit==0.0.1)\n", + " Downloading alchemy_logging-1.2.0-py3-none-any.whl (14 kB)\n", + "Collecting anytree<3.0,>=2.7.0 (from caikit==0.0.1)\n", + " Downloading anytree-2.9.0-py3-none-any.whl (38 kB)\n", + "Collecting docstring-parser<0.16.0,>=0.14.1 (from caikit==0.0.1)\n", + " Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", + "Requirement already satisfied: grpcio!=1.55.0,<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.57.0)\n", + "Collecting ijson<3.3.0,>=3.1.4 (from caikit==0.0.1)\n", + " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting munch<5.0,>=2.5.0 (from caikit==0.0.1)\n", + " Downloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n", + "Requirement already satisfied: numpy<2,>=1.20 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.23.5)\n", + "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (3.20.3)\n", + "Collecting py-to-proto!=0.2.1,<0.5.0,>=0.4.0 (from caikit==0.0.1)\n", + " Downloading py_to_proto-0.4.1-py310-none-any.whl (32 kB)\n", + "Requirement already satisfied: PyYAML<7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (6.0.1)\n", + "Collecting semver<4.0,>=2.13.0 (from caikit==0.0.1)\n", + " Downloading semver-3.0.1-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.16.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.59.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (4.66.1)\n", + "Building wheels for collected packages: caikit\n", + " Building wheel for caikit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for caikit: filename=caikit-0.0.1-py3-none-any.whl size=288707 sha256=389948254c4ab8f2e5812f47634d3f7a158c0fdc01bece32e0457c279b15c635\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ocq0f8b7/wheels/83/70/e6/fbfc01278ea550744ce890a667227c6f09bb2e8de0a7414191\n", + "Successfully built caikit\n", + "Installing collected packages: ijson, alchemy-logging, semver, py-to-proto, munch, docstring-parser, anytree, alchemy-config, caikit\n", + "Successfully installed alchemy-config-1.1.2 alchemy-logging-1.2.0 anytree-2.9.0 caikit-0.0.1 docstring-parser-0.15 ijson-3.2.3 munch-4.0.0 py-to-proto-0.4.1 semver-3.0.1\n", + "Collecting git+https://github.com/caikit/caikit-nlp\n", + " Cloning https://github.com/caikit/caikit-nlp to /tmp/pip-req-build-zqu32mo5\n", + " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit-nlp /tmp/pip-req-build-zqu32mo5\n", + " Resolved https://github.com/caikit/caikit-nlp to commit 77bc04e52d1fed2020a090b0f990b0a21c243601\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting peft@ git+https://github.com/huggingface/peft.git#8c17d556a8fe9522e10d73d7bd3fad46a6ecae14 (from caikit-nlp==0.0.1)\n", + " Cloning https://github.com/huggingface/peft.git to /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", + " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", + " Resolved https://github.com/huggingface/peft.git to commit 7d99466446d65219a401a2c64f1d84d1b1be31b4\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0 (from caikit-nlp==0.0.1)\n", + " Downloading caikit-0.17.2-py3-none-any.whl (309 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting caikit-tgis-backend<0.2.0,>=0.1.16 (from caikit-nlp==0.0.1)\n", + " Downloading caikit_tgis_backend-0.1.16-py3-none-any.whl (24 kB)\n", + "Collecting accelerate>=0.21.0 (from caikit-nlp==0.0.1)\n", + " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: datasets>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.14.4)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.16.4)\n", + "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.23.5)\n", + "Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.2.2)\n", + "Requirement already satisfied: scipy>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.10.1)\n", + "Requirement already satisfied: tokenizers>=0.13.3 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.13.3)\n", + "Requirement already satisfied: torch>=1.13.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.0.1+cu118)\n", + "Requirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.66.1)\n", + "Requirement already satisfied: transformers>=4.31.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.32.1)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (23.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (6.0.1)\n", + "Collecting caikit<0.20.0,>=0.16.0 (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1)\n", + " Downloading caikit-0.18.0-py3-none-any.whl (309 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m35.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: grpcio<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (1.57.0)\n", + "Requirement already satisfied: requests<3,>=2.28.2 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.31.0)\n", + "Requirement already satisfied: alchemy-config<2.0.0,>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.2)\n", + "Requirement already satisfied: alchemy-logging<2.0.0,>=1.0.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.2.0)\n", + "Requirement already satisfied: anytree<3.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.9.0)\n", + "Requirement already satisfied: docstring-parser<0.16.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.15)\n", + "Requirement already satisfied: ijson<3.3.0,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.2.3)\n", + "Requirement already satisfied: munch<5.0,>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (4.0.0)\n", + "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.20.3)\n", + "Requirement already satisfied: py-to-proto!=0.2.1,<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.4.1)\n", + "Requirement already satisfied: semver<4.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.0.1)\n", + "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.16.0)\n", + "Collecting fastapi[all]<1,>=0.95 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading fastapi-0.103.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.2/66.2 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting sse-starlette<2,>=1.6.1 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading sse_starlette-1.6.5-py3-none-any.whl (9.6 kB)\n", + "Collecting grpcio-health-checking<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading grpcio_health_checking-1.57.0-py3-none-any.whl (8.6 kB)\n", + "Collecting grpcio-reflection<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading grpcio_reflection-1.57.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: prometheus_client<1.0,>=0.12.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.17.1)\n", + "Collecting py-grpc-prometheus<0.8,>=0.7.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading py_grpc_prometheus-0.7.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (9.0.0)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.3.7)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.3.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.70.15)\n", + "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.8.5)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (3.12.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (4.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2023.3)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (3.2.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (3.27.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (16.0.6)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (2023.6.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (0.3.3)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.2.1)\n", + "Collecting starlette<0.28.0,>=0.27.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting email-validator>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading email_validator-2.0.0.post2-py3-none-any.whl (31 kB)\n", + "Collecting httpx>=0.23.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: itsdangerous>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.1.2)\n", + "Collecting orjson>=3.2.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.9/139.9 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pydantic-extra-types>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading pydantic_extra_types-2.1.0-py3-none-any.whl (16 kB)\n", + "Collecting pydantic-settings>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading pydantic_settings-2.0.3-py3-none-any.whl (11 kB)\n", + "Collecting python-multipart>=0.0.5 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.9/53.9 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvicorn[standard]>=0.12.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (3.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.3.1)\n", + "Collecting protobuf<5,>=3.19.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl (311 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.4/311.4 kB\u001b[0m \u001b[31m33.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.1->caikit-nlp==0.0.1) (2.1.3)\n", + "Requirement already satisfied: setuptools>=39.0.1 in /usr/local/lib/python3.10/dist-packages (from py-grpc-prometheus<0.8,>=0.7.0->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (67.7.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2023.7.22)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.1->caikit-nlp==0.0.1) (1.3.0)\n", + "Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.4/300.4 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httpcore<0.18.0,>=0.15.0 (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.3.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.5.0)\n", + "Requirement already satisfied: pydantic-core==2.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.6.1)\n", + "Collecting python-dotenv>=0.21.0 (from pydantic-settings>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.7.1)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (8.1.7)\n", + "Collecting h11>=0.8 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading httptools-0.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (428 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m428.8/428.8 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading uvloop-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m88.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading watchfiles-0.20.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", + " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.3)\n", + "Building wheels for collected packages: caikit-nlp, peft\n", + " Building wheel for caikit-nlp (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for caikit-nlp: filename=caikit_nlp-0.0.1-py3-none-any.whl size=71738 sha256=5176f598fbf41d2eaf242a7aa6d1508afeb1415739492bbdebce9c71924e4ff8\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/29/1c/3c/060d91e84e7a56eab1cb92fe59f09d22e106a75668a0cb62db\n", + " Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for peft: filename=peft-0.6.0.dev0-py3-none-any.whl size=106759 sha256=1bf4c6bdd17ccf9f387eea1e69f85089c63663c900ac5f38706b318d3c208e1e\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/d7/c7/de/1368fac8590e1b103ddc2ec2a28ad51d83aded1a3830e8a087\n", + "Successfully built caikit-nlp peft\n", + "Installing collected packages: websockets, uvloop, ujson, python-multipart, python-dotenv, py-grpc-prometheus, protobuf, orjson, httptools, h11, dnspython, watchfiles, uvicorn, starlette, httpcore, grpcio-reflection, grpcio-health-checking, email-validator, sse-starlette, pydantic-settings, pydantic-extra-types, httpx, fastapi, caikit, caikit-tgis-backend, accelerate, peft, caikit-nlp\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 3.20.3\n", + " Uninstalling protobuf-3.20.3:\n", + " Successfully uninstalled protobuf-3.20.3\n", + " Attempting uninstall: caikit\n", + " Found existing installation: caikit 0.0.1\n", + " Uninstalling caikit-0.0.1:\n", + " Successfully uninstalled caikit-0.0.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.24.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed accelerate-0.22.0 caikit-0.17.2 caikit-nlp-0.0.1 caikit-tgis-backend-0.1.16 dnspython-2.4.2 email-validator-2.0.0.post2 fastapi-0.103.0 grpcio-health-checking-1.57.0 grpcio-reflection-1.57.0 h11-0.14.0 httpcore-0.17.3 httptools-0.6.0 httpx-0.24.1 orjson-3.9.5 peft-0.6.0.dev0 protobuf-4.24.2 py-grpc-prometheus-0.7.0 pydantic-extra-types-2.1.0 pydantic-settings-2.0.3 python-dotenv-1.0.0 python-multipart-0.0.6 sse-starlette-1.6.5 starlette-0.27.0 ujson-5.8.0 uvicorn-0.23.2 uvloop-0.17.0 watchfiles-0.20.0 websockets-11.0.3\n", + "Cloning into 'caikit-nlp'...\n", + "remote: Enumerating objects: 2536, done.\u001b[K\n", + "remote: Counting objects: 100% (920/920), done.\u001b[K\n", + "remote: Compressing objects: 100% (268/268), done.\u001b[K\n", + "remote: Total 2536 (delta 734), reused 657 (delta 650), pack-reused 1616\u001b[K\n", + "Receiving objects: 100% (2536/2536), 1.89 MiB | 2.56 MiB/s, done.\n", + "Resolving deltas: 100% (1816/1816), done.\n" + ] + } + ], + "source": [ + "!pip install evaluate\n", + "!pip install rouge_score\n", + "\n", + "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", + "!pip install git+https://github.com/caikit/caikit-nlp\n", + "\n", + "!git clone https://github.com/caikit/caikit-nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7RIXUii94aLy" + }, + "source": [ + "# Step 2. Fine tuning Tuning\n", + "\n", + "```\n", + "!python caikit-nlp/examples/run_fine_tuning.py --dataset \"billsum\" \\\n", + " --model_name tiiuae/falcon-rw-1b \\\n", + " --num_epochs 3 \\\n", + " --output_dir tmp/falcon-rw-1b \\\n", + " --batch_size=8 \\\n", + " --accumulate_steps 32 \\\n", + " --max_source_length 512 \\\n", + " --metric rouge \\\n", + " --torch_dtype bfloat16 \\\n", + " --evaluate\n", + "```\n", + "\n", + "This is a command-line instruction to run a Python script called `run_fine_tuning.py` using the python interpreter. It is part of the `caikit-nlp` package and is meant to tune a pre-trained model (in this case gpt2-mediudm) on a specific dataset (in this case `billsum`).\n", + "\n", + "Let's explain each argument in the command:\n", + "\n", + "1. `caikit-nlp/examples/run_peft_tuning.py`: This specifies the path to the Python script that will be executed. It is a part of the caikit-nlp library and contains the implementation of the PEFT approach.\n", + "1. `--dataset \"billsum\"`: This specifies the dataset to be used for tuning. In this example, the dataset is `billsum` which is a summary of US Congressional and California State Bills [link](https://huggingface.co/datasets/billsum).\n", + "1. `--model_name tiiuae/falcon-rw-1b`: This indicates the base model that will be used for prompt tuning. In this case, it's `tiiuae/falcon-rw-1b`, which refers to the Falcon 1B model from Hugging Face [link](https://huggingface.co/tiiuae/falcon-rw-1b).\n", + "1. `--num_epochs 3`: This sets the number of epochs (training iterations) for the prompt-tuning process. Here, it's set to 3, meaning the model will go through the dataset three times during fine-tuning.\n", + "1. `--output_dir tmp/falcon-rw-1b`: This sets the directory where the prompt-tuned model and related outputs will be stored. In this case, it's set to the `tmp/falcon-rw-1b` directory.\n", + "1. `--batch_size=8`: This sets the batch size used during training. The data will be divided into batches of 8 samples each.\n", + "1. `--accumulate_steps 32`: This specifies the number of steps before gradients are accumulated and the weights are updated. It can be useful for larger batch sizes when the GPU memory is limited.\n", + "1. `--max_source_length 512`: This is a flag to set the maximum length of the input sequence.\n", + "1. `--metric rouge`: This sets the evaluation metric to ROUGE.\n", + "1. `--torch_dtype bfloat16`: This specifies what dtype to use for the training. `float32` is considered 'full precision', though other options such as `float16` and `bfloat16` (known as half precision) also exist. `bfloat16` has wider range, but less precision than `float16` but is only available on Ampre class GPUs (such as the A100)\n", + "1. `--evaluate` this signals to the script to evaluate the model at the end of finetuning.\n", + "\n", + "\n", + "Overall, this command line script is fine-tuning the tiiuae/falcon-rw-1b model on the `billsum` dataset, with specific settings for maximum source length, batch size, accumulation steps, and so on. For a full list of available args, their descriptions, and default values, run `!python caikit-nlp/examples/run_fine_tuning.py --help`\n", + " \n", + "The results of prompt-tuning will be stored in the `tmp/falcon-rw-1b` directory." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_bIQQI_G0t5z" + }, + "source": [ + "# The `tiiuae/falcon-rw-1b` Model\n", + "\n", + "The next model we'll fine tune is `tiiuae/falcon-rw-1b` ([link](https://huggingface.co/tiiuae/falcon-rw-1b)). This model has 1B parameters. We'll fine tune it on the `billsum` dataset and then see how it performs on summarizing new bills." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L4deByI5aRNQ", + "outputId": "feaf7ce3-c836-4af1-996d-f40982206c76" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "env: ALLOW_DOWNLOADS=true\n", + "2023-08-23 14:46:29.987417: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/errors/__init__.py:29: DeprecationWarning: The caikit.toolkit.errors package has moved to caikit.core.exceptions\n", + " _warnings.warn(\n", + " is still in the BETA phase and subject to change!\n", + "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/error_handler.py:29: DeprecationWarning: The caikit.toolkit.error_handler package has moved to caikit.core.exceptions\n", + " _warnings.warn(\n", + "\u001b[94mExperiment Configuration\n", + "- Model Name: [tiiuae/falcon-rw-1b]\n", + " |- Inferred Model Resource Type: []\n", + "- Dataset: [billsum]\n", + "- Number of Epochs: [3]\n", + "- Learning Rate: [2e-05]\n", + "- Batch Size: [8]\n", + "- Output Directory: [tmp/falcon-rw-1b]\n", + "- Maximum source sequence length: [512]\n", + "- Maximum target sequence length: [128]\n", + "- Gradient accumulation steps: [32]\n", + "- Enable evaluation: [True]\n", + "- Evaluation metrics: [['rouge']]\n", + "- Torch dtype to use for training: [bfloat16]\u001b[0m\n", + "\u001b[94m[Loading the dataset...]\u001b[0m\n", + "2023-08-23T14:46:39.081970 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", + "2023-08-23T14:46:39.088425 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", + "\u001b[94m[Loading the base model resource...]\u001b[0m\n", + "\u001b[94m[Starting the training...]\u001b[0m\n", + "2023-08-23T14:49:50.656386 [PEFT_:DBUG] Shuffling enabled? True\n", + "2023-08-23T14:49:50.656523 [PEFT_:DBUG] Shuffling buffer size: 124654\n", + " 0% 0/1458 [00:00 Amends the Illinois Vehicle Code. Defines \"immediate hazard\". Provides instances in which an individual operating a bicycle approaching a stop sign may proceed through the intersection without stopping at the stop sign." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X6F0hhEYtTi_" + }, + "outputs": [], + "source": [ + "text = \"\"\"summarize:\n", + " Be it enacted by the People of the State of Illinois,\n", + "represented in the General Assembly:\n", + "\n", + "Section 5. The Illinois Vehicle Code is amended by adding\n", + "Section 11-1511.5 as follows:\n", + "\n", + "(625 ILCS 5/11-1511.5 new)\n", + "Sec. 11-1511.5. Operation of bicycle approaching a stop\n", + "sign.\n", + "\t\t(a) As used in this Section, \"immediate hazard\" means a\n", + "vehicle approaching an intersection at a proximity and rate of\n", + "speed sufficient to indicate to a reasonable person that there\n", + "is a danger of collision or accident.\n", + " (b) Except as provided in subsection (c), an individual\n", + "operating a bicycle approaching a stop sign may proceed\n", + "through the intersection without stopping at the stop sign if:\n", + " (1) the individual slows to a reasonable speed; and\n", + " (2) the individual yields the right-of-way to:\n", + " (i) any pedestrian within the intersection or an\n", + " adjacent crosswalk;\n", + " (ii) other traffic within the intersection; and\n", + " (iii) oncoming traffic that poses an immediate\n", + " hazard during the time the individual is traveling\n", + " through the intersection.\n", + "\n", + "\n", + "\n", + "\n", + "HB3923\t- 2 -\tLRB103 26384 MXP 52747 b\n", + " (c) Subsection (b) does not apply to an intersection with\n", + "an active railroad grade crossing.\n", + "\"\"\"\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wb5xooVEajwx", + "outputId": "17920fc3-7856-4920-8327-ae53cc8d051b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\n", + "HJR3149 Existing law generally prohibits someone from operating A violation for which notice will be issued pursuant tressign unless they are riding their bicycles between stops signs except under certain circumstances specified above bicyclists shall yield rights Of pedestrians walking upon footpaths,. Existiby passing laws governingThe act establishes penaltiesfor various violations involving safety when operatedbetween postedA misdemeanorpersonality disorderor mental illnessas definedin existing federal Law makesit unlawfulto operateany vehicleuponbe enforcedby localauthoritiesand prohibitswithoutIllinoisVehice code providesthat operationoffenders have been convicted orofendorsedictingthe\n" + ] + } + ], + "source": [ + "import caikit_nlp\n", + "model = caikit_nlp.modules.text_generation.TextGeneration.load('tmp/falcon-rw-1b')\n", + "\n", + "output = model.run(text, max_new_tokens= 128)\n", + "print('\\n\\n\\n'+output.generated_text.replace(text, ''))\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Conclusion\n", + "\n", + "It doesn't do horrible. Again, some strange characters and formatting, as it goes on it gets worse." + ], + "metadata": { + "id": "w_GXzvUZU_e0" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Wa9svk3NVRj7" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "machine_shape": "hm", + "provenance": [], + "authorship_tag": "ABX9TyMJ4KaHM4FW+jaOcN89CuoG", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From c47927d84bfaffca48bb93ebc69afc6338c1065e Mon Sep 17 00:00:00 2001 From: Trevor Grant Date: Thu, 31 Aug 2023 11:02:01 -0500 Subject: [PATCH 3/4] Delete Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb Signed-off-by: Trevor Grant Signed-off-by: Trevor Grant --- ...g_falcon_rw_1b_model_with_caikit_nlp.ipynb | 1219 ----------------- 1 file changed, 1219 deletions(-) delete mode 100644 Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb diff --git a/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb b/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb deleted file mode 100644 index cc9e9806..00000000 --- a/Finetuning_falcon_rw_1b_model_with_caikit_nlp.ipynb +++ /dev/null @@ -1,1219 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nqAU3Yh-rha5" - }, - "source": [ - "# Step 1: Install Caikit\n", - "\n", - "## Installation and Setup\n", - "\n", - "In this example Jupyter notebook, we'll be using various Python libraries and pre-trained models for evaluating and analyzing natural language processing tasks. Before we proceed, we need to install the required dependencies and download some essential resources.\n", - "\n", - "### 1. Installing Libraries\n", - "\n", - "To begin, we'll install the following Python packages using `pip`:\n", - "\n", - "- `evaluate`: A library for evaluating model performance on different NLP tasks.\n", - "- `rouge_score`: A package for calculating ROUGE (Recall-Oriented Understudy for Gisting Evaluation) metrics for text summarization.\n", - "\n", - "Please note that these libraries may have dependencies, so we'll ensure all the necessary requirements are met during the installation process.\n", - "\n", - "```python\n", - "!pip install evaluate\n", - "!pip install rouge_score\n", - "```\n", - "\n", - "### 2. Installing `caikit` and `caikit-nlp`\n", - "\n", - "Next, we'll install specific versions of the caikit and caikit-nlp libraries, as the project is still in beta and breaking changes can happen.\n", - "\n", - "```python\n", - "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", - "!pip install git+https://github.com/caikit/caikit-nlp\n", - "```\n", - "\n", - "### 3. Downloading Additional Resources\n", - "\n", - "In order to explore the capabilities of pre-trained models, we'll need to download the caikit-nlp repository.\n", - "\n", - "\n", - "```python\n", - "!git clone https://github.com/caikit/caikit-nlp\n", - "```\n", - "\n", - "Now that we have all the necessary libraries and resources installed, we can move on to the next steps in our NLP analysis using these powerful tools!" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZhZcVULDrTRz", - "outputId": "f0092a68-49eb-4855-ea86-119fbdf4ed31" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting evaluate\n", - " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/81.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting datasets>=2.0.0 (from evaluate)\n", - " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/519.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.23.5)\n", - "Collecting dill (from evaluate)\n", - " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate) (1.5.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from evaluate) (4.66.1)\n", - "Collecting xxhash (from evaluate)\n", - " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting multiprocess (from evaluate)\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (2023.6.0)\n", - "Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.16.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from evaluate) (23.1)\n", - "Collecting responses<0.19 (from evaluate)\n", - " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (3.8.5)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.12.2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.7.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->evaluate) (2023.7.22)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate) (2023.3)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", - "Installing collected packages: xxhash, dill, responses, multiprocess, datasets, evaluate\n", - "Successfully installed datasets-2.14.4 dill-0.3.7 evaluate-0.4.0 multiprocess-0.70.15 responses-0.18.0 xxhash-3.3.0\n", - "Collecting rouge_score\n", - " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.4.0)\n", - "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge_score) (3.8.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.23.5)\n", - "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.16.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (8.1.7)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (1.3.2)\n", - "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (2023.6.3)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (4.66.1)\n", - "Building wheels for collected packages: rouge_score\n", - " Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=399dede598bd07e104577a176856f7d1d7a65064e2ef8884fd211c917e9a0cba\n", - " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", - "Successfully built rouge_score\n", - "Installing collected packages: rouge_score\n", - "Successfully installed rouge_score-0.1.2\n", - "Collecting git+https://github.com/caikit/caikit@v0.11.3\n", - " Cloning https://github.com/caikit/caikit (to revision v0.11.3) to /tmp/pip-req-build-x9xbt7q1\n", - " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit /tmp/pip-req-build-x9xbt7q1\n", - " Running command git checkout -q da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", - " Resolved https://github.com/caikit/caikit to commit da1dc8fa7df4f9e9ba5a5b7d926cb38b9e2f1757\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting alchemy-config<2.0.0,>=1.1.1 (from caikit==0.0.1)\n", - " Downloading alchemy_config-1.1.2-py3-none-any.whl (7.2 kB)\n", - "Collecting alchemy-logging<2.0.0,>=1.0.4 (from caikit==0.0.1)\n", - " Downloading alchemy_logging-1.2.0-py3-none-any.whl (14 kB)\n", - "Collecting anytree<3.0,>=2.7.0 (from caikit==0.0.1)\n", - " Downloading anytree-2.9.0-py3-none-any.whl (38 kB)\n", - "Collecting docstring-parser<0.16.0,>=0.14.1 (from caikit==0.0.1)\n", - " Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", - "Requirement already satisfied: grpcio!=1.55.0,<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.57.0)\n", - "Collecting ijson<3.3.0,>=3.1.4 (from caikit==0.0.1)\n", - " Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.8/111.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting munch<5.0,>=2.5.0 (from caikit==0.0.1)\n", - " Downloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n", - "Requirement already satisfied: numpy<2,>=1.20 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.23.5)\n", - "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (3.20.3)\n", - "Collecting py-to-proto!=0.2.1,<0.5.0,>=0.4.0 (from caikit==0.0.1)\n", - " Downloading py_to_proto-0.4.1-py310-none-any.whl (32 kB)\n", - "Requirement already satisfied: PyYAML<7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (6.0.1)\n", - "Collecting semver<4.0,>=2.13.0 (from caikit==0.0.1)\n", - " Downloading semver-3.0.1-py3-none-any.whl (17 kB)\n", - "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (1.16.0)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.59.0 in /usr/local/lib/python3.10/dist-packages (from caikit==0.0.1) (4.66.1)\n", - "Building wheels for collected packages: caikit\n", - " Building wheel for caikit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for caikit: filename=caikit-0.0.1-py3-none-any.whl size=288707 sha256=389948254c4ab8f2e5812f47634d3f7a158c0fdc01bece32e0457c279b15c635\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-ocq0f8b7/wheels/83/70/e6/fbfc01278ea550744ce890a667227c6f09bb2e8de0a7414191\n", - "Successfully built caikit\n", - "Installing collected packages: ijson, alchemy-logging, semver, py-to-proto, munch, docstring-parser, anytree, alchemy-config, caikit\n", - "Successfully installed alchemy-config-1.1.2 alchemy-logging-1.2.0 anytree-2.9.0 caikit-0.0.1 docstring-parser-0.15 ijson-3.2.3 munch-4.0.0 py-to-proto-0.4.1 semver-3.0.1\n", - "Collecting git+https://github.com/caikit/caikit-nlp\n", - " Cloning https://github.com/caikit/caikit-nlp to /tmp/pip-req-build-zqu32mo5\n", - " Running command git clone --filter=blob:none --quiet https://github.com/caikit/caikit-nlp /tmp/pip-req-build-zqu32mo5\n", - " Resolved https://github.com/caikit/caikit-nlp to commit 77bc04e52d1fed2020a090b0f990b0a21c243601\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting peft@ git+https://github.com/huggingface/peft.git#8c17d556a8fe9522e10d73d7bd3fad46a6ecae14 (from caikit-nlp==0.0.1)\n", - " Cloning https://github.com/huggingface/peft.git to /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", - " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-install-9s_uw5b5/peft_03c72380d3764f41a32c61147cfc0c27\n", - " Resolved https://github.com/huggingface/peft.git to commit 7d99466446d65219a401a2c64f1d84d1b1be31b4\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0 (from caikit-nlp==0.0.1)\n", - " Downloading caikit-0.17.2-py3-none-any.whl (309 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting caikit-tgis-backend<0.2.0,>=0.1.16 (from caikit-nlp==0.0.1)\n", - " Downloading caikit_tgis_backend-0.1.16-py3-none-any.whl (24 kB)\n", - "Collecting accelerate>=0.21.0 (from caikit-nlp==0.0.1)\n", - " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: datasets>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.14.4)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.16.4)\n", - "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.23.5)\n", - "Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.5.3)\n", - "Requirement already satisfied: scikit-learn>=1.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.2.2)\n", - "Requirement already satisfied: scipy>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (1.10.1)\n", - "Requirement already satisfied: tokenizers>=0.13.3 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (0.13.3)\n", - "Requirement already satisfied: torch>=1.13.1 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (2.0.1+cu118)\n", - "Requirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.66.1)\n", - "Requirement already satisfied: transformers>=4.31.0 in /usr/local/lib/python3.10/dist-packages (from caikit-nlp==0.0.1) (4.32.1)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (23.1)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (5.9.5)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->caikit-nlp==0.0.1) (6.0.1)\n", - "Collecting caikit<0.20.0,>=0.16.0 (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1)\n", - " Downloading caikit-0.18.0-py3-none-any.whl (309 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m35.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: grpcio<2.0,>=1.35.0 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (1.57.0)\n", - "Requirement already satisfied: requests<3,>=2.28.2 in /usr/local/lib/python3.10/dist-packages (from caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.31.0)\n", - "Requirement already satisfied: alchemy-config<2.0.0,>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.2)\n", - "Requirement already satisfied: alchemy-logging<2.0.0,>=1.0.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.2.0)\n", - "Requirement already satisfied: anytree<3.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.9.0)\n", - "Requirement already satisfied: docstring-parser<0.16.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.15)\n", - "Requirement already satisfied: ijson<3.3.0,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.2.3)\n", - "Requirement already satisfied: munch<5.0,>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (4.0.0)\n", - "Requirement already satisfied: protobuf<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.20.3)\n", - "Requirement already satisfied: py-to-proto!=0.2.1,<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.4.1)\n", - "Requirement already satisfied: semver<4.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.0.1)\n", - "Requirement already satisfied: six<2.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.16.0)\n", - "Collecting fastapi[all]<1,>=0.95 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading fastapi-0.103.0-py3-none-any.whl (66 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.2/66.2 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting sse-starlette<2,>=1.6.1 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading sse_starlette-1.6.5-py3-none-any.whl (9.6 kB)\n", - "Collecting grpcio-health-checking<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading grpcio_health_checking-1.57.0-py3-none-any.whl (8.6 kB)\n", - "Collecting grpcio-reflection<2.0,>=1.35.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading grpcio_reflection-1.57.0-py3-none-any.whl (11 kB)\n", - "Requirement already satisfied: prometheus_client<1.0,>=0.12.0 in /usr/local/lib/python3.10/dist-packages (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.17.1)\n", - "Collecting py-grpc-prometheus<0.8,>=0.7.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading py_grpc_prometheus-0.7.0-py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (9.0.0)\n", - "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.3.7)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.3.0)\n", - "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (0.70.15)\n", - "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (2023.6.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.4.0->caikit-nlp==0.0.1) (3.8.5)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (3.12.2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->caikit-nlp==0.0.1) (4.7.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.0->caikit-nlp==0.0.1) (2023.3)\n", - "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (1.3.2)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1->caikit-nlp==0.0.1) (3.2.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->caikit-nlp==0.0.1) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (3.27.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.1->caikit-nlp==0.0.1) (16.0.6)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (2023.6.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.31.0->caikit-nlp==0.0.1) (0.3.3)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.2.1)\n", - "Collecting starlette<0.28.0,>=0.27.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting email-validator>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading email_validator-2.0.0.post2-py3-none-any.whl (31 kB)\n", - "Collecting httpx>=0.23.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: itsdangerous>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.1.2)\n", - "Collecting orjson>=3.2.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.9/139.9 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pydantic-extra-types>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading pydantic_extra_types-2.1.0-py3-none-any.whl (16 kB)\n", - "Collecting pydantic-settings>=2.0.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading pydantic_settings-2.0.3-py3-none-any.whl (11 kB)\n", - "Collecting python-multipart>=0.0.5 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.9/53.9 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting uvicorn[standard]>=0.12.0 (from fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (23.1.0)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.4.0->caikit-nlp==0.0.1) (1.3.1)\n", - "Collecting protobuf<5,>=3.19.0 (from caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl (311 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.4/311.4 kB\u001b[0m \u001b[31m33.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.1->caikit-nlp==0.0.1) (2.1.3)\n", - "Requirement already satisfied: setuptools>=39.0.1 in /usr/local/lib/python3.10/dist-packages (from py-grpc-prometheus<0.8,>=0.7.0->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (67.7.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.28.2->caikit-tgis-backend<0.2.0,>=0.1.16->caikit-nlp==0.0.1) (2023.7.22)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.1->caikit-nlp==0.0.1) (1.3.0)\n", - "Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.4/300.4 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting httpcore<0.18.0,>=0.15.0 (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.3.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (0.5.0)\n", - "Requirement already satisfied: pydantic-core==2.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (2.6.1)\n", - "Collecting python-dotenv>=0.21.0 (from pydantic-settings>=2.0.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (3.7.1)\n", - "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (8.1.7)\n", - "Collecting h11>=0.8 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading httptools-0.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (428 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m428.8/428.8 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading uvloop-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m88.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading watchfiles-0.20.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.12.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1)\n", - " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi[all]<1,>=0.95->caikit[runtime-grpc,runtime-http]<0.18.0,>=0.16.0->caikit-nlp==0.0.1) (1.1.3)\n", - "Building wheels for collected packages: caikit-nlp, peft\n", - " Building wheel for caikit-nlp (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for caikit-nlp: filename=caikit_nlp-0.0.1-py3-none-any.whl size=71738 sha256=5176f598fbf41d2eaf242a7aa6d1508afeb1415739492bbdebce9c71924e4ff8\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/29/1c/3c/060d91e84e7a56eab1cb92fe59f09d22e106a75668a0cb62db\n", - " Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for peft: filename=peft-0.6.0.dev0-py3-none-any.whl size=106759 sha256=1bf4c6bdd17ccf9f387eea1e69f85089c63663c900ac5f38706b318d3c208e1e\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-4qkd9tik/wheels/d7/c7/de/1368fac8590e1b103ddc2ec2a28ad51d83aded1a3830e8a087\n", - "Successfully built caikit-nlp peft\n", - "Installing collected packages: websockets, uvloop, ujson, python-multipart, python-dotenv, py-grpc-prometheus, protobuf, orjson, httptools, h11, dnspython, watchfiles, uvicorn, starlette, httpcore, grpcio-reflection, grpcio-health-checking, email-validator, sse-starlette, pydantic-settings, pydantic-extra-types, httpx, fastapi, caikit, caikit-tgis-backend, accelerate, peft, caikit-nlp\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 3.20.3\n", - " Uninstalling protobuf-3.20.3:\n", - " Successfully uninstalled protobuf-3.20.3\n", - " Attempting uninstall: caikit\n", - " Found existing installation: caikit 0.0.1\n", - " Uninstalling caikit-0.0.1:\n", - " Successfully uninstalled caikit-0.0.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.24.2 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed accelerate-0.22.0 caikit-0.17.2 caikit-nlp-0.0.1 caikit-tgis-backend-0.1.16 dnspython-2.4.2 email-validator-2.0.0.post2 fastapi-0.103.0 grpcio-health-checking-1.57.0 grpcio-reflection-1.57.0 h11-0.14.0 httpcore-0.17.3 httptools-0.6.0 httpx-0.24.1 orjson-3.9.5 peft-0.6.0.dev0 protobuf-4.24.2 py-grpc-prometheus-0.7.0 pydantic-extra-types-2.1.0 pydantic-settings-2.0.3 python-dotenv-1.0.0 python-multipart-0.0.6 sse-starlette-1.6.5 starlette-0.27.0 ujson-5.8.0 uvicorn-0.23.2 uvloop-0.17.0 watchfiles-0.20.0 websockets-11.0.3\n", - "Cloning into 'caikit-nlp'...\n", - "remote: Enumerating objects: 2536, done.\u001b[K\n", - "remote: Counting objects: 100% (920/920), done.\u001b[K\n", - "remote: Compressing objects: 100% (268/268), done.\u001b[K\n", - "remote: Total 2536 (delta 734), reused 657 (delta 650), pack-reused 1616\u001b[K\n", - "Receiving objects: 100% (2536/2536), 1.89 MiB | 2.56 MiB/s, done.\n", - "Resolving deltas: 100% (1816/1816), done.\n" - ] - } - ], - "source": [ - "!pip install evaluate\n", - "!pip install rouge_score\n", - "\n", - "!pip install git+https://github.com/caikit/caikit@v0.11.3\n", - "!pip install git+https://github.com/caikit/caikit-nlp\n", - "\n", - "!git clone https://github.com/caikit/caikit-nlp" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7RIXUii94aLy" - }, - "source": [ - "# Step 2. Fine tuning Tuning\n", - "\n", - "```\n", - "!python caikit-nlp/examples/run_fine_tuning.py --dataset \"billsum\" \\\n", - " --model_name tiiuae/falcon-rw-1b \\\n", - " --num_epochs 3 \\\n", - " --output_dir tmp/falcon-rw-1b \\\n", - " --batch_size=8 \\\n", - " --accumulate_steps 32 \\\n", - " --max_source_length 512 \\\n", - " --metric rouge \\\n", - " --torch_dtype bfloat16 \\\n", - " --evaluate\n", - "```\n", - "\n", - "This is a command-line instruction to run a Python script called `run_fine_tuning.py` using the python interpreter. It is part of the `caikit-nlp` package and is meant to tune a pre-trained model (in this case gpt2-mediudm) on a specific dataset (in this case `billsum`).\n", - "\n", - "Let's explain each argument in the command:\n", - "\n", - "1. `caikit-nlp/examples/run_peft_tuning.py`: This specifies the path to the Python script that will be executed. It is a part of the caikit-nlp library and contains the implementation of the PEFT approach.\n", - "1. `--dataset \"billsum\"`: This specifies the dataset to be used for tuning. In this example, the dataset is `billsum` which is a summary of US Congressional and California State Bills [link](https://huggingface.co/datasets/billsum).\n", - "1. `--model_name tiiuae/falcon-rw-1b`: This indicates the base model that will be used for prompt tuning. In this case, it's `tiiuae/falcon-rw-1b`, which refers to the Falcon 1B model from Hugging Face [link](https://huggingface.co/tiiuae/falcon-rw-1b).\n", - "1. `--num_epochs 3`: This sets the number of epochs (training iterations) for the prompt-tuning process. Here, it's set to 3, meaning the model will go through the dataset three times during fine-tuning.\n", - "1. `--output_dir tmp/falcon-rw-1b`: This sets the directory where the prompt-tuned model and related outputs will be stored. In this case, it's set to the `tmp/falcon-rw-1b` directory.\n", - "1. `--batch_size=8`: This sets the batch size used during training. The data will be divided into batches of 8 samples each.\n", - "1. `--accumulate_steps 32`: This specifies the number of steps before gradients are accumulated and the weights are updated. It can be useful for larger batch sizes when the GPU memory is limited.\n", - "1. `--max_source_length 512`: This is a flag to set the maximum length of the input sequence.\n", - "1. `--metric rouge`: This sets the evaluation metric to ROUGE.\n", - "1. `--torch_dtype bfloat16`: This specifies what dtype to use for the training. `float32` is considered 'full precision', though other options such as `float16` and `bfloat16` (known as half precision) also exist. `bfloat16` has wider range, but less precision than `float16` but is only available on Ampre class GPUs (such as the A100)\n", - "1. `--evaluate` this signals to the script to evaluate the model at the end of finetuning.\n", - "\n", - "\n", - "Overall, this command line script is fine-tuning the tiiuae/falcon-rw-1b model on the `billsum` dataset, with specific settings for maximum source length, batch size, accumulation steps, and so on. For a full list of available args, their descriptions, and default values, run `!python caikit-nlp/examples/run_fine_tuning.py --help`\n", - " \n", - "The results of prompt-tuning will be stored in the `tmp/falcon-rw-1b` directory." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_bIQQI_G0t5z" - }, - "source": [ - "# The `tiiuae/falcon-rw-1b` Model\n", - "\n", - "The next model we'll fine tune is `tiiuae/falcon-rw-1b` ([link](https://huggingface.co/tiiuae/falcon-rw-1b)). This model has 1B parameters. We'll fine tune it on the `billsum` dataset and then see how it performs on summarizing new bills." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "L4deByI5aRNQ", - "outputId": "feaf7ce3-c836-4af1-996d-f40982206c76" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "env: ALLOW_DOWNLOADS=true\n", - "2023-08-23 14:46:29.987417: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", - "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/errors/__init__.py:29: DeprecationWarning: The caikit.toolkit.errors package has moved to caikit.core.exceptions\n", - " _warnings.warn(\n", - " is still in the BETA phase and subject to change!\n", - "/usr/local/lib/python3.10/dist-packages/caikit/core/toolkit/error_handler.py:29: DeprecationWarning: The caikit.toolkit.error_handler package has moved to caikit.core.exceptions\n", - " _warnings.warn(\n", - "\u001b[94mExperiment Configuration\n", - "- Model Name: [tiiuae/falcon-rw-1b]\n", - " |- Inferred Model Resource Type: []\n", - "- Dataset: [billsum]\n", - "- Number of Epochs: [3]\n", - "- Learning Rate: [2e-05]\n", - "- Batch Size: [8]\n", - "- Output Directory: [tmp/falcon-rw-1b]\n", - "- Maximum source sequence length: [512]\n", - "- Maximum target sequence length: [128]\n", - "- Gradient accumulation steps: [32]\n", - "- Enable evaluation: [True]\n", - "- Evaluation metrics: [['rouge']]\n", - "- Torch dtype to use for training: [bfloat16]\u001b[0m\n", - "\u001b[94m[Loading the dataset...]\u001b[0m\n", - "2023-08-23T14:46:39.081970 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", - "2023-08-23T14:46:39.088425 [fsspe:DBUG] open file: /root/.cache/huggingface/datasets/billsum/default/3.0.0/75cf1719d38d6553aa0e0714c393c74579b083ae6e164b2543684e3e92e0c4cc/dataset_info.json\n", - "\u001b[94m[Loading the base model resource...]\u001b[0m\n", - "\u001b[94m[Starting the training...]\u001b[0m\n", - "2023-08-23T14:49:50.656386 [PEFT_:DBUG] Shuffling enabled? True\n", - "2023-08-23T14:49:50.656523 [PEFT_:DBUG] Shuffling buffer size: 124654\n", - " 0% 0/1458 [00:00 Amends the Illinois Vehicle Code. Defines \"immediate hazard\". Provides instances in which an individual operating a bicycle approaching a stop sign may proceed through the intersection without stopping at the stop sign." - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "ol9-BjpBOboR" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "EDtesOUJO7Ak" - }, - "outputs": [], - "source": [ - "from transformers import pipeline, set_seed\n", - "\n", - "generator = pipeline('text-generation', model='tmp/falcon-rw-1b/artifacts')\n", - "set_seed(42)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "X6F0hhEYtTi_" - }, - "outputs": [], - "source": [ - "text = \"\"\"summarize:\n", - " Be it enacted by the People of the State of Illinois,\n", - "represented in the General Assembly:\n", - "\n", - "Section 5. The Illinois Vehicle Code is amended by adding\n", - "Section 11-1511.5 as follows:\n", - "\n", - "(625 ILCS 5/11-1511.5 new)\n", - "Sec. 11-1511.5. Operation of bicycle approaching a stop\n", - "sign.\n", - "\t\t(a) As used in this Section, \"immediate hazard\" means a\n", - "vehicle approaching an intersection at a proximity and rate of\n", - "speed sufficient to indicate to a reasonable person that there\n", - "is a danger of collision or accident.\n", - " (b) Except as provided in subsection (c), an individual\n", - "operating a bicycle approaching a stop sign may proceed\n", - "through the intersection without stopping at the stop sign if:\n", - " (1) the individual slows to a reasonable speed; and\n", - " (2) the individual yields the right-of-way to:\n", - " (i) any pedestrian within the intersection or an\n", - " adjacent crosswalk;\n", - " (ii) other traffic within the intersection; and\n", - " (iii) oncoming traffic that poses an immediate\n", - " hazard during the time the individual is traveling\n", - " through the intersection.\n", - "\n", - "\n", - "\n", - "\n", - "HB3923\t- 2 -\tLRB103 26384 MXP 52747 b\n", - " (c) Subsection (b) does not apply to an intersection with\n", - "an active railroad grade crossing.\n", - "\"\"\"\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wb5xooVEajwx", - "outputId": "17920fc3-7856-4920-8327-ae53cc8d051b" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", - "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "\n", - "\n", - "HJR3149 Existing law generally prohibits someone from operating A violation for which notice will be issued pursuant tressign unless they are riding their bicycles between stops signs except under certain circumstances specified above bicyclists shall yield rights Of pedestrians walking upon footpaths,. Existiby passing laws governingThe act establishes penaltiesfor various violations involving safety when operatedbetween postedA misdemeanorpersonality disorderor mental illnessas definedin existing federal Law makesit unlawfulto operateany vehicleuponbe enforcedby localauthoritiesand prohibitswithoutIllinoisVehice code providesthat operationoffenders have been convicted orofendorsedictingthe\n" - ] - } - ], - "source": [ - "import caikit_nlp\n", - "model = caikit_nlp.modules.text_generation.TextGeneration.load('tmp/falcon-rw-1b')\n", - "\n", - "output = model.run(text, max_new_tokens= 128)\n", - "print('\\n\\n\\n'+output.generated_text.replace(text, ''))\n" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Conclusion\n", - "\n", - "It doesn't do horrible. Again, some strange characters and formatting, as it goes on it gets worse." - ], - "metadata": { - "id": "w_GXzvUZU_e0" - } - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "Wa9svk3NVRj7" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "machine_shape": "hm", - "provenance": [], - "authorship_tag": "ABX9TyO8kbTWZRbxYVOQ87HDTIO2", - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file From 7332b7ad040bfb8d56883e06b532d152c6eec2dc Mon Sep 17 00:00:00 2001 From: Trevor Grant Date: Thu, 31 Aug 2023 11:02:30 -0500 Subject: [PATCH 4/4] Signed-off-by: Trevor Grant