From 7159b62838de626cf9d1233a4234f5ea9963a848 Mon Sep 17 00:00:00 2001 From: lbourdois <58078086+lbourdois@users.noreply.github.com> Date: Fri, 17 Jan 2025 17:11:11 +0100 Subject: [PATCH] Add chapter 10 --- course/fr/chapter10/section2.ipynb | 65 ++++++++++++++++ course/fr/chapter10/section3.ipynb | 115 +++++++++++++++++++++++++++++ course/fr/chapter10/section5.ipynb | 95 ++++++++++++++++++++++++ 3 files changed, 275 insertions(+) create mode 100644 course/fr/chapter10/section2.ipynb create mode 100644 course/fr/chapter10/section3.ipynb create mode 100644 course/fr/chapter10/section5.ipynb diff --git a/course/fr/chapter10/section2.ipynb b/course/fr/chapter10/section2.ipynb new file mode 100644 index 00000000..f7dd5431 --- /dev/null +++ b/course/fr/chapter10/section2.ipynb @@ -0,0 +1,65 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "kK8kfIfqrgrc" + }, + "source": [ + "# Configurez votre instance Argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cGOYFrYyrgrd" + }, + "outputs": [], + "source": [ + "!pip install argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mWB23rrFrgrf" + }, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # uniquement pour les spaces privés\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # uniquement pour les spaces privés\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UVHJPCFKrgrg" + }, + "outputs": [], + "source": [ + "client.me" + ] + } + ], + "metadata": { + "colab": { + "name": "Set up your Argilla instance", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/course/fr/chapter10/section3.ipynb b/course/fr/chapter10/section3.ipynb new file mode 100644 index 00000000..48a2a5d0 --- /dev/null +++ b/course/fr/chapter10/section3.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chargez votre jeu de données dans Argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install argilla datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # uniquement pour les spaces privés\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # uniquement pour les spaces privés\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'text': Value(dtype='string', id=None),\n", + " 'label': Value(dtype='int64', id=None),\n", + " 'label_text': Value(dtype='string', id=None)}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "data = load_dataset(\"SetFit/ag_news\", split=\"train\")\n", + "data.features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings = rg.Settings(\n", + " fields=[rg.TextField(name=\"text\")],\n", + " questions=[\n", + " rg.LabelQuestion(\n", + " name=\"label\", title=\"Classifier le texte :\", labels=data.unique(\"label_text\")\n", + " ),\n", + " rg.SpanQuestion(\n", + " name=\"entities\",\n", + " title=\"Surligner toutes les entités présentes dans le texte :\",\n", + " labels=[\"PERSON\", \"ORG\", \"LOC\", \"EVENT\"],\n", + " field=\"text\",\n", + " ),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = rg.Dataset(name=\"ag_news\", settings=settings)\n", + "\n", + "dataset.create()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.records.log(data, mapping={\"label_text\": \"label\"})" + ] + } + ], + "metadata": { + "colab": { + "name": "Load your dataset to Argilla", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/course/fr/chapter10/section5.ipynb b/course/fr/chapter10/section5.ipynb new file mode 100644 index 00000000..b2747a79 --- /dev/null +++ b/course/fr/chapter10/section5.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Utilisez votre jeu de données annoté" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # uniquement pour les spaces privés\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # uniquement pour les spaces privés\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = client.datasets(name=\"ag_news\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "status_filter = rg.Query(filter=rg.Filter([(\"status\", \"==\", \"completed\")]))\n", + "\n", + "filtered_records = dataset.records(status_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_records.to_datasets().push_to_hub(\"argilla/ag_news_annotated\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.to_hub(repo_id=\"argilla/ag_news_annotated\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = rg.Dataset.from_hub(repo_id=\"argilla/ag_news_annotated\")" + ] + } + ], + "metadata": { + "colab": { + "name": "Use your annotated dataset", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}