Skip to content

Commit

Permalink
add annotation notebook (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
shahules786 authored Nov 20, 2024
1 parent 9736412 commit b30245d
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 0 deletions.
134 changes: 134 additions & 0 deletions app.ragas-experiments/evaluation_annotation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "37727c5a-0b9b-45b8-9494-0d41dc4e16d9",
"metadata": {},
"source": [
"## API_KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df16f79c-dd92-402f-b5d2-aa72703d6c9a",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = \"your-api_key\""
]
},
{
"cell_type": "markdown",
"id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
"metadata": {},
"outputs": [],
"source": [
"from datasets import load_dataset\n",
"from ragas import evaluate, EvaluationDataset\n",
"from ragas.metrics import AspectCritic\n",
"from langchain_openai.chat_models import ChatOpenAI\n",
"from ragas.llms import LangchainLLMWrapper\n"
]
},
{
"cell_type": "markdown",
"id": "0dab6de2-680a-4178-823f-8889a144a5d4",
"metadata": {},
"source": [
"## Dataset"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
"metadata": {},
"outputs": [],
"source": [
"dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
"eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
]
},
{
"cell_type": "markdown",
"id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
"metadata": {},
"source": [
"## Set Model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
]
},
{
"cell_type": "markdown",
"id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
"metadata": {},
"source": [
"## Evaluate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
"metadata": {},
"outputs": [],
"source": [
"critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
"results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ragas",
"language": "python",
"name": "ragas"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
115 changes: 115 additions & 0 deletions evaluation_annotation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
"metadata": {},
"outputs": [],
"source": [
"from datasets import load_dataset\n",
"from ragas import evaluate, EvaluationDataset\n",
"from ragas.metrics import AspectCritic\n",
"from langchain_openai.chat_models import ChatOpenAI\n",
"from ragas.llms import LangchainLLMWrapper\n"
]
},
{
"cell_type": "markdown",
"id": "0dab6de2-680a-4178-823f-8889a144a5d4",
"metadata": {},
"source": [
"## Dataset"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
"metadata": {},
"outputs": [],
"source": [
"dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
"eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
]
},
{
"cell_type": "markdown",
"id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
"metadata": {},
"source": [
"## Set Model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
]
},
{
"cell_type": "markdown",
"id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
"metadata": {},
"source": [
"## Evaluate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
"metadata": {},
"outputs": [],
"source": [
"critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
"results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ragas",
"language": "python",
"name": "ragas"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit b30245d

Please sign in to comment.