Skip to content

Commit

Permalink
feat: ragas integratino
Browse files Browse the repository at this point in the history
  • Loading branch information
cabreraalex committed Dec 5, 2023
1 parent 334ec32 commit 57301f2
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 0 deletions.
7 changes: 7 additions & 0 deletions examples/rag/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Retrieval Augmented Generation (RAG) Evaluation with RAGAS

This example shows how to use the [ragas](https://docs.ragas.io/) library to evaluate retrieval augmented generation (RAG) models
and visualize the results using Zeno.

The [rag-eval.ipynb](rag-eval.ipynb) notebook walks through an example on the [
Financial Opinion Mining and Question Answering (fiqa)](https://sites.google.com/view/fiqa/) dataset.
205 changes: 205 additions & 0 deletions examples/rag/rag-eval.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import dotenv\n",
"from datasets import load_dataset\n",
"import pandas as pd\n",
"from zeno_client import ZenoClient, ZenoMetric\n",
"from ragas.metrics import (\n",
" answer_relevancy,\n",
" faithfulness,\n",
" context_recall,\n",
" context_precision,\n",
")\n",
"from ragas import evaluate\n",
"\n",
"dotenv.load_dotenv(override=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")\n",
"fiqa_eval"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = evaluate(\n",
" fiqa_eval[\"baseline\"],\n",
" metrics=[\n",
" context_precision,\n",
" faithfulness,\n",
" answer_relevancy,\n",
" context_recall,\n",
" ],\n",
")\n",
"\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = result.to_pandas()\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = ZenoClient(os.environ[\"ZENO_API_KEY\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project = client.create_project(\n",
" name=\"ragas FICA eval\",\n",
" description=\"Evaluation of RAG model using ragas on FICA dataset\",\n",
" public=True,\n",
" view={\n",
" \"data\": {\n",
" \"type\": \"vstack\",\n",
" \"keys\": {\n",
" \"question\": {\"type\": \"markdown\"},\n",
" \"texts\": {\n",
" \"type\": \"list\",\n",
" \"elements\": {\"type\": \"markdown\"},\n",
" \"border\": True,\n",
" \"pad\": True,\n",
" },\n",
" },\n",
" },\n",
" \"label\": {\n",
" \"type\": \"markdown\",\n",
" },\n",
" \"output\": {\n",
" \"type\": \"vstack\",\n",
" \"keys\": {\n",
" \"answer\": {\"type\": \"markdown\"},\n",
" \"ground_truths\": {\n",
" \"type\": \"list\",\n",
" \"elements\": {\"type\": \"markdown\"},\n",
" \"border\": True,\n",
" \"pad\": True,\n",
" },\n",
" },\n",
" },\n",
" \"size\": \"large\",\n",
" },\n",
" metrics=[\n",
" ZenoMetric(\n",
" name=\"context_precision\", type=\"mean\", columns=[\"context_precision\"]\n",
" ),\n",
" ZenoMetric(name=\"faithfulness\", type=\"mean\", columns=[\"faithfulness\"]),\n",
" ZenoMetric(name=\"answer_relevancy\", type=\"mean\", columns=[\"answer_relevancy\"]),\n",
" ZenoMetric(name=\"context_recall\", type=\"mean\", columns=[\"context_recall\"]),\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_df = pd.DataFrame(\n",
" {\n",
" \"data\": df.apply(\n",
" lambda x: {\"question\": x[\"question\"], \"texts\": list(x[\"contexts\"])}, axis=1\n",
" ),\n",
" \"label\": df[\"ground_truths\"].apply(lambda x: \"\\n\".join(x)),\n",
" }\n",
")\n",
"data_df[\"id\"] = data_df.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project.upload_dataset(\n",
" data_df, id_column=\"id\", data_column=\"data\", label_column=\"label\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"output_df = df[\n",
" [\n",
" \"context_precision\",\n",
" \"faithfulness\",\n",
" \"answer_relevancy\",\n",
" \"context_recall\",\n",
" ]\n",
"].copy()\n",
"output_df[\"output\"] = df.apply(\n",
" lambda x: {\"answer\": x[\"answer\"], \"ground_truths\": list(x[\"ground_truths\"])}, axis=1\n",
")\n",
"output_df[\"id\"] = output_df.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project.upload_system(\n",
" output_df, name=\"Base System\", id_column=\"id\", output_column=\"output\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "zeno-build",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
3 changes: 3 additions & 0 deletions examples/rag/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ragas
python-dotenv
datasets

0 comments on commit 57301f2

Please sign in to comment.