Skip to content

Commit

Permalink
Add RagBench datasets (#1580)
Browse files Browse the repository at this point in the history
  • Loading branch information
elronbandel authored Feb 5, 2025
1 parent 2ef9091 commit 5f72b0d
Show file tree
Hide file tree
Showing 25 changed files with 978 additions and 0 deletions.
78 changes: 78 additions & 0 deletions prepare/cards/ragbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from unitxt import add_to_catalog
from unitxt.blocks import (
LoadHF,
TaskCard,
)
from unitxt.collections_operators import Dictify, Wrap
from unitxt.operators import Copy, Set
from unitxt.test_utils.card import test_card

for subset in [
"covidqa",
"cuad",
"delucionqa",
"emanual",
"expertqa",
"finqa",
"hagrid",
"hotpotqa",
"msmarco",
"pubmedqa",
"tatqa",
"techqa",
]:
card = TaskCard(
loader=LoadHF(
path="rungalileo/ragbench",
name=subset,
),
preprocess_steps=[
Copy(field="documents", to_field="contexts"),
Copy(field="documents", to_field="contexts_ids"),
Wrap(field="response", inside="list", to_field="reference_answers"),
],
task="tasks.rag.response_generation",
templates={"default": "templates.rag.response_generation.please_respond_chat"},
)

if subset == "covidqa":
test_card(
card,
strict=True,
metrics=["metrics.rouge"],
demos_taken_from="test",
)

add_to_catalog(
card, f"cards.rag.response_generation.ragbench.{subset}", overwrite=True
)

card = TaskCard(
loader=LoadHF(
path="rungalileo/ragbench",
name=subset,
),
preprocess_steps=[
Set({"context_type": "documents"}),
Wrap(field="documents", inside="list", process_every_value=True),
Dictify(
field="documents",
to_field="context",
with_keys=["body"],
process_every_value=True,
),
Set({"context/*/title": "Document"}),
Wrap(field="response", inside="list", to_field="answers"),
],
task="tasks.qa.with_context",
templates="templates.qa.with_context.all",
)

if subset == "covidqa":
test_card(
card,
strict=True,
demos_taken_from="test",
)

add_to_catalog(card, f"cards.ragbench.{subset}", overwrite=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "covidqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "cuad"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "delucionqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "emanual"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "expertqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "finqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "hagrid"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "hotpotqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "msmarco"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "pubmedqa"
},
"preprocess_steps": [
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts"
},
{
"__type__": "copy",
"field": "documents",
"to_field": "contexts_ids"
},
{
"__type__": "wrap",
"field": "response",
"inside": "list",
"to_field": "reference_answers"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Loading

0 comments on commit 5f72b0d

Please sign in to comment.