Merge branch 'main' into add-granite-docs-format

IBM · Jan 30, 2025 · 0a103da · 0a103da
2 parents a90e1c0 + bc65c5c
commit 0a103da
Show file tree

Hide file tree

Showing 24 changed files with 259 additions and 26 deletions.
diff --git a/prepare/cards/fin_qa.py b/prepare/cards/fin_qa.py
@@ -50,7 +50,8 @@
                 ["table-average", "table header", "number", "the average of one table row"],
                 ["table-max", "table header", "number", "the maximum number of one table row"],
                 ["table-min", "table header", "number", "the minimum number of one table row"]]
-                Answer with only the program, without any additional explanation.
+                \nAnswer with only the program, without any additional explanation or introductory text.
+                \nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.
                 """,
             input_format="""Pre-table text: {pre_text}
                 Table: {table}
@@ -59,7 +60,9 @@
                 Program:
                     """,
             output_format="{program_re}",
-            postprocessors=[],
+            postprocessors=[
+                "processors.take_first_non_empty_line",
+            ],
         ),
     ],
     __description__=(

diff --git a/prepare/cards/scigen.py b/prepare/cards/scigen.py
@@ -28,7 +28,7 @@
                 }
             ),
         ],
-        task="tasks.generation.from_pair[metrics=[metrics.llm_as_judge.rating.llama_3_1_70b_instruct_cross_provider_template_table2text_single_turn_with_reference]]",
+        task="tasks.generation.from_pair[metrics=[metrics.rouge,metrics.bert_score.bert_base_uncased,metrics.bleu,metrics.meteor,metrics.llm_as_judge.rating.llama_3_1_70b_instruct_cross_provider_template_table2text_single_turn_with_reference]]",
         templates=[
             "templates.generation.from_pair.default[postprocessors=[processors.lower_case]]"
         ],

diff --git a/prepare/cards/tab_fact.py b/prepare/cards/tab_fact.py
@@ -32,7 +32,9 @@
         task="tasks.classification.multi_class.relation",
         templates=[
             InputOutputTemplate(
-                instruction="Given a {text_a_type} and {text_b_type} classify the {type_of_relation} of the {text_b_type} to one of {classes}. You should only output the result. Do not add any explanation or other information.",
+                instruction="Given a {text_a_type} and {text_b_type} classify the {type_of_relation} of the {text_b_type} to one of {classes}."
+                + "\nOutput only the final answer without any explanations, extra information, or introductory text."
+                + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
                 input_format="{text_a_type}: {text_a}\n{text_b_type}: {text_b} ",
                 output_format="{label}",
                 postprocessors=[
@@ -52,5 +54,5 @@
         ),
     )
 
-    test_card(card)
+    test_card(card, num_demos=2, demos_pool_size=20)
     add_to_catalog(card, "cards.tab_fact", overwrite=True)
diff --git a/prepare/cards/tablebench_data_analysis.py b/prepare/cards/tablebench_data_analysis.py
@@ -51,11 +51,14 @@
     ),
     templates=[
         InputOutputTemplate(
-            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}",
+            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}"
+            + "\nOutput only the final answer without any explanations, extra information, or introductory text."
+            + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             input_format="{context_type}: {context} \nQuestion: {question}",
             target_prefix="Final Answer: ",
             output_format="{answers}",
             postprocessors=[
+                "processors.take_first_non_empty_line",
                 "processors.lower_case",
                 "processors.remove_punctuations",
                 "processors.remove_articles",

diff --git a/prepare/cards/tablebench_fact_checking.py b/prepare/cards/tablebench_fact_checking.py
@@ -56,11 +56,14 @@
     ),
     templates=[
         InputOutputTemplate(
-            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}",
+            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}"
+            + "\nOutput only the final answer without any explanations, extra information, or introductory text."
+            + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             input_format="{context_type}: {context} \nQuestion: {question}",
             target_prefix="Final Answer: ",
             output_format="{answers}",
             postprocessors=[
+                "processors.take_first_non_empty_line",
                 "processors.lower_case",
                 "processors.remove_punctuations",
                 "processors.remove_articles",

diff --git a/prepare/cards/tablebench_numerical_reasoning.py b/prepare/cards/tablebench_numerical_reasoning.py
@@ -51,11 +51,14 @@
     ),
     templates=[
         InputOutputTemplate(
-            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}",
+            instruction="You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}"
+            + "\nOutput only the final answer without any explanations, extra information, or introductory text."
+            + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             input_format="{context_type}: {context} \nQuestion: {question}",
             target_prefix="Final Answer: ",
             output_format="{answers}",
             postprocessors=[
+                "processors.take_first_non_empty_line",
                 "processors.lower_case",
                 "processors.remove_punctuations",
                 "processors.remove_articles",

diff --git a/prepare/cards/turl_col_type.py b/prepare/cards/turl_col_type.py
@@ -44,7 +44,9 @@
         InputOutputTemplate(
             instruction="""
                     This is a column type annotation task. The goal of this task is to choose the correct types for one selected column of the given input table from the given candidate types. The Wikipedia page, section and table caption (if any) provide important information for choosing the correct column types.
-                    Candidate Types: {vocab} \nOutput only the correct column types for the mentioned from the candidate types.
+                    Candidate Types: {vocab}
+                    \nOutput only the correct column types from the candidate list for the mentioned columns. Do not include any explanations, extra information, or introductory text—only the final answer.
+                    \nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.
                 """.strip(),
             input_format="\nColumn name: {colname}"
             "\nPage Title: {page_title} "

diff --git a/prepare/cards/wikitq.py b/prepare/cards/wikitq.py
@@ -25,10 +25,12 @@
     task="tasks.qa.extractive[metrics=[metrics.f1_strings, metrics.unsorted_list_exact_match]]",
     templates=[
         MultiReferenceTemplate(
-            instruction="Answer the question based on the provided table. You should only output the final answer. Do not add any explanation or other information.",
-            input_format="\nQuestion: {question}\nTable: {context}\nAnswer: ",
+            instruction="Answer the question based on the provided table. Extract and output only the final answer—the exact phrase or data from the table that directly answers the question. Do not include any alterations, explanations, or introductory text"
+            + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
+            input_format="\nQuestion: {question}" "\nTable: {context}" "\nAnswer: ",
             references_field="answers",
             postprocessors=[
+                "processors.take_first_non_empty_line",
                 "processors.to_list_by_comma_space",
                 "processors.str_to_float_format",
             ],

diff --git a/prepare/tasks/generation.py b/prepare/tasks/generation.py
@@ -29,9 +29,9 @@
         reference_fields={"output": str},
         prediction_type=str,
         metrics=[
-            "metrics.bleu",
             "metrics.rouge",
             "metrics.bert_score.bert_base_uncased",
+            "metrics.bleu",
             "metrics.meteor",
         ],
         augmentable_inputs=["input_a", "input_b"],

diff --git a/prepare/templates/generation/generation.py b/prepare/templates/generation/generation.py
@@ -33,7 +33,8 @@
 
 add_to_catalog(
     InputOutputTemplate(
-        instruction="Given the following {type_of_input_a} and {type_of_input_b}, generate the corresponding {type_of_output}.",
+        instruction="Given the following {type_of_input_a} and {type_of_input_b}, generate the corresponding {type_of_output}."
+        + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
         input_format="{type_of_input_a}: \n{input_a} \n{type_of_input_b}: \n{input_b} \n{type_of_output}:",
         output_format="{output}",
         postprocessors=[

diff --git a/prepare/templates/qa/with_context.py b/prepare/templates/qa/with_context.py
@@ -92,7 +92,8 @@
 
 add_to_catalog(
     MultiReferenceTemplate(
-        instruction="Using the information from the {context_type} given below, summarize a paragraph-long response to the following user query.",
+        instruction="Using the information from the {context_type} given below, summarize a paragraph-long response to the following user query."
+        + "\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
         input_format="{context_type}:\n{context}\nQuery:\n{question}",
         output_format="{answers}",
         target_prefix="Answer:\n",

diff --git a/src/unitxt/catalog/cards/fin_qa.json b/src/unitxt/catalog/cards/fin_qa.json
@@ -59,10 +59,12 @@
     "templates": [
         {
             "__type__": "input_output_template",
-            "instruction": "Presented with a financial report consisting of textual contents and a structured table, given a question, generate the reasoning program in the domain specific language (DSL) that will be executed to get the answer. \nThe DSL consists of mathematical operations and table operations as executable programs. The program consists of a sequence of operations. Each operation takes a list of arguments. \nThere are 6 mathematical operations: add, subtract, multiply, divide, greater, exp, and 4 table aggregation operations table-max, table-min, table-sum, table-average, that apply aggregation operations on table rows. The mathematical operations take arguments of either numbers from the given reports, or a numerical result from a previous step.\nThe table operations take arguments of table row names. We use the special token #n to denote the result from the nth step. \nFor example, in the example \"divide(9413, 20.01), divide(8249, 9.48), subtract(#0, #1)\", the program consists of 3 steps; The first and the second division steps take arguments from the table and the text, respectively, then the third step subtracts the results from the two previous steps.\n                Definitions of all operations:\n                [[\"Name\", \"Arguments\", \"Output\", \"Description\"],\n                [\"add\", \"number1, number2\", \"number\", \"add two numbers: number1 + number2\"],\n                [\"subtract\", \"number1, number2\", \"number\", \"subtract two numbers: number1 - number2\"],\n                [\"multiply\", \"number1, number2\", \"number\", \"multiply two numbers: number1 * number2\"],\n                [\"divide\", \"number1, number2\", \"number\", \"multiply two numbers: number1 / number2\"],\n                [\"exp\", \"number1, number2\", \"number\", \"exponential: number1 ^ number2\"],\n                [\"greater\", \"number1, number2\", \"bool\", \"comparison: number1 > number2\"],\n                [\"table-sum\", \"table header\", \"number\", \"the summation of one table row\"],\n                [\"table-average\", \"table header\", \"number\", \"the average of one table row\"],\n                [\"table-max\", \"table header\", \"number\", \"the maximum number of one table row\"],\n                [\"table-min\", \"table header\", \"number\", \"the minimum number of one table row\"]]\n                Answer with only the program, without any additional explanation.\n                ",
+            "instruction": "Presented with a financial report consisting of textual contents and a structured table, given a question, generate the reasoning program in the domain specific language (DSL) that will be executed to get the answer. \nThe DSL consists of mathematical operations and table operations as executable programs. The program consists of a sequence of operations. Each operation takes a list of arguments. \nThere are 6 mathematical operations: add, subtract, multiply, divide, greater, exp, and 4 table aggregation operations table-max, table-min, table-sum, table-average, that apply aggregation operations on table rows. The mathematical operations take arguments of either numbers from the given reports, or a numerical result from a previous step.\nThe table operations take arguments of table row names. We use the special token #n to denote the result from the nth step. \nFor example, in the example \"divide(9413, 20.01), divide(8249, 9.48), subtract(#0, #1)\", the program consists of 3 steps; The first and the second division steps take arguments from the table and the text, respectively, then the third step subtracts the results from the two previous steps.\n                Definitions of all operations:\n                [[\"Name\", \"Arguments\", \"Output\", \"Description\"],\n                [\"add\", \"number1, number2\", \"number\", \"add two numbers: number1 + number2\"],\n                [\"subtract\", \"number1, number2\", \"number\", \"subtract two numbers: number1 - number2\"],\n                [\"multiply\", \"number1, number2\", \"number\", \"multiply two numbers: number1 * number2\"],\n                [\"divide\", \"number1, number2\", \"number\", \"multiply two numbers: number1 / number2\"],\n                [\"exp\", \"number1, number2\", \"number\", \"exponential: number1 ^ number2\"],\n                [\"greater\", \"number1, number2\", \"bool\", \"comparison: number1 > number2\"],\n                [\"table-sum\", \"table header\", \"number\", \"the summation of one table row\"],\n                [\"table-average\", \"table header\", \"number\", \"the average of one table row\"],\n                [\"table-max\", \"table header\", \"number\", \"the maximum number of one table row\"],\n                [\"table-min\", \"table header\", \"number\", \"the minimum number of one table row\"]]\n                \nAnswer with only the program, without any additional explanation or introductory text.\n                \nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.\n                ",
             "input_format": "Pre-table text: {pre_text}\n                Table: {table}\n                Post-table text: {post_text}\n                Question: {question}\n                Program:\n                    ",
             "output_format": "{program_re}",
-            "postprocessors": []
+            "postprocessors": [
+                "processors.take_first_non_empty_line"
+            ]
         }
     ],
     "__description__": "FINQA is an expert-annotated QA dataset that aims to tackle numerical reasoning over real-world financial data.",

diff --git a/src/unitxt/catalog/cards/scigen.json b/src/unitxt/catalog/cards/scigen.json
@@ -39,7 +39,7 @@
             }
         }
     ],
-    "task": "tasks.generation.from_pair[metrics=[metrics.llm_as_judge.rating.llama_3_1_70b_instruct_cross_provider_template_table2text_single_turn_with_reference]]",
+    "task": "tasks.generation.from_pair[metrics=[metrics.rouge,metrics.bert_score.bert_base_uncased,metrics.bleu,metrics.meteor,metrics.llm_as_judge.rating.llama_3_1_70b_instruct_cross_provider_template_table2text_single_turn_with_reference]]",
     "templates": [
         "templates.generation.from_pair.default[postprocessors=[processors.lower_case]]"
     ],

diff --git a/src/unitxt/catalog/cards/tab_fact.json b/src/unitxt/catalog/cards/tab_fact.json
@@ -42,7 +42,7 @@
     "templates": [
         {
             "__type__": "input_output_template",
-            "instruction": "Given a {text_a_type} and {text_b_type} classify the {type_of_relation} of the {text_b_type} to one of {classes}. You should only output the result. Do not add any explanation or other information.",
+            "instruction": "Given a {text_a_type} and {text_b_type} classify the {type_of_relation} of the {text_b_type} to one of {classes}.\nOutput only the final answer without any explanations, extra information, or introductory text.\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             "input_format": "{text_a_type}: {text_a}\n{text_b_type}: {text_b} ",
             "output_format": "{label}",
             "postprocessors": [

diff --git a/src/unitxt/catalog/cards/tablebench_data_analysis.json b/src/unitxt/catalog/cards/tablebench_data_analysis.json
@@ -84,11 +84,12 @@
     "templates": [
         {
             "__type__": "input_output_template",
-            "instruction": "You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}",
+            "instruction": "You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}\nOutput only the final answer without any explanations, extra information, or introductory text.\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             "input_format": "{context_type}: {context} \nQuestion: {question}",
             "target_prefix": "Final Answer: ",
             "output_format": "{answers}",
             "postprocessors": [
+                "processors.take_first_non_empty_line",
                 "processors.lower_case",
                 "processors.remove_punctuations",
                 "processors.remove_articles",

diff --git a/src/unitxt/catalog/cards/tablebench_fact_checking.json b/src/unitxt/catalog/cards/tablebench_fact_checking.json
@@ -84,11 +84,12 @@
     "templates": [
         {
             "__type__": "input_output_template",
-            "instruction": "You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}",
+            "instruction": "You are a table analyst. Your task is to answer questions based on the table content. {answer_formatter}\nOutput only the final answer without any explanations, extra information, or introductory text.\nHere are some input-output examples. Read the examples carefully to figure out the mapping. The output of the last example is not given, and your job is to figure out what it is.",
             "input_format": "{context_type}: {context} \nQuestion: {question}",
             "target_prefix": "Final Answer: ",
             "output_format": "{answers}",
             "postprocessors": [
+                "processors.take_first_non_empty_line",
                 "processors.lower_case",
                 "processors.remove_punctuations",
                 "processors.remove_articles",