From 46894553931df2051449f70713542a42731cdba2 Mon Sep 17 00:00:00 2001 From: eren23 Date: Sat, 25 Feb 2023 22:52:48 +0100 Subject: [PATCH] generic baseline --- examples/classification.ipynb | 6 +- examples/generic.ipynb | 416 ++++++++++++++++++ promptify/prompts/nlp/templates/generic.jinja | 33 ++ 3 files changed, 452 insertions(+), 3 deletions(-) create mode 100644 examples/generic.ipynb create mode 100644 promptify/prompts/nlp/templates/generic.jinja diff --git a/examples/classification.ipynb b/examples/classification.ipynb index ae486b8..30eb2a8 100644 --- a/examples/classification.ipynb +++ b/examples/classification.ipynb @@ -380,7 +380,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "prompt-env", "language": "python", "name": "python3" }, @@ -394,11 +394,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.9.16" }, "vscode": { "interpreter": { - "hash": "67ec57dda2d4cf896ecf03d091187d70ff0a6cd6e0feb9599db137f2d1cf3d9f" + "hash": "003cbd9d5de60837b71ec248298ceb3401870867de42df8ae6f444c9dc068945" } } }, diff --git a/examples/generic.ipynb b/examples/generic.ipynb new file mode 100644 index 0000000..8c55965 --- /dev/null +++ b/examples/generic.ipynb @@ -0,0 +1,416 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "from promptify.models.nlp.openai_model import OpenAI\n", + "from promptify.prompts.nlp.prompter import Prompter" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "model = OpenAI(api_key=\"\")\n", + "nlp_prompter = Prompter(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data = json.load(open(\"data/binary.json\",'r'))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'text': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'labels': 'negative', 'score': '', 'complexity': ''} \n", + "\n", + "{'text': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'labels': 'positive', 'score': '', 'complexity': ''} \n", + "\n", + "{'text': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'labels': 'negative', 'score': '', 'complexity': ''} \n", + "\n" + ] + } + ], + "source": [ + "examples = []\n", + "for sample in data[:3]:\n", + " print(sample,\"\\n\")\n", + " examples.append((sample['text'],sample['labels']))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "key_map = {'text': 'input', 'labels': 'label'}\n", + "data = [{key_map.get(k, k): v for k, v in d.items()} for d in data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "delete_keys = ['score', 'complexity']\n", + "data = [{k: v for k, v in d.items() if k not in delete_keys} for d in data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'input': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'label': 'negative'} \n", + "\n", + "{'input': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'label': 'positive'} \n", + "\n", + "{'input': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'label': 'negative'} \n", + "\n", + "{'input': 'Looking forward to going to Carrow Rd tonight. Last time we were there\\\\u002c Bale scored 2 and we were 3rd. Do not want extra time though', 'label': 'positive'} \n", + "\n", + "{'input': \"It's a good day at work when you get to shake Jim Lehrer's hand. Thanks, @user Still kicking myself for being to shy to hug\", 'label': 'positive'} \n", + "\n" + ] + } + ], + "source": [ + "examples = []\n", + "for sample in data[:5]:\n", + " print(sample,\"\\n\")\n", + " examples.append((sample['input'],sample['label']))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "duty = \" You take Passage as input and classify that as either positive or negative Category. Your output format is only form, no other form.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Binary Classificaion System\n", + "You are a highly intelligen binary classification system.\n", + " You take Passage as input and classify that as either positive or negative Category. Your output format is only form, no other form. \n", + "\n", + "Example objects with keys input as model input and label as model output:\n", + "\n", + "('Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'negative')\n", + "\n", + "('Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'positive')\n", + "\n", + "('So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'negative')\n", + "\n", + "('Looking forward to going to Carrow Rd tonight. Last time we were there\\\\u002c Bale scored 2 and we were 3rd. Do not want extra time though', 'positive')\n", + "\n", + "(\"It's a good day at work when you get to shake Jim Lehrer's hand. Thanks, @user Still kicking myself for being to shy to hug\", 'positive')\n", + "Your output has to be in the form\n", + "\n", + "{'output': ['Label']}\n", + "\n", + "Make sure that it works when wrapped with python eval() function.\n", + "\n", + "\n", + "Input: i have been with petronas for years i feel that petronas has performed well and made a huge profit\n", + "Output:\n" + ] + } + ], + "source": [ + "prompt = nlp_prompter.generate_prompt('generic.jinja',\n", + " free_duty=duty,\n", + " Output= \"Label\",\n", + " examples=examples,\n", + " task=\"binary classification\",\n", + " example_definition=\"Example objects with keys input as model input and label as model output:\",\n", + " text_input=\"i have been with petronas for years i feel that petronas has performed well and made a huge profit\",\n", + " description=\"Binary Classificaion System\")\n", + "print(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "output = nlp_prompter.fit('generic.jinja',\n", + " free_duty=duty,\n", + " Output= \"Label\",\n", + " examples=examples,\n", + " task=\"binary classification\",\n", + " example_definition =\"Example objects with keys input as model input and label as model output:\",\n", + " text_input=\"i have been with petronas for years i feel that petronas has performed well and made a huge profit\",\n", + " description=\"Binary Classification System\",\n", + " model_name=\"text-davinci-003\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'prompt_tokens': 305,\n", + " 'completion_tokens': 8,\n", + " 'total_tokens': 313,\n", + " 'text': \" {'output': ['positive']}\"}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "data = json.load(open(\"data/multiclass.json\",'r'))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "key_map = {'text': 'input', 'category': 'label'}\n", + "data = [{key_map.get(k, k): v for k, v in d.items()} for d in data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "delete_keys = ['confidence_score', 'complexity']\n", + "data = [{k: v for k, v in d.items() if k not in delete_keys} for d in data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'input': \"I ate Something I don't know what it is... Why do I keep Telling things about food\", 'label': 'worry'} \n", + "\n", + "{'input': \"Here's to the start of a great adventure. Niners today, Alaska tomorrow.\", 'label': 'joy'} \n", + "\n", + "{'input': 'It is so annoying when she starts typing on her computer in the middle of the night!', 'label': 'hate'} \n", + "\n", + "{'input': 'Chocolate milk is so much better through a straw. I lack said straw', 'label': 'neutral'} \n", + "\n", + "{'input': 'I want to buy this great album but unfortunately i dont hav enuff funds its "long time noisy"', 'label': 'sadness'} \n", + "\n" + ] + } + ], + "source": [ + "examples = []\n", + "for sample in data[:5]:\n", + " print(sample,\"\\n\")\n", + " examples.append((sample['input'],sample['label']))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Oh no one minute too late! Oh well\n", + "duty = \"You take Passage as input and classify that as one of the following appropriate Categories: {'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiclass classification System\n", + "You are a highly intelligen Multiclass classification system.\n", + "You take Passage as input and classify that as one of the following appropriate Categories: {'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'} \n", + "\n", + "Example objects with keys input as model input and label as model output:\n", + "\n", + "(\"I ate Something I don't know what it is... Why do I keep Telling things about food\", 'worry')\n", + "\n", + "(\"Here's to the start of a great adventure. Niners today, Alaska tomorrow.\", 'joy')\n", + "\n", + "('It is so annoying when she starts typing on her computer in the middle of the night!', 'hate')\n", + "\n", + "('Chocolate milk is so much better through a straw. I lack said straw', 'neutral')\n", + "\n", + "('I want to buy this great album but unfortunately i dont hav enuff funds its "long time noisy"', 'sadness')\n", + "Your output has to be in the form\n", + "\n", + "{'output': ['Label']}\n", + "\n", + "Make sure that it works when wrapped with python eval() function.\n", + "\n", + "\n", + "Input: Oh no one minute too late! Oh well\n", + "Output:\n" + ] + } + ], + "source": [ + "prompt = nlp_prompter.generate_prompt('generic.jinja',\n", + " free_duty=duty,\n", + " Output= \"Label\",\n", + " examples=examples,\n", + " task=\"Multiclass classification\",\n", + " text_input=\"Oh no one minute too late! Oh well\",\n", + " example_definition=\"Example objects with keys input as model input and label as model output:\",\n", + " description=\"Multiclass classification System\")\n", + "print(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "output = nlp_prompter.fit('generic.jinja', \n", + " free_duty=duty,\n", + " Output= \"Label\",\n", + " examples=examples,\n", + " task=\"Multiclass classification\",\n", + " text_input=\"Oh no one minute too late! Oh well\",\n", + " example_definition=\"Example objects with keys input as model input and label as model output:\",\n", + " description=\"Multiclass classification System\",\n", + " model_name=\"text-davinci-003\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'prompt_tokens': 262,\n", + " 'completion_tokens': 9,\n", + " 'total_tokens': 271,\n", + " 'text': \" {'output': ['surprise']}\"}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "prompt-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "003cbd9d5de60837b71ec248298ceb3401870867de42df8ae6f444c9dc068945" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/promptify/prompts/nlp/templates/generic.jinja b/promptify/prompts/nlp/templates/generic.jinja new file mode 100644 index 0000000..fb15503 --- /dev/null +++ b/promptify/prompts/nlp/templates/generic.jinja @@ -0,0 +1,33 @@ +{%- if description is not none -%} +{{ description }} +{% endif -%} + +{%- if task is not none -%} +You are a highly intelligen {{task}} system. +{% endif -%} + +{%- if free_duty is not none -%} +{{free_duty}} +{# Free duty explanation comes here. #} +{% endif -%} + +{% if examples is defined and examples|length > 0 -%} + +{%- if example_definition is not none -%} +{{example_definition}} +{% endif -%} + + +{% for example in examples %} +{{example}} +{% endfor %} +{%- endif -%} + +Your output has to be in the form + +{{output_format|default({"output" : [ Output ] })}} + +Make sure that it works when wrapped with python eval() function. + +Input: {{text_input}} +Output: \ No newline at end of file