From 46894553931df2051449f70713542a42731cdba2 Mon Sep 17 00:00:00 2001
From: eren23 <erenak1996@gmail.com>
Date: Sat, 25 Feb 2023 22:52:48 +0100
Subject: [PATCH]  generic baseline

---
 examples/classification.ipynb                 |   6 +-
 examples/generic.ipynb                        | 416 ++++++++++++++++++
 promptify/prompts/nlp/templates/generic.jinja |  33 ++
 3 files changed, 452 insertions(+), 3 deletions(-)
 create mode 100644 examples/generic.ipynb
 create mode 100644 promptify/prompts/nlp/templates/generic.jinja

diff --git a/examples/classification.ipynb b/examples/classification.ipynb
index ae486b8..30eb2a8 100644
--- a/examples/classification.ipynb
+++ b/examples/classification.ipynb
@@ -380,7 +380,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "prompt-env",
    "language": "python",
    "name": "python3"
   },
@@ -394,11 +394,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.9.16"
   },
   "vscode": {
    "interpreter": {
-    "hash": "67ec57dda2d4cf896ecf03d091187d70ff0a6cd6e0feb9599db137f2d1cf3d9f"
+    "hash": "003cbd9d5de60837b71ec248298ceb3401870867de42df8ae6f444c9dc068945"
    }
   }
  },
diff --git a/examples/generic.ipynb b/examples/generic.ipynb
new file mode 100644
index 0000000..8c55965
--- /dev/null
+++ b/examples/generic.ipynb
@@ -0,0 +1,416 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "from promptify.models.nlp.openai_model import OpenAI\n",
+    "from promptify.prompts.nlp.prompter import Prompter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = OpenAI(api_key=\"\")\n",
+    "nlp_prompter = Prompter(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = json.load(open(\"data/binary.json\",'r'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "9"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'text': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'labels': 'negative', 'score': '', 'complexity': ''} \n",
+      "\n",
+      "{'text': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'labels': 'positive', 'score': '', 'complexity': ''} \n",
+      "\n",
+      "{'text': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'labels': 'negative', 'score': '', 'complexity': ''} \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "examples = []\n",
+    "for sample in data[:3]:\n",
+    "    print(sample,\"\\n\")\n",
+    "    examples.append((sample['text'],sample['labels']))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "key_map = {'text': 'input', 'labels': 'label'}\n",
+    "data = [{key_map.get(k, k): v for k, v in d.items()} for d in data]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "delete_keys = ['score', 'complexity']\n",
+    "data = [{k: v for k, v in d.items() if k not in delete_keys} for d in data]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'input': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'label': 'negative'} \n",
+      "\n",
+      "{'input': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'label': 'positive'} \n",
+      "\n",
+      "{'input': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'label': 'negative'} \n",
+      "\n",
+      "{'input': 'Looking forward to going to Carrow Rd tonight. Last time we were there\\\\u002c Bale scored 2 and we were 3rd. Do not want extra time though', 'label': 'positive'} \n",
+      "\n",
+      "{'input': \"It's a good day at work when you get to shake Jim Lehrer's hand. Thanks, @user Still kicking myself for being to shy to hug\", 'label': 'positive'} \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "examples = []\n",
+    "for sample in data[:5]:\n",
+    "    print(sample,\"\\n\")\n",
+    "    examples.append((sample['input'],sample['label']))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "duty = \" You take Passage as input and classify that as either positive or negative Category. Your output format is only  form, no other form.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Binary Classificaion System\n",
+      "You are a highly intelligen binary classification system.\n",
+      " You take Passage as input and classify that as either positive or negative Category. Your output format is only  form, no other form. \n",
+      "\n",
+      "Example objects with keys input as model input and label as model output:\n",
+      "\n",
+      "('Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'negative')\n",
+      "\n",
+      "('Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'positive')\n",
+      "\n",
+      "('So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'negative')\n",
+      "\n",
+      "('Looking forward to going to Carrow Rd tonight. Last time we were there\\\\u002c Bale scored 2 and we were 3rd. Do not want extra time though', 'positive')\n",
+      "\n",
+      "(\"It's a good day at work when you get to shake Jim Lehrer's hand. Thanks, @user Still kicking myself for being to shy to hug\", 'positive')\n",
+      "Your output has to be in the form\n",
+      "\n",
+      "{'output': ['Label']}\n",
+      "\n",
+      "Make sure that it works when wrapped with python eval() function.\n",
+      "\n",
+      "\n",
+      "Input: i have been with petronas for years i feel that petronas has performed well and made a huge profit\n",
+      "Output:\n"
+     ]
+    }
+   ],
+   "source": [
+    "prompt = nlp_prompter.generate_prompt('generic.jinja',\n",
+    "                                      free_duty=duty,\n",
+    "                                      Output= \"Label\",\n",
+    "                                      examples=examples,\n",
+    "                                      task=\"binary classification\",\n",
+    "                                      example_definition=\"Example objects with keys input as model input and label as model output:\",\n",
+    "                                      text_input=\"i have been with petronas for years i feel that petronas has performed well and made a huge profit\",\n",
+    "                                     description=\"Binary Classificaion System\")\n",
+    "print(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = nlp_prompter.fit('generic.jinja',\n",
+    "                                      free_duty=duty,\n",
+    "                                      Output= \"Label\",\n",
+    "                                      examples=examples,\n",
+    "                                      task=\"binary classification\",\n",
+    "                                      example_definition =\"Example objects with keys input as model input and label as model output:\",\n",
+    "                                      text_input=\"i have been with petronas for years i feel that petronas has performed well and made a huge profit\",\n",
+    "                                      description=\"Binary Classification System\",\n",
+    "                 model_name=\"text-davinci-003\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'prompt_tokens': 305,\n",
+       " 'completion_tokens': 8,\n",
+       " 'total_tokens': 313,\n",
+       " 'text': \" {'output': ['positive']}\"}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = json.load(open(\"data/multiclass.json\",'r'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "key_map = {'text': 'input', 'category': 'label'}\n",
+    "data = [{key_map.get(k, k): v for k, v in d.items()} for d in data]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "delete_keys = ['confidence_score', 'complexity']\n",
+    "data = [{k: v for k, v in d.items() if k not in delete_keys} for d in data]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'input': \"I ate Something I don't know what it is... Why do I keep Telling things about food\", 'label': 'worry'} \n",
+      "\n",
+      "{'input': \"Here's to the start of a great adventure. Niners today, Alaska tomorrow.\", 'label': 'joy'} \n",
+      "\n",
+      "{'input': 'It is so annoying when she starts typing on her computer in the middle of the night!', 'label': 'hate'} \n",
+      "\n",
+      "{'input': 'Chocolate milk is so much better through a straw. I lack said straw', 'label': 'neutral'} \n",
+      "\n",
+      "{'input': 'I want to buy this great album but unfortunately i dont hav enuff funds  its &quot;long time noisy&quot;', 'label': 'sadness'} \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "examples = []\n",
+    "for sample in data[:5]:\n",
+    "    print(sample,\"\\n\")\n",
+    "    examples.append((sample['input'],sample['label']))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Oh no one minute too late! Oh well\n",
+    "duty = \"You take Passage as input and classify that as one of the following appropriate Categories: {'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Multiclass classification System\n",
+      "You are a highly intelligen Multiclass classification system.\n",
+      "You take Passage as input and classify that as one of the following appropriate Categories: {'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'} \n",
+      "\n",
+      "Example objects with keys input as model input and label as model output:\n",
+      "\n",
+      "(\"I ate Something I don't know what it is... Why do I keep Telling things about food\", 'worry')\n",
+      "\n",
+      "(\"Here's to the start of a great adventure. Niners today, Alaska tomorrow.\", 'joy')\n",
+      "\n",
+      "('It is so annoying when she starts typing on her computer in the middle of the night!', 'hate')\n",
+      "\n",
+      "('Chocolate milk is so much better through a straw. I lack said straw', 'neutral')\n",
+      "\n",
+      "('I want to buy this great album but unfortunately i dont hav enuff funds  its &quot;long time noisy&quot;', 'sadness')\n",
+      "Your output has to be in the form\n",
+      "\n",
+      "{'output': ['Label']}\n",
+      "\n",
+      "Make sure that it works when wrapped with python eval() function.\n",
+      "\n",
+      "\n",
+      "Input: Oh no one minute too late! Oh well\n",
+      "Output:\n"
+     ]
+    }
+   ],
+   "source": [
+    "prompt = nlp_prompter.generate_prompt('generic.jinja',\n",
+    "                                      free_duty=duty,\n",
+    "                                      Output= \"Label\",\n",
+    "                                      examples=examples,\n",
+    "                                      task=\"Multiclass classification\",\n",
+    "                                      text_input=\"Oh no one minute too late! Oh well\",\n",
+    "                                      example_definition=\"Example objects with keys input as model input and label as model output:\",\n",
+    "                                     description=\"Multiclass classification System\")\n",
+    "print(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = nlp_prompter.fit('generic.jinja',                                      \n",
+    "                          free_duty=duty,\n",
+    "                            Output= \"Label\",\n",
+    "                          examples=examples,\n",
+    "                          task=\"Multiclass classification\",\n",
+    "                          text_input=\"Oh no one minute too late! Oh well\",\n",
+    "                          example_definition=\"Example objects with keys input as model input and label as model output:\",\n",
+    "                         description=\"Multiclass classification System\",\n",
+    "                         model_name=\"text-davinci-003\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'prompt_tokens': 262,\n",
+       " 'completion_tokens': 9,\n",
+       " 'total_tokens': 271,\n",
+       " 'text': \" {'output': ['surprise']}\"}"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "prompt-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "003cbd9d5de60837b71ec248298ceb3401870867de42df8ae6f444c9dc068945"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/promptify/prompts/nlp/templates/generic.jinja b/promptify/prompts/nlp/templates/generic.jinja
new file mode 100644
index 0000000..fb15503
--- /dev/null
+++ b/promptify/prompts/nlp/templates/generic.jinja
@@ -0,0 +1,33 @@
+{%- if description is not none -%}
+{{ description }}
+{% endif -%}
+
+{%- if task is not none -%}
+You are a highly intelligen {{task}} system.
+{% endif -%}
+
+{%- if free_duty is not none -%}
+{{free_duty}} 
+{# Free duty explanation comes here. #}
+{% endif -%}
+
+{% if examples is defined and examples|length > 0 -%}
+
+{%- if example_definition is not none -%}
+{{example_definition}}
+{% endif -%}
+
+
+{% for example in examples %}
+{{example}}
+{% endfor %}
+{%- endif -%}
+
+Your output has to be in the form
+
+{{output_format|default({"output" : [ Output ] })}}
+
+Make sure that it works when wrapped with python eval() function.
+
+Input: {{text_input}}
+Output:
\ No newline at end of file