Skip to content

Commit

Permalink
feat: added logger
Browse files Browse the repository at this point in the history
  • Loading branch information
lynchee-owo committed May 16, 2024
1 parent a9af593 commit c9c773b
Show file tree
Hide file tree
Showing 14 changed files with 145 additions and 7 deletions.
23 changes: 16 additions & 7 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,21 @@
from selenium.common.exceptions import WebDriverException
import logging

# Setup logging
logging.basicConfig(filename='webwand_test_log.txt', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Place to store task execution results
results_dir = 'results'
os.makedirs(results_dir, exist_ok=True)

dataset = 'tasks_test.jsonl'
example_dataset = 'example_test.jsonl'

# Setup logging
logs_path = 'webwand_test_log.txt'
sample_logs_path = 'webwand_test_log_example.txt'
logging.basicConfig(filename=sample_logs_path, level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')

def setup_driver():
chrome_options = Options()
# Load the unpacked webwand chrome extension
Expand All @@ -33,6 +38,7 @@ def setup_driver():
return driver

def dispatch_event(driver, event_name, event):
logging.info(f'Dispatched event {event_name}')
script = f"""
var event = new CustomEvent('{event_name}', {{ detail: {json.dumps(event)} }});
document.dispatchEvent(event);
Expand Down Expand Up @@ -77,10 +83,11 @@ def add_task_listener(driver, task_id, max_retries=3):
def handle_event(event_data):
nonlocal attempts
if not event_data:
print("no event data")
logging.info("No event data received")
return
if event_data['type'] == 'history':
# Record history when task stops
logging.info(f'Task {task_id} status: {event_data["status"]}')
write_history(task_id, event_data['data'])
attempts = 0
return
Expand All @@ -90,6 +97,7 @@ def handle_event(event_data):
# Task is still running. Continue to listen for events
handle_event(driver.execute_async_script(script))
else:
logging.error(f"Unhandled event data type: {event_data['type']}")
raise ValueError(f"Unhandled event data type: {event_data['type']}")

while attempts < max_retries:
Expand All @@ -100,7 +108,6 @@ def handle_event(event_data):
if "javascript error: document unloaded while waiting for result" in str(e):
attempts += 1
logging.warning(f'Document unloaded error during task {task_id} attempt {attempts}: {str(e)}')
print(f"Attempt {attempts}: Document unloaded error. Retrying...")
logging.info("Retrying...")
if attempts == max_retries:
logging.error(f'Maximum retry attempts reached for task {task_id}.')
Expand All @@ -118,6 +125,7 @@ def write_history(task_id, task_history):

with open(file_path, 'w') as file:
json.dump(task_history, file, indent=4)
logging.info(f'History saved for task {task_id}')

def write_screenshots(task_id, image_data):
image_bytes = base64.b64decode(image_data)
Expand All @@ -137,7 +145,7 @@ def run_webwand_task(driver, task_id, task_description):
dispatch_event(driver, 'RunTask', {})
add_task_listener(driver, task_id)
end = time.time()
logging.info(f'The task {task_id} took {end - start} seconds to complete.')
logging.info(f'Task {task_id} took {end - start} seconds to complete.')

def click_extensions_icon(driver):
# Simulate click to open side panel
Expand All @@ -154,8 +162,9 @@ def main():
driver = setup_driver()
initial_load = True

with open('tasks_test.jsonl', 'r') as file:
with open(example_dataset, 'r') as file:
for line in file:
logging.info(f'-------------------------------------')
task = json.loads(line)
task_id = task["id"]
driver.get(task['web'])
Expand Down
2 changes: 2 additions & 0 deletions example_test.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"web_name": "Google", "id": "Google--0", "ques": "Search for bananas", "web": "https://www.google.com/"}
{"web_name": "Google", "id": "Google--1", "ques": "Search for apples", "web": "https://www.google.com/"}
36 changes: 36 additions & 0 deletions results/testGoogle--0/interact_messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[
{
"action": {
"operation": {
"args": {
"label": "8",
"value": "bananas"
},
"name": "setValueAndEnter"
},
"thought": "I will enter 'bananas' into the search bar and press Enter to search."
},
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 2:57:41 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
"response": "{\n \"thought\": \"I will enter 'bananas' into the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
"usage": {
"completion_tokens": 63,
"prompt_tokens": 2599,
"total_tokens": 2662
}
},
{
"action": {
"operation": {
"name": "finish"
},
"thought": "The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information."
},
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' into the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 2:57:49 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 22.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
"usage": {
"completion_tokens": 50,
"prompt_tokens": 3147,
"total_tokens": 3197
}
}
]
Binary file added results/testGoogle--0/screenshot_1715885075.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--0/screenshot_1715885084.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--0/screenshot_1715885861.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--0/screenshot_1715885870.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 37 additions & 0 deletions results/testGoogle--1/interact_messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[
{
"action": {
"operation": {
"args": {
"label": "9",
"value": "apples"
},
"name": "setValueAndEnter"
},
"thought": "I will enter 'apples' in the search bar and press Enter to search for apples."
},
"prompt": "The user requests the following task:\n\n Search for apples\n\n\nCurrent time: 5/16/2024, 2:57:58 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Share\ntagName = BUTTON\n===\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 10\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 12\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 13\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 14\nname = Advertising\ntagName = A\n===\nlabel = 15\nname = Business\ntagName = A\n===\nlabel = 16\nname = How Search works\ntagName = A\n===\nlabel = 17\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 18\nname = Privacy\ntagName = A\n===\nlabel = 19\nname = Terms\ntagName = A\n===\nlabel = 20\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
"response": "{\n \"thought\": \"I will enter 'apples' in the search bar and press Enter to search for apples.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"9\",\n \"value\": \"apples\"\n }\n }\n}",
"usage": {
"completion_tokens": 65,
"prompt_tokens": 2614,
"total_tokens": 2679
}
},
{
"action": {
"operation": {
"args": {},
"name": "finish"
},
"thought": "The search results for 'apples' are displayed. The results include various types of apples and related products from different vendors."
},
"prompt": "The user requests the following task:\n\n Search for apples\nYou have already taken the following actions: \nThought: I will enter 'apples' in the search bar and press Enter to search for apples.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"9\",\"value\":\"apples\"}}\n\n\n\nCurrent time: 5/16/2024, 2:58:07 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 18.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = apples\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = News\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Nutrition\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add Fruit\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Drawing\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Clipart\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Top rated\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Green\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Stay signed out\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 27\nname = Sign in\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 28\nname = Why this ad?\ntagName = DIV\nrole = button\n===\nlabel = 29\nname = Fresh - Giant Ultra-Premium Honeycrisp Apple for $1.99 from FreshDirect\ntagName = A\n===\nlabel = 30\nname = Image of Fresh - Giant Ultra-Premium Honeycrisp Apple\ntagName = DIV\nrole = link\n===\nlabel = 31\nname = Fresh - Giant Ultra-Premium Honeycrisp Apple\ntagName = DIV\nrole = link\n===\nlabel = 32\nname = Gala Apples Fresh Produce Fruit, 3 LB Bag for $14.95 from Amazon.com\ntagName = A\n===\nlabel = 33\nname = Image of Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 34\nname = Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 35\nname = Hudson Valley McIntosh Apples for $4.99 from FreshDirect\ntagName = A\n===\nlabel = 36\nname = Image of Hudson Valley McIntosh Apples\ntagName = DIV\nrole = link\n===\nlabel = 37\nname = Hudson Valley McIntosh Apples\ntagName = DIV\nrole = link\n===\nlabel = 38\nname = Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples for $8.08 from Walmart\ntagName = A\n===\nlabel = 39\nname = Image of Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples\ntagName = DIV\nrole = link\n===\nlabel = 40\nname = Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples\ntagName = DIV\nrole = link\n===\nlabel = 41\nname = ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party for $21.99 from Amazon.com\ntagName = A\n===\nlabel = 42\nname = Image of ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party\ntagName = DIV\nrole = link\n===\nlabel = 43\nname = ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party\ntagName = DIV\nrole = link\n===\nlabel = 44\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 45\nname = Health benefits\u2026Why are apples the healthiest fruit?\ntagName = DIV\nrole = button\n===\nlabel = 46\nname = Type of fruit\u2026what type of fruit is a apple\ntagName = DIV\nrole = button\n===\nlabel = 47\nname = Inventors\u2026Who invented the apple fruit?\ntagName = DIV\nrole = button\n===\nlabel = 48\nname = Fruit or Vegetable\u2026Is an apple a fruit or vegetable?\ntagName = DIV\nrole = button\n===\nlabel = 49\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 50\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 51\nname = Apple Fruit\ntagName = A\n",
"response": "{\n \"thought\": \"The search results for 'apples' are displayed. The results include various types of apples and related products from different vendors.\",\n \"action\": {\n \"name\": \"finish\",\n \"args\": {}\n }\n}",
"usage": {
"completion_tokens": 51,
"prompt_tokens": 3514,
"total_tokens": 3565
}
}
]
Binary file added results/testGoogle--1/screenshot_1715885093.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--1/screenshot_1715885101.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--1/screenshot_1715885878.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results/testGoogle--1/screenshot_1715885887.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit c9c773b

Please sign in to comment.