forked from explosion/prodigy-recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmark.py
57 lines (50 loc) · 2.21 KB
/
mark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.util import split_string
from collections import Counter
from typing import List, Optional
# Recipe decorator with argument annotations: (description, argument type,
# shortcut, type / converter function called on value before it's passed to
# the function). Descriptions are also shown when typing --help.
@prodigy.recipe(
"mark",
dataset=("The dataset to use", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
view_id=("ID of annotation interface", "option", "o", str),
exclude=("Names of datasets to exclude", "option", "e", split_string),
)
def mark(dataset: str, source: str, view_id: str, exclude: Optional[List[str]] = None):
"""
Click through pre-prepared examples, with no model in the loop.
"""
counts = Counter()
# Load the stream from a JSONL file and return a generator that yields a
# dictionary for each example in the data.
stream = JSONL(source)
def on_load(controller):
# Check if current dataset is available in database. The on_load
# callback receives the controller as an argument, which exposes the
# database via controller.db
if dataset in controller.db:
examples = controller.db.get_dataset(dataset)
for eg in examples:
# Update counts with existing answers
counts[eg["answer"]] += 1
def receive_answers(answers):
for eg in answers:
# Update counts with new answers
counts[eg["answer"]] += 1
def on_exit(controller):
# Output the total annotation counts
print("Accept:", counts["accept"])
print("Reject:", counts["reject"])
print("Ignore:", counts["ignore"])
print("Total: ", sum(counts.values()))
return {
"view_id": view_id, # Annotation interface to use
"dataset": dataset, # Name of dataset to save annotations
"stream": stream, # Incoming stream of examples
"update": receive_answers, # Update callback, called with answers
"on_load": on_load, # Called on first load
"on_exit": on_exit, # Called when Prodigy server is stopped
}