-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_hallucination.py
74 lines (53 loc) · 1.89 KB
/
test_hallucination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
from deepeval import assert_test
from deepeval.test_case import LLMTestCase
from deepeval.metrics import HallucinationMetric
from vertex_ai.google_vertex_ai import GoogleVertexAI
# The hallucination metric determines whether your LLM generates factually correct information by comparing the
# actual_output to the provided context.
# https://docs.confident-ai.com/docs/metrics-hallucination
def get_project_id():
project_id = os.environ.get("GOOGLE_PROJECT_ID")
if not project_id:
raise ValueError("GOOGLE_PROJECT_ID environment variable not set")
return project_id
def test_hallucination():
model = GoogleVertexAI(model_name="gemini-1.5-flash-001",
project=get_project_id(),
location="us-central1")
context = [
"Paris is the capital of France."
]
input = "What's the capital of France?"
test_case = LLMTestCase(
input=input,
actual_output=model.generate(input),
context=context
)
metric = HallucinationMetric(
model=model,
threshold=0.5)
metric.measure(test_case)
print(f"Metric score: {metric.score}")
print(f"Metric reason: {metric.reason}")
assert_test(test_case, [metric])
def test_hallucination_fails():
model = GoogleVertexAI(model_name="gemini-1.5-flash-001",
project=get_project_id(),
location="us-central1")
context = [
"London is the capital of France."
]
input = "What's the capital of France?"
test_case = LLMTestCase(
input=input,
actual_output=model.generate(input),
context=context
)
metric = HallucinationMetric(
model=model,
threshold=0.5)
metric.measure(test_case)
print(f"Metric score: {metric.score}")
print(f"Metric reason: {metric.reason}")
assert_test(test_case, [metric])