Skip to content

Commit

Permalink
Add helper method for genai metrics (#2519)
Browse files Browse the repository at this point in the history
* Added info about required packages

* Update responsibleaidashboard-question-answering-model-debugging.ipynb

* show example prediction

* Update responsibleaidashboard-question-answering-model-debugging.ipynb

* add helper method for genai metrics

Signed-off-by: Kartik Choudhary <[email protected]>

* Fix import order in metrics.py

Signed-off-by: Kartik Choudhary <[email protected]>

---------

Signed-off-by: Kartik Choudhary <[email protected]>
  • Loading branch information
kartikc727 authored Jan 31, 2024
1 parent 84428aa commit 5b4bd20
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import logging
from pathlib import Path

import numpy as np

module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)

Expand All @@ -30,3 +32,16 @@ def get_genai_metric(metric_name, **metric_kwargs):
metric = evaluate.load(
str(curr_file_dir.joinpath(f'scripts/{metric_name}.py')))
return metric.compute(**metric_kwargs)


def get_genai_metric_mean(metric_name, **metric_kwargs):
"""Get the mean of the metric from the genai library.
:param metric_name: The name of the metric.
:type metric_name: str
:param metric_kwargs: The keyword arguments to pass to the metric.
:type metric_kwargs: dict
:return: The mean of the metric.
:rtype: float
"""
return np.mean(get_genai_metric(metric_name, **metric_kwargs)['scores'])
94 changes: 37 additions & 57 deletions responsibleai_text/tests/test_genai_metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

from responsibleai_text.utils.genai_metrics.metrics import get_genai_metric
from responsibleai_text.utils.genai_metrics.metrics import (
get_genai_metric, get_genai_metric_mean)

PREDICTIONS = ['This is a prediction']
REFERENCES = ['This is a reference']
Expand All @@ -15,69 +16,48 @@ def predict(self, inp):

class TestGenAIMetrics:

def test_coherence(self):
metric = get_genai_metric('coherence',
predictions=PREDICTIONS,
references=REFERENCES,
def assert_metrics(self, metric_name,
expected, input_len,
**metric_kwargs):
metric = get_genai_metric(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
assert metric['scores'] == [expected]

metric = get_genai_metric('coherence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
metric_mean = get_genai_metric_mean(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric_mean == expected

def test_equivalence(self):
metric = get_genai_metric('equivalence',
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
kwargs_multi = {k: v * input_len for k, v in metric_kwargs.items()}
metric_multi = get_genai_metric(metric_name, **kwargs_multi,
wrapper_model=DummyModelWrapper())
assert metric_multi['scores'] == [expected] * input_len

metric = get_genai_metric('equivalence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
answers=ANSWERS * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
metric_mean_multi = get_genai_metric_mean(
metric_name, **kwargs_multi, wrapper_model=DummyModelWrapper())
assert metric_mean_multi == expected

def test_fluency(self):
metric = get_genai_metric('fluency',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
def test_coherence(self):
self.assert_metrics('coherence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

metric = get_genai_metric('fluency',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
def test_equivalence(self):
self.assert_metrics('equivalence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS)

def test_groundedness(self):
metric = get_genai_metric('groundedness',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
def test_fluency(self):
self.assert_metrics('fluency', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

metric = get_genai_metric('groundedness',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
def test_groundedness(self):
self.assert_metrics('groundedness', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

def test_relevance(self):
metric = get_genai_metric('relevance',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]

metric = get_genai_metric('relevance',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('relevance', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

0 comments on commit 5b4bd20

Please sign in to comment.