Skip to content

Commit

Permalink
feat: impklent actual score
Browse files Browse the repository at this point in the history
  • Loading branch information
kieranklaassen committed Aug 21, 2024
1 parent 5f38414 commit 48413f0
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 9 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ rails generate leva:eval sentiment_accuracy
class SentimentAccuracyEval < Leva::BaseEval
def evaluate(prediction, record)
score = prediction == record.expected_label ? 1.0 : 0.0
score
[score, record.expected_label]
end
end

class SentimentF1Eval < Leva::BaseEval
def evaluate(prediction, record)
# Calculate F1 score
# ...
f1_score
[f1_score, record.f1_score]
end
end
```
Expand Down
1 change: 0 additions & 1 deletion app/models/leva/runner_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class RunnerResult < ApplicationRecord
has_many :evaluation_results, dependent: :destroy

validates :prediction, presence: true
validates :actual_result, presence: true
validates :prompt, presence: true
end
end
4 changes: 2 additions & 2 deletions lib/generators/leva/templates/eval.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
class <%= class_name %>Eval < Leva::BaseEval
# @param prediction [String] The prediction to evaluate
# @param record [YourRecordClass] The record to evaluate
# @return [Float] The score of the evaluation
# @return [Array<(Float, Object)>] An array containing the evaluation score and the actual result.
def evaluate(prediction, record)
# Implement your evaluation logic here

1.0
[1.0, record.actual_result]
end
end
9 changes: 6 additions & 3 deletions lib/leva.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def execute_and_store(experiment, dataset_record, prompt)
dataset_record: dataset_record,
prompt: prompt,
prediction: result,
actual_result: dataset_record.actual_result
)
end
end
Expand All @@ -71,7 +70,7 @@ class BaseEval
#
# @param prediction [Object] The model's prediction.
# @param record [Object] The expected result.
# @return [Float] The evaluation score.
# @return [Array<(Float, Object)>] An array containing the evaluation score and the actual result.
# @raise [NotImplementedError] if the method is not implemented in a subclass.
def evaluate(prediction, record)
raise NotImplementedError, "#{self.class} must implement #evaluate"
Expand All @@ -83,7 +82,11 @@ def evaluate(prediction, record)
# @param runner_result [Leva::RunnerResult] The runner result to evaluate.
# @return [Leva::EvaluationResult] The stored evaluation result.
def evaluate_and_store(experiment, runner_result)
score = evaluate(runner_result.prediction, runner_result.dataset_record.recordable)
score, actual_result = evaluate(runner_result.prediction, runner_result.dataset_record.recordable)

if runner_result.actual_result.blank?
runner_result.update!(actual_result: actual_result)
end

EvaluationResult.create!(
experiment: experiment,
Expand Down
1 change: 1 addition & 0 deletions test/dummy/app/evals/sentiment_accuracy_eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ class SentimentAccuracyEval < Leva::BaseEval
# @return [Float] The score of the evaluation
def evaluate(prediction, text_content)
prediction == text_content.expected_label ? 1.0 : 0.0
[1.0, text_content.expected_label]
end
end
2 changes: 1 addition & 1 deletion test/dummy/app/evals/sentiment_f1_eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ class SentimentF1Eval < Leva::BaseEval
# @return [Float] The score of the evaluation
def evaluate(prediction, text_content)
sleep 3
rand
[rand(0.0..1.0), 2]
end
end

0 comments on commit 48413f0

Please sign in to comment.