Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
kieranklaassen committed Aug 14, 2024
1 parent d94254a commit 33e5c14
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 154 deletions.
6 changes: 6 additions & 0 deletions app/evals/test_sentiment_accuracy_eval.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class TestSentimentAccuracyEval < Leva::BaseEval
def evaluate(prediction, expected)
score = prediction == expected ? 1.0 : 0.0
Leva::Result.new(label: 'sentiment_accuracy', score: score)
end
end
5 changes: 4 additions & 1 deletion app/models/leva/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
module Leva
class Dataset < ApplicationRecord
has_many :dataset_records, dependent: :destroy
has_many :records, through: :dataset_records, source: :recordable
has_many :experiments, dependent: :destroy

def add_record(record)
dataset_records.create(recordable: record)
end
end
end
2 changes: 1 addition & 1 deletion app/models/leva/experiment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# created_at :datetime not null
# updated_at :datetime not null
# dataset_id :integer not null
# prompt_id :integer not null
# prompt_id :integer
#
# Indexes
#
Expand Down
13 changes: 13 additions & 0 deletions app/runners/test_sentiment_run.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class TestSentimentRun < Leva::BaseRun
def execute(record)
# Simple sentiment analysis logic for testing
case record.content.downcase
when /love|great|excellent/
"Positive"
when /terrible|bad|awful/
"Negative"
else
"Neutral"
end
end
end
2 changes: 1 addition & 1 deletion db/migrate/20240813173222_create_leva_experiments.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ def change
create_table :leva_experiments do |t|
t.string :name
t.references :dataset, null: false, foreign_key: true
t.references :prompt, null: false, foreign_key: true
t.references :prompt, null: true, foreign_key: true
t.integer :status
t.text :metadata

Expand Down
2 changes: 1 addition & 1 deletion test/dummy/db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
create_table "leva_experiments", force: :cascade do |t|
t.string "name"
t.integer "dataset_id", null: false
t.integer "prompt_id", null: false
t.integer "prompt_id"
t.integer "status"
t.text "metadata"
t.datetime "created_at", null: false
Expand Down
30 changes: 0 additions & 30 deletions test/fixtures/leva/dataset_records.yml

This file was deleted.

15 changes: 0 additions & 15 deletions test/fixtures/leva/datasets.yml

This file was deleted.

37 changes: 0 additions & 37 deletions test/fixtures/leva/evaluation_results.yml

This file was deleted.

37 changes: 0 additions & 37 deletions test/fixtures/leva/experiments.yml

This file was deleted.

27 changes: 0 additions & 27 deletions test/fixtures/leva/prompts.yml

This file was deleted.

33 changes: 29 additions & 4 deletions test/models/leva/experiment_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# created_at :datetime not null
# updated_at :datetime not null
# dataset_id :integer not null
# prompt_id :integer not null
# prompt_id :integer
#
# Indexes
#
Expand All @@ -25,8 +25,33 @@

module Leva
class ExperimentTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
def setup
dataset = Leva::Dataset.create(name: "Sentiment Analysis Dataset")
dataset.add_record TextContent.create(text: "I love this product!", expected_label: "Positive")
dataset.add_record TextContent.create(text: "Terrible experience", expected_label: "Negative")
dataset.add_record TextContent.create(text: "I's ok", expected_label: "Neutral")
@experiment = Leva::Experiment.create!(name: "Sentiment Analysis", dataset: dataset)

@run = TestSentimentRun.new
@evals = [TestSentimentAccuracyEval.new, TestSentimentF1Eval.new]
end

test "run evaluation with two evals and one runner" do
Leva.run_evaluation(experiment: @experiment, run: @run, evals: @evals)

assert_equal 6, @experiment.evaluation_results.count, "Should have 6 evaluation results (1 run * 3 records * 2 evals)"

accuracy_results = @experiment.evaluation_results.where(label: 'sentiment_accuracy')
f1_results = @experiment.evaluation_results.where(label: 'sentiment_f1')

assert_equal 3, accuracy_results.count, "Should have 3 accuracy results"
assert_equal 3, f1_results.count, "Should have 3 F1 results"

average_accuracy = accuracy_results.average(:score)
average_f1 = f1_results.average(:score)

assert_in_delta 0.67, average_accuracy, 0.01, "Average accuracy should be about 0.67 (2 out of 3 correct)"
assert_in_delta 0.67, average_f1, 0.01, "Average F1 score should be about 0.67 (2 out of 3 correct)"
end
end
end

0 comments on commit 33e5c14

Please sign in to comment.