Skip to content

Commit

Permalink
Release health definition and events (#581)
Browse files Browse the repository at this point in the history
- Create rules defining release health
- Create release events for the production release if the health metrics fetched violate any rules or change the health status from a previous event
  • Loading branch information
nid90 authored Nov 27, 2023
1 parent ce23a6e commit 3996185
Show file tree
Hide file tree
Showing 13 changed files with 328 additions and 2 deletions.
1 change: 1 addition & 0 deletions app/jobs/releases/fetch_health_metrics_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def perform(deployment_run_id)
return if run.release.finished? && run.release.completed_at < RELEASE_MONITORING_PERIOD_IN_DAYS.days.ago

run.fetch_health_data!
ensure
Releases::FetchHealthMetricsJob.set(wait: 5.minutes).perform_later(deployment_run_id)
end
end
7 changes: 7 additions & 0 deletions app/models/concerns/health_awareness.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module HealthAwareness
extend ActiveSupport::Concern

included do
enum health_status: {healthy: "healthy", unhealthy: "unhealthy"}
end
end
1 change: 1 addition & 0 deletions app/models/deployment_run.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class DeploymentRun < ApplicationRecord
has_one :staged_rollout, dependent: :destroy
has_one :external_release, dependent: :destroy
has_many :release_health_metrics, dependent: :destroy, inverse_of: :deployment_run
has_many :release_health_events, dependent: :destroy, inverse_of: :deployment_run

validates :deployment_id, uniqueness: {scope: :step_run_id}

Expand Down
23 changes: 23 additions & 0 deletions app/models/release_health_event.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# == Schema Information
#
# Table name: release_health_events
#
# id :uuid not null, primary key
# action_triggered :boolean default(FALSE)
# event_timestamp :datetime not null, indexed
# health_status :string not null
# notification_triggered :boolean default(FALSE)
# created_at :datetime not null
# updated_at :datetime not null
# deployment_run_id :uuid not null, indexed => [release_health_rule_id, release_health_metric_id], indexed
# release_health_metric_id :uuid not null, indexed => [deployment_run_id, release_health_rule_id], indexed
# release_health_rule_id :uuid not null, indexed => [deployment_run_id, release_health_metric_id], indexed
#
class ReleaseHealthEvent < ApplicationRecord
include HealthAwareness
self.implicit_order_column = :event_timestamp

belongs_to :deployment_run
belongs_to :release_health_rule
belongs_to :release_health_metric
end
30 changes: 30 additions & 0 deletions app/models/release_health_metric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
#
class ReleaseHealthMetric < ApplicationRecord
belongs_to :deployment_run
has_one :release_health_event, dependent: :nullify

delegate :train, to: :deployment_run

after_create_commit :check_release_health

METRIC_VALUES = {
session_stability: :session_stability,
user_stability: :user_stability,
errors: :errors_count,
new_errors: :new_errors_count
}.with_indifferent_access

def user_stability
return if daily_users.blank? || daily_users.zero?
Expand All @@ -33,4 +45,22 @@ def adoption_rate
return 0 if total_sessions_in_last_day.blank? || total_sessions_in_last_day.zero?
((sessions_in_last_day.to_f / total_sessions_in_last_day.to_f) * 100).ceil(2)
end

def check_release_health
return if train.release_health_rules.blank?
train.release_health_rules.each do |rule|
value = send(METRIC_VALUES[rule.metric])
next unless value
create_health_event(rule, value)
end
end

def create_health_event(rule, value)
last_event = deployment_run.release_health_events.where(release_health_rule: rule).last

current_status = rule.evaluate(value)
return if last_event.blank? && current_status == ReleaseHealthRule.health_statuses[:healthy]
return if last_event.present? && last_event.health_status == current_status
create_release_health_event(deployment_run:, release_health_rule: rule, health_status: current_status, event_timestamp: fetched_at)
end
end
50 changes: 50 additions & 0 deletions app/models/release_health_rule.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# == Schema Information
#
# Table name: release_health_rules
#
# id :uuid not null, primary key
# comparator :string not null
# is_halting :boolean default(FALSE), not null
# metric :string not null, indexed, indexed => [train_id]
# threshold_value :float not null
# created_at :datetime not null
# updated_at :datetime not null
# train_id :uuid not null, indexed, indexed => [metric]
#
class ReleaseHealthRule < ApplicationRecord
include HealthAwareness
belongs_to :train

enum metric: {
session_stability: "session_stability",
user_stability: "user_stability",
errors: "errors",
new_errors: "new_errors"
}

enum comparator: {
lt: "lt",
lte: "lte",
gt: "gt",
gte: "gte",
eq: "eq"
}

COMPARATORS = {
lt: ->(value, threshold) { value < threshold },
lte: ->(value, threshold) { value <= threshold },
gt: ->(value, threshold) { value > threshold },
gte: ->(value, threshold) { value >= threshold },
eq: ->(value, threshold) { value == threshold }
}

validates :metric, uniqueness: {scope: :train_id}

def evaluate(value)
comparator_proc = COMPARATORS[comparator.to_sym]
raise ArgumentError, "Invalid comparator" unless comparator_proc

return ReleaseHealthRule.health_statuses[:healthy] if comparator_proc.call(value, threshold_value)
ReleaseHealthRule.health_statuses[:unhealthy]
end
end
3 changes: 2 additions & 1 deletion app/models/train.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ class Train < ApplicationRecord
has_many :deployments, through: :steps
has_many :scheduled_releases, dependent: :destroy
has_many :notification_settings, inverse_of: :train, dependent: :destroy
has_many :release_health_rules, dependent: :destroy

scope :sequential, -> { order("trains.created_at ASC") }
scope :running, -> { includes(:releases).where(releases: {status: Release.statuses[:on_track]}) }
scope :only_with_runs, -> { joins(:releases).where.not(releases: {status: "stopped"}).distinct }

delegate :ready?, :config, to: :app
delegate :vcs_provider, :ci_cd_provider, :notification_provider, to: :integrations
delegate :vcs_provider, :ci_cd_provider, :notification_provider, :monitoring_provider, to: :integrations

enum status: {
draft: "draft",
Expand Down
16 changes: 16 additions & 0 deletions db/migrate/20231123144813_add_release_health_rules.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class AddReleaseHealthRules < ActiveRecord::Migration[7.0]
def change
create_table :release_health_rules, id: :uuid do |t|
t.belongs_to :train, null: false, index: true, foreign_key: true, type: :uuid

t.string :metric, null: false, index: true
t.string :comparator, null: false
t.float :threshold_value, null: false
t.boolean :is_halting, null: false, default: false

t.timestamps
end

add_index :release_health_rules, [:train_id, :metric], unique: true
end
end
21 changes: 21 additions & 0 deletions db/migrate/20231124134628_add_release_health_events.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class AddReleaseHealthEvents < ActiveRecord::Migration[7.0]
def change
create_table :release_health_events, id: :uuid do |t|
t.belongs_to :deployment_run, null: false, foreign_key: true, type: :uuid
t.belongs_to :release_health_rule, null: false, foreign_key: true, type: :uuid
t.belongs_to :release_health_metric, null: false, foreign_key: true, type: :uuid

t.string :health_status, null: false
t.datetime :event_timestamp, null: false, index: true
t.boolean :notification_triggered, default: false
t.boolean :action_triggered, default: false

t.timestamps
end

add_index :release_health_events,
[:deployment_run_id, :release_health_rule_id, :release_health_metric_id],
unique: true,
name: "idx_events_on_deployment_and_rule_and_metric"
end
end
36 changes: 35 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions spec/factories/release_health_metrics.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FactoryBot.define do
factory :release_health_metric do
association :deployment_run

daily_users { 100 }
daily_users_with_errors { 10 }
errors_count { 10 }
new_errors_count { 1 }
sessions { 1000 }
sessions_in_last_day { 100 }
sessions_with_errors { 10 }
total_sessions_in_last_day { 5000 }
fetched_at { Time.current }
end
end
28 changes: 28 additions & 0 deletions spec/factories/release_health_rules.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FactoryBot.define do
factory :release_health_rule do
association :train

comparator { "gte" }
threshold_value { 90.0 }

trait :session_stability do
metric { "session_stability" }
end

trait :user_stability do
metric { "user_stability" }
end

trait :errors do
metric { "errors" }
threshold_value { 90 }
comparator { "lt" }
end

trait :new_errors do
metric { "new_errors" }
threshold_value { 10 }
comparator { "lt" }
end
end
end
Loading

0 comments on commit 3996185

Please sign in to comment.