From eef83e84e2020298c59bf58b05c2b38708954020 Mon Sep 17 00:00:00 2001 From: Tim Cowlishaw Date: Mon, 14 Oct 2024 08:10:29 +0200 Subject: [PATCH 1/2] log errors for devices and send warning --- app/lib/mqtt_messages_handler.rb | 71 ++++++++++++++----- app/mailers/user_mailer.rb | 6 ++ app/models/device.rb | 1 + app/models/ingest_error.rb | 3 + .../user_mailer/device_ingest_errors.html.erb | 66 +++++++++++++++++ ...41014052837_create_device_ingest_errors.rb | 13 ++++ db/schema.rb | 16 ++++- 7 files changed, 157 insertions(+), 19 deletions(-) create mode 100644 app/models/ingest_error.rb create mode 100644 app/views/user_mailer/device_ingest_errors.html.erb create mode 100644 db/migrate/20241014052837_create_device_ingest_errors.rb diff --git a/app/lib/mqtt_messages_handler.rb b/app/lib/mqtt_messages_handler.rb index 86fadfae..62282114 100644 --- a/app/lib/mqtt_messages_handler.rb +++ b/app/lib/mqtt_messages_handler.rb @@ -7,46 +7,46 @@ def handle_topic(topic, message, retry_on_nil_device=true) handshake_device(topic) if topic.to_s.include?('inventory') - handle_inventory(topic, message) - elsif topic.to_s.include?('raw') - handle_readings(topic, parse_raw_readings(message), retry_on_nil_device) - elsif topic.to_s.include?('readings') - handle_readings(topic, message, retry_on_nil_device) - elsif topic.to_s.include?('info') - handle_info(topic, message, retry_on_nil_device) + handle_inventory(message) + return true else - true + device = find_device_for_topic(topic, message, retry_on_nil_device) + return nil if device.nil? + with_device_error_handling(device, topic, message) do + if topic.to_s.include?('raw') + handle_readings(device, parse_raw_readings(message)) + elsif topic.to_s.include?('readings') + handle_readings(device, message) + elsif topic.to_s.include?('info') + handle_info(device, message) + else + true + end + end end end private - def handle_inventory(topic, message) + def handle_inventory(message) DeviceInventory.create({ report: (message rescue nil) }) return true end - def handle_readings(topic, message, retry_on_nil_device) - device = find_device_for_topic(topic, message, retry_on_nil_device) - return nil if device.nil? - + def handle_readings(device, message) parsed = JSON.parse(message) if message data = parsed["data"] if parsed return nil if data.nil? or data&.empty? - data.each do |reading| storer.store(device, reading) end - return true rescue Exception => e Sentry.capture_exception(e) raise e if Rails.env.test? end - def handle_info(topic, message, retry_on_nil_device) - device = find_device_for_topic(topic, message, retry_on_nil_device) - return nil if device.nil? + def handle_info(device, message) json_message = JSON.parse(message) device.update_column(:hardware_info, json_message) return true @@ -88,6 +88,41 @@ def handle_nil_device(topic, message, retry_on_nil_device) end end + def with_device_error_handling(device, topic, message, reraise=true, &block) + begin + block.call + rescue Exception => e + hardware_info = device.hardware_info + Sentry.set_tags({ + "device-id": device.id, + "device-hardware-version": hardware_info&.[]("hw_ver"), + "device-esp-version": hardware_info&.[]("esp_ver"), + "device-sam-version": hardware_info&.[]("sam_ver"), + }) + Sentry.capture_exception(e) + last_error = device.ingest_errors.order(created_at: :desc).first + ingest_error = device.ingest_errors.create({ + topic: topic, + message: message, + error_class: e.class.name, + error_message: e.message, + error_trace: e.full_message + }) + if send_device_error_warnings && (!last_error || last_error.created_at < device_error_warning_threshold) + UserMailer.device_ingest_errors(device.id).deliver_later + end + raise e if reraise + end + end + + def send_device_error_warnings + ENV.fetch("SEND_DEVICE_ERROR_WARNINGS", false) + end + + def device_error_warning_threshold + ENV.fetch("DEVICE_ERROR_WARNING_THRESHOLD_HOURS", "6").to_i.hours.ago + end + def device_token(topic) device_token = topic[/device\/sck\/(.*?)\//m, 1].to_s end diff --git a/app/mailers/user_mailer.rb b/app/mailers/user_mailer.rb index df72aa8a..7c8a71f3 100644 --- a/app/mailers/user_mailer.rb +++ b/app/mailers/user_mailer.rb @@ -32,4 +32,10 @@ def device_stopped_publishing device_id mail to: @user.to_email_s, subject: 'Device stopped publishing', from: "SmartCitizen Notifications - Device " end + def device_ingest_errors(device_id) + @device = Device.find(device_id) + @user = @device.owner + mail to: @user.to_email_s, subject: "Device has errors", from: "SmartCitizen Notifications - Device " + end + end diff --git a/app/models/device.rb b/app/models/device.rb index 52c37a8e..2ce4d811 100644 --- a/app/models/device.rb +++ b/app/models/device.rb @@ -25,6 +25,7 @@ class Device < ActiveRecord::Base has_many :components, dependent: :destroy has_many :sensors, through: :components has_one :postprocessing, dependent: :destroy + has_many :ingest_errors, dependent: :destroy has_and_belongs_to_many :experiments diff --git a/app/models/ingest_error.rb b/app/models/ingest_error.rb new file mode 100644 index 00000000..ddadd02a --- /dev/null +++ b/app/models/ingest_error.rb @@ -0,0 +1,3 @@ +class IngestError < ActiveRecord::Base + belongs_to :device +end diff --git a/app/views/user_mailer/device_ingest_errors.html.erb b/app/views/user_mailer/device_ingest_errors.html.erb new file mode 100644 index 00000000..8bdc9af7 --- /dev/null +++ b/app/views/user_mailer/device_ingest_errors.html.erb @@ -0,0 +1,66 @@ + + + + + + Device has errrors + <%= render "email_css" %> + + + + + + + + + + +
+
+ + + + + + + +
+

+ SCK logo +

+
+ + + + + + + + + + + + + +
+

<%= @user.username %>,

+
+

+ We have encountered errors on processing data from the + device '<%= @device %>'. +

+

+ If these errors persist, you may want to update the device firmware, or get in touch with our support team. +

+
+ Manage your notifications +
+

The Smart Citizen Team

+
+
+ +
+
+ + + diff --git a/db/migrate/20241014052837_create_device_ingest_errors.rb b/db/migrate/20241014052837_create_device_ingest_errors.rb new file mode 100644 index 00000000..6a692e57 --- /dev/null +++ b/db/migrate/20241014052837_create_device_ingest_errors.rb @@ -0,0 +1,13 @@ +class CreateDeviceIngestErrors < ActiveRecord::Migration[6.1] + def change + create_table :ingest_errors do |t| + t.references :device, null: false, foreign_key: true + t.text :topic + t.text :message + t.text :error_class + t.text :error_message + t.text :error_trace + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index cf2d78d4..cc624da1 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,8 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2024_10_09_174732) do +ActiveRecord::Schema.define(version: 2024_10_14_052837) do + # These are extensions that must be enabled in order to support this database enable_extension "adminpack" enable_extension "hstore" @@ -161,6 +162,18 @@ t.index ["sluggable_type"], name: "index_friendly_id_slugs_on_sluggable_type" end + create_table "ingest_errors", force: :cascade do |t| + t.bigint "device_id", null: false + t.text "topic" + t.text "message" + t.text "error_class" + t.text "error_message" + t.text "error_trace" + t.datetime "created_at", precision: 6, null: false + t.datetime "updated_at", precision: 6, null: false + t.index ["device_id"], name: "index_ingest_errors_on_device_id" + end + create_table "measurements", id: :serial, force: :cascade do |t| t.string "name" t.text "description" @@ -338,6 +351,7 @@ add_foreign_key "devices_tags", "devices" add_foreign_key "devices_tags", "tags" add_foreign_key "experiments", "users", column: "owner_id" + add_foreign_key "ingest_errors", "devices" add_foreign_key "postprocessings", "devices" add_foreign_key "sensors", "measurements" add_foreign_key "uploads", "users" From a63b9fcf804b91057588c9a97253694fefbb0d8e Mon Sep 17 00:00:00 2001 From: Tim Cowlishaw Date: Mon, 14 Oct 2024 08:23:47 +0200 Subject: [PATCH 2/2] raise error to be captured on parser failure --- app/lib/raw_mqtt_message_parser.rb | 4 +++- spec/lib/raw_mqtt_message_parser_spec.rb | 5 ++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/app/lib/raw_mqtt_message_parser.rb b/app/lib/raw_mqtt_message_parser.rb index ba2c8f48..dfa5e61d 100644 --- a/app/lib/raw_mqtt_message_parser.rb +++ b/app/lib/raw_mqtt_message_parser.rb @@ -6,7 +6,9 @@ def initialize end def parse(message) - parser.parse(self.convert_to_ascii(message.strip))&.to_hash + message = parser.parse(self.convert_to_ascii(message.strip))&.to_hash + raise "Message not parsed: #{message}" unless message + return message end private diff --git a/spec/lib/raw_mqtt_message_parser_spec.rb b/spec/lib/raw_mqtt_message_parser_spec.rb index 7e3d798a..6987187a 100644 --- a/spec/lib/raw_mqtt_message_parser_spec.rb +++ b/spec/lib/raw_mqtt_message_parser_spec.rb @@ -59,10 +59,9 @@ expect(parsed).to eq({ data: [ { recorded_at: "2024-09-25T13:19:38Z", sensors: [{id: "100", value: "-2000.12345"}] }]}) end - it "returns nil if no valid message parsed" do + it "raises an error if no valid message parsed" do message = "ceci n'est pas un message" - parsed = parser.parse(message) - expect(parsed).to eq(nil) + expect {parser.parse(message)}.to raise_error(RuntimeError) end it "parses timestamps at any position in the packet" do