Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(notifier): add selfstate monitor #1118

Open
wants to merge 11 commits into
base: feat/add-selfstate-heartbeats
Choose a base branch
from
244 changes: 196 additions & 48 deletions cmd/notifier/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"github.com/moira-alert/moira/cmd"
"github.com/moira-alert/moira/notifier"
"github.com/moira-alert/moira/notifier/selfstate"
"github.com/moira-alert/moira/notifier/selfstate/heartbeat"
"github.com/moira-alert/moira/notifier/selfstate/monitor"
)

type config struct {
Expand Down Expand Up @@ -42,8 +44,8 @@ type notifierConfig struct {
ReschedulingDelay string `yaml:"rescheduling_delay"`
// Senders configuration section. See https://moira.readthedocs.io/en/latest/installation/configuration.html for more explanation
Senders []map[string]interface{} `yaml:"senders"`
// Self state monitor configuration section. Note: No inner subscriptions is required. It's own notification mechanism will be used.
SelfState selfStateConfig `yaml:"moira_selfstate"`
// Selfstate monitor configuration section. Note: No inner subscriptions is required. It's own notification mechanism will be used.
Selfstate selfstateConfig `yaml:"moira_selfstate"`
// Web-UI uri prefix for trigger links in notifications. For example: with 'http://localhost' every notification will contain link like 'http://localhost/trigger/triggerId'
FrontURI string `yaml:"front_uri"`
// Timezone to use to convert ticks. Default is UTC. See https://golang.org/pkg/time/#LoadLocation for more details.
Expand All @@ -58,25 +60,153 @@ type notifierConfig struct {
SetLogLevel setLogLevelConfig `yaml:"set_log_level"`
}

type selfStateConfig struct {
// If true, Self state monitor will be enabled
Enabled bool `yaml:"enabled"`
// If true, Self state monitor will check remote checker status
RemoteTriggersEnabled bool `yaml:"remote_triggers_enabled"`
// Max Redis disconnect delay to send alert when reached
RedisDisconnectDelay string `yaml:"redis_disconect_delay"`
// Max Filter metrics receive delay to send alert when reached
LastMetricReceivedDelay string `yaml:"last_metric_received_delay"`
// Max Checker checks perform delay to send alert when reached
LastCheckDelay string `yaml:"last_check_delay"`
// Max Remote triggers Checker checks perform delay to send alert when reached
LastRemoteCheckDelay string `yaml:"last_remote_check_delay"`
// Contact list for Self state monitor alerts
Contacts []map[string]string `yaml:"contacts"`
// Self state monitor alerting interval
NoticeInterval string `yaml:"notice_interval"`
// Self state monitor check interval
CheckInterval string `yaml:"check_interval"`
// heartbeaterAlertConfig sets the configuration for the alert of a particular heartbeater.
type heartbeaterAlertConfig struct {
Name string `yaml:"name"`
Desc string `yaml:"desc"`
}

// heartbeaterBaseConfig sets the base configuration of heartbeater.
type heartbeaterBaseConfig struct {
Enabled bool `yaml:"enabled"`
NeedTurnOffNotifier bool `yaml:"need_turn_off_notifier"`

AlertCfg heartbeaterAlertConfig `yaml:"alert"`
}

func (cfg heartbeaterBaseConfig) getSettings() heartbeat.HeartbeaterBaseConfig {
return heartbeat.HeartbeaterBaseConfig{
Enabled: cfg.Enabled,
NeedTurnOffNotifier: cfg.NeedTurnOffNotifier,

AlertCfg: heartbeat.AlertConfig{
Name: cfg.AlertCfg.Name,
Desc: cfg.AlertCfg.Desc,
},
}
}

// databaseHeartbeaterConfig defines the database heartbeater configuration.
type databaseHeartbeaterConfig struct {
heartbeaterBaseConfig `yaml:",inline"`

RedisDisconnectDelay string `yaml:"redis_disconnect_delay"`
}

// filterHeartbeaterConfig defines the filter heartbeater configuration.
type filterHeartbeaterConfig struct {
heartbeaterBaseConfig `yaml:",inline"`

MetricReceivedDelay string `yaml:"last_metric_received_delay"`
}

// localCheckerHeartbeaterConfig defines the local checker heartbeater configuration.
type localCheckerHeartbeaterConfig struct {
heartbeaterBaseConfig `yaml:",inline"`

LocalCheckDelay string `yaml:"last_check_delay"`
}

// remoteCheckerHeartbeaterConfig defines the remote checker heartbeater configuration.
type remoteCheckerHeartbeaterConfig struct {
heartbeaterBaseConfig `yaml:",inline"`

RemoteCheckDelay string `yaml:"last_remote_check_delay"`
}

// notifierHeartbeaterConfig defines the notifier heartbeater configuration.
type notifierHeartbeaterConfig struct {
heartbeaterBaseConfig `yaml:",inline"`
}

// heartbeatsConfig defines the configuration of heartbeaters.
type heartbeatsConfig struct {
DatabaseCfg databaseHeartbeaterConfig `yaml:"database"`
FilterCfg filterHeartbeaterConfig `yaml:"filter"`
LocalCheckerCfg localCheckerHeartbeaterConfig `yaml:"local_checker"`
RemoteCheckerCfg remoteCheckerHeartbeaterConfig `yaml:"remote_checker"`
NotifierCfg notifierHeartbeaterConfig `yaml:"notifier"`
}

func (cfg heartbeatsConfig) getSettings() heartbeat.HeartbeatersConfig {
return heartbeat.HeartbeatersConfig{
DatabaseCfg: heartbeat.DatabaseHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.DatabaseCfg.heartbeaterBaseConfig.getSettings(),
RedisDisconnectDelay: to.Duration(cfg.DatabaseCfg.RedisDisconnectDelay),
},
FilterCfg: heartbeat.FilterHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.FilterCfg.heartbeaterBaseConfig.getSettings(),
MetricReceivedDelay: to.Duration(cfg.FilterCfg.MetricReceivedDelay),
},
LocalCheckerCfg: heartbeat.LocalCheckerHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.LocalCheckerCfg.heartbeaterBaseConfig.getSettings(),
LocalCheckDelay: to.Duration(cfg.LocalCheckerCfg.LocalCheckDelay),
},
RemoteCheckerCfg: heartbeat.RemoteCheckerHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.RemoteCheckerCfg.heartbeaterBaseConfig.getSettings(),
RemoteCheckDelay: to.Duration(cfg.RemoteCheckerCfg.RemoteCheckDelay),
},
NotifierCfg: heartbeat.NotifierHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.NotifierCfg.heartbeaterBaseConfig.getSettings(),
},
}
}

// monitorBaseConfig defines the basic configuration of the monitor.
type monitorBaseConfig struct {
Enabled bool `yaml:"enabled"`
HearbeatersCfg heartbeatsConfig `yaml:"heartbeaters"`
NoticeInterval string `yaml:"notice_interval"`
CheckInterval string `yaml:"check_interval"`
}

// adminMonitorConfig defines the configuration for the admin monitor.
type adminMonitorConfig struct {
monitorBaseConfig `yaml:",inline"`

AdminContacts []map[string]string `yaml:"contacts"`
}

// userMonitorConfig defines the configuration for the user monitor.
type userMonitorConfig struct {
monitorBaseConfig `yaml:",inline"`
}

// monitorConfig defines the configuration for all monitors.
type monitorConfig struct {
AdminCfg adminMonitorConfig `yaml:"admin"`
UserCfg userMonitorConfig `yaml:"user"`
}

// selfstateConfig defines the configuration of the selfstate worker.
type selfstateConfig struct {
Enabled bool `yaml:"enabled"`
MonitorCfg monitorConfig `yaml:"monitor"`
}

func (cfg *selfstateConfig) getSettings() selfstate.Config {
return selfstate.Config{
Enabled: cfg.Enabled,
MonitorCfg: selfstate.MonitorConfig{
AdminCfg: monitor.AdminMonitorConfig{
MonitorBaseConfig: monitor.MonitorBaseConfig{
Enabled: cfg.MonitorCfg.AdminCfg.Enabled,
HeartbeatersCfg: cfg.MonitorCfg.AdminCfg.HearbeatersCfg.getSettings(),
NoticeInterval: to.Duration(cfg.MonitorCfg.AdminCfg.NoticeInterval),
CheckInterval: to.Duration(cfg.MonitorCfg.AdminCfg.CheckInterval),
},
AdminContacts: cfg.MonitorCfg.AdminCfg.AdminContacts,
},
UserCfg: monitor.UserMonitorConfig{
MonitorBaseConfig: monitor.MonitorBaseConfig{
Enabled: cfg.MonitorCfg.UserCfg.Enabled,
HeartbeatersCfg: cfg.MonitorCfg.UserCfg.HearbeatersCfg.getSettings(),
NoticeInterval: to.Duration(cfg.MonitorCfg.UserCfg.NoticeInterval),
CheckInterval: to.Duration(cfg.MonitorCfg.UserCfg.CheckInterval),
},
},
},
}
}

func getDefault() config {
Expand Down Expand Up @@ -105,12 +235,50 @@ func getDefault() config {
SenderTimeout: "10s",
ResendingTimeout: "1:00",
ReschedulingDelay: "60s",
SelfState: selfStateConfig{
Enabled: false,
RedisDisconnectDelay: "30s",
LastMetricReceivedDelay: "60s",
LastCheckDelay: "60s",
NoticeInterval: "300s",
Selfstate: selfstateConfig{
Enabled: false,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А нужно ли заполнять остальной конфиг, если он по умолчанию выключен? Или типа на случай, если включал и чего-то не заполнят?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Просто оставил старые значения, могу убрать

MonitorCfg: monitorConfig{
AdminCfg: adminMonitorConfig{
monitorBaseConfig: monitorBaseConfig{
Enabled: false,
HearbeatersCfg: heartbeatsConfig{
DatabaseCfg: databaseHeartbeaterConfig{
RedisDisconnectDelay: "30s",
},
FilterCfg: filterHeartbeaterConfig{
MetricReceivedDelay: "60s",
},
LocalCheckerCfg: localCheckerHeartbeaterConfig{
LocalCheckDelay: "60s",
},
RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{
RemoteCheckDelay: "300s",
},
NotifierCfg: notifierHeartbeaterConfig{},
},
},
},
UserCfg: userMonitorConfig{
monitorBaseConfig: monitorBaseConfig{
Enabled: false,
HearbeatersCfg: heartbeatsConfig{
DatabaseCfg: databaseHeartbeaterConfig{
RedisDisconnectDelay: "30s",
},
FilterCfg: filterHeartbeaterConfig{
MetricReceivedDelay: "60s",
},
LocalCheckerCfg: localCheckerHeartbeaterConfig{
LocalCheckDelay: "60s",
},
RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{
RemoteCheckDelay: "300s",
},
NotifierCfg: notifierHeartbeaterConfig{},
},
},
},
},
},
FrontURI: "http://localhost",
Timezone: "UTC",
Expand Down Expand Up @@ -189,8 +357,7 @@ func (config *notifierConfig) getSettings(logger moira.Logger) notifier.Config {
Msg("Found dynamic log rules in config for some contacts and subscriptions")

return notifier.Config{
SelfStateEnabled: config.SelfState.Enabled,
SelfStateContacts: config.SelfState.Contacts,
SelfstateEnabled: config.Selfstate.Enabled,
SendingTimeout: to.Duration(config.SenderTimeout),
ResendingTimeout: to.Duration(config.ResendingTimeout),
ReschedulingDelay: to.Duration(config.ReschedulingDelay),
Expand All @@ -213,22 +380,3 @@ func checkDateTimeFormat(format string) error {
}
return nil
}

func (config *selfStateConfig) getSettings() selfstate.Config {
// 10 sec is default check value
checkInterval := 10 * time.Second
if config.CheckInterval != "" {
checkInterval = to.Duration(config.CheckInterval)
}

return selfstate.Config{
Enabled: config.Enabled,
RedisDisconnectDelaySeconds: int64(to.Duration(config.RedisDisconnectDelay).Seconds()),
LastMetricReceivedDelaySeconds: int64(to.Duration(config.LastMetricReceivedDelay).Seconds()),
LastCheckDelaySeconds: int64(to.Duration(config.LastCheckDelay).Seconds()),
LastRemoteCheckDelaySeconds: int64(to.Duration(config.LastRemoteCheckDelay).Seconds()),
CheckInterval: checkInterval,
Contacts: config.Contacts,
NoticeIntervalSeconds: int64(to.Duration(config.NoticeInterval).Seconds()),
}
}
25 changes: 15 additions & 10 deletions cmd/notifier/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,22 @@ func main() {
Msg("Can not configure senders")
}

// Start moira self state checker
if config.Notifier.SelfState.getSettings().Enabled {
selfState := selfstate.NewSelfCheckWorker(logger, database, sender, config.Notifier.SelfState.getSettings(), metrics.ConfigureHeartBeatMetrics(telemetry.Metrics))
if err := selfState.Start(); err != nil {
selfstateCfg := config.Notifier.Selfstate.getSettings()

// Start moira selfstate checker
if selfstateCfg.Enabled {
logger.Info().Msg("Selfstate enabled")
selfstateWorker, err := selfstate.NewSelfstateWorker(selfstateCfg, logger, database, sender, systemClock)
if err != nil {
logger.Fatal().
Error(err).
Msg("SelfState failed")
Msg("Failed to create new selfstate worker")
}
defer stopSelfStateChecker(selfState)

selfstateWorker.Start()
defer stopSelfstateWorker(selfstateWorker)
} else {
logger.Debug().Msg("Moira Self State Monitoring disabled")
logger.Debug().Msg("Moira Selfstate Monitoring disabled")
}

// Start moira notification fetcher
Expand Down Expand Up @@ -181,10 +186,10 @@ func stopNotificationsFetcher(worker *notifications.FetchNotificationsWorker) {
}
}

func stopSelfStateChecker(checker *selfstate.SelfCheckWorker) {
if err := checker.Stop(); err != nil {
func stopSelfstateWorker(selfstateWorker selfstate.SelfstateWorker) {
if err := selfstateWorker.Stop(); err != nil {
logger.Error().
Error(err).
Msg("Failed to stop self check worker")
Msg("Failed to stop selfstate worker")
}
}
1 change: 1 addition & 0 deletions generate_mocks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ mockgen -destination=mock/moira-alert/searcher.go -package=mock_moira_alert gith
mockgen -destination=mock/metric_source/source.go -package=mock_metric_source github.com/moira-alert/moira/metric_source MetricSource
mockgen -destination=mock/metric_source/fetch_result.go -package=mock_metric_source github.com/moira-alert/moira/metric_source FetchResult
mockgen -destination=mock/heartbeat/heartbeat.go -package=mock_heartbeat github.com/moira-alert/moira/notifier/selfstate/heartbeat Heartbeater
mockgen -destination=mock/monitor/monitor.go -package=mock_monitor github.com/moira-alert/moira/notifier/selfstate/monitor Monitor
mockgen -destination=mock/clock/clock.go -package=mock_clock github.com/moira-alert/moira Clock
mockgen -destination=mock/notifier/mattermost/client.go -package=mock_mattermost github.com/moira-alert/moira/senders/mattermost Client

Expand Down
8 changes: 5 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module github.com/moira-alert/moira

go 1.22
go 1.22.0

toolchain go1.22.2

require (
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible
Expand Down Expand Up @@ -159,7 +161,7 @@ require (
golang.org/x/text v0.16.0 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.33.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
Expand Down Expand Up @@ -192,7 +194,7 @@ require (
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
github.com/mitchellh/reflectwalk v1.0.0 // indirect
github.com/oklog/run v1.1.0 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect
github.com/rogpeppe/go-internal v1.12.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,8 @@ github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqn
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/rs/cors v1.11.0 h1:0B9GE/r9Bc2UxRMMtymBkHTenPkHDv0CW4Y98GBY+po=
github.com/rs/cors v1.11.0/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
Expand Down Expand Up @@ -1416,8 +1416,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc h1:2gGKlE2+asNV9m7xrywl36YYNnBG5ZQ0r/BOOxqPpmk=
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc/go.mod h1:m7x9LTH6d71AHyAX77c9yqWCCa3UKHcVEj9y7hAtKDk=
Expand Down
Loading
Loading