diff --git a/lib/metrics.go b/lib/metrics.go index 36b0addc..6c2480cb 100644 --- a/lib/metrics.go +++ b/lib/metrics.go @@ -15,18 +15,18 @@ var ( []string{"host"}, ) - routeReloadCountMetric = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "router_route_reload_total", - Help: "Total number of attempts to reload the routing table", - }, - ) - - routeReloadErrorCountMetric = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "router_route_reload_error_total", - Help: "Number of failed attempts to reload the routing table", + routeReloadDurationMetric = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "router_route_reload_duration_seconds", + Help: "Histogram of route reload durations in seconds", + Objectives: map[float64]float64{ + 0.5: 0.01, + 0.9: 0.01, + 0.95: 0.01, + 0.99: 0.005, + }, }, + []string{"success"}, ) routesCountMetric = prometheus.NewGauge( @@ -40,8 +40,7 @@ var ( func registerMetrics(r prometheus.Registerer) { r.MustRegister( internalServerErrorCountMetric, - routeReloadCountMetric, - routeReloadErrorCountMetric, + routeReloadDurationMetric, routesCountMetric, ) handlers.RegisterMetrics(r) diff --git a/lib/router.go b/lib/router.go index 03e56545..7029440d 100644 --- a/lib/router.go +++ b/lib/router.go @@ -5,6 +5,7 @@ import ( "net/http" "net/url" "os" + "strconv" "sync" "time" @@ -213,21 +214,24 @@ type mongoDatabase interface { // create a new proxy mux, load applications (backends) and routes into it, and // then flip the "mux" pointer in the Router. func (rt *Router) reloadRoutes(db *mgo.Database, currentOptime bson.MongoTimestamp) { + var success bool + timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) { + labels := prometheus.Labels{"success": strconv.FormatBool(success)} + routeReloadDurationMetric.With(labels).Observe(v) + })) defer func() { - // increment this metric regardless of whether the route reload succeeded - routeReloadCountMetric.Inc() - + success = true if r := recover(); r != nil { + success = false logWarn("router: recovered from panic in reloadRoutes:", r) logInfo("router: original routes have not been modified") errorMessage := fmt.Sprintf("panic: %v", r) err := logger.RecoveredError{ErrorMessage: errorMessage} logger.NotifySentry(logger.ReportableError{Error: err}) - - routeReloadErrorCountMetric.Inc() } else { rt.mongoReadToOptime = currentOptime } + timer.ObserveDuration() }() logInfo("router: reloading routes")