From d0ebc78a65e0c41aae0d89105055e347341db63c Mon Sep 17 00:00:00 2001 From: Anna Kapuscinska Date: Sun, 23 Jun 2024 22:12:46 +0100 Subject: [PATCH] metricsconfig: Define health metrics group The idea is that in the future this group will have constrained cardinality and will be enabled by default (in contrast to another group with potentially high cardinality "debug" metrics). This commit only refactors the existing metrics initialization code to use the new framework. The health metrics group contains all metrics that were documented in the "health metrics" section, but in the future some of them will likely be moved to another group. Signed-off-by: Anna Kapuscinska --- cmd/tetragon-metrics-docs/main.go | 2 +- pkg/exporter/metrics.go | 9 +- pkg/grpc/tracing/stats.go | 15 ++- .../cgroupratemetrics/cgroupratemetrics.go | 5 +- pkg/metrics/errormetrics/errormetrics.go | 21 ++-- .../eventcachemetrics/eventcachemetrics.go | 27 +++-- pkg/metrics/eventmetrics/eventmetrics.go | 19 ++-- pkg/metrics/kprobemetrics/kprobemetrics.go | 21 ++-- pkg/metrics/opcodemetrics/opcodemetrics.go | 18 +-- .../policyfiltermetrics.go | 16 ++- .../ratelimitmetrics/ratelimitmetrics.go | 5 +- pkg/metrics/ringbufmetrics/ringbufmetrics.go | 9 +- .../ringbufqueuemetrics.go | 7 +- pkg/metrics/watchermetrics/watchermetrics.go | 17 ++- pkg/metricsconfig/healthmetrics.go | 105 ++++++++++++++++++ pkg/metricsconfig/initmetrics.go | 67 +---------- pkg/observer/data_stats.go | 18 +-- pkg/process/metrics.go | 7 +- pkg/version/metrics.go | 6 +- 19 files changed, 234 insertions(+), 160 deletions(-) create mode 100644 pkg/metricsconfig/healthmetrics.go diff --git a/cmd/tetragon-metrics-docs/main.go b/cmd/tetragon-metrics-docs/main.go index a5c2c446421..d96d254ed8f 100644 --- a/cmd/tetragon-metrics-docs/main.go +++ b/cmd/tetragon-metrics-docs/main.go @@ -27,7 +27,7 @@ func main() { func initMetrics(target string, reg *prometheus.Registry, _ *slog.Logger) error { switch target { case "health": - metricsconfig.InitHealthMetricsForDocs(reg) + metricsconfig.EnableHealthMetrics(reg).InitForDocs() case "resources": metricsconfig.InitResourcesMetricsForDocs(reg) case "events": diff --git a/pkg/exporter/metrics.go b/pkg/exporter/metrics.go index 75188d315b1..a95db1f0392 100644 --- a/pkg/exporter/metrics.go +++ b/pkg/exporter/metrics.go @@ -6,6 +6,7 @@ package exporter import ( "io" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -30,10 +31,10 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(eventsExportedTotal) - registry.MustRegister(eventsExportedBytesTotal) - registry.MustRegister(eventsExportTimestamp) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(eventsExportedTotal) + group.MustRegister(eventsExportedBytesTotal) + group.MustRegister(eventsExportTimestamp) } func newExportedBytesCounterWriter(w io.Writer, c prometheus.Counter) io.Writer { diff --git a/pkg/grpc/tracing/stats.go b/pkg/grpc/tracing/stats.go index ada802ea0aa..9bb6da45770 100644 --- a/pkg/grpc/tracing/stats.go +++ b/pkg/grpc/tracing/stats.go @@ -4,6 +4,7 @@ package tracing import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -17,16 +18,20 @@ var ( }, []string{"count"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(LoaderStats) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(LoaderStats) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, ty := range LoaderTypeStrings { LoaderStats.WithLabelValues(ty).Add(0) } - - // NOTES: - // * Rename process_loader_stats metric (to e.g. process_loader_events_total) and count label (to e.g. event)? } type LoaderType int diff --git a/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go b/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go index 1d502281cd8..5bacd61a649 100644 --- a/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go +++ b/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go @@ -4,6 +4,7 @@ package cgroupratemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -41,8 +42,8 @@ var ( }, []string{"type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(CgroupRateTotal) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(CgroupRateTotal) } // Get a new handle on an ErrorTotal metric for an ErrorType diff --git a/pkg/metrics/errormetrics/errormetrics.go b/pkg/metrics/errormetrics/errormetrics.go index a60f6d0bbe7..0874b0dcea8 100644 --- a/pkg/metrics/errormetrics/errormetrics.go +++ b/pkg/metrics/errormetrics/errormetrics.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/cilium/tetragon/pkg/api/ops" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -84,10 +85,17 @@ var ( }, []string{"opcode", "error_type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(ErrorTotal) - registry.MustRegister(HandlerErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(ErrorTotal) + group.MustRegister(HandlerErrors) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for er := range errorTypeLabelValues { GetErrorTotal(er).Add(0) @@ -100,13 +108,6 @@ func InitMetrics(registry *prometheus.Registry) { // NB: We initialize only ops.MsgOpUndef here, but unknown_opcode can occur for any opcode // that is not explicitly handled. GetHandlerErrors(ops.MsgOpUndef, HandlePerfUnknownOp).Add(0) - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * error, error_type, type - standardize on a label - // * Delete errors_total{type="handler_error"} - it duplicates handler_errors_total - // * Consider further splitting errors_total - // * Rename handler_errors_total to event_handler_errors_total? } // Get a new handle on an ErrorTotal metric for an ErrorType diff --git a/pkg/metrics/eventcachemetrics/eventcachemetrics.go b/pkg/metrics/eventcachemetrics/eventcachemetrics.go index 93b816bcb22..a2c33f7b9ca 100644 --- a/pkg/metrics/eventcachemetrics/eventcachemetrics.go +++ b/pkg/metrics/eventcachemetrics/eventcachemetrics.go @@ -5,6 +5,7 @@ package eventcachemetrics import ( "github.com/cilium/tetragon/api/v1/tetragon" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -79,14 +80,21 @@ var ( }, []string{"event_type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(processInfoErrors) - registry.MustRegister(podInfoErrors) - registry.MustRegister(EventCacheCount) - registry.MustRegister(eventCacheErrorsTotal) - registry.MustRegister(eventCacheRetriesTotal) - registry.MustRegister(parentInfoErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(processInfoErrors) + group.MustRegister(podInfoErrors) + group.MustRegister(EventCacheCount) + group.MustRegister(eventCacheErrorsTotal) + group.MustRegister(eventCacheRetriesTotal) + group.MustRegister(parentInfoErrors) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for en := range cacheEntryTypeLabelValues { EventCacheRetries(en).Add(0) @@ -101,11 +109,6 @@ func InitMetrics(registry *prometheus.Registry) { } } } - - // NOTES: - // * error, error_type, type - standardize on a label - // * event, event_type, type - standardize on a label - // * Consider merging event cache errors metrics into one with error, event, entry labels } // Get a new handle on a processInfoErrors metric for an eventType diff --git a/pkg/metrics/eventmetrics/eventmetrics.go b/pkg/metrics/eventmetrics/eventmetrics.go index e3deb2c108b..97a68b17108 100644 --- a/pkg/metrics/eventmetrics/eventmetrics.go +++ b/pkg/metrics/eventmetrics/eventmetrics.go @@ -53,19 +53,22 @@ var ( }, []string{"policy", "hook"}) ) -func InitHealthMetrics(registry *prometheus.Registry) { - registry.MustRegister(FlagCount) - registry.MustRegister(NotifyOverflowedEvents) - // custom collectors are registered independently +func RegisterHealthMetrics(group metrics.Group) { + group.MustRegister(FlagCount) + group.MustRegister(NotifyOverflowedEvents) + group.MustRegisterWithInit(NewBPFCollector()) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitHealthMetrics() { // Initialize metrics with labels for _, v := range exec.FlagStrings { FlagCount.WithLabelValues(v).Add(0) } - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * event, event_type, type - standardize on a label } func InitEventsMetrics(registry *prometheus.Registry) { diff --git a/pkg/metrics/kprobemetrics/kprobemetrics.go b/pkg/metrics/kprobemetrics/kprobemetrics.go index 2353dab83e5..6d5073570e0 100644 --- a/pkg/metrics/kprobemetrics/kprobemetrics.go +++ b/pkg/metrics/kprobemetrics/kprobemetrics.go @@ -4,6 +4,7 @@ package kprobemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,18 +46,18 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(MergeErrors) - registry.MustRegister(MergeOkTotal) - registry.MustRegister(MergePushed) - - // NOTES: - // * Consider merging ok and errors into one with status label +func RegisterMetrics(group metrics.Group) { + group.MustRegister(MergeErrors) + group.MustRegister(MergeOkTotal) + group.MustRegister(MergePushed) } -func InitMetricsForDocs(registry *prometheus.Registry) { - InitMetrics(registry) - +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetricsForDocs() { // Initialize metrics with example labels for _, curr := range mergeErrorTypeLabelValues { for _, prev := range mergeErrorTypeLabelValues { diff --git a/pkg/metrics/opcodemetrics/opcodemetrics.go b/pkg/metrics/opcodemetrics/opcodemetrics.go index 72cb089f4c4..dc69f94f237 100644 --- a/pkg/metrics/opcodemetrics/opcodemetrics.go +++ b/pkg/metrics/opcodemetrics/opcodemetrics.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/cilium/tetragon/pkg/api/ops" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -28,10 +29,17 @@ var ( }, []string{"op"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(MsgOpsCount) - registry.MustRegister(LatencyStats) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(MsgOpsCount) + group.MustRegister(LatencyStats) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize all metrics for opcode := range ops.OpCodeStrings { if opcode != ops.MsgOpUndef && opcode != ops.MsgOpTest { @@ -39,10 +47,6 @@ func InitMetrics(registry *prometheus.Registry) { LatencyStats.WithLabelValues(fmt.Sprint(int32(opcode))) } } - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * Rename handling_latency to handler_latency_microseconds? } // Get a new handle on a msgOpsCount metric for an OpCode diff --git a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go index 0c60cf844e9..d4f18110a7a 100644 --- a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go +++ b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go @@ -4,6 +4,7 @@ package policyfiltermetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -80,9 +81,16 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(PolicyFilterOpMetrics, PolicyFilterHookContainerNameMissingMetrics) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(PolicyFilterOpMetrics, PolicyFilterHookContainerNameMissingMetrics) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, subsys := range subsysLabelValues { for _, op := range operationLabelValues { @@ -93,10 +101,6 @@ func InitMetrics(registry *prometheus.Registry) { } } } - - // NOTES: - // * Don't confuse op in policyfilter_metrics_total with ops.OpCode - // * Rename policyfilter_metrics_total to get rid of _metrics? } func OpInc(subsys Subsys, op Operation, err string) { diff --git a/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go index caa64296294..86daba4952c 100644 --- a/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go +++ b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go @@ -4,6 +4,7 @@ package ratelimitmetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -17,6 +18,6 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(RateLimitDropped) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(RateLimitDropped) } diff --git a/pkg/metrics/ringbufmetrics/ringbufmetrics.go b/pkg/metrics/ringbufmetrics/ringbufmetrics.go index 92ebc57b542..da3360cc621 100644 --- a/pkg/metrics/ringbufmetrics/ringbufmetrics.go +++ b/pkg/metrics/ringbufmetrics/ringbufmetrics.go @@ -4,6 +4,7 @@ package ringbufmetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -29,8 +30,8 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(PerfEventReceived) - registry.MustRegister(PerfEventLost) - registry.MustRegister(PerfEventErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(PerfEventReceived) + group.MustRegister(PerfEventLost) + group.MustRegister(PerfEventErrors) } diff --git a/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go b/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go index a5ce095ec36..de7a84f5bd4 100644 --- a/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go +++ b/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go @@ -4,6 +4,7 @@ package ringbufqueuemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -23,7 +24,7 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(Received) - registry.MustRegister(Lost) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(Received) + group.MustRegister(Lost) } diff --git a/pkg/metrics/watchermetrics/watchermetrics.go b/pkg/metrics/watchermetrics/watchermetrics.go index 2cdd1abb2a0..476576432fe 100644 --- a/pkg/metrics/watchermetrics/watchermetrics.go +++ b/pkg/metrics/watchermetrics/watchermetrics.go @@ -4,6 +4,7 @@ package watchermetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,16 +46,20 @@ var ( }, []string{"watcher"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(WatcherErrors) - registry.MustRegister(WatcherEvents) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(WatcherErrors) + group.MustRegister(WatcherEvents) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels GetWatcherEvents(K8sWatcher).Add(0) GetWatcherErrors(K8sWatcher, FailedToGetPodError).Add(0) - - // NOTES: - // * error, error_type, type - standardize on a label } // Get a new handle on an WatcherEvents metric for a watcher type diff --git a/pkg/metricsconfig/healthmetrics.go b/pkg/metricsconfig/healthmetrics.go new file mode 100644 index 00000000000..4b7483d84cb --- /dev/null +++ b/pkg/metricsconfig/healthmetrics.go @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package metricsconfig + +import ( + "sync" + + "github.com/cilium/tetragon/pkg/eventcache" + "github.com/cilium/tetragon/pkg/exporter" + "github.com/cilium/tetragon/pkg/grpc/tracing" + "github.com/cilium/tetragon/pkg/metrics" + "github.com/cilium/tetragon/pkg/metrics/cgroupratemetrics" + "github.com/cilium/tetragon/pkg/metrics/errormetrics" + "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" + "github.com/cilium/tetragon/pkg/metrics/eventmetrics" + "github.com/cilium/tetragon/pkg/metrics/kprobemetrics" + "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" + "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" + "github.com/cilium/tetragon/pkg/metrics/policystatemetrics" + "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" + "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" + "github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics" + "github.com/cilium/tetragon/pkg/metrics/watchermetrics" + "github.com/cilium/tetragon/pkg/observer" + "github.com/cilium/tetragon/pkg/process" + "github.com/cilium/tetragon/pkg/version" + grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + healthMetrics metrics.Group + healthMetricsOnce sync.Once +) + +func GetHealthGroup() metrics.Group { + healthMetricsOnce.Do(func() { + healthMetrics = metrics.NewMetricsGroup(true) + }) + return healthMetrics +} + +func EnableHealthMetrics(registry *prometheus.Registry) metrics.Group { + healthMetrics := GetHealthGroup() + registerHealthMetrics(healthMetrics) + registry.MustRegister(healthMetrics) + return healthMetrics +} + +// NOTE: Health metrics group is marked as constrained. However, the +// constraints are only enforced for metrics registered with RegisterWithInit, +// and custom collectors are responsible for enforcing it on their own. So the +// group's cardinality isn't really constrained until all metrics are migrated +// to the new interface. +func registerHealthMetrics(group metrics.Group) { + // build info metrics + group.MustRegister(version.NewBuildInfoCollector()) + // error metrics + errormetrics.RegisterMetrics(group) + group.ExtendInit(errormetrics.InitMetrics) + // event cache metrics + eventcachemetrics.RegisterMetrics(group) + group.MustRegister(eventcache.NewCacheCollector()) + group.ExtendInit(eventcachemetrics.InitMetrics) + // event metrics + eventmetrics.RegisterHealthMetrics(group) + group.ExtendInit(eventmetrics.InitHealthMetrics) + // map metrics + group.MustRegisterWithInit(observer.NewBPFCollector()) + // opcode metrics + opcodemetrics.RegisterMetrics(group) + group.ExtendInit(opcodemetrics.InitMetrics) + // policy filter metrics + policyfiltermetrics.RegisterMetrics(group) + group.ExtendInit(policyfiltermetrics.InitMetrics) + // process metrics + process.RegisterMetrics(group) + // ringbuf metrics + ringbufmetrics.RegisterMetrics(group) + // ringbuf queue metrics + ringbufqueuemetrics.RegisterMetrics(group) + // watcher metrics + watchermetrics.RegisterMetrics(group) + group.ExtendInit(watchermetrics.InitMetrics) + // observer metrics + observer.RegisterMetrics(group) + group.ExtendInit(observer.InitMetrics) + // tracing metrics + tracing.RegisterMetrics(group) + group.ExtendInit(tracing.InitMetrics) + // rate limit metrics + ratelimitmetrics.RegisterMetrics(group) + // exporter metrics + exporter.RegisterMetrics(group) + // cgrup rate metrics + cgroupratemetrics.RegisterMetrics(group) + // kprobe metrics + kprobemetrics.RegisterMetrics(group) + group.ExtendInitForDocs(kprobemetrics.InitMetricsForDocs) + // policy state metrics + group.MustRegisterWithInit(policystatemetrics.NewPolicyStateCollector()) + // gRPC metrics + group.MustRegister(grpcmetrics.NewServerMetrics()) +} diff --git a/pkg/metricsconfig/initmetrics.go b/pkg/metricsconfig/initmetrics.go index 9157b8b8b45..25f987df08d 100644 --- a/pkg/metricsconfig/initmetrics.go +++ b/pkg/metricsconfig/initmetrics.go @@ -4,76 +4,12 @@ package metricsconfig import ( - "github.com/cilium/tetragon/pkg/eventcache" - "github.com/cilium/tetragon/pkg/exporter" - "github.com/cilium/tetragon/pkg/grpc/tracing" - "github.com/cilium/tetragon/pkg/metrics/cgroupratemetrics" - "github.com/cilium/tetragon/pkg/metrics/errormetrics" - "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" "github.com/cilium/tetragon/pkg/metrics/eventmetrics" - "github.com/cilium/tetragon/pkg/metrics/kprobemetrics" - "github.com/cilium/tetragon/pkg/metrics/mapmetrics" - "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" - "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" - "github.com/cilium/tetragon/pkg/metrics/policystatemetrics" - "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" - "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" - "github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics" "github.com/cilium/tetragon/pkg/metrics/syscallmetrics" - "github.com/cilium/tetragon/pkg/metrics/watchermetrics" - "github.com/cilium/tetragon/pkg/observer" - "github.com/cilium/tetragon/pkg/process" - "github.com/cilium/tetragon/pkg/version" - grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" ) -func initHealthMetrics(registry *prometheus.Registry) { - version.InitMetrics(registry) - errormetrics.InitMetrics(registry) - eventcachemetrics.InitMetrics(registry) - registry.MustRegister(eventcache.NewCacheCollector()) - eventmetrics.InitHealthMetrics(registry) - mapmetrics.InitMetrics(registry) - opcodemetrics.InitMetrics(registry) - policyfiltermetrics.InitMetrics(registry) - process.InitMetrics(registry) - ringbufmetrics.InitMetrics(registry) - ringbufqueuemetrics.InitMetrics(registry) - watchermetrics.InitMetrics(registry) - observer.InitMetrics(registry) - tracing.InitMetrics(registry) - ratelimitmetrics.InitMetrics(registry) - exporter.InitMetrics(registry) - cgroupratemetrics.InitMetrics(registry) - - // register common third-party collectors - registry.MustRegister(grpcmetrics.NewServerMetrics()) -} - -func initAllHealthMetrics(registry *prometheus.Registry) { - initHealthMetrics(registry) - - kprobemetrics.InitMetrics(registry) - policystatemetrics.InitMetrics(registry) - - // register custom collectors - registry.MustRegister(observer.NewBPFCollector()) - registry.MustRegister(eventmetrics.NewBPFCollector()) -} - -func InitHealthMetricsForDocs(registry *prometheus.Registry) { - initHealthMetrics(registry) - - kprobemetrics.InitMetricsForDocs(registry) - policystatemetrics.InitMetricsForDocs(registry) - - // register custom zero collectors - registry.MustRegister(observer.NewBPFZeroCollector()) - registry.MustRegister(eventmetrics.NewBPFZeroCollector()) -} - func initResourcesMetrics(registry *prometheus.Registry) { // register common third-party collectors registry.MustRegister(collectors.NewGoCollector()) @@ -99,7 +35,8 @@ func InitEventsMetricsForDocs(registry *prometheus.Registry) { } func InitAllMetrics(registry *prometheus.Registry) { - initAllHealthMetrics(registry) + healthMetrics := EnableHealthMetrics(registry) + healthMetrics.Init() initAllResourcesMetrics(registry) initAllEventsMetrics(registry) } diff --git a/pkg/observer/data_stats.go b/pkg/observer/data_stats.go index 2d9832687d3..5323dab1fb2 100644 --- a/pkg/observer/data_stats.go +++ b/pkg/observer/data_stats.go @@ -4,6 +4,7 @@ package observer import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -42,20 +43,23 @@ var ( }, []string{"op"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(DataEventStats) - registry.MustRegister(DataEventSizeHist) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(DataEventStats) + group.MustRegister(DataEventSizeHist) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, ev := range DataEventTypeStrings { DataEventStats.WithLabelValues(ev).Add(0) } DataEventSizeHist.WithLabelValues(DataEventOpOk.String()) DataEventSizeHist.WithLabelValues(DataEventOpBad.String()) - - // NOTES: - // * Don't confuse op in data_event_size with ops.OpCode - // * Don't confuse event in data_events_total with tetragon.EventType } type DataEventType int diff --git a/pkg/process/metrics.go b/pkg/process/metrics.go index 61425a8c4f8..0998d191b7d 100644 --- a/pkg/process/metrics.go +++ b/pkg/process/metrics.go @@ -4,6 +4,7 @@ package process import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,7 +46,7 @@ func NewCacheCollector() prometheus.Collector { } } -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(ProcessCacheTotal) - registry.MustRegister(NewCacheCollector()) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(ProcessCacheTotal) + group.MustRegister(NewCacheCollector()) } diff --git a/pkg/version/metrics.go b/pkg/version/metrics.go index e7568d775e4..79d9b739475 100644 --- a/pkg/version/metrics.go +++ b/pkg/version/metrics.go @@ -31,7 +31,7 @@ func (b *buildInfoCollector) Collect(ch chan<- prometheus.Metric) { ch <- b.self } -func newBuildInfoCollector() prometheus.Collector { +func NewBuildInfoCollector() prometheus.Collector { buildInfo := ReadBuildInfo() c := &buildInfoCollector{ prometheus.MustNewConstMetric( @@ -52,7 +52,3 @@ func newBuildInfoCollector() prometheus.Collector { c.init(c.self) return c } - -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(newBuildInfoCollector()) -}