diff --git a/cmd/tetragon-metrics-docs/main.go b/cmd/tetragon-metrics-docs/main.go index a5c2c446421..d96d254ed8f 100644 --- a/cmd/tetragon-metrics-docs/main.go +++ b/cmd/tetragon-metrics-docs/main.go @@ -27,7 +27,7 @@ func main() { func initMetrics(target string, reg *prometheus.Registry, _ *slog.Logger) error { switch target { case "health": - metricsconfig.InitHealthMetricsForDocs(reg) + metricsconfig.EnableHealthMetrics(reg).InitForDocs() case "resources": metricsconfig.InitResourcesMetricsForDocs(reg) case "events": diff --git a/pkg/exporter/metrics.go b/pkg/exporter/metrics.go index 75188d315b1..a95db1f0392 100644 --- a/pkg/exporter/metrics.go +++ b/pkg/exporter/metrics.go @@ -6,6 +6,7 @@ package exporter import ( "io" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -30,10 +31,10 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(eventsExportedTotal) - registry.MustRegister(eventsExportedBytesTotal) - registry.MustRegister(eventsExportTimestamp) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(eventsExportedTotal) + group.MustRegister(eventsExportedBytesTotal) + group.MustRegister(eventsExportTimestamp) } func newExportedBytesCounterWriter(w io.Writer, c prometheus.Counter) io.Writer { diff --git a/pkg/grpc/tracing/stats.go b/pkg/grpc/tracing/stats.go index ada802ea0aa..9bb6da45770 100644 --- a/pkg/grpc/tracing/stats.go +++ b/pkg/grpc/tracing/stats.go @@ -4,6 +4,7 @@ package tracing import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -17,16 +18,20 @@ var ( }, []string{"count"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(LoaderStats) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(LoaderStats) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, ty := range LoaderTypeStrings { LoaderStats.WithLabelValues(ty).Add(0) } - - // NOTES: - // * Rename process_loader_stats metric (to e.g. process_loader_events_total) and count label (to e.g. event)? } type LoaderType int diff --git a/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go b/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go index 1d502281cd8..5bacd61a649 100644 --- a/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go +++ b/pkg/metrics/cgroupratemetrics/cgroupratemetrics.go @@ -4,6 +4,7 @@ package cgroupratemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -41,8 +42,8 @@ var ( }, []string{"type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(CgroupRateTotal) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(CgroupRateTotal) } // Get a new handle on an ErrorTotal metric for an ErrorType diff --git a/pkg/metrics/errormetrics/errormetrics.go b/pkg/metrics/errormetrics/errormetrics.go index a60f6d0bbe7..0874b0dcea8 100644 --- a/pkg/metrics/errormetrics/errormetrics.go +++ b/pkg/metrics/errormetrics/errormetrics.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/cilium/tetragon/pkg/api/ops" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -84,10 +85,17 @@ var ( }, []string{"opcode", "error_type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(ErrorTotal) - registry.MustRegister(HandlerErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(ErrorTotal) + group.MustRegister(HandlerErrors) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for er := range errorTypeLabelValues { GetErrorTotal(er).Add(0) @@ -100,13 +108,6 @@ func InitMetrics(registry *prometheus.Registry) { // NB: We initialize only ops.MsgOpUndef here, but unknown_opcode can occur for any opcode // that is not explicitly handled. GetHandlerErrors(ops.MsgOpUndef, HandlePerfUnknownOp).Add(0) - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * error, error_type, type - standardize on a label - // * Delete errors_total{type="handler_error"} - it duplicates handler_errors_total - // * Consider further splitting errors_total - // * Rename handler_errors_total to event_handler_errors_total? } // Get a new handle on an ErrorTotal metric for an ErrorType diff --git a/pkg/metrics/eventcachemetrics/eventcachemetrics.go b/pkg/metrics/eventcachemetrics/eventcachemetrics.go index 93b816bcb22..a2c33f7b9ca 100644 --- a/pkg/metrics/eventcachemetrics/eventcachemetrics.go +++ b/pkg/metrics/eventcachemetrics/eventcachemetrics.go @@ -5,6 +5,7 @@ package eventcachemetrics import ( "github.com/cilium/tetragon/api/v1/tetragon" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -79,14 +80,21 @@ var ( }, []string{"event_type"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(processInfoErrors) - registry.MustRegister(podInfoErrors) - registry.MustRegister(EventCacheCount) - registry.MustRegister(eventCacheErrorsTotal) - registry.MustRegister(eventCacheRetriesTotal) - registry.MustRegister(parentInfoErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(processInfoErrors) + group.MustRegister(podInfoErrors) + group.MustRegister(EventCacheCount) + group.MustRegister(eventCacheErrorsTotal) + group.MustRegister(eventCacheRetriesTotal) + group.MustRegister(parentInfoErrors) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for en := range cacheEntryTypeLabelValues { EventCacheRetries(en).Add(0) @@ -101,11 +109,6 @@ func InitMetrics(registry *prometheus.Registry) { } } } - - // NOTES: - // * error, error_type, type - standardize on a label - // * event, event_type, type - standardize on a label - // * Consider merging event cache errors metrics into one with error, event, entry labels } // Get a new handle on a processInfoErrors metric for an eventType diff --git a/pkg/metrics/eventmetrics/eventmetrics.go b/pkg/metrics/eventmetrics/eventmetrics.go index e3deb2c108b..97a68b17108 100644 --- a/pkg/metrics/eventmetrics/eventmetrics.go +++ b/pkg/metrics/eventmetrics/eventmetrics.go @@ -53,19 +53,22 @@ var ( }, []string{"policy", "hook"}) ) -func InitHealthMetrics(registry *prometheus.Registry) { - registry.MustRegister(FlagCount) - registry.MustRegister(NotifyOverflowedEvents) - // custom collectors are registered independently +func RegisterHealthMetrics(group metrics.Group) { + group.MustRegister(FlagCount) + group.MustRegister(NotifyOverflowedEvents) + group.MustRegisterWithInit(NewBPFCollector()) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitHealthMetrics() { // Initialize metrics with labels for _, v := range exec.FlagStrings { FlagCount.WithLabelValues(v).Add(0) } - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * event, event_type, type - standardize on a label } func InitEventsMetrics(registry *prometheus.Registry) { diff --git a/pkg/metrics/kprobemetrics/kprobemetrics.go b/pkg/metrics/kprobemetrics/kprobemetrics.go index 2353dab83e5..6d5073570e0 100644 --- a/pkg/metrics/kprobemetrics/kprobemetrics.go +++ b/pkg/metrics/kprobemetrics/kprobemetrics.go @@ -4,6 +4,7 @@ package kprobemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,18 +46,18 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(MergeErrors) - registry.MustRegister(MergeOkTotal) - registry.MustRegister(MergePushed) - - // NOTES: - // * Consider merging ok and errors into one with status label +func RegisterMetrics(group metrics.Group) { + group.MustRegister(MergeErrors) + group.MustRegister(MergeOkTotal) + group.MustRegister(MergePushed) } -func InitMetricsForDocs(registry *prometheus.Registry) { - InitMetrics(registry) - +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetricsForDocs() { // Initialize metrics with example labels for _, curr := range mergeErrorTypeLabelValues { for _, prev := range mergeErrorTypeLabelValues { diff --git a/pkg/metrics/opcodemetrics/opcodemetrics.go b/pkg/metrics/opcodemetrics/opcodemetrics.go index 72cb089f4c4..dc69f94f237 100644 --- a/pkg/metrics/opcodemetrics/opcodemetrics.go +++ b/pkg/metrics/opcodemetrics/opcodemetrics.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/cilium/tetragon/pkg/api/ops" + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -28,10 +29,17 @@ var ( }, []string{"op"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(MsgOpsCount) - registry.MustRegister(LatencyStats) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(MsgOpsCount) + group.MustRegister(LatencyStats) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize all metrics for opcode := range ops.OpCodeStrings { if opcode != ops.MsgOpUndef && opcode != ops.MsgOpTest { @@ -39,10 +47,6 @@ func InitMetrics(registry *prometheus.Registry) { LatencyStats.WithLabelValues(fmt.Sprint(int32(opcode))) } } - - // NOTES: - // * op, msg_op, opcode - standardize on a label (+ add human-readable label) - // * Rename handling_latency to handler_latency_microseconds? } // Get a new handle on a msgOpsCount metric for an OpCode diff --git a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go index 0c60cf844e9..d4f18110a7a 100644 --- a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go +++ b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go @@ -4,6 +4,7 @@ package policyfiltermetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -80,9 +81,16 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(PolicyFilterOpMetrics, PolicyFilterHookContainerNameMissingMetrics) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(PolicyFilterOpMetrics, PolicyFilterHookContainerNameMissingMetrics) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, subsys := range subsysLabelValues { for _, op := range operationLabelValues { @@ -93,10 +101,6 @@ func InitMetrics(registry *prometheus.Registry) { } } } - - // NOTES: - // * Don't confuse op in policyfilter_metrics_total with ops.OpCode - // * Rename policyfilter_metrics_total to get rid of _metrics? } func OpInc(subsys Subsys, op Operation, err string) { diff --git a/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go index caa64296294..86daba4952c 100644 --- a/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go +++ b/pkg/metrics/ratelimitmetrics/ratelimitmetrics.go @@ -4,6 +4,7 @@ package ratelimitmetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -17,6 +18,6 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(RateLimitDropped) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(RateLimitDropped) } diff --git a/pkg/metrics/ringbufmetrics/ringbufmetrics.go b/pkg/metrics/ringbufmetrics/ringbufmetrics.go index 92ebc57b542..da3360cc621 100644 --- a/pkg/metrics/ringbufmetrics/ringbufmetrics.go +++ b/pkg/metrics/ringbufmetrics/ringbufmetrics.go @@ -4,6 +4,7 @@ package ringbufmetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -29,8 +30,8 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(PerfEventReceived) - registry.MustRegister(PerfEventLost) - registry.MustRegister(PerfEventErrors) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(PerfEventReceived) + group.MustRegister(PerfEventLost) + group.MustRegister(PerfEventErrors) } diff --git a/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go b/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go index a5ce095ec36..de7a84f5bd4 100644 --- a/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go +++ b/pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go @@ -4,6 +4,7 @@ package ringbufqueuemetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -23,7 +24,7 @@ var ( }) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(Received) - registry.MustRegister(Lost) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(Received) + group.MustRegister(Lost) } diff --git a/pkg/metrics/watchermetrics/watchermetrics.go b/pkg/metrics/watchermetrics/watchermetrics.go index 2cdd1abb2a0..476576432fe 100644 --- a/pkg/metrics/watchermetrics/watchermetrics.go +++ b/pkg/metrics/watchermetrics/watchermetrics.go @@ -4,6 +4,7 @@ package watchermetrics import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,16 +46,20 @@ var ( }, []string{"watcher"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(WatcherErrors) - registry.MustRegister(WatcherEvents) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(WatcherErrors) + group.MustRegister(WatcherEvents) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels GetWatcherEvents(K8sWatcher).Add(0) GetWatcherErrors(K8sWatcher, FailedToGetPodError).Add(0) - - // NOTES: - // * error, error_type, type - standardize on a label } // Get a new handle on an WatcherEvents metric for a watcher type diff --git a/pkg/metricsconfig/healthmetrics.go b/pkg/metricsconfig/healthmetrics.go new file mode 100644 index 00000000000..4b7483d84cb --- /dev/null +++ b/pkg/metricsconfig/healthmetrics.go @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package metricsconfig + +import ( + "sync" + + "github.com/cilium/tetragon/pkg/eventcache" + "github.com/cilium/tetragon/pkg/exporter" + "github.com/cilium/tetragon/pkg/grpc/tracing" + "github.com/cilium/tetragon/pkg/metrics" + "github.com/cilium/tetragon/pkg/metrics/cgroupratemetrics" + "github.com/cilium/tetragon/pkg/metrics/errormetrics" + "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" + "github.com/cilium/tetragon/pkg/metrics/eventmetrics" + "github.com/cilium/tetragon/pkg/metrics/kprobemetrics" + "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" + "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" + "github.com/cilium/tetragon/pkg/metrics/policystatemetrics" + "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" + "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" + "github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics" + "github.com/cilium/tetragon/pkg/metrics/watchermetrics" + "github.com/cilium/tetragon/pkg/observer" + "github.com/cilium/tetragon/pkg/process" + "github.com/cilium/tetragon/pkg/version" + grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + healthMetrics metrics.Group + healthMetricsOnce sync.Once +) + +func GetHealthGroup() metrics.Group { + healthMetricsOnce.Do(func() { + healthMetrics = metrics.NewMetricsGroup(true) + }) + return healthMetrics +} + +func EnableHealthMetrics(registry *prometheus.Registry) metrics.Group { + healthMetrics := GetHealthGroup() + registerHealthMetrics(healthMetrics) + registry.MustRegister(healthMetrics) + return healthMetrics +} + +// NOTE: Health metrics group is marked as constrained. However, the +// constraints are only enforced for metrics registered with RegisterWithInit, +// and custom collectors are responsible for enforcing it on their own. So the +// group's cardinality isn't really constrained until all metrics are migrated +// to the new interface. +func registerHealthMetrics(group metrics.Group) { + // build info metrics + group.MustRegister(version.NewBuildInfoCollector()) + // error metrics + errormetrics.RegisterMetrics(group) + group.ExtendInit(errormetrics.InitMetrics) + // event cache metrics + eventcachemetrics.RegisterMetrics(group) + group.MustRegister(eventcache.NewCacheCollector()) + group.ExtendInit(eventcachemetrics.InitMetrics) + // event metrics + eventmetrics.RegisterHealthMetrics(group) + group.ExtendInit(eventmetrics.InitHealthMetrics) + // map metrics + group.MustRegisterWithInit(observer.NewBPFCollector()) + // opcode metrics + opcodemetrics.RegisterMetrics(group) + group.ExtendInit(opcodemetrics.InitMetrics) + // policy filter metrics + policyfiltermetrics.RegisterMetrics(group) + group.ExtendInit(policyfiltermetrics.InitMetrics) + // process metrics + process.RegisterMetrics(group) + // ringbuf metrics + ringbufmetrics.RegisterMetrics(group) + // ringbuf queue metrics + ringbufqueuemetrics.RegisterMetrics(group) + // watcher metrics + watchermetrics.RegisterMetrics(group) + group.ExtendInit(watchermetrics.InitMetrics) + // observer metrics + observer.RegisterMetrics(group) + group.ExtendInit(observer.InitMetrics) + // tracing metrics + tracing.RegisterMetrics(group) + group.ExtendInit(tracing.InitMetrics) + // rate limit metrics + ratelimitmetrics.RegisterMetrics(group) + // exporter metrics + exporter.RegisterMetrics(group) + // cgrup rate metrics + cgroupratemetrics.RegisterMetrics(group) + // kprobe metrics + kprobemetrics.RegisterMetrics(group) + group.ExtendInitForDocs(kprobemetrics.InitMetricsForDocs) + // policy state metrics + group.MustRegisterWithInit(policystatemetrics.NewPolicyStateCollector()) + // gRPC metrics + group.MustRegister(grpcmetrics.NewServerMetrics()) +} diff --git a/pkg/metricsconfig/initmetrics.go b/pkg/metricsconfig/initmetrics.go index 9157b8b8b45..25f987df08d 100644 --- a/pkg/metricsconfig/initmetrics.go +++ b/pkg/metricsconfig/initmetrics.go @@ -4,76 +4,12 @@ package metricsconfig import ( - "github.com/cilium/tetragon/pkg/eventcache" - "github.com/cilium/tetragon/pkg/exporter" - "github.com/cilium/tetragon/pkg/grpc/tracing" - "github.com/cilium/tetragon/pkg/metrics/cgroupratemetrics" - "github.com/cilium/tetragon/pkg/metrics/errormetrics" - "github.com/cilium/tetragon/pkg/metrics/eventcachemetrics" "github.com/cilium/tetragon/pkg/metrics/eventmetrics" - "github.com/cilium/tetragon/pkg/metrics/kprobemetrics" - "github.com/cilium/tetragon/pkg/metrics/mapmetrics" - "github.com/cilium/tetragon/pkg/metrics/opcodemetrics" - "github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics" - "github.com/cilium/tetragon/pkg/metrics/policystatemetrics" - "github.com/cilium/tetragon/pkg/metrics/ratelimitmetrics" - "github.com/cilium/tetragon/pkg/metrics/ringbufmetrics" - "github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics" "github.com/cilium/tetragon/pkg/metrics/syscallmetrics" - "github.com/cilium/tetragon/pkg/metrics/watchermetrics" - "github.com/cilium/tetragon/pkg/observer" - "github.com/cilium/tetragon/pkg/process" - "github.com/cilium/tetragon/pkg/version" - grpcmetrics "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" ) -func initHealthMetrics(registry *prometheus.Registry) { - version.InitMetrics(registry) - errormetrics.InitMetrics(registry) - eventcachemetrics.InitMetrics(registry) - registry.MustRegister(eventcache.NewCacheCollector()) - eventmetrics.InitHealthMetrics(registry) - mapmetrics.InitMetrics(registry) - opcodemetrics.InitMetrics(registry) - policyfiltermetrics.InitMetrics(registry) - process.InitMetrics(registry) - ringbufmetrics.InitMetrics(registry) - ringbufqueuemetrics.InitMetrics(registry) - watchermetrics.InitMetrics(registry) - observer.InitMetrics(registry) - tracing.InitMetrics(registry) - ratelimitmetrics.InitMetrics(registry) - exporter.InitMetrics(registry) - cgroupratemetrics.InitMetrics(registry) - - // register common third-party collectors - registry.MustRegister(grpcmetrics.NewServerMetrics()) -} - -func initAllHealthMetrics(registry *prometheus.Registry) { - initHealthMetrics(registry) - - kprobemetrics.InitMetrics(registry) - policystatemetrics.InitMetrics(registry) - - // register custom collectors - registry.MustRegister(observer.NewBPFCollector()) - registry.MustRegister(eventmetrics.NewBPFCollector()) -} - -func InitHealthMetricsForDocs(registry *prometheus.Registry) { - initHealthMetrics(registry) - - kprobemetrics.InitMetricsForDocs(registry) - policystatemetrics.InitMetricsForDocs(registry) - - // register custom zero collectors - registry.MustRegister(observer.NewBPFZeroCollector()) - registry.MustRegister(eventmetrics.NewBPFZeroCollector()) -} - func initResourcesMetrics(registry *prometheus.Registry) { // register common third-party collectors registry.MustRegister(collectors.NewGoCollector()) @@ -99,7 +35,8 @@ func InitEventsMetricsForDocs(registry *prometheus.Registry) { } func InitAllMetrics(registry *prometheus.Registry) { - initAllHealthMetrics(registry) + healthMetrics := EnableHealthMetrics(registry) + healthMetrics.Init() initAllResourcesMetrics(registry) initAllEventsMetrics(registry) } diff --git a/pkg/observer/data_stats.go b/pkg/observer/data_stats.go index 2d9832687d3..5323dab1fb2 100644 --- a/pkg/observer/data_stats.go +++ b/pkg/observer/data_stats.go @@ -4,6 +4,7 @@ package observer import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -42,20 +43,23 @@ var ( }, []string{"op"}) ) -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(DataEventStats) - registry.MustRegister(DataEventSizeHist) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(DataEventStats) + group.MustRegister(DataEventSizeHist) +} +// TODO: +// 1. Define metrics using functions from pkg/metrics +// 2. Move initialization code to metrics definitions if needed or remove it +// if not needed +// 3. Use label values defined as metrics.ConstrainedLabel +func InitMetrics() { // Initialize metrics with labels for _, ev := range DataEventTypeStrings { DataEventStats.WithLabelValues(ev).Add(0) } DataEventSizeHist.WithLabelValues(DataEventOpOk.String()) DataEventSizeHist.WithLabelValues(DataEventOpBad.String()) - - // NOTES: - // * Don't confuse op in data_event_size with ops.OpCode - // * Don't confuse event in data_events_total with tetragon.EventType } type DataEventType int diff --git a/pkg/process/metrics.go b/pkg/process/metrics.go index 61425a8c4f8..0998d191b7d 100644 --- a/pkg/process/metrics.go +++ b/pkg/process/metrics.go @@ -4,6 +4,7 @@ package process import ( + "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/consts" "github.com/prometheus/client_golang/prometheus" ) @@ -45,7 +46,7 @@ func NewCacheCollector() prometheus.Collector { } } -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(ProcessCacheTotal) - registry.MustRegister(NewCacheCollector()) +func RegisterMetrics(group metrics.Group) { + group.MustRegister(ProcessCacheTotal) + group.MustRegister(NewCacheCollector()) } diff --git a/pkg/version/metrics.go b/pkg/version/metrics.go index e7568d775e4..79d9b739475 100644 --- a/pkg/version/metrics.go +++ b/pkg/version/metrics.go @@ -31,7 +31,7 @@ func (b *buildInfoCollector) Collect(ch chan<- prometheus.Metric) { ch <- b.self } -func newBuildInfoCollector() prometheus.Collector { +func NewBuildInfoCollector() prometheus.Collector { buildInfo := ReadBuildInfo() c := &buildInfoCollector{ prometheus.MustNewConstMetric( @@ -52,7 +52,3 @@ func newBuildInfoCollector() prometheus.Collector { c.init(c.self) return c } - -func InitMetrics(registry *prometheus.Registry) { - registry.MustRegister(newBuildInfoCollector()) -}