From 5162c2ebe568cf4e06769cf4e702c84e2dc68834 Mon Sep 17 00:00:00 2001 From: Igor Velichkovich Date: Sat, 4 Jan 2025 11:36:13 -0800 Subject: [PATCH 1/3] add configurable pagination to nfd-gc --- cmd/nfd-gc/main.go | 2 ++ pkg/nfd-gc/nfd-gc.go | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/nfd-gc/main.go b/cmd/nfd-gc/main.go index 799185514f..ee78dff5d9 100644 --- a/cmd/nfd-gc/main.go +++ b/cmd/nfd-gc/main.go @@ -85,6 +85,8 @@ func initFlags(flagset *flag.FlagSet) *nfdgarbagecollector.Args { "Kubeconfig to use") flagset.IntVar(&args.MetricsPort, "metrics", 8081, "Port on which to expose metrics.") + flagset.Int64Var(&args.ListSize, "list-size", 200, + "the pagination size used when listing node features") klog.InitFlags(flagset) diff --git a/pkg/nfd-gc/nfd-gc.go b/pkg/nfd-gc/nfd-gc.go index c3dc8dae37..de8ab61ad1 100644 --- a/pkg/nfd-gc/nfd-gc.go +++ b/pkg/nfd-gc/nfd-gc.go @@ -49,6 +49,7 @@ type Args struct { GCPeriod time.Duration Kubeconfig string MetricsPort int + ListSize int64 } type NfdGarbageCollector interface { @@ -155,7 +156,7 @@ func (n *nfdGarbageCollector) garbageCollect() { listAndHandle := func(gvr schema.GroupVersionResource, handler func(metav1.PartialObjectMetadata)) { opts := metav1.ListOptions{ - Limit: 200, + Limit: n.args.ListSize, } for { rsp, err := n.client.Resource(gvr).List(context.TODO(), opts) From 3f941a760c5b5306b32dc6f111835a890bd3cbfd Mon Sep 17 00:00:00 2001 From: Igor Velichkovich Date: Sat, 4 Jan 2025 17:48:13 -0800 Subject: [PATCH 2/3] add documentation --- docs/reference/gc-commandline-reference.md | 5 +++++ docs/usage/nfd-gc.md | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/docs/reference/gc-commandline-reference.md b/docs/reference/gc-commandline-reference.md index 101adaf280..6b17db164e 100644 --- a/docs/reference/gc-commandline-reference.md +++ b/docs/reference/gc-commandline-reference.md @@ -31,6 +31,11 @@ Print usage and exit. Print version and exit. +### -list-size + +The pagination size to use when calling api-server to list nodefeatures. +Pagination is useful for controlling the load on api-server/etcd as the nodefeature resources can be large. + ### -gc-interval The `-gc-interval` specifies the interval between periodic garbage collector runs. diff --git a/docs/usage/nfd-gc.md b/docs/usage/nfd-gc.md index 5481598e2a..c21b5f6d80 100644 --- a/docs/usage/nfd-gc.md +++ b/docs/usage/nfd-gc.md @@ -27,3 +27,10 @@ default garbage collector interval is set to 1h which is the value when no In Helm deployments see [garbage collector parameters](../deployment/helm.md#garbage-collector-parameters) for altering the nfd-gc configuration. + +## List Pagination & Scalability + +When NFD GC runs up it lists nodefeatures from api-server. +These resources can be large and in a large cluster this initial list call to sync the informer cache can be +expensive and heavy on api-server/etcd. You can use the `list-size` argument to control pagination size +to help control the load from this list. From a7634030de78622db8863bc011f70cb6e696c40b Mon Sep 17 00:00:00 2001 From: Igor Velichkovich Date: Sat, 4 Jan 2025 17:54:18 -0800 Subject: [PATCH 3/3] improve docs --- docs/reference/gc-commandline-reference.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/reference/gc-commandline-reference.md b/docs/reference/gc-commandline-reference.md index 6b17db164e..e18ce03165 100644 --- a/docs/reference/gc-commandline-reference.md +++ b/docs/reference/gc-commandline-reference.md @@ -35,6 +35,15 @@ Print version and exit. The pagination size to use when calling api-server to list nodefeatures. Pagination is useful for controlling the load on api-server/etcd as the nodefeature resources can be large. +A value of 0 will disable pagination. + +Default: 200 + +Example: + +```bash +nfd-gc -list-size=100 +``` ### -gc-interval