From 5f8e6732a2a860ce6903faedabd8c018bae42f2d Mon Sep 17 00:00:00 2001 From: Yuvaraj Kakaraparthi Date: Tue, 7 Jan 2025 14:26:01 -0800 Subject: [PATCH] sync machine labels --- controllers/alias.go | 4 ++ .../src/reference/api/metadata-propagation.md | 6 +- ...27-label-sync-between-machine-and-nodes.md | 6 +- .../controllers/machine/machine_controller.go | 3 + .../machine/machine_controller_noderef.go | 14 +++- .../machine_controller_noderef_test.go | 70 ++++++++++++++++--- internal/controllers/machine/suite_test.go | 1 + main.go | 22 ++++++ 8 files changed, 110 insertions(+), 16 deletions(-) diff --git a/controllers/alias.go b/controllers/alias.go index 09d429f3a1aa..aa78557e8758 100644 --- a/controllers/alias.go +++ b/controllers/alias.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "regexp" "time" ctrl "sigs.k8s.io/controller-runtime" @@ -72,6 +73,8 @@ type MachineReconciler struct { WatchFilterValue string RemoteConditionsGracePeriod time.Duration + + AdditionalSyncMachineLabels []*regexp.Regexp } func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -81,6 +84,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag ClusterCache: r.ClusterCache, WatchFilterValue: r.WatchFilterValue, RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod, + AdditionalSyncMachineLabels: r.AdditionalSyncMachineLabels, }).SetupWithManager(ctx, mgr, options) } diff --git a/docs/book/src/reference/api/metadata-propagation.md b/docs/book/src/reference/api/metadata-propagation.md index ca20c25dcee2..7a6fe39c52b4 100644 --- a/docs/book/src/reference/api/metadata-propagation.md +++ b/docs/book/src/reference/api/metadata-propagation.md @@ -63,7 +63,9 @@ Top-level labels that meet a specific cretria are propagated to the Node labels - `.labels.[label-meets-criteria]` => `Node.labels` - `.annotations` => Not propagated. -Label should meet one of the following criteria to propagate to Node: +Labels that meet at least one of the following criteria are always propagated to the Node: - Has `node-role.kubernetes.io` as prefix. - Belongs to `node-restriction.kubernetes.io` domain. -- Belongs to `node.cluster.x-k8s.io` domain. +- Belongs to `node.cluster.x-k8s.io` domain. + +In addition, any labels that match at least one of the regexes provided by the `--additional-sync-machine-labels` flag on the manager will be synced from the Machine to the Node. diff --git a/docs/proposals/20220927-label-sync-between-machine-and-nodes.md b/docs/proposals/20220927-label-sync-between-machine-and-nodes.md index cbdbe08e5329..01e27aafca3c 100644 --- a/docs/proposals/20220927-label-sync-between-machine-and-nodes.md +++ b/docs/proposals/20220927-label-sync-between-machine-and-nodes.md @@ -68,6 +68,7 @@ With the "divide and conquer" principle in mind this proposal aims to address th - Support label sync from Machine to the linked Kubernetes node, limited to `node-role.kubernetes.io/` prefix and the `node-restriction.kubernetes.io` domain. - Support syncing labels from Machine to the linked Kubernetes node for the Cluster API owned `node.cluster.x-k8s.io` domain. +- Support a flag to sync additional user configured labels from the Machine to the Node. ### Non-Goals @@ -98,7 +99,9 @@ While designing a solution for syncing labels between Machine and underlying Kub ### Label domains & prefixes -The idea of scoping synchronization to a well defined set of labels is a first answer to security/concurrency concerns; labels to be managed by Cluster API have been selected based on following criteria: +A default list of labels would always be synced from the Machines to the Nodes. An additional list of labels can be synced from the Machine to the Node by providing a list of regexes as a flag to the manager. + +The following is the default list of label domains that would always be sync from Machines to Nodes: - The `node-role.kubernetes.io` label has been used widely in the past to identify the role of a Kubernetes Node (e.g. `node-role.kubernetes.io/worker=''`). For example, `kubectl get node` looks for this specific label when displaying the role to the user. @@ -163,3 +166,4 @@ Users could also implement their own label synchronizer in their tooling, but th - [ ] 09/27/2022: First Draft of this document - [ ] 09/28/2022: First Draft of this document presented in the Cluster API office hours meeting +- [ ] 01/09/2025: Update to support configurable label syncing Ref:[11657](https://github.com/kubernetes-sigs/cluster-api/issues/11657) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index fbd79f7f280b..739400ef8005 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -19,6 +19,7 @@ package machine import ( "context" "fmt" + "regexp" "slices" "strings" "time" @@ -99,6 +100,8 @@ type Reconciler struct { RemoteConditionsGracePeriod time.Duration + AdditionalSyncMachineLabels []*regexp.Regexp + controller controller.Controller recorder record.EventRecorder externalTracker external.ObjectTracker diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 683468242433..4e9bfab85e92 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -127,7 +127,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, // Compute labels to be propagated from Machines to nodes. // NOTE: CAPI should manage only a subset of node labels, everything else should be preserved. // NOTE: Once we reconcile node labels for the first time, the NodeUninitializedTaint is removed from the node. - nodeLabels := getManagedLabels(machine.Labels) + nodeLabels := r.getManagedLabels(machine.Labels) // Get interruptible instance status from the infrastructure provider and set the interruptible label on the node. interruptible := false @@ -178,9 +178,10 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, // getManagedLabels gets a map[string]string and returns another map[string]string // filtering out labels not managed by CAPI. -func getManagedLabels(labels map[string]string) map[string]string { +func (r *Reconciler) getManagedLabels(labels map[string]string) map[string]string { managedLabels := make(map[string]string) for key, value := range labels { + // Always sync the default set of labels. dnsSubdomainOrName := strings.Split(key, "/")[0] if dnsSubdomainOrName == clusterv1.NodeRoleLabelPrefix { managedLabels[key] = value @@ -191,8 +192,15 @@ func getManagedLabels(labels map[string]string) map[string]string { if dnsSubdomainOrName == clusterv1.ManagedNodeLabelDomain || strings.HasSuffix(dnsSubdomainOrName, "."+clusterv1.ManagedNodeLabelDomain) { managedLabels[key] = value } - } + // Sync if the labels matches at least one user provided regex. + for _, regex := range r.AdditionalSyncMachineLabels { + if regex.MatchString(key) { + managedLabels[key] = value + break + } + } + } return managedLabels } diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index ef56f947ebc6..4b58ead1e75a 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -19,6 +19,7 @@ package machine import ( "context" "fmt" + "regexp" "testing" "time" @@ -700,8 +701,7 @@ func TestSummarizeNodeConditions(t *testing.T) { } func TestGetManagedLabels(t *testing.T) { - // Create managedLabels map from known managed prefixes. - managedLabels := map[string]string{ + defaultLabels := map[string]string{ clusterv1.NodeRoleLabelPrefix + "/anyRole": "", clusterv1.ManagedNodeLabelDomain: "", @@ -715,22 +715,72 @@ func TestGetManagedLabels(t *testing.T) { "custom-prefix." + clusterv1.NodeRestrictionLabelDomain + "/anything": "", } - // Append arbitrary labels. - allLabels := map[string]string{ - "foo": "", - "bar": "", + additionalLabels := map[string]string{ + "foo": "bar", + "bar": "baz", "company.xyz/node.cluster.x-k8s.io": "not-managed", "gpu-node.cluster.x-k8s.io": "not-managed", "company.xyz/node-restriction.kubernetes.io": "not-managed", "gpu-node-restriction.kubernetes.io": "not-managed", + "wrong.test.foo.com": "", } - for k, v := range managedLabels { + + exampleRegex := regexp.MustCompile(`foo`) + defaultAndRegexLabels := map[string]string{} + for k, v := range defaultLabels { + defaultAndRegexLabels[k] = v + } + defaultAndRegexLabels["foo"] = "bar" + defaultAndRegexLabels["wrong.test.foo.com"] = "" + + allLabels := map[string]string{} + for k, v := range defaultLabels { + allLabels[k] = v + } + for k, v := range additionalLabels { allLabels[k] = v } - g := NewWithT(t) - got := getManagedLabels(allLabels) - g.Expect(got).To(BeEquivalentTo(managedLabels)) + tests := []struct { + name string + additionalSyncMachineLabels []*regexp.Regexp + allLabels map[string]string + managedLabels map[string]string + }{ + { + name: "always sync default labels", + additionalSyncMachineLabels: nil, + allLabels: allLabels, + managedLabels: defaultLabels, + }, + { + name: "sync additional defined labels", + additionalSyncMachineLabels: []*regexp.Regexp{ + exampleRegex, + }, + allLabels: allLabels, + managedLabels: defaultAndRegexLabels, + }, + { + name: "sync all labels", + additionalSyncMachineLabels: []*regexp.Regexp{ + regexp.MustCompile(`.*`), + }, + allLabels: allLabels, + managedLabels: allLabels, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + r := &Reconciler{ + AdditionalSyncMachineLabels: tt.additionalSyncMachineLabels, + } + got := r.getManagedLabels(tt.allLabels) + g.Expect(got).To(BeEquivalentTo(tt.managedLabels)) + }) + } } func TestPatchNode(t *testing.T) { diff --git a/internal/controllers/machine/suite_test.go b/internal/controllers/machine/suite_test.go index b6e00091e0d6..2d66ba0641b0 100644 --- a/internal/controllers/machine/suite_test.go +++ b/internal/controllers/machine/suite_test.go @@ -94,6 +94,7 @@ func TestMain(m *testing.M) { APIReader: mgr.GetAPIReader(), ClusterCache: clusterCache, RemoteConditionsGracePeriod: 5 * time.Minute, + AdditionalSyncMachineLabels: nil, }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: 1}); err != nil { panic(fmt.Sprintf("Failed to start MachineReconciler: %v", err)) } diff --git a/main.go b/main.go index e3cf887e6128..919e0313f64f 100644 --- a/main.go +++ b/main.go @@ -22,6 +22,7 @@ import ( "flag" "fmt" "os" + "regexp" goruntime "runtime" "time" @@ -35,6 +36,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/selection" + kerrors "k8s.io/apimachinery/pkg/util/errors" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/leaderelection/resourcelock" cliflag "k8s.io/component-base/cli/flag" @@ -125,6 +127,7 @@ var ( machinePoolConcurrency int clusterResourceSetConcurrency int machineHealthCheckConcurrency int + additionalSyncMachineLabels []string ) func init() { @@ -251,6 +254,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.StringVar(&healthAddr, "health-addr", ":9440", "The address the health endpoint binds to.") + fs.StringArrayVar(&additionalSyncMachineLabels, "additional-sync-machine-labels", []string{}, + "List of regexes to select the additional set of labels to sync from the Machine to the Node. A label will be synced as long as it matches at least one of the regexes.") + flags.AddManagerOptions(fs, &managerOptions) feature.MutableGates.AddFlag(fs) @@ -559,12 +565,28 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchNamespaces map setupLog.Error(err, "Unable to create controller", "controller", "Cluster") os.Exit(1) } + + var errs []error + var additionalSyncMachineLabelRegexes []*regexp.Regexp + for _, re := range additionalSyncMachineLabels { + reg, err := regexp.Compile(re) + if err != nil { + errs = append(errs, err) + } else { + additionalSyncMachineLabelRegexes = append(additionalSyncMachineLabelRegexes, reg) + } + } + if len(errs) > 0 { + setupLog.Error(fmt.Errorf("at least one of --additional-sync-machine-labels regexes is invalid: %w", kerrors.NewAggregate(errs)), "Unable to start manager") + os.Exit(1) + } if err := (&controllers.MachineReconciler{ Client: mgr.GetClient(), APIReader: mgr.GetAPIReader(), ClusterCache: clusterCache, WatchFilterValue: watchFilterValue, RemoteConditionsGracePeriod: remoteConditionsGracePeriod, + AdditionalSyncMachineLabels: additionalSyncMachineLabelRegexes, }).SetupWithManager(ctx, mgr, concurrency(machineConcurrency)); err != nil { setupLog.Error(err, "Unable to create controller", "controller", "Machine") os.Exit(1)