From d229ecb24bc2a1b2a782dbd982ac93ccc192c06d Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 8 Jan 2025 10:50:39 -0800 Subject: [PATCH] feat: working poc of ARG Queries and nic garbage collection, need to fix tests --- pkg/cloudprovider/cloudprovider.go | 10 +++++----- pkg/controllers/controllers.go | 2 +- .../nodeclaim/garbagecollection/controller.go | 20 +++++++++++-------- pkg/providers/instance/arglist.go | 6 +----- pkg/providers/instance/instance.go | 7 +++++++ 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index b9c9764b3..652667627 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -138,6 +138,7 @@ func (c *CloudProvider) List(ctx context.Context) ([]*karpv1.NodeClaim, error) { if err != nil { return nil, fmt.Errorf("listing instances, %w", err) } + var nodeClaims []*karpv1.NodeClaim for _, instance := range instances { instanceType, err := c.resolveInstanceTypeFromInstance(ctx, instance) @@ -339,9 +340,8 @@ func (c *CloudProvider) instanceToNodeClaim(ctx context.Context, vm *armcompute. labels[karpv1.CapacityTypeLabelKey] = instance.GetCapacityType(vm) - // TODO: v1beta1 new kes/labels if tag, ok := vm.Tags[instance.NodePoolTagKey]; ok { - labels[karpv1.NodePoolLabelKey] = *tag + labels[karpv1.NodePoolLabelKey] = lo.FromPtr(tag) } inPlaceUpdateHash, err := inplaceupdate.HashFromVM(vm) @@ -350,15 +350,15 @@ func (c *CloudProvider) instanceToNodeClaim(ctx context.Context, vm *armcompute. } annotations[v1alpha2.AnnotationInPlaceUpdateHash] = inPlaceUpdateHash - nodeClaim.Name = GenerateNodeClaimName(*vm.Name) + nodeClaim.Name = GenerateNodeClaimName(lo.FromPtr(vm.Name)) nodeClaim.Labels = labels nodeClaim.Annotations = annotations - nodeClaim.CreationTimestamp = metav1.Time{Time: *vm.Properties.TimeCreated} + nodeClaim.CreationTimestamp = metav1.Time{Time: lo.FromPtr(vm.Properties.TimeCreated)} // Set the deletionTimestamp to be the current time if the instance is currently terminating if utils.IsVMDeleting(*vm) { nodeClaim.DeletionTimestamp = &metav1.Time{Time: time.Now()} } - nodeClaim.Status.ProviderID = utils.ResourceIDToProviderID(ctx, *vm.ID) + nodeClaim.Status.ProviderID = utils.ResourceIDToProviderID(ctx, lo.FromPtr(vm.ID)) if vm.Properties != nil && vm.Properties.StorageProfile != nil && vm.Properties.StorageProfile.ImageReference != nil { nodeClaim.Status.ImageID = utils.ImageReferenceToString(vm.Properties.StorageProfile.ImageReference) } diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index e96be8104..99b30c2b7 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -44,7 +44,7 @@ func NewControllers(ctx context.Context, mgr manager.Manager, kubeClient client. nodeclasshash.NewController(kubeClient), nodeclassstatus.NewController(kubeClient), nodeclasstermination.NewController(kubeClient, recorder), - nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), + nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider, instanceProvider), // TODO: nodeclaim tagging inplaceupdate.NewController(kubeClient, instanceProvider), status.NewController[*v1alpha2.AKSNodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")), diff --git a/pkg/controllers/nodeclaim/garbagecollection/controller.go b/pkg/controllers/nodeclaim/garbagecollection/controller.go index 207e0426e..6465984e7 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/controller.go +++ b/pkg/controllers/nodeclaim/garbagecollection/controller.go @@ -50,31 +50,34 @@ type Controller struct { successfulCount uint64 // keeps track of successful reconciles for more aggressive requeueing near the start of the controller } -func NewController(kubeClient client.Client, cloudProvider corecloudprovider.CloudProvider) *Controller { +func NewController(kubeClient client.Client, cloudProvider corecloudprovider.CloudProvider, instanceProvider instance.Provider) *Controller { return &Controller{ - kubeClient: kubeClient, - cloudProvider: cloudProvider, - successfulCount: 0, + kubeClient: kubeClient, + cloudProvider: cloudProvider, + instanceProvider: instanceProvider, + successfulCount: 0, } } func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, "instance.garbagecollection") + var aggregatedError error // Perform VM garbage collection if err := c.gcVMs(ctx); err != nil { - return reconcile.Result{}, fmt.Errorf("VM garbage collection failed: %w", err) + aggregatedError = multierr.Append(aggregatedError, fmt.Errorf("VM garbage collection failed: %w", err)) } // Perform NIC garbage collection if err := c.gcNics(ctx); err != nil { - return reconcile.Result{}, fmt.Errorf("NIC garbage collection failed: %w", err) + aggregatedError = multierr.Append(aggregatedError, fmt.Errorf("NIC garbage collection failed: %w", err)) } c.successfulCount++ + return reconcile.Result{ - RequeueAfter: lo.Ternary(c.successfulCount <= 20, time.Second*10, time.Minute*2), - }, nil + RequeueAfter: lo.Ternary(c.successfulCount <= 20, 10*time.Second, 2*time.Minute), + }, aggregatedError } // gcVMs handles the garbage collection of virtual machines. @@ -156,6 +159,7 @@ func (c *Controller) gcNics(ctx context.Context) error { gcErrors = append(gcErrors, fmt.Errorf("deleting NIC %s: %w", nicName, err)) mu.Unlock() } + logging.FromContext(ctx).With("nic", nicName).Infof("garbage collected NIC") } }) diff --git a/pkg/providers/instance/arglist.go b/pkg/providers/instance/arglist.go index 5d335e706..c0d2366fe 100644 --- a/pkg/providers/instance/arglist.go +++ b/pkg/providers/instance/arglist.go @@ -16,11 +16,6 @@ const ( nicResourceType = "microsoft.network/networkinterfaces" ) -var ( - vmListQuery string - nicListQuery string -) - // getResourceListQueryBuilder returns a KQL query builder for listing resources with nodepool tags func getResourceListQueryBuilder(rg string, resourceType string) *kql.Builder { return kql.New(`Resources`). @@ -73,6 +68,7 @@ func createNICFromQueryResponseData(data map[string]interface{}) (*armnetwork.In if err != nil { return nil, err } + nic := armnetwork.Interface{} err = json.Unmarshal(jsonString, &nic) if err != nil { diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 0adfb3a2d..3e1e09907 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -52,6 +52,11 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork" ) +var ( + vmListQuery string + nicListQuery string +) + var ( NodePoolTagKey = strings.ReplaceAll(karpv1.NodePoolLabelKey, "/", "_") @@ -112,6 +117,8 @@ func NewDefaultProvider( subscriptionID string, provisionMode string, ) *DefaultProvider { + vmListQuery = GetVMListQueryBuilder(resourceGroup).String() + nicListQuery = GetNICListQueryBuilder(resourceGroup).String() return &DefaultProvider{ azClient: azClient, instanceTypeProvider: instanceTypeProvider,