diff --git a/hack/update-generated-conversions.sh b/hack/update-generated-conversions.sh index ebf51280bd..49f9110e92 100755 --- a/hack/update-generated-conversions.sh +++ b/hack/update-generated-conversions.sh @@ -5,5 +5,5 @@ go build -o "${OS_OUTPUT_BINPATH}/conversion-gen" "k8s.io/code-generator/cmd/con ${OS_OUTPUT_BINPATH}/conversion-gen \ --go-header-file "hack/boilerplate/boilerplate.go.txt" \ - --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \ + --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha2" \ --output-file-base zz_generated.conversion diff --git a/hack/update-generated-deep-copies.sh b/hack/update-generated-deep-copies.sh index 5a71d15545..e12ffdb804 100755 --- a/hack/update-generated-deep-copies.sh +++ b/hack/update-generated-deep-copies.sh @@ -5,6 +5,5 @@ go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepc ${OS_OUTPUT_BINPATH}/deepcopy-gen \ --go-header-file "hack/boilerplate/boilerplate.go.txt" \ - --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig,${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api,${PRJ_PREFIX}/pkg/api/v1alpha1" \ + --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig,${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api,${PRJ_PREFIX}/pkg/api/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha2" \ --output-file-base zz_generated.deepcopy - diff --git a/hack/update-generated-defaulters.sh b/hack/update-generated-defaulters.sh index 1c88e1d111..464f4f2238 100755 --- a/hack/update-generated-defaulters.sh +++ b/hack/update-generated-defaulters.sh @@ -5,6 +5,6 @@ go build -o "${OS_OUTPUT_BINPATH}/defaulter-gen" "k8s.io/code-generator/cmd/defa ${OS_OUTPUT_BINPATH}/defaulter-gen \ --go-header-file "hack/boilerplate/boilerplate.go.txt" \ - --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \ - --extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \ + --input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha2" \ + --extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha2" \ --output-file-base zz_generated.defaults diff --git a/pkg/api/types.go b/pkg/api/types.go index ef4c651232..26f514d6e7 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -19,6 +19,7 @@ package api import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -72,6 +73,11 @@ type Namespaces struct { Exclude []string } +type PriorityThreshold struct { + Value *int32 + Name string +} + // Besides Namespaces only one of its members may be specified // TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies // once the policy version is bumped to v1alpha2 @@ -122,3 +128,155 @@ type FailedPods struct { Reasons []string IncludingInitContainers bool } + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type DeschedulerConfiguration struct { + metav1.TypeMeta + + // Profiles + Profiles []Profile + + // NodeSelector for a set of nodes to operate over + NodeSelector *string + + // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. + MaxNoOfPodsToEvictPerNode *int + + // MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace. + MaxNoOfPodsToEvictPerNamespace *int +} + +type Profile struct { + Name string + PluginConfig []PluginConfig + Plugins Plugins +} + +type Plugins struct { + PreSort Plugin + Deschedule Plugin + Balance Plugin + Sort Plugin + Evict Plugin +} + +type PluginConfig struct { + Name string + Args runtime.Object +} + +type Plugin struct { + Enabled []string + Disabled []string +} + +// RemoveDuplicatePodsArgs holds arguments used to configure the RemoveDuplicatePods plugin. +type RemoveDuplicatePodsArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + ExcludeOwnerKinds []string +} + +// RemoveFailedPodsArgs holds arguments used to configure the RemoveFailedPods plugin. +type RemoveFailedPodsArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + MinPodLifetimeSeconds *uint + Reasons []string + IncludingInitContainers bool + ExcludeOwnerKinds []string +} + +// RemovePodsViolatingNodeAffinityArgs holds arguments used to configure the RemovePodsViolatingNodeAffinity plugin. +type RemovePodsViolatingNodeAffinityArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + NodeAffinityType []string +} + +// RemovePodsViolatingNodeTaintsArgs holds arguments used to configure the RemovePodsViolatingNodeTaints plugin. +type RemovePodsViolatingNodeTaintsArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + IncludePreferNoSchedule bool + ExcludedTaints []string +} + +// RemovePodsViolatingInterPodAntiAffinityArgs holds arguments used to configure the RemovePodsViolatingInterPodAntiAffinity plugin. +type RemovePodsViolatingInterPodAntiAffinityArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector +} + +// PodLifeTimeArgs holds arguments used to configure the PodLifeTime plugin. +type PodLifeTimeArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + MaxPodLifeTimeSeconds *uint + PodStatusPhases []string +} + +// RemovePodsHavingTooManyRestartsArgs holds arguments used to configure the RemovePodsHavingTooManyRestarts plugin. +type RemovePodsHavingTooManyRestartsArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + PodRestartThreshold int32 + IncludingInitContainers bool +} + +// RemovePodsViolatingTopologySpreadConstraintArgs holds arguments used to configure the RemovePodsViolatingTopologySpreadConstraint plugin. +type RemovePodsViolatingTopologySpreadConstraintArgs struct { + metav1.TypeMeta + + Namespaces *Namespaces + LabelSelector *metav1.LabelSelector + IncludeSoftConstraints bool +} + +// LowNodeUtilizationArgs holds arguments used to configure the LowNodeUtilization plugin. +type LowNodeUtilizationArgs struct { + metav1.TypeMeta + + UseDeviationThresholds bool + Thresholds ResourceThresholds + TargetThresholds ResourceThresholds + NumberOfNodes int +} + +// HighNodeUtilizationArgs holds arguments used to configure the HighNodeUtilization plugin. +type HighNodeUtilizationArgs struct { + metav1.TypeMeta + + Thresholds ResourceThresholds + TargetThresholds ResourceThresholds + NumberOfNodes int +} + +// DefaultEvictorArgs holds arguments used to configure the DefaultEvictor plugin. +type DefaultEvictorArgs struct { + metav1.TypeMeta + + EvictFailedBarePods bool + EvictLocalStoragePods bool + EvictSystemCriticalPods bool + IgnorePvcPods bool + PriorityThreshold *PriorityThreshold + NodeFit bool + LabelSelector *metav1.LabelSelector + // TODO(jchaloup): turn it into *metav1.LabelSelector + NodeSelector string +} diff --git a/pkg/api/v1alpha2/defaults.go b/pkg/api/v1alpha2/defaults.go new file mode 100644 index 0000000000..71fc09ca73 --- /dev/null +++ b/pkg/api/v1alpha2/defaults.go @@ -0,0 +1,23 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha2 + +import "k8s.io/apimachinery/pkg/runtime" + +func addDefaultingFuncs(scheme *runtime.Scheme) error { + return RegisterDefaults(scheme) +} diff --git a/pkg/api/v1alpha2/doc.go b/pkg/api/v1alpha2/doc.go new file mode 100644 index 0000000000..6493a76086 --- /dev/null +++ b/pkg/api/v1alpha2/doc.go @@ -0,0 +1,24 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package,register +// +k8s:conversion-gen=sigs.k8s.io/descheduler/pkg/api +// +k8s:defaulter-gen=TypeMeta + +// Package v1alpha2 is the v1alpha2 version of the descheduler API +// +groupName=descheduler + +package v1alpha2 // import "sigs.k8s.io/descheduler/pkg/api/v1alpha2" diff --git a/pkg/api/v1alpha2/plugin_args.go b/pkg/api/v1alpha2/plugin_args.go new file mode 100644 index 0000000000..69bc9f1157 --- /dev/null +++ b/pkg/api/v1alpha2/plugin_args.go @@ -0,0 +1,126 @@ +package v1alpha2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "sigs.k8s.io/descheduler/pkg/api" +) + +// RemoveDuplicatePodsArgs holds arguments used to configure the RemoveDuplicatePods plugin. +type RemoveDuplicatePodsArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + ExcludeOwnerKinds []string `json:"excludeOwnerKinds"` +} + +// RemoveFailedPodsArgs holds arguments used to configure the RemoveFailedPods plugin. +type RemoveFailedPodsArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds"` + Reasons []string `json:"reasons"` + IncludingInitContainers bool `json:"includingInitContainers"` + ExcludeOwnerKinds []string `json:"excludeOwnerKinds"` +} + +// RemovePodsViolatingNodeAffinityArgs holds arguments used to configure the RemovePodsViolatingNodeAffinity plugin. +type RemovePodsViolatingNodeAffinityArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + NodeAffinityType []string `json:"nodeAffinityType"` +} + +// RemovePodsViolatingNodeTaintsArgs holds arguments used to configure the RemovePodsViolatingNodeTaints plugin. +type RemovePodsViolatingNodeTaintsArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + IncludePreferNoSchedule bool `json:"includePreferNoSchedule"` + ExcludedTaints []string `json:"excludedTaints"` +} + +// RemovePodsViolatingInterPodAntiAffinityArgs holds arguments used to configure the RemovePodsViolatingInterPodAntiAffinity plugin. +type RemovePodsViolatingInterPodAntiAffinityArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` +} + +// PodLifeTimeArgs holds arguments used to configure the PodLifeTime plugin. +type PodLifeTimeArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds"` + PodStatusPhases []string `json:"podStatusPhases"` +} + +// RemovePodsHavingTooManyRestartsArgs holds arguments used to configure the RemovePodsHavingTooManyRestarts plugin. +type RemovePodsHavingTooManyRestartsArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + PodRestartThreshold int32 `json:"podRestartThreshold"` + IncludingInitContainers bool `json:"includingInitContainers"` +} + +// RemovePodsViolatingTopologySpreadConstraintArgs holds arguments used to configure the RemovePodsViolatingTopologySpreadConstraint plugin. +type RemovePodsViolatingTopologySpreadConstraintArgs struct { + metav1.TypeMeta + + Namespaces *api.Namespaces `json:"namespaces"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + IncludeSoftConstraints bool `json:"includeSoftConstraints"` +} + +// LowNodeUtilizationArgs holds arguments used to configure the LowNodeUtilization plugin. +type LowNodeUtilizationArgs struct { + metav1.TypeMeta + + UseDeviationThresholds bool `json:"useDeviationThresholds"` + Thresholds api.ResourceThresholds `json:"thresholds"` + TargetThresholds api.ResourceThresholds `json:"targetThresholds"` + NumberOfNodes int `json:"numberOfNodes"` +} + +// HighNodeUtilizationArgs holds arguments used to configure the HighNodeUtilization plugin. +type HighNodeUtilizationArgs struct { + metav1.TypeMeta + + Thresholds api.ResourceThresholds `json:"thresholds"` + // TODO(jchaloup): remove TargetThresholds + TargetThresholds api.ResourceThresholds `json:"targetThresholds"` + NumberOfNodes int `json:"numberOfNodes"` +} + +// DefaultEvictorArgs holds arguments used to configure the DefaultEvictor plugin. +type DefaultEvictorArgs struct { + metav1.TypeMeta + + // EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted. + EvictFailedBarePods bool `json:"evictFailedBarePods"` + + // EvictLocalStoragePods allows pods using local storage to be evicted. + EvictLocalStoragePods bool `json:"evictLocalStoragePods"` + + // EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods) + EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"` + + // IgnorePVCPods prevents pods with PVCs from being evicted. + IgnorePvcPods bool `json:"ignorePvcPods"` + + PriorityThreshold *api.PriorityThreshold `json:"priorityThreshold"` + NodeFit bool `json:"nodeFit"` + LabelSelector *metav1.LabelSelector `json:"labelSelector"` + // TODO(jchaloup): turn it into *metav1.LabelSelector `json:"labelSelector"` + NodeSelector string `json:"nodeSelector"` +} diff --git a/pkg/api/v1alpha2/register.go b/pkg/api/v1alpha2/register.go new file mode 100644 index 0000000000..18d5ff5bb9 --- /dev/null +++ b/pkg/api/v1alpha2/register.go @@ -0,0 +1,61 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha2 + +import ( + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + localSchemeBuilder = &SchemeBuilder + AddToScheme = SchemeBuilder.AddToScheme +) + +// GroupName is the group name used in this package +const GroupName = "descheduler" +const GroupVersion = "v1alpha2" + +// SchemeGroupVersion is group version used to register these objects +var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} + +// Kind takes an unqualified kind and returns a Group qualified GroupKind +func Kind(kind string) schema.GroupKind { + return SchemeGroupVersion.WithKind(kind).GroupKind() +} + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +func init() { + // We only register manually written functions here. The registration of the + // generated functions takes place in the generated files. The separation + // makes the code compile even when the generated files are missing. + localSchemeBuilder.Register(addKnownTypes, addDefaultingFuncs) +} + +func addKnownTypes(scheme *runtime.Scheme) error { + // TODO this will get cleaned up with the scheme types are fixed + scheme.AddKnownTypes(SchemeGroupVersion, + &DeschedulerConfiguration{}, + ) + + return nil +} diff --git a/pkg/api/v1alpha2/types.go b/pkg/api/v1alpha2/types.go new file mode 100644 index 0000000000..a97115155f --- /dev/null +++ b/pkg/api/v1alpha2/types.go @@ -0,0 +1,71 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha2 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type DeschedulerConfiguration struct { + metav1.TypeMeta `json:",inline"` + + // Profiles + Profiles []Profile `json:"profiles,omitempty"` + + // NodeSelector for a set of nodes to operate over + NodeSelector *string `json:"nodeSelector,omitempty"` + + // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. + MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"` + + // MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace. + MaxNoOfPodsToEvictPerNamespace *int `json:"maxNoOfPodsToEvictPerNamespace,omitempty"` +} + +type Profile struct { + Name string `json:"name"` + PluginConfig []PluginConfig `json:"pluginConfig"` + Plugins Plugins `json:"plugins"` +} + +type Plugins struct { + PreSort Plugin `json:"presort"` + Sort Plugin `json:"sort"` + Deschedule Plugin `json:"deschedule"` + Balance Plugin `json:"balance"` + Evict Plugin `json:"evict"` +} + +type PluginConfig struct { + Name string `json:"name"` + Args runtime.Object `json:"args"` +} + +type Plugin struct { + Enabled []string `json:"enabled"` + Disabled []string `json:"disabled"` +} + +// Namespaces carries a list of included/excluded namespaces +// for which a given strategy is applicable. +type Namespaces struct { + Include []string `json:"include"` + Exclude []string `json:"exclude"` +} diff --git a/pkg/api/v1alpha2/zz_generated.conversion.go b/pkg/api/v1alpha2/zz_generated.conversion.go new file mode 100644 index 0000000000..5b3e936cb8 --- /dev/null +++ b/pkg/api/v1alpha2/zz_generated.conversion.go @@ -0,0 +1,72 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by conversion-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + unsafe "unsafe" + + conversion "k8s.io/apimachinery/pkg/conversion" + runtime "k8s.io/apimachinery/pkg/runtime" + api "sigs.k8s.io/descheduler/pkg/api" +) + +func init() { + localSchemeBuilder.Register(RegisterConversions) +} + +// RegisterConversions adds conversion functions to the given scheme. +// Public to allow building arbitrary schemes. +func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha2_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*api.Namespaces)(nil), (*Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_api_Namespaces_To_v1alpha2_Namespaces(a.(*api.Namespaces), b.(*Namespaces), scope) + }); err != nil { + return err + } + return nil +} + +func autoConvert_v1alpha2_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error { + out.Include = *(*[]string)(unsafe.Pointer(&in.Include)) + out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude)) + return nil +} + +// Convert_v1alpha2_Namespaces_To_api_Namespaces is an autogenerated conversion function. +func Convert_v1alpha2_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error { + return autoConvert_v1alpha2_Namespaces_To_api_Namespaces(in, out, s) +} + +func autoConvert_api_Namespaces_To_v1alpha2_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error { + out.Include = *(*[]string)(unsafe.Pointer(&in.Include)) + out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude)) + return nil +} + +// Convert_api_Namespaces_To_v1alpha2_Namespaces is an autogenerated conversion function. +func Convert_api_Namespaces_To_v1alpha2_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error { + return autoConvert_api_Namespaces_To_v1alpha2_Namespaces(in, out, s) +} diff --git a/pkg/api/v1alpha2/zz_generated.deepcopy.go b/pkg/api/v1alpha2/zz_generated.deepcopy.go new file mode 100644 index 0000000000..8864720b84 --- /dev/null +++ b/pkg/api/v1alpha2/zz_generated.deepcopy.go @@ -0,0 +1,531 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + api "sigs.k8s.io/descheduler/pkg/api" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DefaultEvictorArgs) DeepCopyInto(out *DefaultEvictorArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.PriorityThreshold != nil { + in, out := &in.PriorityThreshold, &out.PriorityThreshold + *out = new(api.PriorityThreshold) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DefaultEvictorArgs. +func (in *DefaultEvictorArgs) DeepCopy() *DefaultEvictorArgs { + if in == nil { + return nil + } + out := new(DefaultEvictorArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Profiles != nil { + in, out := &in.Profiles, &out.Profiles + *out = make([]Profile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = new(string) + **out = **in + } + if in.MaxNoOfPodsToEvictPerNode != nil { + in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode + *out = new(int) + **out = **in + } + if in.MaxNoOfPodsToEvictPerNamespace != nil { + in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace + *out = new(int) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeschedulerConfiguration. +func (in *DeschedulerConfiguration) DeepCopy() *DeschedulerConfiguration { + if in == nil { + return nil + } + out := new(DeschedulerConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *DeschedulerConfiguration) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Thresholds != nil { + in, out := &in.Thresholds, &out.Thresholds + *out = make(api.ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.TargetThresholds != nil { + in, out := &in.TargetThresholds, &out.TargetThresholds + *out = make(api.ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HighNodeUtilizationArgs. +func (in *HighNodeUtilizationArgs) DeepCopy() *HighNodeUtilizationArgs { + if in == nil { + return nil + } + out := new(HighNodeUtilizationArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Thresholds != nil { + in, out := &in.Thresholds, &out.Thresholds + *out = make(api.ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.TargetThresholds != nil { + in, out := &in.TargetThresholds, &out.TargetThresholds + *out = make(api.ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LowNodeUtilizationArgs. +func (in *LowNodeUtilizationArgs) DeepCopy() *LowNodeUtilizationArgs { + if in == nil { + return nil + } + out := new(LowNodeUtilizationArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Namespaces) DeepCopyInto(out *Namespaces) { + *out = *in + if in.Include != nil { + in, out := &in.Include, &out.Include + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Exclude != nil { + in, out := &in.Exclude, &out.Exclude + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces. +func (in *Namespaces) DeepCopy() *Namespaces { + if in == nil { + return nil + } + out := new(Namespaces) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Plugin) DeepCopyInto(out *Plugin) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Disabled != nil { + in, out := &in.Disabled, &out.Disabled + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugin. +func (in *Plugin) DeepCopy() *Plugin { + if in == nil { + return nil + } + out := new(Plugin) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PluginConfig) DeepCopyInto(out *PluginConfig) { + *out = *in + if in.Args != nil { + out.Args = in.Args.DeepCopyObject() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginConfig. +func (in *PluginConfig) DeepCopy() *PluginConfig { + if in == nil { + return nil + } + out := new(PluginConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Plugins) DeepCopyInto(out *Plugins) { + *out = *in + in.PreSort.DeepCopyInto(&out.PreSort) + in.Sort.DeepCopyInto(&out.Sort) + in.Deschedule.DeepCopyInto(&out.Deschedule) + in.Balance.DeepCopyInto(&out.Balance) + in.Evict.DeepCopyInto(&out.Evict) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugins. +func (in *Plugins) DeepCopy() *Plugins { + if in == nil { + return nil + } + out := new(Plugins) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } + if in.PodStatusPhases != nil { + in, out := &in.PodStatusPhases, &out.PodStatusPhases + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTimeArgs. +func (in *PodLifeTimeArgs) DeepCopy() *PodLifeTimeArgs { + if in == nil { + return nil + } + out := new(PodLifeTimeArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Profile) DeepCopyInto(out *Profile) { + *out = *in + if in.PluginConfig != nil { + in, out := &in.PluginConfig, &out.PluginConfig + *out = make([]PluginConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.Plugins.DeepCopyInto(&out.Plugins) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Profile. +func (in *Profile) DeepCopy() *Profile { + if in == nil { + return nil + } + out := new(Profile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoveDuplicatePodsArgs) DeepCopyInto(out *RemoveDuplicatePodsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveDuplicatePodsArgs. +func (in *RemoveDuplicatePodsArgs) DeepCopy() *RemoveDuplicatePodsArgs { + if in == nil { + return nil + } + out := new(RemoveDuplicatePodsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoveFailedPodsArgs) DeepCopyInto(out *RemoveFailedPodsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.MinPodLifetimeSeconds != nil { + in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds + *out = new(uint) + **out = **in + } + if in.Reasons != nil { + in, out := &in.Reasons, &out.Reasons + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveFailedPodsArgs. +func (in *RemoveFailedPodsArgs) DeepCopy() *RemoveFailedPodsArgs { + if in == nil { + return nil + } + out := new(RemoveFailedPodsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyInto(out *RemovePodsHavingTooManyRestartsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsHavingTooManyRestartsArgs. +func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopy() *RemovePodsHavingTooManyRestartsArgs { + if in == nil { + return nil + } + out := new(RemovePodsHavingTooManyRestartsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopyInto(out *RemovePodsViolatingInterPodAntiAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingInterPodAntiAffinityArgs. +func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopy() *RemovePodsViolatingInterPodAntiAffinityArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingInterPodAntiAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopyInto(out *RemovePodsViolatingNodeAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.NodeAffinityType != nil { + in, out := &in.NodeAffinityType, &out.NodeAffinityType + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeAffinityArgs. +func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopy() *RemovePodsViolatingNodeAffinityArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingNodeAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopyInto(out *RemovePodsViolatingNodeTaintsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.ExcludedTaints != nil { + in, out := &in.ExcludedTaints, &out.ExcludedTaints + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeTaintsArgs. +func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopy() *RemovePodsViolatingNodeTaintsArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingNodeTaintsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopyInto(out *RemovePodsViolatingTopologySpreadConstraintArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingTopologySpreadConstraintArgs. +func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopy() *RemovePodsViolatingTopologySpreadConstraintArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingTopologySpreadConstraintArgs) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/api/v1alpha2/zz_generated.defaults.go b/pkg/api/v1alpha2/zz_generated.defaults.go new file mode 100644 index 0000000000..c65a60181f --- /dev/null +++ b/pkg/api/v1alpha2/zz_generated.defaults.go @@ -0,0 +1,33 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by defaulter-gen. DO NOT EDIT. + +package v1alpha2 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// RegisterDefaults adds defaulters functions to the given scheme. +// Public to allow building arbitrary schemes. +// All generated defaulters are covering - they call all nested defaulters. +func RegisterDefaults(scheme *runtime.Scheme) error { + return nil +} diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index 07238f415b..0897f6e516 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -26,6 +26,80 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DefaultEvictorArgs) DeepCopyInto(out *DefaultEvictorArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.PriorityThreshold != nil { + in, out := &in.PriorityThreshold, &out.PriorityThreshold + *out = new(PriorityThreshold) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DefaultEvictorArgs. +func (in *DefaultEvictorArgs) DeepCopy() *DefaultEvictorArgs { + if in == nil { + return nil + } + out := new(DefaultEvictorArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Profiles != nil { + in, out := &in.Profiles, &out.Profiles + *out = make([]Profile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = new(string) + **out = **in + } + if in.MaxNoOfPodsToEvictPerNode != nil { + in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode + *out = new(int) + **out = **in + } + if in.MaxNoOfPodsToEvictPerNamespace != nil { + in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace + *out = new(int) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeschedulerConfiguration. +func (in *DeschedulerConfiguration) DeepCopy() *DeschedulerConfiguration { + if in == nil { + return nil + } + out := new(DeschedulerConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *DeschedulerConfiguration) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = *in @@ -145,6 +219,68 @@ func (in *FailedPods) DeepCopy() *FailedPods { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Thresholds != nil { + in, out := &in.Thresholds, &out.Thresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.TargetThresholds != nil { + in, out := &in.TargetThresholds, &out.TargetThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HighNodeUtilizationArgs. +func (in *HighNodeUtilizationArgs) DeepCopy() *HighNodeUtilizationArgs { + if in == nil { + return nil + } + out := new(HighNodeUtilizationArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Thresholds != nil { + in, out := &in.Thresholds, &out.Thresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.TargetThresholds != nil { + in, out := &in.TargetThresholds, &out.TargetThresholds + *out = make(ResourceThresholds, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LowNodeUtilizationArgs. +func (in *LowNodeUtilizationArgs) DeepCopy() *LowNodeUtilizationArgs { + if in == nil { + return nil + } + out := new(LowNodeUtilizationArgs) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Namespaces) DeepCopyInto(out *Namespaces) { *out = *in @@ -201,6 +337,72 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Plugin) DeepCopyInto(out *Plugin) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Disabled != nil { + in, out := &in.Disabled, &out.Disabled + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugin. +func (in *Plugin) DeepCopy() *Plugin { + if in == nil { + return nil + } + out := new(Plugin) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PluginConfig) DeepCopyInto(out *PluginConfig) { + *out = *in + if in.Args != nil { + out.Args = in.Args.DeepCopyObject() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginConfig. +func (in *PluginConfig) DeepCopy() *PluginConfig { + if in == nil { + return nil + } + out := new(PluginConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Plugins) DeepCopyInto(out *Plugins) { + *out = *in + in.PreSort.DeepCopyInto(&out.PreSort) + in.Sort.DeepCopyInto(&out.Sort) + in.Deschedule.DeepCopyInto(&out.Deschedule) + in.Balance.DeepCopyInto(&out.Balance) + in.Evict.DeepCopyInto(&out.Evict) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugins. +func (in *Plugins) DeepCopy() *Plugins { + if in == nil { + return nil + } + out := new(Plugins) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) { *out = *in @@ -227,6 +429,43 @@ func (in *PodLifeTime) DeepCopy() *PodLifeTime { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } + if in.PodStatusPhases != nil { + in, out := &in.PodStatusPhases, &out.PodStatusPhases + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTimeArgs. +func (in *PodLifeTimeArgs) DeepCopy() *PodLifeTimeArgs { + if in == nil { + return nil + } + out := new(PodLifeTimeArgs) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) { *out = *in @@ -243,6 +482,78 @@ func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PriorityThreshold) DeepCopyInto(out *PriorityThreshold) { + *out = *in + if in.Value != nil { + in, out := &in.Value, &out.Value + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityThreshold. +func (in *PriorityThreshold) DeepCopy() *PriorityThreshold { + if in == nil { + return nil + } + out := new(PriorityThreshold) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Profile) DeepCopyInto(out *Profile) { + *out = *in + if in.PluginConfig != nil { + in, out := &in.PluginConfig, &out.PluginConfig + *out = make([]PluginConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.Plugins.DeepCopyInto(&out.Plugins) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Profile. +func (in *Profile) DeepCopy() *Profile { + if in == nil { + return nil + } + out := new(Profile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoveDuplicatePodsArgs) DeepCopyInto(out *RemoveDuplicatePodsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveDuplicatePodsArgs. +func (in *RemoveDuplicatePodsArgs) DeepCopy() *RemoveDuplicatePodsArgs { + if in == nil { + return nil + } + out := new(RemoveDuplicatePodsArgs) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RemoveDuplicates) DeepCopyInto(out *RemoveDuplicates) { *out = *in @@ -264,6 +575,193 @@ func (in *RemoveDuplicates) DeepCopy() *RemoveDuplicates { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoveFailedPodsArgs) DeepCopyInto(out *RemoveFailedPodsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.MinPodLifetimeSeconds != nil { + in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds + *out = new(uint) + **out = **in + } + if in.Reasons != nil { + in, out := &in.Reasons, &out.Reasons + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveFailedPodsArgs. +func (in *RemoveFailedPodsArgs) DeepCopy() *RemoveFailedPodsArgs { + if in == nil { + return nil + } + out := new(RemoveFailedPodsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyInto(out *RemovePodsHavingTooManyRestartsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsHavingTooManyRestartsArgs. +func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopy() *RemovePodsHavingTooManyRestartsArgs { + if in == nil { + return nil + } + out := new(RemovePodsHavingTooManyRestartsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopyInto(out *RemovePodsViolatingInterPodAntiAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingInterPodAntiAffinityArgs. +func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopy() *RemovePodsViolatingInterPodAntiAffinityArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingInterPodAntiAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopyInto(out *RemovePodsViolatingNodeAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.NodeAffinityType != nil { + in, out := &in.NodeAffinityType, &out.NodeAffinityType + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeAffinityArgs. +func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopy() *RemovePodsViolatingNodeAffinityArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingNodeAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopyInto(out *RemovePodsViolatingNodeTaintsArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.ExcludedTaints != nil { + in, out := &in.ExcludedTaints, &out.ExcludedTaints + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeTaintsArgs. +func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopy() *RemovePodsViolatingNodeTaintsArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingNodeTaintsArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopyInto(out *RemovePodsViolatingTopologySpreadConstraintArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.Namespaces != nil { + in, out := &in.Namespaces, &out.Namespaces + *out = new(Namespaces) + (*in).DeepCopyInto(*out) + } + if in.LabelSelector != nil { + in, out := &in.LabelSelector, &out.LabelSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingTopologySpreadConstraintArgs. +func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopy() *RemovePodsViolatingTopologySpreadConstraintArgs { + if in == nil { + return nil + } + out := new(RemovePodsViolatingTopologySpreadConstraintArgs) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) { { diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 33def95341..99b159cddf 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -21,30 +21,32 @@ import ( "fmt" v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" policy "k8s.io/api/policy/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/informers" + corev1informers "k8s.io/client-go/informers/core/v1" + schedulingv1informers "k8s.io/client-go/informers/scheduling/v1" clientset "k8s.io/client-go/kubernetes" fakeclientset "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" "k8s.io/klog/v2" - corev1informers "k8s.io/client-go/informers/core/v1" - schedulingv1informers "k8s.io/client-go/informers/scheduling/v1" - "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/metrics" "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" "sigs.k8s.io/descheduler/pkg/descheduler/client" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/descheduler/strategies" - "sigs.k8s.io/descheduler/pkg/descheduler/strategies/nodeutilization" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/registry" + frameworkruntime "sigs.k8s.io/descheduler/pkg/framework/runtime" ) func Run(ctx context.Context, rs *options.DeschedulerServer) error { @@ -165,6 +167,104 @@ func cachedClient( return fakeClient, nil } +type Descheduler struct { + podEvictor *framework.PodEvictor + framework *frameworkruntime.Framework + nodeSelector string + clientSet clientset.Interface + nodeInformer corev1informers.NodeInformer + + nodepodCount map[string]uint + namespacePodCount map[string]uint + evicted uint + + maxPodsToEvictPerNode *uint + maxPodsToEvictPerNamespace *uint +} + +func (d *Descheduler) Evict(ctx context.Context, pod *v1.Pod) bool { + if d.maxPodsToEvictPerNode != nil && d.nodepodCount[pod.Spec.NodeName]+1 > *d.maxPodsToEvictPerNode { + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per node reached"), "limit", *d.maxPodsToEvictPerNode, "node", pod.Spec.NodeName) + return false + } + if d.maxPodsToEvictPerNamespace != nil && d.namespacePodCount[pod.Namespace]+1 > *d.maxPodsToEvictPerNamespace { + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per namespace reached"), "limit", *d.maxPodsToEvictPerNamespace, "namespace", pod.Namespace) + } + if d.podEvictor.Evict(ctx, pod) { + d.nodepodCount[pod.Spec.NodeName]++ + d.namespacePodCount[pod.Namespace]++ + d.evicted++ + } + return false +} + +func (d *Descheduler) deschedulerOnce(ctx context.Context) error { + d.nodepodCount = make(map[string]uint) + d.namespacePodCount = make(map[string]uint) + + nodes, err := nodeutil.ReadyNodes(ctx, d.clientSet, d.nodeInformer, d.nodeSelector) + if err != nil { + return fmt.Errorf("unable to get ready nodes: %v", err) + } + + if len(nodes) <= 1 { + return fmt.Errorf("the cluster size is 0 or 1 meaning eviction causes service disruption or degradation") + } + + if status := d.framework.RunDeschedulePlugins(ctx, nodes); status != nil && status.Err != nil { + return status.Err + } + + if status := d.framework.RunBalancePlugins(ctx, nodes); status != nil && status.Err != nil { + return status.Err + } + + return nil +} + +func resetFramework( + desch *Descheduler, + config v1alpha2.DeschedulerConfiguration, + pluginReg registry.Registry, + realClient clientset.Interface, + podInformer corev1informers.PodInformer, + nodeInformer corev1informers.NodeInformer, + namespaceInformer corev1informers.NamespaceInformer, + priorityClassInformer schedulingv1informers.PriorityClassInformer, +) ( + context.CancelFunc, + error, +) { + // When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client. + // So when evicting pods while running multiple strategies in a row have the cummulative effect + // as is when evicting pods for real. + klog.V(3).Infof("Building a cached client from the cluster for the dry run") + // Create a new cache so we start from scratch without any leftovers + fakeClient, err := cachedClient(realClient, podInformer, nodeInformer, namespaceInformer, priorityClassInformer) + if err != nil { + return nil, err + } + + fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + fakeCtx, cncl := context.WithCancel(context.TODO()) + fakeSharedInformerFactory.Start(fakeCtx.Done()) + fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done()) + + frmwrk, err := frameworkruntime.NewFramework(config, + frameworkruntime.WithClientSet(fakeClient), + frameworkruntime.WithSharedInformerFactory(fakeSharedInformerFactory), + frameworkruntime.WithPodEvictor(desch), + frameworkruntime.WithRegistry(pluginReg), + ) + if err != nil { + cncl() + return cncl, err + } + + desch.framework = frmwrk + return cncl, nil +} + func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error { sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0) nodeInformer := sharedInformerFactory.Core().V1().Nodes() @@ -172,139 +272,79 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer namespaceInformer := sharedInformerFactory.Core().V1().Namespaces() priorityClassInformer := sharedInformerFactory.Scheduling().V1().PriorityClasses() - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // create the informers + // create the informers before starting the informer factory namespaceInformer.Informer() priorityClassInformer.Informer() - getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) - if err != nil { - return fmt.Errorf("build get pods assigned to node function error: %v", err) - } + ctx, cancel := context.WithCancel(ctx) + defer cancel() sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - strategyFuncs := map[api.StrategyName]strategyFunction{ - "RemoveDuplicates": strategies.RemoveDuplicatePods, - "LowNodeUtilization": nodeutilization.LowNodeUtilization, - "HighNodeUtilization": nodeutilization.HighNodeUtilization, - "RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity, - "RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity, - "RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints, - "RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts, - "PodLifeTime": strategies.PodLifeTime, - "RemovePodsViolatingTopologySpreadConstraint": strategies.RemovePodsViolatingTopologySpreadConstraint, - "RemoveFailedPods": strategies.RemoveFailedPods, - } - var nodeSelector string if deschedulerPolicy.NodeSelector != nil { nodeSelector = *deschedulerPolicy.NodeSelector } - var evictLocalStoragePods bool - if deschedulerPolicy.EvictLocalStoragePods != nil { - evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods + klog.V(3).Infof("Building a pod evictor") + podEvictor := framework.NewPodEvictor( + rs.Client, + policyv1.SchemeGroupVersion.String(), + rs.DryRun, + nil, + nil, + !rs.DisableMetrics, + ) + + desch := &Descheduler{ + podEvictor: podEvictor, + clientSet: rs.Client, + nodeInformer: nodeInformer, + nodeSelector: nodeSelector, + nodepodCount: make(map[string]uint), + namespacePodCount: make(map[string]uint), + maxPodsToEvictPerNode: deschedulerPolicy.MaxNoOfPodsToEvictPerNode, + maxPodsToEvictPerNamespace: deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace, } - evictBarePods := false - if deschedulerPolicy.EvictFailedBarePods != nil { - evictBarePods = *deschedulerPolicy.EvictFailedBarePods - if evictBarePods { - klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.") - } - } + pluginReg := registry.NewRegistry() - evictSystemCriticalPods := false - if deschedulerPolicy.EvictSystemCriticalPods != nil { - evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods - if evictSystemCriticalPods { - klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.") - } - } + config := v1alpha2.DeschedulerConfiguration{} - ignorePvcPods := false - if deschedulerPolicy.IgnorePVCPods != nil { - ignorePvcPods = *deschedulerPolicy.IgnorePVCPods - } - - wait.NonSlidingUntil(func() { - nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, nodeSelector) + if !rs.DryRun { + frmwrk, err := frameworkruntime.NewFramework(config, + frameworkruntime.WithClientSet(rs.Client), + frameworkruntime.WithSharedInformerFactory(sharedInformerFactory), + frameworkruntime.WithPodEvictor(desch), + frameworkruntime.WithRegistry(pluginReg), + ) if err != nil { - klog.V(1).InfoS("Unable to get ready nodes", "err", err) - cancel() - return - } - - if len(nodes) <= 1 { - klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..") - cancel() - return + return fmt.Errorf("Unable to initialize framework: %v", err) } + desch.framework = frmwrk + } - var podEvictorClient clientset.Interface - // When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client. - // So when evicting pods while running multiple strategies in a row have the cummulative effect - // as is when evicting pods for real. + wait.NonSlidingUntil(func() { if rs.DryRun { - klog.V(3).Infof("Building a cached client from the cluster for the dry run") - // Create a new cache so we start from scratch without any leftovers - fakeClient, err := cachedClient(rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer) + cncl, err := resetFramework(desch, config, pluginReg, rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer) if err != nil { klog.Error(err) + cancel() return } - - fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) - getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods()) - if err != nil { - klog.Errorf("build get pods assigned to node function error: %v", err) - return - } - - fakeCtx, cncl := context.WithCancel(context.TODO()) defer cncl() - fakeSharedInformerFactory.Start(fakeCtx.Done()) - fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done()) - - podEvictorClient = fakeClient - } else { - podEvictorClient = rs.Client } - - klog.V(3).Infof("Building a pod evictor") - podEvictor := evictions.NewPodEvictor( - podEvictorClient, - evictionPolicyGroupVersion, - rs.DryRun, - deschedulerPolicy.MaxNoOfPodsToEvictPerNode, - deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace, - nodes, - evictLocalStoragePods, - evictSystemCriticalPods, - ignorePvcPods, - evictBarePods, - !rs.DisableMetrics, - ) - - for name, strategy := range deschedulerPolicy.Strategies { - if f, ok := strategyFuncs[name]; ok { - if strategy.Enabled { - f(ctx, rs.Client, strategy, nodes, podEvictor, getPodsAssignedToNode) - } - } else { - klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name) - } + if err := desch.deschedulerOnce(ctx); err != nil { + klog.Errorf("Error descheduling pods: %v", err) } - klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted()) + klog.V(1).InfoS("Number of evicted pods", "totalEvicted", desch.evicted) // If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration if rs.DeschedulingInterval.Seconds() == 0 { cancel() + return } }, rs.DeschedulingInterval, ctx.Done()) diff --git a/pkg/descheduler/scheme/scheme.go b/pkg/descheduler/scheme/scheme.go index 2949193c1e..cbd4c16c13 100644 --- a/pkg/descheduler/scheme/scheme.go +++ b/pkg/descheduler/scheme/scheme.go @@ -22,6 +22,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/api/v1alpha1" + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" "sigs.k8s.io/descheduler/pkg/apis/componentconfig" componentconfigv1alpha1 "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1" ) @@ -34,6 +35,7 @@ var ( func init() { utilruntime.Must(api.AddToScheme(Scheme)) utilruntime.Must(v1alpha1.AddToScheme(Scheme)) + utilruntime.Must(v1alpha2.AddToScheme(Scheme)) utilruntime.Must(componentconfig.AddToScheme(Scheme)) utilruntime.Must(componentconfigv1alpha1.AddToScheme(Scheme)) diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go deleted file mode 100644 index b2942509b3..0000000000 --- a/pkg/descheduler/strategies/failedpods.go +++ /dev/null @@ -1,179 +0,0 @@ -package strategies - -import ( - "context" - "fmt" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilerrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" -) - -// validatedFailedPodsStrategyParams contains validated strategy parameters -type validatedFailedPodsStrategyParams struct { - validation.ValidatedStrategyParams - includingInitContainers bool - reasons sets.String - excludeOwnerKinds sets.String - minPodLifetimeSeconds *uint -} - -// RemoveFailedPods removes Pods that are in failed status phase. -func RemoveFailedPods( - ctx context.Context, - client clientset.Interface, - strategy api.DeschedulerStrategy, - nodes []*v1.Node, - podEvictor *evictions.PodEvictor, - getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, -) { - strategyParams, err := validateAndParseRemoveFailedPodsParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Invalid RemoveFailedPods parameters") - return - } - - evictable := podEvictor.Evictable( - evictions.WithPriorityThreshold(strategyParams.ThresholdPriority), - evictions.WithNodeFit(strategyParams.NodeFit), - evictions.WithLabelSelector(strategyParams.LabelSelector), - ) - - var labelSelector *metav1.LabelSelector - if strategy.Params != nil { - labelSelector = strategy.Params.LabelSelector - } - - podFilter, err := podutil.NewOptions(). - WithFilter(evictable.IsEvictable). - WithNamespaces(strategyParams.IncludedNamespaces). - WithoutNamespaces(strategyParams.ExcludedNamespaces). - WithLabelSelector(labelSelector). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - // Only list failed pods - phaseFilter := func(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed } - podFilter = podutil.WrapFilterFuncs(phaseFilter, podFilter) - - for _, node := range nodes { - klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) - if err != nil { - klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) - continue - } - - for i, pod := range pods { - if err = validateFailedPodShouldEvict(pod, *strategyParams); err != nil { - klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod)) - continue - } - - if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - } - } -} - -func validateAndParseRemoveFailedPodsParams( - ctx context.Context, - client clientset.Interface, - params *api.StrategyParameters, -) (*validatedFailedPodsStrategyParams, error) { - if params == nil { - return &validatedFailedPodsStrategyParams{ - ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(), - }, nil - } - - strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params) - if err != nil { - return nil, err - } - - var reasons, excludeOwnerKinds sets.String - var includingInitContainers bool - var minPodLifetimeSeconds *uint - if params.FailedPods != nil { - reasons = sets.NewString(params.FailedPods.Reasons...) - includingInitContainers = params.FailedPods.IncludingInitContainers - excludeOwnerKinds = sets.NewString(params.FailedPods.ExcludeOwnerKinds...) - minPodLifetimeSeconds = params.FailedPods.MinPodLifetimeSeconds - } - - return &validatedFailedPodsStrategyParams{ - ValidatedStrategyParams: *strategyParams, - includingInitContainers: includingInitContainers, - reasons: reasons, - excludeOwnerKinds: excludeOwnerKinds, - minPodLifetimeSeconds: minPodLifetimeSeconds, - }, nil -} - -// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod -// should be evicted given the params in the PodFailed policy. -func validateFailedPodShouldEvict(pod *v1.Pod, strategyParams validatedFailedPodsStrategyParams) error { - var errs []error - - if strategyParams.minPodLifetimeSeconds != nil { - podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) - if podAgeSeconds < *strategyParams.minPodLifetimeSeconds { - errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *strategyParams.minPodLifetimeSeconds)) - } - } - - if len(strategyParams.excludeOwnerKinds) > 0 { - ownerRefList := podutil.OwnerRef(pod) - for _, owner := range ownerRefList { - if strategyParams.excludeOwnerKinds.Has(owner.Kind) { - errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind)) - } - } - } - - if len(strategyParams.reasons) > 0 { - reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses) - - if pod.Status.Phase == v1.PodFailed && pod.Status.Reason != "" { - reasons = append(reasons, pod.Status.Reason) - } - - if strategyParams.includingInitContainers { - reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...) - } - - if !strategyParams.reasons.HasAny(reasons...) { - errs = append(errs, fmt.Errorf("pod does not match any of the reasons")) - } - } - - return utilerrors.NewAggregate(errs) -} - -func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string { - reasons := make([]string, 0) - - for _, containerStatus := range containerStatuses { - if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" { - reasons = append(reasons, containerStatus.State.Waiting.Reason) - } - if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" { - reasons = append(reasons, containerStatus.State.Terminated.Reason) - } - } - - return reasons -} diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go deleted file mode 100644 index c768e6b78c..0000000000 --- a/pkg/descheduler/strategies/node_affinity.go +++ /dev/null @@ -1,120 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package strategies - -import ( - "context" - "fmt" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" -) - -func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) error { - if params == nil || len(params.NodeAffinityType) == 0 { - return fmt.Errorf("NodeAffinityType is empty") - } - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - - return nil -} - -// RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity -func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validatePodsViolatingNodeAffinityParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid RemovePodsViolatingNodeAffinity parameters") - return - } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return - } - - var includedNamespaces, excludedNamespaces sets.String - if strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) - } - - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - - podFilter, err := podutil.NewOptions(). - WithNamespaces(includedNamespaces). - WithoutNamespaces(excludedNamespaces). - WithLabelSelector(strategy.Params.LabelSelector). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - - for _, nodeAffinity := range strategy.Params.NodeAffinityType { - klog.V(2).InfoS("Executing for nodeAffinityType", "nodeAffinity", nodeAffinity) - - switch nodeAffinity { - case "requiredDuringSchedulingIgnoredDuringExecution": - for _, node := range nodes { - klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - - pods, err := podutil.ListPodsOnANode( - node.Name, - getPodsAssignedToNode, - podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { - return evictable.IsEvictable(pod) && - !nodeutil.PodFitsCurrentNode(pod, node) && - nodeutil.PodFitsAnyNode(pod, nodes) - }), - ) - if err != nil { - klog.ErrorS(err, "Failed to get pods", "node", klog.KObj(node)) - } - - for _, pod := range pods { - if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { - klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil { - klog.ErrorS(err, "Error evicting pod") - break - } - } - } - } - default: - klog.ErrorS(nil, "Invalid nodeAffinityType", "nodeAffinity", nodeAffinity) - } - } -} diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go deleted file mode 100644 index 64cf8d48c8..0000000000 --- a/pkg/descheduler/strategies/node_taint.go +++ /dev/null @@ -1,129 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package strategies - -import ( - "context" - "fmt" - - "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" -) - -func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters) error { - if params == nil { - return nil - } - - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - - return nil -} - -// RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes -func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validateRemovePodsViolatingNodeTaintsParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid RemovePodsViolatingNodeTaints parameters") - return - } - - var includedNamespaces, excludedNamespaces, excludedTaints sets.String - var labelSelector *metav1.LabelSelector - if strategy.Params != nil { - if strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) - } - if strategy.Params.ExcludedTaints != nil { - excludedTaints = sets.NewString(strategy.Params.ExcludedTaints...) - } - labelSelector = strategy.Params.LabelSelector - } - - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return - } - - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - - podFilter, err := podutil.NewOptions(). - WithFilter(evictable.IsEvictable). - WithNamespaces(includedNamespaces). - WithoutNamespaces(excludedNamespaces). - WithLabelSelector(labelSelector). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - - excludeTaint := func(taint *v1.Taint) bool { - // Exclude taints by key *or* key=value - return excludedTaints.Has(taint.Key) || (taint.Value != "" && excludedTaints.Has(fmt.Sprintf("%s=%s", taint.Key, taint.Value))) - } - - taintFilterFnc := func(taint *v1.Taint) bool { return (taint.Effect == v1.TaintEffectNoSchedule) && !excludeTaint(taint) } - if strategy.Params != nil && strategy.Params.IncludePreferNoSchedule { - taintFilterFnc = func(taint *v1.Taint) bool { - return (taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectPreferNoSchedule) && !excludeTaint(taint) - } - } - - for _, node := range nodes { - klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) - if err != nil { - //no pods evicted as error encountered retrieving evictable Pods - return - } - totalPods := len(pods) - for i := 0; i < totalPods; i++ { - if !utils.TolerationsTolerateTaintsWithFilter( - pods[i].Spec.Tolerations, - node.Spec.Taints, - taintFilterFnc, - ) { - klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil { - klog.ErrorS(err, "Error evicting pod") - break - } - } - } - } -} diff --git a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go b/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go deleted file mode 100644 index 3ae79722db..0000000000 --- a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package nodeutilization - -import ( - "context" - "fmt" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" -) - -// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used -// to calculate nodes' utilization and not the actual resource usage. -func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - // TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params? - if err := validateNodeUtilizationParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid LowNodeUtilization parameters") - return - } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return - } - - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit - } - useDeviationThresholds := strategy.Params.NodeResourceUtilizationThresholds.UseDeviationThresholds - thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds - targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds - if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds, useDeviationThresholds); err != nil { - klog.ErrorS(err, "LowNodeUtilization config is not valid") - return - } - - // check if Pods/CPU/Mem are set, if not, set them to 100 - if _, ok := thresholds[v1.ResourcePods]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourcePods] = MinResourcePercentage - targetThresholds[v1.ResourcePods] = MinResourcePercentage - } else { - thresholds[v1.ResourcePods] = MaxResourcePercentage - targetThresholds[v1.ResourcePods] = MaxResourcePercentage - } - } - if _, ok := thresholds[v1.ResourceCPU]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourceCPU] = MinResourcePercentage - targetThresholds[v1.ResourceCPU] = MinResourcePercentage - } else { - thresholds[v1.ResourceCPU] = MaxResourcePercentage - targetThresholds[v1.ResourceCPU] = MaxResourcePercentage - } - } - if _, ok := thresholds[v1.ResourceMemory]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourceMemory] = MinResourcePercentage - targetThresholds[v1.ResourceMemory] = MinResourcePercentage - } else { - thresholds[v1.ResourceMemory] = MaxResourcePercentage - targetThresholds[v1.ResourceMemory] = MaxResourcePercentage - } - } - resourceNames := getResourceNames(thresholds) - - lowNodes, sourceNodes := classifyNodes( - getNodeUsage(nodes, resourceNames, getPodsAssignedToNode), - getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, useDeviationThresholds), - // The node has to be schedulable (to be able to move workload there) - func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { - if nodeutil.IsNodeUnschedulable(node) { - klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node)) - return false - } - return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) - }, - func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { - return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold) - }, - ) - - // log message in one line - keysAndValues := []interface{}{ - "CPU", thresholds[v1.ResourceCPU], - "Mem", thresholds[v1.ResourceMemory], - "Pods", thresholds[v1.ResourcePods], - } - for name := range thresholds { - if !isBasicResource(name) { - keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) - } - } - klog.V(1).InfoS("Criteria for a node under utilization", keysAndValues...) - klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes)) - - // log message in one line - keysAndValues = []interface{}{ - "CPU", targetThresholds[v1.ResourceCPU], - "Mem", targetThresholds[v1.ResourceMemory], - "Pods", targetThresholds[v1.ResourcePods], - } - for name := range targetThresholds { - if !isBasicResource(name) { - keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name])) - } - } - klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) - klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes)) - - if len(lowNodes) == 0 { - klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") - return - } - - if len(lowNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes { - klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes) - return - } - - if len(lowNodes) == len(nodes) { - klog.V(1).InfoS("All nodes are underutilized, nothing to do here") - return - } - - if len(sourceNodes) == 0 { - klog.V(1).InfoS("All nodes are under target utilization, nothing to do here") - return - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - - // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved - continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { - if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) { - return false - } - for name := range totalAvailableUsage { - if totalAvailableUsage[name].CmpInt64(0) < 1 { - return false - } - } - - return true - } - - // Sort the nodes by the usage in descending order - sortNodesByUsage(sourceNodes, false) - - evictPodsFromSourceNodes( - ctx, - sourceNodes, - lowNodes, - podEvictor, - evictable.IsEvictable, - resourceNames, - "LowNodeUtilization", - continueEvictionCond) - - klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted()) -} - -// validateLowUtilizationStrategyConfig checks if the strategy's config is valid -func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error { - // validate thresholds and targetThresholds config - if err := validateThresholds(thresholds); err != nil { - return fmt.Errorf("thresholds config is not valid: %v", err) - } - if err := validateThresholds(targetThresholds); err != nil { - return fmt.Errorf("targetThresholds config is not valid: %v", err) - } - - // validate if thresholds and targetThresholds have same resources configured - if len(thresholds) != len(targetThresholds) { - return fmt.Errorf("thresholds and targetThresholds configured different resources") - } - for resourceName, value := range thresholds { - if targetValue, ok := targetThresholds[resourceName]; !ok { - return fmt.Errorf("thresholds and targetThresholds configured different resources") - } else if value > targetValue && !useDeviationThresholds { - return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName) - } - } - return nil -} diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go deleted file mode 100644 index bf31892da1..0000000000 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ /dev/null @@ -1,142 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package strategies - -import ( - "context" - "fmt" - - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" -) - -func validatePodLifeTimeParams(params *api.StrategyParameters) error { - if params == nil || params.PodLifeTime == nil || params.PodLifeTime.MaxPodLifeTimeSeconds == nil { - return fmt.Errorf("MaxPodLifeTimeSeconds not set") - } - - if params.PodLifeTime.PodStatusPhases != nil { - for _, phase := range params.PodLifeTime.PodStatusPhases { - if phase != string(v1.PodPending) && phase != string(v1.PodRunning) { - return fmt.Errorf("only Pending and Running phases are supported in PodLifeTime") - } - } - } - - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - - return nil -} - -// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. -func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validatePodLifeTimeParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid PodLifeTime parameters") - return - } - - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return - } - - var includedNamespaces, excludedNamespaces sets.String - if strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) - - filter := evictable.IsEvictable - if strategy.Params.PodLifeTime.PodStatusPhases != nil { - filter = func(pod *v1.Pod) bool { - for _, phase := range strategy.Params.PodLifeTime.PodStatusPhases { - if string(pod.Status.Phase) == phase { - return evictable.IsEvictable(pod) - } - } - return false - } - } - - podFilter, err := podutil.NewOptions(). - WithFilter(filter). - WithNamespaces(includedNamespaces). - WithoutNamespaces(excludedNamespaces). - WithLabelSelector(strategy.Params.LabelSelector). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - - for _, node := range nodes { - klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - - pods := listOldPodsOnNode(node.Name, getPodsAssignedToNode, podFilter, *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) - for _, pod := range pods { - success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime") - if success { - klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) - } - - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - } - - } -} - -func listOldPodsOnNode( - nodeName string, - getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, - filter podutil.FilterFunc, - maxPodLifeTimeSeconds uint, -) []*v1.Pod { - pods, err := podutil.ListPodsOnANode(nodeName, getPodsAssignedToNode, filter) - if err != nil { - return nil - } - - var oldPods []*v1.Pod - for _, pod := range pods { - podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) - if podAgeSeconds > maxPodLifeTimeSeconds { - oldPods = append(oldPods, pod) - } - } - - return oldPods -} diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go deleted file mode 100644 index 629122f076..0000000000 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ /dev/null @@ -1,127 +0,0 @@ -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package strategies - -import ( - "context" - "fmt" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" -) - -func validateRemovePodsHavingTooManyRestartsParams(params *api.StrategyParameters) error { - if params == nil || params.PodsHavingTooManyRestarts == nil || params.PodsHavingTooManyRestarts.PodRestartThreshold < 1 { - return fmt.Errorf("PodsHavingTooManyRestarts threshold not set") - } - - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - - return nil -} - -// RemovePodsHavingTooManyRestarts removes the pods that have too many restarts on node. -// There are too many cases leading this issue: Volume mount failed, app error due to nodes' different settings. -// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages. -func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validateRemovePodsHavingTooManyRestartsParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid RemovePodsHavingTooManyRestarts parameters") - return - } - - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return - } - - var includedNamespaces, excludedNamespaces sets.String - if strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) - } - - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - - podFilter, err := podutil.NewOptions(). - WithFilter(evictable.IsEvictable). - WithNamespaces(includedNamespaces). - WithoutNamespaces(excludedNamespaces). - WithLabelSelector(strategy.Params.LabelSelector). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - - for _, node := range nodes { - klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) - if err != nil { - klog.ErrorS(err, "Error listing a nodes pods", "node", klog.KObj(node)) - continue - } - - for i, pod := range pods { - restarts, initRestarts := calcContainerRestarts(pod) - if strategy.Params.PodsHavingTooManyRestarts.IncludingInitContainers { - if restarts+initRestarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { - continue - } - } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { - continue - } - if _, err := podEvictor.EvictPod(ctx, pods[i], node, "TooManyRestarts"); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - } - } -} - -// calcContainerRestarts get container restarts and init container restarts. -func calcContainerRestarts(pod *v1.Pod) (int32, int32) { - var restarts, initRestarts int32 - - for _, cs := range pod.Status.ContainerStatuses { - restarts += cs.RestartCount - } - - for _, cs := range pod.Status.InitContainerStatuses { - initRestarts += cs.RestartCount - } - - return restarts, initRestarts -} diff --git a/pkg/descheduler/strategies/validation/strategyparams.go b/pkg/descheduler/strategies/validation/strategyparams.go deleted file mode 100644 index 33446e6fa1..0000000000 --- a/pkg/descheduler/strategies/validation/strategyparams.go +++ /dev/null @@ -1,71 +0,0 @@ -package validation - -import ( - "context" - "fmt" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" - - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/utils" -) - -// ValidatedStrategyParams contains validated common strategy parameters -type ValidatedStrategyParams struct { - ThresholdPriority int32 - IncludedNamespaces sets.String - ExcludedNamespaces sets.String - LabelSelector labels.Selector - NodeFit bool -} - -func DefaultValidatedStrategyParams() ValidatedStrategyParams { - return ValidatedStrategyParams{ThresholdPriority: utils.SystemCriticalPriority} -} - -func ValidateAndParseStrategyParams( - ctx context.Context, - client clientset.Interface, - params *api.StrategyParameters, -) (*ValidatedStrategyParams, error) { - if params == nil { - defaultValidatedStrategyParams := DefaultValidatedStrategyParams() - return &defaultValidatedStrategyParams, nil - } - - // At most one of include/exclude can be set - var includedNamespaces, excludedNamespaces sets.String - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return nil, fmt.Errorf("only one of ThresholdPriority and thresholdPriorityClassName can be set") - } - - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, params) - if err != nil { - return nil, fmt.Errorf("failed to get threshold priority from strategy's params: %+v", err) - } - if params.Namespaces != nil { - includedNamespaces = sets.NewString(params.Namespaces.Include...) - excludedNamespaces = sets.NewString(params.Namespaces.Exclude...) - } - var selector labels.Selector - if params.LabelSelector != nil { - selector, err = metav1.LabelSelectorAsSelector(params.LabelSelector) - if err != nil { - return nil, fmt.Errorf("failed to get label selectors from strategy's params: %+v", err) - } - } - - return &ValidatedStrategyParams{ - ThresholdPriority: thresholdPriority, - IncludedNamespaces: includedNamespaces, - ExcludedNamespaces: excludedNamespaces, - LabelSelector: selector, - NodeFit: params.NodeFit, - }, nil -} diff --git a/pkg/descheduler/strategies/validation/strategyparams_test.go b/pkg/descheduler/strategies/validation/strategyparams_test.go deleted file mode 100644 index 54fd0e2aca..0000000000 --- a/pkg/descheduler/strategies/validation/strategyparams_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package validation - -import ( - "context" - "testing" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/fake" - - "sigs.k8s.io/descheduler/pkg/api" -) - -var ( - thresholdPriority int32 = 1000 -) - -func TestValidStrategyParams(t *testing.T) { - ctx := context.Background() - fakeClient := &fake.Clientset{} - testCases := []struct { - name string - params *api.StrategyParameters - }{ - {name: "validate nil params", params: nil}, - {name: "validate empty params", params: &api.StrategyParameters{}}, - {name: "validate params with NodeFit", params: &api.StrategyParameters{NodeFit: true}}, - {name: "validate params with ThresholdPriority", params: &api.StrategyParameters{ThresholdPriority: &thresholdPriority}}, - {name: "validate params with priorityClassName", params: &api.StrategyParameters{ThresholdPriorityClassName: "high-priority"}}, - {name: "validate params with excluded namespace", params: &api.StrategyParameters{Namespaces: &api.Namespaces{Exclude: []string{"excluded-ns"}}}}, - {name: "validate params with included namespace", params: &api.StrategyParameters{Namespaces: &api.Namespaces{Include: []string{"include-ns"}}}}, - {name: "validate params with empty label selector", params: &api.StrategyParameters{LabelSelector: &metav1.LabelSelector{}}}, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - params, err := ValidateAndParseStrategyParams(ctx, fakeClient, tc.params) - if err != nil { - t.Errorf("strategy params should be valid but got err: %v", err.Error()) - } - if params == nil { - t.Errorf("strategy params should return a strategyParams but got nil") - } - }) - } -} - -func TestInvalidStrategyParams(t *testing.T) { - ctx := context.Background() - fakeClient := &fake.Clientset{} - testCases := []struct { - name string - params *api.StrategyParameters - }{ - { - name: "invalid params with both included and excluded namespaces nil params", - params: &api.StrategyParameters{Namespaces: &api.Namespaces{Include: []string{"include-ns"}, Exclude: []string{"exclude-ns"}}}, - }, - { - name: "invalid params with both threshold priority and priority class name", - params: &api.StrategyParameters{ThresholdPriorityClassName: "high-priority", ThresholdPriority: &thresholdPriority}, - }, - { - name: "invalid params with bad label selector", - params: &api.StrategyParameters{LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"": "missing-label"}}}, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - params, err := ValidateAndParseStrategyParams(ctx, fakeClient, tc.params) - if err == nil { - t.Errorf("strategy params should be invalid but did not get err") - } - if params != nil { - t.Errorf("strategy params should return a nil strategyParams but got %v", params) - } - }) - } -} diff --git a/pkg/framework/plugins/defaultevictor/evictor.go b/pkg/framework/plugins/defaultevictor/evictor.go new file mode 100644 index 0000000000..20ba2ab607 --- /dev/null +++ b/pkg/framework/plugins/defaultevictor/evictor.go @@ -0,0 +1,224 @@ +package defaultevictor + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/errors" + + nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/utils" + + "k8s.io/klog/v2" +) + +const ( + PluginName = "DefaultEvictor" + evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict" +) + +type constraint func(pod *v1.Pod) error + +// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. +type DefaultEvictor struct { + handle framework.Handle + constraints []constraint +} + +var _ framework.Plugin = &DefaultEvictor{} + +var _ framework.EvictPlugin = &DefaultEvictor{} +var _ framework.SortPlugin = &DefaultEvictor{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + evictorArgs, ok := args.(*framework.DefaultEvictorArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type DefaultEvictorArgs, got %T", args) + } + + constraints := []constraint{} + + if evictorArgs.EvictFailedBarePods { + klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.") + constraints = append(constraints, func(pod *v1.Pod) error { + ownerRefList := podutil.OwnerRef(pod) + // Enable evictFailedBarePods to evict bare pods in failed phase + if len(ownerRefList) == 0 && pod.Status.Phase != v1.PodFailed { + return fmt.Errorf("pod does not have any ownerRefs and is not in failed phase") + } + return nil + }) + } else { + constraints = append(constraints, func(pod *v1.Pod) error { + ownerRefList := podutil.OwnerRef(pod) + // Moved from IsEvictable function for backward compatibility + if len(ownerRefList) == 0 { + return fmt.Errorf("pod does not have any ownerRefs") + } + return nil + }) + } + + if !evictorArgs.EvictSystemCriticalPods { + klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.") + constraints = append(constraints, func(pod *v1.Pod) error { + // Moved from IsEvictable function to allow for disabling + if utils.IsCriticalPriorityPod(pod) { + return fmt.Errorf("pod has system critical priority") + } + return nil + }) + + if evictorArgs.PriorityThreshold != nil { + thresholdPriority, err := utils.GetPriorityValueFromPriorityThreshold(context.TODO(), handle.ClientSet(), evictorArgs.PriorityThreshold) + if err != nil { + return nil, fmt.Errorf("failed to get priority threshold: %v", err) + } + constraints = append(constraints, func(pod *v1.Pod) error { + if isPodEvictableBasedOnPriority(pod, thresholdPriority) { + return nil + } + return fmt.Errorf("pod has higher priority than specified priority class threshold") + }) + } + } + + if !evictorArgs.EvictLocalStoragePods { + constraints = append(constraints, func(pod *v1.Pod) error { + if utils.IsPodWithLocalStorage(pod) { + return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods") + } + return nil + }) + } + if evictorArgs.IgnorePvcPods { + constraints = append(constraints, func(pod *v1.Pod) error { + if utils.IsPodWithPVC(pod) { + return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods") + } + return nil + }) + } + if evictorArgs.NodeFit { + constraints = append(constraints, func(pod *v1.Pod) error { + // TODO(jchaloup): should the list of ready nodes be captured? Or, do we want the latest greatest about the nodes? + nodes, err := nodeutil.ReadyNodes(context.TODO(), handle.ClientSet(), handle.SharedInformerFactory().Core().V1().Nodes(), evictorArgs.NodeSelector) + if err != nil { + return fmt.Errorf("unable to list ready nodes: %v", err) + } + if !nodeutil.PodFitsAnyOtherNode(pod, nodes) { + return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable") + } + return nil + }) + } + + if evictorArgs.LabelSelector != nil { + selector, err := metav1.LabelSelectorAsSelector(evictorArgs.LabelSelector) + if err != nil { + return nil, fmt.Errorf("failed to get label selectors: %v", err) + } + if !selector.Empty() { + constraints = append(constraints, func(pod *v1.Pod) error { + if !selector.Matches(labels.Set(pod.Labels)) { + return fmt.Errorf("pod labels do not match the labelSelector filter in the policy parameter") + } + return nil + }) + } + } + + return &DefaultEvictor{ + handle: handle, + constraints: constraints, + }, nil +} + +func (d *DefaultEvictor) Name() string { + return PluginName +} + +// sort based on priority +func (de *DefaultEvictor) Less(podi *v1.Pod, podj *v1.Pod) bool { + if podi.Spec.Priority == nil && podj.Spec.Priority != nil { + return true + } + if podj.Spec.Priority == nil && podi.Spec.Priority != nil { + return false + } + if (podj.Spec.Priority == nil && podi.Spec.Priority == nil) || (*podi.Spec.Priority == *podj.Spec.Priority) { + if isBestEffortPod(podi) { + return true + } + if isBurstablePod(podi) && isGuaranteedPod(podj) { + return true + } + return false + } + return *podi.Spec.Priority < *podj.Spec.Priority +} + +func isBestEffortPod(pod *v1.Pod) bool { + return utils.GetPodQOS(pod) == v1.PodQOSBestEffort +} + +func isBurstablePod(pod *v1.Pod) bool { + return utils.GetPodQOS(pod) == v1.PodQOSBurstable +} + +func isGuaranteedPod(pod *v1.Pod) bool { + return utils.GetPodQOS(pod) == v1.PodQOSGuaranteed +} + +func (de *DefaultEvictor) Filter(pod *v1.Pod) bool { + checkErrs := []error{} + + ownerRefList := podutil.OwnerRef(pod) + if utils.IsDaemonsetPod(ownerRefList) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) + } + + if utils.IsMirrorPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) + } + + if utils.IsStaticPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod")) + } + + if utils.IsPodTerminating(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is terminating")) + } + + if len(checkErrs) == 0 { + for _, c := range de.constraints { + if err := c(pod); err != nil { + checkErrs = append(checkErrs, err) + } + } + } + + if len(checkErrs) > 0 && !haveEvictAnnotation(pod) { + klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error()) + return false + } + + return true +} + +// isPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec. +func isPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool { + return pod.Spec.Priority == nil || *pod.Spec.Priority < priority +} + +// HaveEvictAnnotation checks if the pod have evict annotation +func haveEvictAnnotation(pod *v1.Pod) bool { + _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] + return found +} diff --git a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go b/pkg/framework/plugins/nodeutilization/highnodeutilization.go similarity index 55% rename from pkg/descheduler/strategies/nodeutilization/highnodeutilization.go rename to pkg/framework/plugins/nodeutilization/highnodeutilization.go index bcdcb4d232..3c8b0f8fbb 100644 --- a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization.go @@ -22,49 +22,73 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - clientset "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" + "sigs.k8s.io/descheduler/pkg/framework" ) +const HighNodeUtilizationPluginName = "HighNodeUtilization" + // HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its strategy. // Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage. -func HighNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validateNodeUtilizationParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid HighNodeUtilization parameters") - return - } +type HighNodeUtilization struct { + handle framework.Handle + args *framework.HighNodeUtilizationArgs + resourceNames []v1.ResourceName + isEvictable func(pod *v1.Pod) bool + continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool +} - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit +var _ framework.Plugin = &HighNodeUtilization{} +var _ framework.BalancePlugin = &HighNodeUtilization{} + +func NewHighNodeUtilization(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + utilizationArgs, ok := args.(*framework.HighNodeUtilizationArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type HighNodeUtilizationArgs, got %T", args) } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return + if utilizationArgs.PriorityThreshold != nil && utilizationArgs.PriorityThreshold.Value != nil && utilizationArgs.PriorityThreshold.Name != "" { + return nil, fmt.Errorf("only one of priorityThreshold fields can be set") } - thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds - targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds - if err := validateHighUtilizationStrategyConfig(thresholds, targetThresholds); err != nil { - klog.ErrorS(err, "HighNodeUtilization config is not valid") - return + if err := validateHighUtilizationStrategyConfig(utilizationArgs.Thresholds, utilizationArgs.TargetThresholds); err != nil { + return nil, fmt.Errorf("highNodeUtilization config is not valid: %v", err) } - targetThresholds = make(api.ResourceThresholds) - setDefaultForThresholds(thresholds, targetThresholds) - resourceNames := getResourceNames(targetThresholds) + // TODO(jchaloup): set defaults before initializing the plugin? + utilizationArgs.TargetThresholds = make(api.ResourceThresholds) + setDefaultForThresholds(utilizationArgs.Thresholds, utilizationArgs.TargetThresholds) + + return &HighNodeUtilization{ + handle: handle, + args: utilizationArgs, + isEvictable: handle.Evictor().Filter, + resourceNames: getResourceNames(utilizationArgs.TargetThresholds), + // stop if the total available usage has dropped to zero - no more pods can be scheduled + continueEvictionCond: func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { + for name := range totalAvailableUsage { + if totalAvailableUsage[name].CmpInt64(0) < 1 { + return false + } + } + + return true + }, + }, nil +} + +func (d *HighNodeUtilization) Name() string { + return HighNodeUtilizationPluginName +} +func (d *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *framework.Status { sourceNodes, highNodes := classifyNodes( - getNodeUsage(nodes, resourceNames, getPodsAssignedToNode), - getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, false), + getNodeUsage(nodes, d.resourceNames, d.handle.GetPodsAssignedToNodeFunc()), + getNodeThresholds(nodes, d.args.Thresholds, d.args.TargetThresholds, d.resourceNames, d.handle.GetPodsAssignedToNodeFunc(), false), func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) }, @@ -78,13 +102,13 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate // log message in one line keysAndValues := []interface{}{ - "CPU", thresholds[v1.ResourceCPU], - "Mem", thresholds[v1.ResourceMemory], - "Pods", thresholds[v1.ResourcePods], + "CPU", d.args.Thresholds[v1.ResourceCPU], + "Mem", d.args.Thresholds[v1.ResourceMemory], + "Pods", d.args.Thresholds[v1.ResourcePods], } - for name := range thresholds { + for name := range d.args.Thresholds { if !isBasicResource(name) { - keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) + keysAndValues = append(keysAndValues, string(name), int64(d.args.Thresholds[name])) } } @@ -93,32 +117,19 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate if len(sourceNodes) == 0 { klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") - return + return nil } - if len(sourceNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes { - klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes) - return + if len(sourceNodes) <= d.args.NumberOfNodes { + klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", d.args.NumberOfNodes) + return nil } if len(sourceNodes) == len(nodes) { klog.V(1).InfoS("All nodes are underutilized, nothing to do here") - return + return nil } if len(highNodes) == 0 { klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here") - return - } - - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - - // stop if the total available usage has dropped to zero - no more pods can be scheduled - continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { - for name := range totalAvailableUsage { - if totalAvailableUsage[name].CmpInt64(0) < 1 { - return false - } - } - - return true + return nil } // Sort the nodes by the usage in ascending order @@ -128,12 +139,13 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate ctx, sourceNodes, highNodes, - podEvictor, - evictable.IsEvictable, - resourceNames, + d.handle.Evictor(), + d.isEvictable, + d.resourceNames, "HighNodeUtilization", - continueEvictionCond) + d.continueEvictionCond) + return nil } func validateHighUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error { diff --git a/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go similarity index 91% rename from pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go rename to pkg/framework/plugins/nodeutilization/highnodeutilization_test.go index 7d4c8bd60d..1b4c47e334 100644 --- a/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go @@ -22,6 +22,7 @@ import ( "testing" v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" "k8s.io/api/policy/v1beta1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" @@ -30,8 +31,10 @@ import ( core "k8s.io/client-go/testing" "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) @@ -456,31 +459,42 @@ func TestHighNodeUtilization(t *testing.T) { // return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName()) //}) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, - "v1", + policyv1.SchemeGroupVersion.String(), false, nil, nil, - testCase.nodes, - false, - false, - false, - false, false, ) - strategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{ - Thresholds: testCase.thresholds, - }, - NodeFit: true, - }, + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, } - HighNodeUtilization(ctx, fakeClient, strategy, testCase.nodes, podEvictor, getPodsAssignedToNode) + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: true, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := NewHighNodeUtilization(&framework.HighNodeUtilizationArgs{ + Thresholds: testCase.thresholds, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.BalancePlugin).Balance(ctx, testCase.nodes) podsEvicted := podEvictor.TotalEvicted() if testCase.expectedPodsEvicted != podsEvicted { t.Errorf("Expected %v pods to be evicted but %v got evicted", testCase.expectedPodsEvicted, podsEvicted) @@ -560,17 +574,6 @@ func TestValidateHighNodeUtilizationStrategyConfig(t *testing.T) { } func TestHighNodeUtilizationWithTaints(t *testing.T) { - strategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{ - Thresholds: api.ResourceThresholds{ - v1.ResourceCPU: 40, - }, - }, - }, - } - n1 := test.BuildTestNode("n1", 1000, 3000, 10, nil) n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil) n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil) @@ -661,22 +664,44 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, - "policy/v1", + policyv1.SchemeGroupVersion.String(), false, &item.evictionsExpected, nil, - item.nodes, - false, - false, - false, - false, false, ) - HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: true, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := NewHighNodeUtilization(&framework.HighNodeUtilizationArgs{ + Thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 40, + }, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + plugin.(framework.BalancePlugin).Balance(ctx, item.nodes) if item.evictionsExpected != podEvictor.TotalEvicted() { t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, podEvictor.TotalEvicted()) } diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go new file mode 100644 index 0000000000..3014b9008e --- /dev/null +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -0,0 +1,217 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeutilization + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + + "sigs.k8s.io/descheduler/pkg/api" + nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" + "sigs.k8s.io/descheduler/pkg/framework" +) + +const LowNodeUtilizationPluginName = "LowNodeUtilization" + +// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used +// to calculate nodes' utilization and not the actual resource usage. +type LowNodeUtilization struct { + handle framework.Handle + args *framework.LowNodeUtilizationArgs + resourceNames []v1.ResourceName + continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool +} + +var _ framework.Plugin = &LowNodeUtilization{} +var _ framework.BalancePlugin = &LowNodeUtilization{} + +func NewLowNodeUtilization(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + utilizationArgs, ok := args.(*framework.LowNodeUtilizationArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args) + } + + if utilizationArgs.PriorityThreshold != nil && utilizationArgs.PriorityThreshold.Value != nil && utilizationArgs.PriorityThreshold.Name != "" { + return nil, fmt.Errorf("only one of priorityThreshold fields can be set") + } + + if err := validateLowUtilizationStrategyConfig(utilizationArgs.Thresholds, utilizationArgs.TargetThresholds, utilizationArgs.UseDeviationThresholds); err != nil { + return nil, fmt.Errorf("lowNodeUtilization config is not valid: %v", err) + } + + // check if Pods/CPU/Mem are set, if not, set them to 100 + if _, ok := utilizationArgs.Thresholds[v1.ResourcePods]; !ok { + if utilizationArgs.UseDeviationThresholds { + utilizationArgs.Thresholds[v1.ResourcePods] = MinResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourcePods] = MinResourcePercentage + } else { + utilizationArgs.Thresholds[v1.ResourcePods] = MaxResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourcePods] = MaxResourcePercentage + } + } + if _, ok := utilizationArgs.Thresholds[v1.ResourceCPU]; !ok { + if utilizationArgs.UseDeviationThresholds { + utilizationArgs.Thresholds[v1.ResourceCPU] = MinResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourceCPU] = MinResourcePercentage + } else { + utilizationArgs.Thresholds[v1.ResourceCPU] = MaxResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourceCPU] = MaxResourcePercentage + } + } + if _, ok := utilizationArgs.Thresholds[v1.ResourceMemory]; !ok { + if utilizationArgs.UseDeviationThresholds { + utilizationArgs.Thresholds[v1.ResourceMemory] = MinResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourceMemory] = MinResourcePercentage + } else { + utilizationArgs.Thresholds[v1.ResourceMemory] = MaxResourcePercentage + utilizationArgs.TargetThresholds[v1.ResourceMemory] = MaxResourcePercentage + } + } + + return &LowNodeUtilization{ + handle: handle, + args: utilizationArgs, + resourceNames: getResourceNames(utilizationArgs.Thresholds), + // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved + continueEvictionCond: func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { + if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) { + return false + } + for name := range totalAvailableUsage { + if totalAvailableUsage[name].CmpInt64(0) < 1 { + return false + } + } + + return true + }, + }, nil +} + +func (d *LowNodeUtilization) Name() string { + return LowNodeUtilizationPluginName +} + +func (d *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *framework.Status { + lowNodes, sourceNodes := classifyNodes( + getNodeUsage(nodes, d.resourceNames, d.handle.GetPodsAssignedToNodeFunc()), + getNodeThresholds(nodes, d.args.Thresholds, d.args.TargetThresholds, d.resourceNames, d.handle.GetPodsAssignedToNodeFunc(), d.args.UseDeviationThresholds), + // The node has to be schedulable (to be able to move workload there) + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { + if nodeutil.IsNodeUnschedulable(node) { + klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node)) + return false + } + return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) + }, + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { + return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold) + }, + ) + + // log message in one line + keysAndValues := []interface{}{ + "CPU", d.args.Thresholds[v1.ResourceCPU], + "Mem", d.args.Thresholds[v1.ResourceMemory], + "Pods", d.args.Thresholds[v1.ResourcePods], + } + for name := range d.args.Thresholds { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), int64(d.args.Thresholds[name])) + } + } + klog.V(1).InfoS("Criteria for a node under utilization", keysAndValues...) + klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes)) + + // log message in one line + keysAndValues = []interface{}{ + "CPU", d.args.TargetThresholds[v1.ResourceCPU], + "Mem", d.args.TargetThresholds[v1.ResourceMemory], + "Pods", d.args.TargetThresholds[v1.ResourcePods], + } + for name := range d.args.TargetThresholds { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), int64(d.args.TargetThresholds[name])) + } + } + klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) + klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes)) + + if len(lowNodes) == 0 { + klog.V(1).InfoS("no node is underutilized, nothing to do here, you might tune your thresholds further") + return nil + } + + if len(lowNodes) <= d.args.NumberOfNodes { + klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", d.args.NumberOfNodes) + return nil + } + + if len(lowNodes) == len(nodes) { + klog.V(1).InfoS("All nodes are underutilized, nothing to do here") + return nil + } + + if len(sourceNodes) == 0 { + klog.V(1).InfoS("All nodes are under target utilization, nothing to do here") + return nil + } + + // Sort the nodes by the usage in descending order + sortNodesByUsage(sourceNodes, false) + + evictPodsFromSourceNodes( + ctx, + sourceNodes, + lowNodes, + d.handle.Evictor(), + d.handle.Evictor().Filter, + d.resourceNames, + "LowNodeUtilization", + d.continueEvictionCond) + + return nil +} + +// validateLowUtilizationStrategyConfig checks if the strategy's config is valid +func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error { + // validate thresholds and targetThresholds config + if err := validateThresholds(thresholds); err != nil { + return fmt.Errorf("thresholds config is not valid: %v", err) + } + if err := validateThresholds(targetThresholds); err != nil { + return fmt.Errorf("targetThresholds config is not valid: %v", err) + } + + // validate if thresholds and targetThresholds have same resources configured + if len(thresholds) != len(targetThresholds) { + return fmt.Errorf("thresholds and targetThresholds configured different resources") + } + for resourceName, value := range thresholds { + if targetValue, ok := targetThresholds[resourceName]; !ok { + return fmt.Errorf("thresholds and targetThresholds configured different resources") + } else if value > targetValue && !useDeviationThresholds { + return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName) + } + } + return nil +} diff --git a/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go similarity index 93% rename from pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go rename to pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 6b2c9fa1fa..079a1b92a1 100644 --- a/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -27,16 +27,36 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func TestLowNodeUtilization(t *testing.T) { n1NodeName := "n1" n2NodeName := "n2" @@ -765,33 +785,44 @@ func TestLowNodeUtilization(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, nil, nil, - test.nodes, - false, - false, - false, - false, false, ) - strategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{ - Thresholds: test.thresholds, - TargetThresholds: test.targetThresholds, - UseDeviationThresholds: test.useDeviationThresholds, - }, - NodeFit: true, - }, + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, } - LowNodeUtilization(ctx, fakeClient, strategy, test.nodes, podEvictor, getPodsAssignedToNode) + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: true, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := NewLowNodeUtilization(&framework.LowNodeUtilizationArgs{ + Thresholds: test.thresholds, + TargetThresholds: test.targetThresholds, + UseDeviationThresholds: test.useDeviationThresholds, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.BalancePlugin).Balance(ctx, test.nodes) podsEvicted := podEvictor.TotalEvicted() if test.expectedPodsEvicted != podsEvicted { t.Errorf("Expected %v pods to be evicted but %v got evicted", test.expectedPodsEvicted, podsEvicted) @@ -959,20 +990,6 @@ func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) { func TestLowNodeUtilizationWithTaints(t *testing.T) { ctx := context.Background() - strategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{ - Thresholds: api.ResourceThresholds{ - v1.ResourcePods: 20, - }, - TargetThresholds: api.ResourceThresholds{ - v1.ResourcePods: 70, - }, - }, - NodeFit: true, - }, - } n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil) @@ -1079,22 +1096,47 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, &item.evictionsExpected, nil, - item.nodes, - false, - false, - false, - false, false, ) - LowNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: true, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := NewLowNodeUtilization(&framework.LowNodeUtilizationArgs{ + Thresholds: api.ResourceThresholds{ + v1.ResourcePods: 20, + }, + TargetThresholds: api.ResourceThresholds{ + v1.ResourcePods: 70, + }, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + plugin.(framework.BalancePlugin).Balance(ctx, item.nodes) if item.evictionsExpected != podEvictor.TotalEvicted() { t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, podEvictor.TotalEvicted()) } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/framework/plugins/nodeutilization/nodeutilization.go similarity index 96% rename from pkg/descheduler/strategies/nodeutilization/nodeutilization.go rename to pkg/framework/plugins/nodeutilization/nodeutilization.go index 8d29eeeafe..86f3a13059 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/nodeutilization.go @@ -26,8 +26,8 @@ import ( "k8s.io/klog/v2" "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" "sigs.k8s.io/descheduler/pkg/utils" ) @@ -235,7 +235,7 @@ func classifyNodes( func evictPodsFromSourceNodes( ctx context.Context, sourceNodes, destinationNodes []NodeInfo, - podEvictor *evictions.PodEvictor, + podEvictor framework.Evictor, podFilter func(pod *v1.Pod) bool, resourceNames []v1.ResourceName, strategy string, @@ -288,8 +288,8 @@ func evictPodsFromSourceNodes( klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers") // sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers. podutil.SortPodsBasedOnPriorityLowToHigh(removablePods) - evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction) - klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage) + evicted := evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction) + klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", evicted, "usage", node.usage) } } @@ -299,11 +299,12 @@ func evictPods( nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity, taintsOfLowNodes map[string][]v1.Taint, - podEvictor *evictions.PodEvictor, + podEvictor framework.Evictor, strategy string, continueEviction continueEvictionCond, -) { +) uint { + evicted := uint(0) if continueEviction(nodeInfo, totalAvailableUsage) { for _, pod := range inputPods { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { @@ -311,14 +312,9 @@ func evictPods( continue } - success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node, strategy) - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - - if success { - klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod), "err", err) + if podEvictor.Evict(ctx, pod) { + evicted++ + klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) for name := range totalAvailableUsage { if name == v1.ResourcePods { @@ -351,6 +347,7 @@ func evictPods( } } } + return evicted } // sortNodesByUsage sorts nodes based on usage according to the given strategy. diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization_test.go b/pkg/framework/plugins/nodeutilization/nodeutilization_test.go similarity index 100% rename from pkg/descheduler/strategies/nodeutilization/nodeutilization_test.go rename to pkg/framework/plugins/nodeutilization/nodeutilization_test.go diff --git a/pkg/framework/plugins/podlifetime/pod_lifetime.go b/pkg/framework/plugins/podlifetime/pod_lifetime.go new file mode 100644 index 0000000000..54af521fbe --- /dev/null +++ b/pkg/framework/plugins/podlifetime/pod_lifetime.go @@ -0,0 +1,140 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podlifetime + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" +) + +const PluginName = "PodLifeTime" + +// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. +type PodLifeTime struct { + handle framework.Handle + args *framework.PodLifeTimeArgs + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &PodLifeTime{} +var _ framework.DeschedulePlugin = &PodLifeTime{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + lifetimeArgs, ok := args.(*framework.PodLifeTimeArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type PodLifeTimeArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(lifetimeArgs.CommonArgs); err != nil { + return nil, err + } + + if lifetimeArgs.MaxPodLifeTimeSeconds == nil { + return nil, fmt.Errorf("maxPodLifeTimeSeconds not set") + } + + if lifetimeArgs.PodStatusPhases != nil { + for _, phase := range lifetimeArgs.PodStatusPhases { + if phase != string(v1.PodPending) && phase != string(v1.PodRunning) { + return nil, fmt.Errorf("only Pending and Running phases are supported in PodLifeTime") + } + } + } + + filter := handle.Evictor().Filter + if lifetimeArgs.PodStatusPhases != nil { + filter = func(pod *v1.Pod) bool { + for _, phase := range lifetimeArgs.PodStatusPhases { + if string(pod.Status.Phase) == phase { + return handle.Evictor().Filter(pod) + } + } + return false + } + } + + var includedNamespaces, excludedNamespaces sets.String + if lifetimeArgs.Namespaces != nil { + includedNamespaces = sets.NewString(lifetimeArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(lifetimeArgs.Namespaces.Exclude...) + } + + podFilter, err := podutil.NewOptions(). + WithFilter(filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + WithLabelSelector(lifetimeArgs.LabelSelector). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) + } + + return &PodLifeTime{ + handle: handle, + args: lifetimeArgs, + podFilter: podFilter, + }, nil +} + +func (d *PodLifeTime) Name() string { + return PluginName +} + +func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + + pods := listOldPodsOnNode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter, *d.args.MaxPodLifeTimeSeconds) + for _, pod := range pods { + // PodEvictor().EvictPod(ctx, pod, node, "PodLifeTime") + if d.handle.Evictor().Evict(ctx, pod) { + klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *d.args.MaxPodLifeTimeSeconds) + } + } + } + return nil +} + +func listOldPodsOnNode( + nodeName string, + getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, + filter podutil.FilterFunc, + maxPodLifeTimeSeconds uint, +) []*v1.Pod { + pods, err := podutil.ListPodsOnANode(nodeName, getPodsAssignedToNode, filter) + if err != nil { + return nil + } + var oldPods []*v1.Pod + for _, pod := range pods { + podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) + if podAgeSeconds > maxPodLifeTimeSeconds { + oldPods = append(oldPods, pod) + } + } + + return oldPods +} diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go similarity index 69% rename from pkg/descheduler/strategies/pod_lifetime_test.go rename to pkg/framework/plugins/podlifetime/pod_lifetime_test.go index 697250cbaf..eae16a47d1 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/framework/plugins/podlifetime/pod_lifetime_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package podlifetime import ( "context" @@ -26,14 +26,34 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func TestPodLifeTime(t *testing.T) { node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) @@ -145,121 +165,74 @@ func TestPodLifeTime(t *testing.T) { nodes []*v1.Node expectedEvictedPodCount uint ignorePvcPods bool + + maxPodLifeTimeSeconds *uint + podStatusPhases []string + labelSelector *metav1.LabelSelector }{ { - description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p1, p2}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { - description: "Two pods in the `dev` Namespace, 2 are new and 0 are old. 0 should be evicted.", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "Two pods in the `dev` Namespace, 2 are new and 0 are old. 0 should be evicted.", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p3, p4}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, }, { - description: "Two pods in the `dev` Namespace, 1 created 605 seconds ago. 1 should be evicted.", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "Two pods in the `dev` Namespace, 1 created 605 seconds ago. 1 should be evicted.", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p5, p6}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { - description: "Two pods in the `dev` Namespace, 1 created 595 seconds ago. 0 should be evicted.", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "Two pods in the `dev` Namespace, 1 created 595 seconds ago. 0 should be evicted.", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p7, p8}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, }, { - description: "Two old pods with different status phases. 1 should be evicted.", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{ - MaxPodLifeTimeSeconds: &maxLifeTime, - PodStatusPhases: []string{"Pending"}, - }, - }, - }, + description: "Two old pods with different status phases. 1 should be evicted.", + maxPodLifeTimeSeconds: &maxLifeTime, + podStatusPhases: []string{"Pending"}, pods: []*v1.Pod{p9, p10}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { - description: "does not evict pvc pods with ignorePvcPods set to true", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "does not evict pvc pods with ignorePvcPods set to true", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p11}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, ignorePvcPods: true, }, { - description: "evicts pvc pods with ignorePvcPods set to false (or unset)", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - }, - }, + description: "evicts pvc pods with ignorePvcPods set to false (or unset)", + maxPodLifeTimeSeconds: &maxLifeTime, pods: []*v1.Pod{p11}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { - description: "No pod to evicted since all pod terminating", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"foo": "bar"}, - }, - }, - }, + description: "No pod to evicted since all pod terminating", + maxPodLifeTimeSeconds: &maxLifeTime, + labelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, pods: []*v1.Pod{p12, p13}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { - description: "No pod should be evicted since pod terminating", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"foo": "bar"}, - }, - }, - }, + description: "No pod should be evicted since pod terminating", + maxPodLifeTimeSeconds: &maxLifeTime, + labelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, pods: []*v1.Pod{p14, p15}, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, @@ -291,21 +264,44 @@ func TestPodLifeTime(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, nil, nil, - tc.nodes, - false, - false, - tc.ignorePvcPods, - false, false, ) - PodLifeTime(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + IgnorePvcPods: tc.ignorePvcPods, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.PodLifeTimeArgs{ + LabelSelector: tc.labelSelector, + MaxPodLifeTimeSeconds: tc.maxPodLifeTimeSeconds, + PodStatusPhases: tc.podStatusPhases, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.DeschedulePlugin).Deschedule(ctx, tc.nodes) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != tc.expectedEvictedPodCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.description, tc.expectedEvictedPodCount, podsEvicted) diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/framework/plugins/removeduplicatepods/removeduplicatepods.go similarity index 79% rename from pkg/descheduler/strategies/duplicates.go rename to pkg/framework/plugins/removeduplicatepods/removeduplicatepods.go index 462fb36f6e..7e863e786a 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/framework/plugins/removeduplicatepods/removeduplicatepods.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removeduplicatepods import ( "context" @@ -26,89 +26,81 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" - clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" "sigs.k8s.io/descheduler/pkg/utils" ) -func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error { - if params == nil { - return nil - } - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - - return nil -} - -type podOwner struct { - namespace, kind, name string - imagesHash string -} +const PluginName = "RemoveDuplicatePods" // RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node. // A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same // namespace, and have at least one container with the same image. // As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages. -func RemoveDuplicatePods( - ctx context.Context, - client clientset.Interface, - strategy api.DeschedulerStrategy, - nodes []*v1.Node, - podEvictor *evictions.PodEvictor, - getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, -) { - if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid RemoveDuplicatePods parameters") - return +type RemoveDuplicatePods struct { + handle framework.Handle + args *framework.RemoveDuplicatePodsArgs + excludeOwnerKinds []string + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &RemoveDuplicatePods{} +var _ framework.BalancePlugin = &RemoveDuplicatePods{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + duplicatesArgs, ok := args.(*framework.RemoveDuplicatePodsArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemoveDuplicatePodsArgs, got %T", args) } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return + + if err := framework.ValidateCommonArgs(duplicatesArgs.CommonArgs); err != nil { + return nil, err } var includedNamespaces, excludedNamespaces sets.String - if strategy.Params != nil && strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) + if duplicatesArgs.Namespaces != nil { + includedNamespaces = sets.NewString(duplicatesArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(duplicatesArgs.Namespaces.Exclude...) } - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit + podFilter, err := podutil.NewOptions(). + WithFilter(handle.Evictor().Filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) } - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) + return &RemoveDuplicatePods{ + handle: handle, + excludeOwnerKinds: duplicatesArgs.ExcludeOwnerKinds, + podFilter: podFilter, + }, nil +} + +func (d *RemoveDuplicatePods) Name() string { + return PluginName +} +type podOwner struct { + namespace, kind, name string + imagesHash string +} + +func (d *RemoveDuplicatePods) Balance(ctx context.Context, nodes []*v1.Node) *framework.Status { duplicatePods := make(map[podOwner]map[string][]*v1.Pod) ownerKeyOccurence := make(map[podOwner]int32) nodeCount := 0 nodeMap := make(map[string]*v1.Node) - podFilter, err := podutil.NewOptions(). - WithFilter(evictable.IsEvictable). - WithNamespaces(includedNamespaces). - WithoutNamespaces(excludedNamespaces). - BuildFilterFunc() - if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return - } - for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) + pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) if err != nil { klog.ErrorS(err, "Error listing evictable pods on node", "node", klog.KObj(node)) continue @@ -131,7 +123,7 @@ func RemoveDuplicatePods( duplicateKeysMap := map[string][][]string{} for _, pod := range pods { ownerRefList := podutil.OwnerRef(pod) - if hasExcludedOwnerRefKind(ownerRefList, strategy) || len(ownerRefList) == 0 { + if len(ownerRefList) == 0 || hasExcludedOwnerRefKind(ownerRefList, d.excludeOwnerKinds) { continue } podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers)) @@ -210,14 +202,13 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } + d.handle.Evictor().Evict(ctx, pod) } } } } + + return nil } func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector { @@ -287,11 +278,12 @@ func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node return targetNodes } -func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool { - if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil { +func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, ExcludeOwnerKinds []string) bool { + if len(ExcludeOwnerKinds) == 0 { return false } - exclude := sets.NewString(strategy.Params.RemoveDuplicates.ExcludeOwnerKinds...) + + exclude := sets.NewString(ExcludeOwnerKinds...) for _, owner := range ownerRefs { if exclude.Has(owner.Kind) { return true diff --git a/pkg/descheduler/strategies/duplicates_test.go b/pkg/framework/plugins/removeduplicatepods/removeduplicatepods_test.go similarity index 90% rename from pkg/descheduler/strategies/duplicates_test.go rename to pkg/framework/plugins/removeduplicatepods/removeduplicatepods_test.go index f2b6467d30..b4b18d7a70 100644 --- a/pkg/descheduler/strategies/duplicates_test.go +++ b/pkg/framework/plugins/removeduplicatepods/removeduplicatepods_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removeduplicatepods import ( "context" @@ -26,11 +26,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" - "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) @@ -44,6 +47,22 @@ func buildTestPodWithImage(podName, node, image string) *v1.Pod { return pod } +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func TestFindDuplicatePods(t *testing.T) { // first setup pods node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) @@ -177,91 +196,85 @@ func TestFindDuplicatePods(t *testing.T) { pods []*v1.Pod nodes []*v1.Node expectedEvictedPodCount uint - strategy api.DeschedulerStrategy + + nodeFit bool + excludeOwnerKinds []string }{ { description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 1 should be evicted.", pods: []*v1.Pod{p1, p2, p3}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 1, - strategy: api.DeschedulerStrategy{}, }, { description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.", pods: []*v1.Pod{p1, p2, p3}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}}, + excludeOwnerKinds: []string{"ReplicaSet"}, }, { description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 1 should be evicted.", pods: []*v1.Pod{p8, p9, p10}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 1, - strategy: api.DeschedulerStrategy{}, }, { description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.", pods: []*v1.Pod{p1, p2, p3, p8, p9, p10}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 2, - strategy: api.DeschedulerStrategy{}, }, { description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.", pods: []*v1.Pod{p4, p5, p6, p7}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{}, }, { description: "Test all Pods: 4 should be evicted.", pods: []*v1.Pod{p1, p2, p3, p4, p5, p6, p7, p8, p9, p10}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 2, - strategy: api.DeschedulerStrategy{}, }, { description: "Pods with the same owner but different images should not be evicted", pods: []*v1.Pod{p11, p12}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{}, }, { description: "Pods with multiple containers should not match themselves", pods: []*v1.Pod{p13}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{}, }, { description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction", pods: []*v1.Pod{p11, p13}, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{}, }, { description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. Only node available has a taint, and nodeFit set to true. 0 should be evicted.", pods: []*v1.Pod{p1, p2, p3}, nodes: []*v1.Node{node1, node3}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + nodeFit: true, }, { description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet, all with a nodeSelector. Only node available has an incorrect node label, and nodeFit set to true. 0 should be evicted.", pods: []*v1.Pod{p15, p16, p17}, nodes: []*v1.Node{node1, node4}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + nodeFit: true, }, { description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available is not schedulable, and nodeFit set to true. 0 should be evicted.", pods: []*v1.Pod{p1, p2, p3}, nodes: []*v1.Node{node1, node5}, expectedEvictedPodCount: 0, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + nodeFit: true, }, } @@ -290,21 +303,42 @@ func TestFindDuplicatePods(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, - "v1", + policyv1.SchemeGroupVersion.String(), false, nil, nil, - testCase.nodes, - false, - false, - false, - false, false, ) - RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: testCase.nodeFit, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemoveDuplicatePodsArgs{ + ExcludeOwnerKinds: testCase.excludeOwnerKinds, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.BalancePlugin).Balance(ctx, testCase.nodes) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != testCase.expectedEvictedPodCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted) @@ -443,7 +477,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { pods []*v1.Pod nodes []*v1.Node expectedEvictedPodCount uint - strategy api.DeschedulerStrategy }{ { description: "Evict pods uniformly", @@ -465,7 +498,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly with one node left out", @@ -486,7 +518,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n1", 2000, 3000, 10, nil), test.BuildTestNode("n2", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly with two replica sets", @@ -508,7 +539,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly with two owner references", @@ -540,7 +570,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods with number of pods less than nodes", @@ -555,7 +584,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods with number of pods less than nodes, but ignore different pods with the same ownerref", @@ -574,7 +602,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods with a single pod with three nodes", @@ -588,7 +615,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("n2", 2000, 3000, 10, nil), test.BuildTestNode("n3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly respecting taints", @@ -613,7 +639,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleTaint), test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleTaint), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly respecting RequiredDuringSchedulingIgnoredDuringExecution node affinity", @@ -638,7 +663,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleLabel), test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleLabel), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly respecting node selector", @@ -663,7 +687,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("master2", 2000, 3000, 10, nil), test.BuildTestNode("master3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, { description: "Evict pods uniformly respecting node selector with zero target nodes", @@ -688,7 +711,6 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { test.BuildTestNode("master2", 2000, 3000, 10, nil), test.BuildTestNode("master3", 2000, 3000, 10, nil), }, - strategy: api.DeschedulerStrategy{}, }, } @@ -717,21 +739,36 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, nil, nil, - testCase.nodes, - false, - false, - false, - false, false, ) - RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{}, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemoveDuplicatePodsArgs{}, handle) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.BalancePlugin).Balance(ctx, testCase.nodes) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != testCase.expectedEvictedPodCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted) diff --git a/pkg/framework/plugins/removefailedpods/failedpods.go b/pkg/framework/plugins/removefailedpods/failedpods.go new file mode 100644 index 0000000000..d9e96bb537 --- /dev/null +++ b/pkg/framework/plugins/removefailedpods/failedpods.go @@ -0,0 +1,149 @@ +package removefailedpods + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" +) + +const PluginName = "RemoveFailedPods" + +// RemoveFailedPods removes Pods that are in failed status phase. +type RemoveFailedPods struct { + handle framework.Handle + args *framework.RemoveFailedPodsArgs + reasons sets.String + excludeOwnerKinds sets.String + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &RemoveFailedPods{} +var _ framework.DeschedulePlugin = &RemoveFailedPods{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + failedPodsArgs, ok := args.(*framework.RemoveFailedPodsArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemoveFailedPodsArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(failedPodsArgs.CommonArgs); err != nil { + return nil, err + } + + var includedNamespaces, excludedNamespaces sets.String + if failedPodsArgs.Namespaces != nil { + includedNamespaces = sets.NewString(failedPodsArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(failedPodsArgs.Namespaces.Exclude...) + } + + podFilter, err := podutil.NewOptions(). + WithFilter(handle.Evictor().Filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + WithLabelSelector(failedPodsArgs.LabelSelector). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) + } + + // Only list failed pods + phaseFilter := func(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed } + podFilter = podutil.WrapFilterFuncs(phaseFilter, podFilter) + + return &RemoveFailedPods{ + handle: handle, + args: failedPodsArgs, + excludeOwnerKinds: sets.NewString(failedPodsArgs.ExcludeOwnerKinds...), + reasons: sets.NewString(failedPodsArgs.Reasons...), + podFilter: podFilter, + }, nil +} + +func (d *RemoveFailedPods) Name() string { + return PluginName +} + +func (d *RemoveFailedPods) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + pods, err := podutil.ListAllPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) + if err != nil { + klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) + continue + } + + for i, pod := range pods { + if err = d.validateFailedPodShouldEvict(pod); err != nil { + klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod)) + continue + } + + d.handle.Evictor().Evict(ctx, pods[i]) + } + } + return nil +} + +// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod +// should be evicted given the params in the PodFailed policy. +func (d *RemoveFailedPods) validateFailedPodShouldEvict(pod *v1.Pod) error { + var errs []error + + if d.args.MinPodLifetimeSeconds != nil { + podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) + if podAgeSeconds < *d.args.MinPodLifetimeSeconds { + errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *d.args.MinPodLifetimeSeconds)) + } + } + + if len(d.excludeOwnerKinds) > 0 { + ownerRefList := podutil.OwnerRef(pod) + for _, owner := range ownerRefList { + if d.excludeOwnerKinds.Has(owner.Kind) { + errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind)) + } + } + } + + if len(d.args.Reasons) > 0 { + reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses) + + if pod.Status.Phase == v1.PodFailed && pod.Status.Reason != "" { + reasons = append(reasons, pod.Status.Reason) + } + + if d.args.IncludingInitContainers { + reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...) + } + + if !d.reasons.HasAny(reasons...) { + errs = append(errs, fmt.Errorf("pod does not match any of the reasons")) + } + } + + return utilerrors.NewAggregate(errs) +} + +func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string { + reasons := make([]string, 0) + + for _, containerStatus := range containerStatuses { + if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" { + reasons = append(reasons, containerStatus.State.Waiting.Reason) + } + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" { + reasons = append(reasons, containerStatus.State.Terminated.Reason) + } + } + + return reasons +} diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/framework/plugins/removefailedpods/failedpods_test.go similarity index 70% rename from pkg/descheduler/strategies/failedpods_test.go rename to pkg/framework/plugins/removefailedpods/failedpods_test.go index 367418499b..24ef76112e 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/framework/plugins/removefailedpods/failedpods_test.go @@ -1,4 +1,4 @@ -package strategies +package removefailedpods import ( "context" @@ -12,8 +12,10 @@ import ( "k8s.io/client-go/kubernetes/fake" "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/test" ) @@ -22,45 +24,27 @@ var ( ) func TestRemoveFailedPods(t *testing.T) { - createStrategy := func(enabled, includingInitContainers bool, reasons, excludeKinds []string, minAgeSeconds *uint, nodeFit bool) api.DeschedulerStrategy { - return api.DeschedulerStrategy{ - Enabled: enabled, - Params: &api.StrategyParameters{ - FailedPods: &api.FailedPods{ - Reasons: reasons, - IncludingInitContainers: includingInitContainers, - ExcludeOwnerKinds: excludeKinds, - MinPodLifetimeSeconds: minAgeSeconds, - }, - NodeFit: nodeFit, - }, - } - } - tests := []struct { description string nodes []*v1.Node strategy api.DeschedulerStrategy expectedEvictedPodCount uint pods []*v1.Pod + + includingInitContainers bool + reasons []string + excludeOwnerKinds []string + minPodLifetimeSeconds *uint + nodeFit bool }{ - { - description: "default empty strategy, 0 failures, 0 evictions", - strategy: api.DeschedulerStrategy{}, - nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, - expectedEvictedPodCount: 0, - pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed - }, { description: "0 failures, 0 evictions", - strategy: createStrategy(true, false, nil, nil, nil, false), nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed }, { description: "1 container terminated with reason NodeAffinity, 1 eviction", - strategy: createStrategy(true, false, nil, nil, nil, false), nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -71,7 +55,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "1 init container terminated with reason NodeAffinity, 1 eviction", - strategy: createStrategy(true, true, nil, nil, nil, false), + includingInitContainers: true, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -82,7 +66,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction", - strategy: createStrategy(true, true, nil, nil, nil, false), + includingInitContainers: true, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -92,8 +76,8 @@ func TestRemoveFailedPods(t *testing.T) { }, }, { - description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions", - strategy: createStrategy(true, true, nil, nil, nil, false), + description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions", + includingInitContainers: true, nodes: []*v1.Node{ test.BuildTestNode("node1", 2000, 3000, 10, nil), test.BuildTestNode("node2", 2000, 3000, 10, nil), @@ -110,7 +94,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction", - strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + reasons: []string{"CreateContainerConfigError"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -121,7 +105,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction", - strategy: createStrategy(true, false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, false), + reasons: []string{"CreateContainerConfigError", "NodeAffinity"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -132,7 +116,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction", - strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + reasons: []string{"CreateContainerConfigError"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -143,7 +127,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction", - strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + reasons: []string{"CreateContainerConfigError"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -154,7 +138,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction", - strategy: createStrategy(true, false, nil, nil, &OneHourInSeconds, false), + minPodLifetimeSeconds: &OneHourInSeconds, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -165,7 +149,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction", - strategy: createStrategy(true, false, nil, nil, nil, true), + nodeFit: true, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) { node.Spec.Unschedulable = true })}, @@ -178,7 +162,8 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction", - strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false), + includingInitContainers: true, + excludeOwnerKinds: []string{"ReplicaSet"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -189,7 +174,8 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction", - strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false), + includingInitContainers: true, + excludeOwnerKinds: []string{"DaemonSet"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 1, pods: []*v1.Pod{ @@ -200,7 +186,8 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved", - strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false), + includingInitContainers: true, + excludeOwnerKinds: []string{"DaemonSet"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -211,7 +198,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "1 container terminated with reason ShutDown, 0 evictions", - strategy: createStrategy(true, false, nil, nil, nil, true), + nodeFit: true, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 0, pods: []*v1.Pod{ @@ -220,7 +207,7 @@ func TestRemoveFailedPods(t *testing.T) { }, { description: "include reason=Shutdown, 2 containers terminated with reason ShutDown, 2 evictions", - strategy: createStrategy(true, false, []string{"Shutdown"}, nil, nil, false), + reasons: []string{"Shutdown"}, nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, expectedEvictedPodCount: 2, pods: []*v1.Pod{ @@ -254,60 +241,48 @@ func TestRemoveFailedPods(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, nil, nil, - tc.nodes, - false, - false, - false, - false, false, ) - RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode) - actualEvictedPodCount := podEvictor.TotalEvicted() - if actualEvictedPodCount != tc.expectedEvictedPodCount { - t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, } - }) - } -} -func TestValidRemoveFailedPodsParams(t *testing.T) { - ctx := context.Background() - fakeClient := &fake.Clientset{} - testCases := []struct { - name string - params *api.StrategyParameters - }{ - {name: "validate nil params", params: nil}, - {name: "validate empty params", params: &api.StrategyParameters{}}, - {name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ - Reasons: []string{"CreateContainerConfigError"}, - }}}, - {name: "validate includingInitContainers params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ - IncludingInitContainers: true, - }}}, - {name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ - ExcludeOwnerKinds: []string{"Job"}, - }}}, - {name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ - MinPodLifetimeSeconds: &OneHourInSeconds, - }}}, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - params, err := validateAndParseRemoveFailedPodsParams(ctx, fakeClient, tc.params) + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: tc.nodeFit, + }, handle) if err != nil { - t.Errorf("strategy params should be valid but got err: %v", err.Error()) + t.Fatalf("Unable to initialize the default evictor: %v", err) } - if params == nil { - t.Errorf("strategy params should return a ValidatedFailedPodsStrategyParams but got nil") + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemoveFailedPodsArgs{ + ExcludeOwnerKinds: tc.excludeOwnerKinds, + MinPodLifetimeSeconds: tc.minPodLifetimeSeconds, + Reasons: tc.reasons, + IncludingInitContainers: tc.includingInitContainers, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(interface{}).(framework.DeschedulePlugin).Deschedule(ctx, tc.nodes) + actualEvictedPodCount := podEvictor.TotalEvicted() + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) } }) } diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go new file mode 100644 index 0000000000..0bb2a34507 --- /dev/null +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go @@ -0,0 +1,126 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package removepodshavingtoomanyrestarts + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" +) + +const PluginName = "RemovePodsHavingTooManyRestarts" + +// RemovePodsHavingTooManyRestarts removes the pods that have too many restarts on node. +// There are too many cases leading this issue: Volume mount failed, app error due to nodes' different settings. +// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages. +type RemovePodsHavingTooManyRestarts struct { + handle framework.Handle + args *framework.RemovePodsHavingTooManyRestartsArgs + reasons sets.String + excludeOwnerKinds sets.String + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &RemovePodsHavingTooManyRestarts{} +var _ framework.DeschedulePlugin = &RemovePodsHavingTooManyRestarts{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + restartsArgs, ok := args.(*framework.RemovePodsHavingTooManyRestartsArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemovePodsHavingTooManyRestartsArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(restartsArgs.CommonArgs); err != nil { + return nil, err + } + + if restartsArgs.PodRestartThreshold < 1 { + return nil, fmt.Errorf("podsHavingTooManyRestarts threshold not set") + } + + var includedNamespaces, excludedNamespaces sets.String + if restartsArgs.Namespaces != nil { + includedNamespaces = sets.NewString(restartsArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(restartsArgs.Namespaces.Exclude...) + } + + podFilter, err := podutil.NewOptions(). + WithFilter(handle.Evictor().Filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + WithLabelSelector(restartsArgs.LabelSelector). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) + } + + return &RemovePodsHavingTooManyRestarts{ + handle: handle, + args: restartsArgs, + podFilter: podFilter, + }, nil +} + +func (d *RemovePodsHavingTooManyRestarts) Name() string { + return PluginName +} + +func (d *RemovePodsHavingTooManyRestarts) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) + if err != nil { + klog.ErrorS(err, "Error listing a nodes pods", "node", klog.KObj(node)) + continue + } + + for i, pod := range pods { + restarts, initRestarts := calcContainerRestarts(pod) + if d.args.IncludingInitContainers { + if restarts+initRestarts < d.args.PodRestartThreshold { + continue + } + } else if restarts < d.args.PodRestartThreshold { + continue + } + d.handle.Evictor().Evict(ctx, pods[i]) + } + } + return nil +} + +// calcContainerRestarts get container restarts and init container restarts. +func calcContainerRestarts(pod *v1.Pod) (int32, int32) { + var restarts, initRestarts int32 + + for _, cs := range pod.Status.ContainerStatuses { + restarts += cs.RestartCount + } + + for _, cs := range pod.Status.InitContainerStatuses { + initRestarts += cs.RestartCount + } + + return restarts, initRestarts +} diff --git a/pkg/descheduler/strategies/toomanyrestarts_test.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go similarity index 72% rename from pkg/descheduler/strategies/toomanyrestarts_test.go rename to pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go index aa7bec2e78..6cdcf5f413 100644 --- a/pkg/descheduler/strategies/toomanyrestarts_test.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removepodshavingtoomanyrestarts import ( "context" @@ -26,14 +26,34 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func initPods(node *v1.Node) []*v1.Pod { pods := make([]*v1.Pod, 0) @@ -100,19 +120,6 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { pods := initPods(node1) - createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy { - return api.DeschedulerStrategy{ - Enabled: enabled, - Params: &api.StrategyParameters{ - PodsHavingTooManyRestarts: &api.PodsHavingTooManyRestarts{ - PodRestartThreshold: restartThresholds, - IncludingInitContainers: includingInitContainers, - }, - NodeFit: nodeFit, - }, - } - } - var uint3 uint = 3 tests := []struct { @@ -122,79 +129,94 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { expectedEvictedPodCount uint maxPodsToEvictPerNode *uint maxNoOfPodsToEvictPerNamespace *uint + + includingInitContainers bool + podRestartThreshold int32 + nodeFit bool }{ { description: "All pods have total restarts under threshold, no pod evictions", - strategy: createStrategy(true, true, 10000, false), + includingInitContainers: true, + podRestartThreshold: 10000, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, }, { description: "Some pods have total restarts bigger than threshold", - strategy: createStrategy(true, true, 1, false), + includingInitContainers: true, + podRestartThreshold: 1, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 6, }, { description: "Nine pods have total restarts equals threshold(includingInitContainers=true), 6 pod evictions", - strategy: createStrategy(true, true, 1*25, false), + includingInitContainers: true, + podRestartThreshold: 1 * 25, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 6, }, { description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 5 pod evictions", - strategy: createStrategy(true, false, 1*25, false), + podRestartThreshold: 1 * 25, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 5, }, { description: "All pods have total restarts equals threshold(includingInitContainers=true), 6 pod evictions", - strategy: createStrategy(true, true, 1*20, false), + includingInitContainers: true, + podRestartThreshold: 1 * 20, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 6, }, { description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 6 pod evictions", - strategy: createStrategy(true, false, 1*20, false), + podRestartThreshold: 1 * 20, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 6, }, { description: "Five pods have total restarts bigger than threshold(includingInitContainers=true), but only 1 pod eviction", - strategy: createStrategy(true, true, 5*25+1, false), + includingInitContainers: true, + podRestartThreshold: 5*25 + 1, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { description: "Five pods have total restarts bigger than threshold(includingInitContainers=false), but only 1 pod eviction", - strategy: createStrategy(true, false, 5*20+1, false), + podRestartThreshold: 5*20 + 1, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, { description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3), 3 pod evictions", - strategy: createStrategy(true, true, 1, false), + includingInitContainers: true, + podRestartThreshold: 1, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 3, maxPodsToEvictPerNode: &uint3, }, { description: "All pods have total restarts equals threshold(maxNoOfPodsToEvictPerNamespace=3), 3 pod evictions", - strategy: createStrategy(true, true, 1, false), + includingInitContainers: true, + podRestartThreshold: 1, nodes: []*v1.Node{node1}, expectedEvictedPodCount: 3, maxNoOfPodsToEvictPerNamespace: &uint3, }, { description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tained, 0 pod evictions", - strategy: createStrategy(true, true, 1, true), + includingInitContainers: true, + podRestartThreshold: 1, + nodeFit: true, nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, maxPodsToEvictPerNode: &uint3, }, { description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is not schedulable, 0 pod evictions", - strategy: createStrategy(true, true, 1, true), + includingInitContainers: true, + podRestartThreshold: 1, + nodeFit: true, nodes: []*v1.Node{node1, node3}, expectedEvictedPodCount: 0, maxPodsToEvictPerNode: &uint3, @@ -227,21 +249,43 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, - tc.nodes, - false, - false, - false, - false, false, ) - RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: tc.nodeFit, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: tc.podRestartThreshold, + IncludingInitContainers: tc.includingInitContainers, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(interface{}).(framework.DeschedulePlugin).Deschedule(ctx, tc.nodes) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go similarity index 56% rename from pkg/descheduler/strategies/pod_antiaffinity.go rename to pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go index a5c71cd74c..bfbd14a0dd 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go @@ -14,98 +14,87 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removepodsviolatinginterpodantiaffinity import ( "context" "fmt" - "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/utils" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - clientset "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" -) -func validateRemovePodsViolatingInterPodAntiAffinityParams(params *api.StrategyParameters) error { - if params == nil { - return nil - } + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/utils" +) - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } +const PluginName = "RemovePodsViolatingInterPodAntiAffinity" - return nil +// RemovePodsViolatingInterPodAntiAffinity evicts pods on the node which are having a pod affinity rules. +type RemovePodsViolatingInterPodAntiAffinity struct { + handle framework.Handle + args *framework.RemovePodsViolatingInterPodAntiAffinityArgs + podFilter podutil.FilterFunc } -// RemovePodsViolatingInterPodAntiAffinity evicts pods on the node which are having a pod affinity rules. -func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) { - if err := validateRemovePodsViolatingInterPodAntiAffinityParams(strategy.Params); err != nil { - klog.ErrorS(err, "Invalid RemovePodsViolatingInterPodAntiAffinity parameters") - return - } +var _ framework.Plugin = &RemovePodsViolatingInterPodAntiAffinity{} +var _ framework.DeschedulePlugin = &RemovePodsViolatingInterPodAntiAffinity{} - var includedNamespaces, excludedNamespaces sets.String - var labelSelector *metav1.LabelSelector - if strategy.Params != nil { - if strategy.Params.Namespaces != nil { - includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...) - excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...) - } - labelSelector = strategy.Params.LabelSelector +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + podAffinityArgs, ok := args.(*framework.RemovePodsViolatingInterPodAntiAffinityArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemovePodsViolatingInterPodAntiAffinityArgs, got %T", args) } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Failed to get threshold priority from strategy's params") - return + if err := framework.ValidateCommonArgs(podAffinityArgs.CommonArgs); err != nil { + return nil, err } - nodeFit := false - if strategy.Params != nil { - nodeFit = strategy.Params.NodeFit + var includedNamespaces, excludedNamespaces sets.String + if podAffinityArgs.Namespaces != nil { + includedNamespaces = sets.NewString(podAffinityArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(podAffinityArgs.Namespaces.Exclude...) } - evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) - podFilter, err := podutil.NewOptions(). WithNamespaces(includedNamespaces). WithoutNamespaces(excludedNamespaces). - WithLabelSelector(labelSelector). + WithLabelSelector(podAffinityArgs.LabelSelector). BuildFilterFunc() if err != nil { - klog.ErrorS(err, "Error initializing pod filter function") - return + return nil, fmt.Errorf("error initializing pod filter function: %v", err) } + return &RemovePodsViolatingInterPodAntiAffinity{ + handle: handle, + args: podAffinityArgs, + podFilter: podFilter, + }, nil +} + +func (d *RemovePodsViolatingInterPodAntiAffinity) Name() string { + return PluginName +} + +func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) + pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) if err != nil { - return + // no pods evicted as error encountered retrieving evictable Pods + return &framework.Status{ + Err: fmt.Errorf("error listing pods on a node: %v", err), + } } // sort the evictable Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers. podutil.SortPodsBasedOnPriorityLowToHigh(pods) totalPods := len(pods) for i := 0; i < totalPods; i++ { - if checkPodsWithAntiAffinityExist(pods[i], pods) && evictable.IsEvictable(pods[i]) { - success, err := podEvictor.EvictPod(ctx, pods[i], node, "InterPodAntiAffinity") - if err != nil { - klog.ErrorS(err, "Error evicting pod") - break - } - - if success { + if checkPodsWithAntiAffinityExist(pods[i], pods) && d.handle.Evictor().Filter(pods[i]) { + if d.handle.Evictor().Evict(ctx, pods[i]) { // Since the current pod is evicted all other pods which have anti-affinity with this // pod need not be evicted. // Update pods. @@ -116,6 +105,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients } } } + return nil } // checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate. diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go similarity index 82% rename from pkg/descheduler/strategies/pod_antiaffinity_test.go rename to pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go index df5f330570..35df2e0fc7 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removepodsviolatinginterpodantiaffinity import ( "context" @@ -25,15 +25,34 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" - "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func TestPodAntiAffinity(t *testing.T) { node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) { @@ -202,26 +221,42 @@ func TestPodAntiAffinity(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, test.maxPodsToEvictPerNode, test.maxNoOfPodsToEvictPerNamespace, - test.nodes, - false, - false, - false, - false, false, ) - strategy := api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ + + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{}, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemovePodsViolatingInterPodAntiAffinityArgs{ + CommonArgs: framework.CommonArgs{ NodeFit: test.nodeFit, }, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) } - RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, strategy, test.nodes, podEvictor, getPodsAssignedToNode) + plugin.(interface{}).(framework.DeschedulePlugin).Deschedule(ctx, test.nodes) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != test.expectedEvictedPodCount { t.Errorf("Unexpected no of pods evicted: pods evicted: %d, expected: %d", podsEvicted, test.expectedEvictedPodCount) diff --git a/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity.go b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity.go new file mode 100644 index 0000000000..07745a55f1 --- /dev/null +++ b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity.go @@ -0,0 +1,119 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package removepodsviolatingnodeaffinity + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" +) + +const PluginName = "RemovePodsViolatingNodeAffinity" + +// RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity +type RemovePodsViolatingNodeAffinity struct { + handle framework.Handle + args *framework.RemovePodsViolatingNodeAffinityArgs + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &RemovePodsViolatingNodeAffinity{} +var _ framework.DeschedulePlugin = &RemovePodsViolatingNodeAffinity{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + nodeAffinityArgs, ok := args.(*framework.RemovePodsViolatingNodeAffinityArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemovePodsViolatingNodeAffinityArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(nodeAffinityArgs.CommonArgs); err != nil { + return nil, err + } + + if len(nodeAffinityArgs.NodeAffinityType) == 0 { + return nil, fmt.Errorf("NodeAffinityType is empty") + } + + var includedNamespaces, excludedNamespaces sets.String + if nodeAffinityArgs.Namespaces != nil { + includedNamespaces = sets.NewString(nodeAffinityArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(nodeAffinityArgs.Namespaces.Exclude...) + } + + podFilter, err := podutil.NewOptions(). + WithFilter(handle.Evictor().Filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + WithLabelSelector(nodeAffinityArgs.LabelSelector). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) + } + + return &RemovePodsViolatingNodeAffinity{ + handle: handle, + podFilter: podFilter, + args: nodeAffinityArgs, + }, nil +} + +func (d *RemovePodsViolatingNodeAffinity) Name() string { + return PluginName +} + +func (d *RemovePodsViolatingNodeAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { + for _, nodeAffinity := range d.args.NodeAffinityType { + klog.V(2).InfoS("Executing for nodeAffinityType", "nodeAffinity", nodeAffinity) + + switch nodeAffinity { + case "requiredDuringSchedulingIgnoredDuringExecution": + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + + pods, err := podutil.ListPodsOnANode( + node.Name, + d.handle.GetPodsAssignedToNodeFunc(), + podutil.WrapFilterFuncs(d.podFilter, func(pod *v1.Pod) bool { + return !nodeutil.PodFitsCurrentNode(pod, node) && + nodeutil.PodFitsAnyNode(pod, nodes) + }), + ) + if err != nil { + klog.ErrorS(err, "Failed to get pods", "node", klog.KObj(node)) + } + + for _, pod := range pods { + if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { + klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) + d.handle.Evictor().Evict(ctx, pod) + } + } + } + default: + klog.ErrorS(nil, "Invalid nodeAffinityType", "nodeAffinity", nodeAffinity) + } + } + return nil +} diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go similarity index 73% rename from pkg/descheduler/strategies/node_affinity_test.go rename to pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go index df2b636e1a..a08fc3a6fc 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/framework/plugins/removepodsviolatingnodeaffinity/node_affinity_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removepodsviolatingnodeaffinity import ( "context" @@ -25,34 +25,34 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" - "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/test" ) -func TestRemovePodsViolatingNodeAffinity(t *testing.T) { - requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeAffinityType: []string{ - "requiredDuringSchedulingIgnoredDuringExecution", - }, - }, - } +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} - requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy := api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeAffinityType: []string{ - "requiredDuringSchedulingIgnoredDuringExecution", - }, - NodeFit: true, - }, - } +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} +func TestRemovePodsViolatingNodeAffinity(t *testing.T) { nodeLabelKey := "kubernetes.io/desiredNode" nodeLabelValue := "yes" nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10, nil) @@ -108,28 +108,23 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description string nodes []*v1.Node pods []*v1.Pod - strategy api.DeschedulerStrategy expectedEvictedPodCount uint maxPodsToEvictPerNode *uint maxNoOfPodsToEvictPerNamespace *uint + + nodeAffinityType []string + nodeFit bool }{ { - description: "Invalid strategy type, should not evict any pods", - strategy: api.DeschedulerStrategy{ - Enabled: true, - Params: &api.StrategyParameters{ - NodeAffinityType: []string{ - "requiredDuringSchedulingRequiredDuringExecution", - }, - }, - }, + description: "Invalid strategy type, should not evict any pods", + nodeAffinityType: []string{"requiredDuringSchedulingRequiredDuringExecution"}, expectedEvictedPodCount: 0, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, }, { description: "Pod is correctly scheduled on node, no eviction expected", - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, expectedEvictedPodCount: 0, pods: addPodsToNode(nodeWithLabels, nil), nodes: []*v1.Node{nodeWithLabels}, @@ -137,14 +132,14 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { { description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted", expectedEvictedPodCount: 1, - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, }, { description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, should not be evicted", expectedEvictedPodCount: 1, - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxPodsToEvictPerNode: &uint1, @@ -152,7 +147,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { { description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, no pod evicted since pod terminting", expectedEvictedPodCount: 1, - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxPodsToEvictPerNode: &uint1, @@ -160,7 +155,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { { description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, should not be evicted", expectedEvictedPodCount: 1, - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxNoOfPodsToEvictPerNamespace: &uint1, @@ -168,7 +163,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { { description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, no pod evicted since pod terminting", expectedEvictedPodCount: 1, - strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxNoOfPodsToEvictPerNamespace: &uint1, @@ -176,14 +171,16 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { { description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict", expectedEvictedPodCount: 0, - strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, + nodeFit: true, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels}, }, { description: "Pod is scheduled on node without matching labels, and node where pod fits is available, should evict", expectedEvictedPodCount: 0, - strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy, + nodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"}, + nodeFit: true, pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithLabels, unschedulableNodeWithLabels}, maxPodsToEvictPerNode: &uint1, @@ -215,21 +212,42 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, - tc.nodes, - false, - false, - false, - false, false, ) - RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode) + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: tc.nodeFit, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemovePodsViolatingNodeAffinityArgs{ + NodeAffinityType: tc.nodeAffinityType, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(interface{}).(framework.DeschedulePlugin).Deschedule(ctx, tc.nodes) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) diff --git a/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint.go b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint.go new file mode 100644 index 0000000000..f798ff567d --- /dev/null +++ b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint.go @@ -0,0 +1,120 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package removepodsviolatingnodetaints + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/utils" +) + +const PluginName = "RemovePodsViolatingNodeTaints" + +// RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes +type RemovePodsViolatingNodeTaints struct { + handle framework.Handle + args *framework.RemovePodsViolatingNodeTaintsArgs + taintFilterFnc func(taint *v1.Taint) bool + podFilter podutil.FilterFunc +} + +var _ framework.Plugin = &RemovePodsViolatingNodeTaints{} +var _ framework.DeschedulePlugin = &RemovePodsViolatingNodeTaints{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + nodeTaintsArgs, ok := args.(*framework.RemovePodsViolatingNodeTaintsArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemovePodsViolatingNodeTaintsArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(nodeTaintsArgs.CommonArgs); err != nil { + return nil, err + } + + var includedNamespaces, excludedNamespaces sets.String + if nodeTaintsArgs.Namespaces != nil { + includedNamespaces = sets.NewString(nodeTaintsArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(nodeTaintsArgs.Namespaces.Exclude...) + } + + podFilter, err := podutil.NewOptions(). + WithFilter(handle.Evictor().Filter). + WithNamespaces(includedNamespaces). + WithoutNamespaces(excludedNamespaces). + WithLabelSelector(nodeTaintsArgs.LabelSelector). + BuildFilterFunc() + if err != nil { + return nil, fmt.Errorf("error initializing pod filter function: %v", err) + } + + excludedTaints := sets.NewString(nodeTaintsArgs.ExcludedTaints...) + excludeTaint := func(taint *v1.Taint) bool { + // Exclude taints by key *or* key=value + return excludedTaints.Has(taint.Key) || (taint.Value != "" && excludedTaints.Has(fmt.Sprintf("%s=%s", taint.Key, taint.Value))) + } + + taintFilterFnc := func(taint *v1.Taint) bool { return (taint.Effect == v1.TaintEffectNoSchedule) && !excludeTaint(taint) } + if nodeTaintsArgs.IncludePreferNoSchedule { + taintFilterFnc = func(taint *v1.Taint) bool { + return (taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectPreferNoSchedule) && !excludeTaint(taint) + } + } + + return &RemovePodsViolatingNodeTaints{ + handle: handle, + podFilter: podFilter, + args: nodeTaintsArgs, + taintFilterFnc: taintFilterFnc, + }, nil +} + +func (d *RemovePodsViolatingNodeTaints) Name() string { + return PluginName +} + +func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []*v1.Node) *framework.Status { + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + pods, err := podutil.ListAllPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) + if err != nil { + // no pods evicted as error encountered retrieving evictable Pods + return &framework.Status{ + Err: fmt.Errorf("error listing pods on a node: %v", err), + } + } + totalPods := len(pods) + for i := 0; i < totalPods; i++ { + if !utils.TolerationsTolerateTaintsWithFilter( + pods[i].Spec.Tolerations, + node.Spec.Taints, + d.taintFilterFnc, + ) { + klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) + d.handle.Evictor().Evict(ctx, pods[i]) + } + } + } + return nil +} diff --git a/pkg/descheduler/strategies/node_taint_test.go b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go similarity index 89% rename from pkg/descheduler/strategies/node_taint_test.go rename to pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go index 45a7cea58e..b0aa4cc43a 100644 --- a/pkg/descheduler/strategies/node_taint_test.go +++ b/pkg/framework/plugins/removepodsviolatingnodetaints/node_taint_test.go @@ -1,4 +1,4 @@ -package strategies +package removepodsviolatingnodetaints import ( "context" @@ -10,15 +10,34 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" - "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func createNoScheduleTaint(key, value string, index int) v1.Taint { return v1.Taint{ Key: "testTaint" + fmt.Sprintf("%v", index), @@ -317,29 +336,45 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, policyv1.SchemeGroupVersion.String(), false, tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, - tc.nodes, - tc.evictLocalStoragePods, - tc.evictSystemCriticalPods, - false, - false, false, ) - strategy := api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: tc.nodeFit, - IncludePreferNoSchedule: tc.includePreferNoSchedule, - ExcludedTaints: tc.excludedTaints, - }, + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + EvictLocalStoragePods: tc.evictLocalStoragePods, + EvictSystemCriticalPods: tc.evictSystemCriticalPods, + NodeFit: tc.nodeFit, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemovePodsViolatingNodeTaintsArgs{ + IncludePreferNoSchedule: tc.includePreferNoSchedule, + ExcludedTaints: tc.excludedTaints, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) } - RemovePodsViolatingNodeTaints(ctx, fakeClient, strategy, tc.nodes, podEvictor, getPodsAssignedToNode) + plugin.(interface{}).(framework.DeschedulePlugin).Deschedule(ctx, tc.nodes) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint.go similarity index 85% rename from pkg/descheduler/strategies/topologyspreadconstraint.go rename to pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint.go index 2f0fda87ac..d1bca72ea7 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package strategies +package removepodsviolatingtopologyspreadconstraint import ( "context" @@ -25,18 +25,58 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" utilerrors "k8s.io/apimachinery/pkg/util/errors" - clientset "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" - "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" - podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" - "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" + "sigs.k8s.io/descheduler/pkg/framework" "sigs.k8s.io/descheduler/pkg/utils" ) +const PluginName = "RemovePodsViolatingTopologySpreadConstraint" + +// RemoveFailedPods removes Pods that are in failed status phase. +type RemovePodsViolatingTopologySpreadConstraint struct { + handle framework.Handle + args *framework.RemovePodsViolatingTopologySpreadConstraintArgs + isEvictable func(pod *v1.Pod) bool + includedNamespaces sets.String + excludedNamespaces sets.String +} + +var _ framework.Plugin = &RemovePodsViolatingTopologySpreadConstraint{} +var _ framework.BalancePlugin = &RemovePodsViolatingTopologySpreadConstraint{} + +func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + topologyArgs, ok := args.(*framework.RemovePodsViolatingTopologySpreadConstraintArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type RemovePodsViolatingTopologySpreadConstraintArgs, got %T", args) + } + + if err := framework.ValidateCommonArgs(topologyArgs.CommonArgs); err != nil { + return nil, err + } + + var includedNamespaces, excludedNamespaces sets.String + if topologyArgs.Namespaces != nil { + includedNamespaces = sets.NewString(topologyArgs.Namespaces.Include...) + excludedNamespaces = sets.NewString(topologyArgs.Namespaces.Exclude...) + } + + return &RemovePodsViolatingTopologySpreadConstraint{ + handle: handle, + args: topologyArgs, + includedNamespaces: includedNamespaces, + excludedNamespaces: excludedNamespaces, + }, nil +} + +func (d *RemovePodsViolatingTopologySpreadConstraint) Name() string { + return PluginName +} + // AntiAffinityTerm's topology key value used in predicate metadata type topologyPair struct { key string @@ -48,26 +88,7 @@ type topology struct { pods []*v1.Pod } -func RemovePodsViolatingTopologySpreadConstraint( - ctx context.Context, - client clientset.Interface, - strategy api.DeschedulerStrategy, - nodes []*v1.Node, - podEvictor *evictions.PodEvictor, - getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, -) { - strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, strategy.Params) - if err != nil { - klog.ErrorS(err, "Invalid RemovePodsViolatingTopologySpreadConstraint parameters") - return - } - - evictable := podEvictor.Evictable( - evictions.WithPriorityThreshold(strategyParams.ThresholdPriority), - evictions.WithNodeFit(strategyParams.NodeFit), - evictions.WithLabelSelector(strategyParams.LabelSelector), - ) - +func (d *RemovePodsViolatingTopologySpreadConstraint) Balance(ctx context.Context, nodes []*v1.Node) *framework.Status { nodeMap := make(map[string]*v1.Node, len(nodes)) for _, node := range nodes { nodeMap[node.Name] = node @@ -86,20 +107,22 @@ func RemovePodsViolatingTopologySpreadConstraint( // if diff > maxSkew, add this pod in the current bucket for eviction // First record all of the constraints by namespace - namespaces, err := client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + namespaces, err := d.handle.ClientSet().CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) if err != nil { klog.ErrorS(err, "Couldn't list namespaces") - return + return &framework.Status{ + Err: fmt.Errorf("couldn't list namespaces: %v", err), + } } klog.V(1).InfoS("Processing namespaces for topology spread constraints") podsForEviction := make(map[*v1.Pod]struct{}) // 1. for each namespace... for _, namespace := range namespaces.Items { - if (len(strategyParams.IncludedNamespaces) > 0 && !strategyParams.IncludedNamespaces.Has(namespace.Name)) || - (len(strategyParams.ExcludedNamespaces) > 0 && strategyParams.ExcludedNamespaces.Has(namespace.Name)) { + if (len(d.includedNamespaces) > 0 && !d.includedNamespaces.Has(namespace.Name)) || + (len(d.excludedNamespaces) > 0 && d.excludedNamespaces.Has(namespace.Name)) { continue } - namespacePods, err := client.CoreV1().Pods(namespace.Name).List(ctx, metav1.ListOptions{}) + namespacePods, err := d.handle.ClientSet().CoreV1().Pods(namespace.Name).List(ctx, metav1.ListOptions{}) if err != nil { klog.ErrorS(err, "Couldn't list pods in namespace", "namespace", namespace) continue @@ -110,7 +133,7 @@ func RemovePodsViolatingTopologySpreadConstraint( for _, pod := range namespacePods.Items { for _, constraint := range pod.Spec.TopologySpreadConstraints { // Ignore soft topology constraints if they are not included - if constraint.WhenUnsatisfiable == v1.ScheduleAnyway && (strategy.Params == nil || !strategy.Params.IncludeSoftConstraints) { + if constraint.WhenUnsatisfiable == v1.ScheduleAnyway && (!d.args.IncludeSoftConstraints) { continue } namespaceTopologySpreadConstraints[constraint] = struct{}{} @@ -170,19 +193,18 @@ func RemovePodsViolatingTopologySpreadConstraint( klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint) continue } - balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodeMap) + balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, d.handle.Evictor().Filter, nodeMap) } } for pod := range podsForEviction { - if !evictable.IsEvictable(pod) { + if !d.handle.Evictor().Filter(pod) { continue } - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName], "PodTopologySpread"); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } + d.handle.Evictor().Evict(ctx, pod) } + + return nil } // topologyIsBalanced checks if any domains in the topology differ by more than the MaxSkew diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go similarity index 87% rename from pkg/descheduler/strategies/topologyspreadconstraint_test.go rename to pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go index 22a8636514..dead803be0 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint/topologyspreadconstraint_test.go @@ -1,4 +1,4 @@ -package strategies +package removepodsviolatingtopologyspreadconstraint import ( "context" @@ -9,22 +9,46 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" + policyv1 "k8s.io/api/policy/v1" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + fakehandler "sigs.k8s.io/descheduler/pkg/framework/profile/fake" "sigs.k8s.io/descheduler/test" ) +type frameworkHandle struct { + clientset clientset.Interface + podEvictor *evictions.PodEvictor + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc +} + +func (f frameworkHandle) ClientSet() clientset.Interface { + return f.clientset +} +func (f frameworkHandle) PodEvictor() *evictions.PodEvictor { + return f.podEvictor +} +func (f frameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.getPodsAssignedToNodeFunc +} + func TestTopologySpreadConstraint(t *testing.T) { testCases := []struct { name string pods []*v1.Pod expectedEvictedCount uint nodes []*v1.Node - strategy api.DeschedulerStrategy - namespaces []string + + namespaces *api.Namespaces + nodeFit bool + includeSoftConstraints bool + labelSelector *metav1.LabelSelector }{ { name: "2 domains, sizes [2,1], maxSkew=1, move 0 pods", @@ -58,12 +82,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [3,1], maxSkew=1, move 1 pod to achieve [2,2]", @@ -90,12 +108,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [3,1], maxSkew=1, move 1 pod to achieve [2,2] (soft constraints)", @@ -128,9 +140,8 @@ func TestTopologySpreadConstraint(t *testing.T) { labels: map[string]string{"foo": "bar"}, }, }), - expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{IncludeSoftConstraints: true}}, - namespaces: []string{"ns1"}, + expectedEvictedCount: 1, + includeSoftConstraints: true, }, { name: "2 domains, sizes [3,1], maxSkew=1, no pods eligible, move 0 pods", @@ -160,12 +171,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [3,1], maxSkew=1, move 1 pod to achieve [2,2], exclude kube-system namespace", @@ -192,8 +197,8 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{Enabled: true, Params: &api.StrategyParameters{NodeFit: true, Namespaces: &api.Namespaces{Exclude: []string{"kube-system"}}}}, - namespaces: []string{"ns1"}, + nodeFit: true, + namespaces: &api.Namespaces{Exclude: []string{"kube-system"}}, }, { name: "2 domains, sizes [5,2], maxSkew=1, move 1 pod to achieve [4,3]", @@ -220,12 +225,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [4,0], maxSkew=1, move 2 pods to achieve [2,2]", @@ -247,12 +246,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 2, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [4,0], maxSkew=1, only move 1 pod since pods with nodeSelector and nodeAffinity aren't evicted", @@ -291,12 +284,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: true, - }, - }, - namespaces: []string{"ns1"}, + nodeFit: true, }, { name: "2 domains, sizes [4,0], maxSkew=1, move 2 pods since selector matches multiple nodes", @@ -338,12 +326,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 2, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "3 domains, sizes [0, 1, 100], maxSkew=1, move 66 pods to get [34, 33, 34]", @@ -366,8 +348,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 66, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "4 domains, sizes [0, 1, 3, 5], should move 3 to get [2, 2, 3, 2]", @@ -396,12 +376,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 3, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains size [2 6], maxSkew=2, should move 1 to get [3 5]", @@ -428,12 +402,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: false, - }, - }, - namespaces: []string{"ns1"}, }, { name: "2 domains size [2 6], maxSkew=2, can't move any because of node taints", @@ -476,12 +444,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - NodeFit: true, - }, - }, - namespaces: []string{"ns1"}, + nodeFit: true, }, { // see https://github.com/kubernetes-sigs/descheduler/issues/564 @@ -584,9 +547,8 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }, }), - expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{IncludeSoftConstraints: true}}, - namespaces: []string{"ns1"}, + expectedEvictedCount: 1, + includeSoftConstraints: true, }, { name: "3 domains size [8 7 0], maxSkew=1, should move 5 to get [5 5 5]", @@ -610,8 +572,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 5, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "3 domains size [5 5 5], maxSkew=1, should move 0 to retain [5 5 5]", @@ -641,8 +601,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod since pod tolerates the node with taint", @@ -682,8 +640,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node", @@ -715,8 +671,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint", @@ -748,8 +702,6 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{}, - namespaces: []string{"ns1"}, }, { name: "2 domains, sizes [2,0], maxSkew=1, move 0 pod for node with unmatched label filtering", @@ -771,12 +723,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - LabelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpIn), - }, - }, - namespaces: []string{"ns1"}, + labelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpIn), }, { name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with matched label filtering", @@ -798,12 +745,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - LabelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn), - }, - }, - namespaces: []string{"ns1"}, + labelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn), }, { name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with matched label filtering (NotIn op)", @@ -825,12 +767,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - LabelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpNotIn), - }, - }, - namespaces: []string{"ns1"}, + labelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpNotIn), }, { name: "2 domains, sizes [4,2], maxSkew=1, 2 pods in termination; nothing should be moved", @@ -860,12 +797,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 0, - strategy: api.DeschedulerStrategy{ - Params: &api.StrategyParameters{ - LabelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn), - }, - }, - namespaces: []string{"ns1"}, + labelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn), }, } @@ -895,20 +827,46 @@ func TestTopologySpreadConstraint(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) - podEvictor := evictions.NewPodEvictor( + podEvictor := framework.NewPodEvictor( fakeClient, - "v1", + policyv1.SchemeGroupVersion.String(), false, nil, nil, - tc.nodes, - false, - false, - false, - false, false, ) - RemovePodsViolatingTopologySpreadConstraint(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode) + + handle := &fakehandler.FrameworkHandle{ + ClientsetImpl: fakeClient, + EvictorImpl: podEvictor, + GetPodsAssignedToNodeFuncImpl: getPodsAssignedToNode, + SharedInformerFactoryImpl: sharedInformerFactory, + } + + defaultEvictor, err := defaultevictor.New(&framework.DefaultEvictorArgs{ + NodeFit: tc.nodeFit, + LabelSelector: tc.labelSelector, + }, handle) + if err != nil { + t.Fatalf("Unable to initialize the default evictor: %v", err) + } + + handle.EvictPlugin = defaultEvictor.(framework.EvictPlugin) + handle.SortPlugin = defaultEvictor.(framework.SortPlugin) + + plugin, err := New(&framework.RemovePodsViolatingTopologySpreadConstraintArgs{ + CommonArgs: framework.CommonArgs{ + Namespaces: tc.namespaces, + }, + IncludeSoftConstraints: tc.includeSoftConstraints, + }, + handle, + ) + if err != nil { + t.Fatalf("Unable to initialize the plugin: %v", err) + } + + plugin.(framework.BalancePlugin).Balance(ctx, tc.nodes) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != tc.expectedEvictedCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.name, tc.expectedEvictedCount, podsEvicted) diff --git a/pkg/framework/podevictor.go b/pkg/framework/podevictor.go new file mode 100644 index 0000000000..6c4cad1864 --- /dev/null +++ b/pkg/framework/podevictor.go @@ -0,0 +1,152 @@ +package framework + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + policy "k8s.io/api/policy/v1beta1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "sigs.k8s.io/descheduler/metrics" + + eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" +) + +// nodePodEvictedCount keeps count of pods evicted on node +type nodePodEvictedCount map[string]uint +type namespacePodEvictCount map[string]uint + +type PodEvictor struct { + client clientset.Interface + policyGroupVersion string + dryRun bool + maxPodsToEvictPerNode *uint + maxPodsToEvictPerNamespace *uint + nodepodCount nodePodEvictedCount + namespacePodCount namespacePodEvictCount + metricsEnabled bool +} + +func NewPodEvictor( + client clientset.Interface, + policyGroupVersion string, + dryRun bool, + maxPodsToEvictPerNode *uint, + maxPodsToEvictPerNamespace *uint, + metricsEnabled bool, +) *PodEvictor { + return &PodEvictor{ + client: client, + policyGroupVersion: policyGroupVersion, + dryRun: dryRun, + maxPodsToEvictPerNode: maxPodsToEvictPerNode, + maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace, + nodepodCount: make(nodePodEvictedCount), + namespacePodCount: make(namespacePodEvictCount), + metricsEnabled: metricsEnabled, + } +} + +// NodeEvicted gives a number of pods evicted for node +func (pe *PodEvictor) NodeEvicted(node *v1.Node) uint { + return pe.nodepodCount[node.Name] +} + +// TotalEvicted gives a number of pods evicted through all nodes +func (pe *PodEvictor) TotalEvicted() uint { + var total uint + for _, count := range pe.nodepodCount { + total += count + } + return total +} + +// true when the pod is evicted on the server side. +// eviction reason can be set through the ctx's evictionReason:STRING pair +func (pe *PodEvictor) Evict(ctx context.Context, pod *v1.Pod) bool { + // TODO(jchaloup): change the strategy metric label key to plugin? + strategy := "" + if ctx.Value("pluginName") != nil { + strategy = ctx.Value("pluginName").(string) + } + reason := "" + if ctx.Value("evictionReason") != nil { + strategy = ctx.Value("evictionReason").(string) + } + nodeName := pod.Spec.NodeName + if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[nodeName]+1 > *pe.maxPodsToEvictPerNode { + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": nodeName}).Inc() + } + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per node reached"), "limit", *pe.maxPodsToEvictPerNode, "node", nodeName) + return false + } + + if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace { + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": nodeName}).Inc() + } + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per namespace reached"), "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace) + return false + } + + err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion) + if err != nil { + // err is used only for logging purposes + klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason) + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": nodeName}).Inc() + } + return false + } + + pe.nodepodCount[nodeName]++ + pe.namespacePodCount[pod.Namespace]++ + + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": nodeName}).Inc() + } + + if pe.dryRun { + klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", nodeName) + } else { + klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", nodeName) + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartStructuredLogging(3) + eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)}) + r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"}) + r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason)) + } + return true +} + +func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error { + deleteOptions := &metav1.DeleteOptions{} + // GracePeriodSeconds ? + eviction := &policy.Eviction{ + TypeMeta: metav1.TypeMeta{ + APIVersion: policyGroupVersion, + Kind: eutils.EvictionKind, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: pod.Name, + Namespace: pod.Namespace, + }, + DeleteOptions: deleteOptions, + } + err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction) + + if apierrors.IsTooManyRequests(err) { + return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err) + } + if apierrors.IsNotFound(err) { + return fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err) + } + return err +} diff --git a/pkg/framework/profile/fake/fake.go b/pkg/framework/profile/fake/fake.go new file mode 100644 index 0000000000..a87897f9c6 --- /dev/null +++ b/pkg/framework/profile/fake/fake.go @@ -0,0 +1,58 @@ +package fake + +import ( + "context" + "sort" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" + + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" +) + +type FrameworkHandle struct { + ClientsetImpl clientset.Interface + EvictorImpl *framework.PodEvictor + GetPodsAssignedToNodeFuncImpl podutil.GetPodsAssignedToNodeFunc + SharedInformerFactoryImpl informers.SharedInformerFactory + EvictPlugin framework.EvictPlugin + SortPlugin framework.SortPlugin +} + +var _ framework.Handle = &FrameworkHandle{} +var _ framework.Evictor = &FrameworkHandle{} + +func (f *FrameworkHandle) ClientSet() clientset.Interface { + return f.ClientsetImpl +} + +func (f *FrameworkHandle) Evictor() framework.Evictor { + return f +} + +func (f *FrameworkHandle) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return f.GetPodsAssignedToNodeFuncImpl +} + +func (f *FrameworkHandle) SharedInformerFactory() informers.SharedInformerFactory { + return f.SharedInformerFactoryImpl +} + +// Sort pods from the most to the least suitable for eviction +func (f *FrameworkHandle) Sort(pods []*v1.Pod) { + sort.Slice(pods, func(i int, j int) bool { + return f.SortPlugin.Less(pods[i], pods[j]) + }) +} + +// Filter checks if a pod can be evicted +func (f *FrameworkHandle) Filter(pod *v1.Pod) bool { + return f.EvictPlugin.Filter(pod) +} + +// Evict evicts a pod (no pre-check performed) +func (f *FrameworkHandle) Evict(ctx context.Context, pod *v1.Pod) bool { + return f.EvictorImpl.Evict(ctx, pod) +} diff --git a/pkg/framework/profile/profile.go b/pkg/framework/profile/profile.go new file mode 100644 index 0000000000..d2c34ae502 --- /dev/null +++ b/pkg/framework/profile/profile.go @@ -0,0 +1,264 @@ +package profile + +import ( + "context" + "fmt" + "sort" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/registry" +) + +// Option for the handleImpl. +type Option func(*handleImplOpts) + +type handleImplOpts struct { + clientSet clientset.Interface + sharedInformerFactory informers.SharedInformerFactory + podEvictor framework.Evictable +} + +// WithClientSet sets clientSet for the scheduling frameworkImpl. +func WithClientSet(clientSet clientset.Interface) Option { + return func(o *handleImplOpts) { + o.clientSet = clientSet + } +} + +func WithSharedInformerFactory(sharedInformerFactory informers.SharedInformerFactory) Option { + return func(o *handleImplOpts) { + o.sharedInformerFactory = sharedInformerFactory + } +} + +func WithPodEvictor(podEvictor framework.Evictable) Option { + return func(o *handleImplOpts) { + o.podEvictor = podEvictor + } +} + +type handleImpl struct { + clientSet clientset.Interface + getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc + sharedInformerFactory informers.SharedInformerFactory + + evictorImpl *evictorImpl +} + +type Profile struct { + handleImpl *handleImpl + deschedulePlugins []framework.DeschedulePlugin + balancePlugins []framework.BalancePlugin +} + +type evictorImpl struct { + podEvictor framework.Evictable + evictPlugin framework.EvictPlugin + sortPlugin framework.SortPlugin + + evictedCounter uint +} + +// Sort pods from the most to the least suitable for eviction +func (ei *evictorImpl) Sort(pods []*v1.Pod) { + sort.Slice(pods, func(i int, j int) bool { + return ei.sortPlugin.Less(pods[i], pods[j]) + }) +} + +// Filter checks if a pod can be evicted +func (ei *evictorImpl) Filter(pod *v1.Pod) bool { + return ei.evictPlugin.Filter(pod) +} + +// Evict evicts a pod (no pre-check performed) +func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod) bool { + if ei.podEvictor.Evict(ctx, pod) { + ei.evictedCounter++ + return true + } + return false +} + +func (d *handleImpl) ClientSet() clientset.Interface { + return d.clientSet +} + +func (d *handleImpl) Evictor() framework.Evictor { + return d.evictorImpl +} + +func (d *handleImpl) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc { + return d.getPodsAssignedToNodeFunc +} + +func (d *handleImpl) SharedInformerFactory() informers.SharedInformerFactory { + return d.sharedInformerFactory +} + +func (d Profile) RunDeschedulePlugins(ctx context.Context, nodes []*v1.Node) *framework.Status { + errs := []error{} + for _, pl := range d.deschedulePlugins { + d.handleImpl.evictorImpl.evictedCounter = 0 + status := pl.Deschedule(context.WithValue(ctx, "pluginName", pl.Name()), nodes) + if status != nil && status.Err != nil { + errs = append(errs, status.Err) + } + klog.V(1).InfoS("Total number of pods evicted", "evictedPods", d.handleImpl.evictorImpl.evictedCounter) + } + + aggrErr := errors.NewAggregate(errs) + if aggrErr == nil { + return &framework.Status{} + } + + return &framework.Status{ + Err: fmt.Errorf("%v", aggrErr.Error()), + } +} + +func (d Profile) RunBalancePlugins(ctx context.Context, nodes []*v1.Node) *framework.Status { + errs := []error{} + for _, pl := range d.balancePlugins { + d.handleImpl.evictorImpl.evictedCounter = 0 + status := pl.Balance(context.WithValue(ctx, "pluginName", pl.Name()), nodes) + if status != nil && status.Err != nil { + errs = append(errs, status.Err) + } + klog.V(1).InfoS("Total number of pods evicted", "evictedPods", d.handleImpl.evictorImpl.evictedCounter) + } + + aggrErr := errors.NewAggregate(errs) + if aggrErr == nil { + return &framework.Status{} + } + + return &framework.Status{ + Err: fmt.Errorf("%v", aggrErr.Error()), + } +} + +func NewProfile(config v1alpha2.Profile, reg registry.Registry, opts ...Option) (*Profile, error) { + hOpts := &handleImplOpts{} + for _, optFnc := range opts { + optFnc(hOpts) + } + + if hOpts.sharedInformerFactory == nil { + return nil, fmt.Errorf("SharedInformerFactory not set") + } + + if hOpts.podEvictor == nil { + return nil, fmt.Errorf("PodEvictor not set") + } + + if hOpts.clientSet == nil { + return nil, fmt.Errorf("ClientSet not set") + } + + pluginArgs := map[string]runtime.Object{} + for _, plConfig := range config.PluginConfig { + pluginArgs[plConfig.Name] = plConfig.Args + } + + // Assumption: Enabled and Disabled sets are mutually exclusive + + enabled := sets.NewString() + // disabled := sets.NewString() + + // for _, plName := range config.Plugins.Deschedule.Disabled { + // disabled.Insert(plName) + // } + + for _, plName := range config.Plugins.PreSort.Enabled { + enabled.Insert(plName) + } + + for _, plName := range config.Plugins.Deschedule.Enabled { + enabled.Insert(plName) + } + + for _, plName := range config.Plugins.Balance.Enabled { + enabled.Insert(plName) + } + + for _, plName := range config.Plugins.Sort.Enabled { + enabled.Insert(plName) + } + + for _, plName := range config.Plugins.Evict.Enabled { + enabled.Insert(plName) + } + + podInformer := hOpts.sharedInformerFactory.Core().V1().Pods() + getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) + if err != nil { + return nil, fmt.Errorf("unable to create BuildGetPodsAssignedToNodeFunc: %v", err) + } + + handle := &handleImpl{ + clientSet: hOpts.clientSet, + getPodsAssignedToNodeFunc: getPodsAssignedToNode, + sharedInformerFactory: hOpts.sharedInformerFactory, + evictorImpl: &evictorImpl{ + podEvictor: hOpts.podEvictor, + }, + } + + pluginsMap := map[string]framework.Plugin{} + for plName := range enabled { + fmt.Printf("plName: %v\n", plName) + var pl framework.Plugin + var err error + if args, ok := pluginArgs[plName]; ok { + pl, err = reg[plName](args, handle) + } else { + pl, err = reg[plName](nil, handle) + } + if err != nil { + return nil, fmt.Errorf("unable to initialize %q plugin: %v", plName, err) + } + pluginsMap[plName] = pl + } + + deschedulePlugins := []framework.DeschedulePlugin{} + balancePlugins := []framework.BalancePlugin{} + + for _, plName := range config.Plugins.Deschedule.Enabled { + deschedulePlugins = append(deschedulePlugins, pluginsMap[plName].(framework.DeschedulePlugin)) + } + + for _, plName := range config.Plugins.Balance.Enabled { + balancePlugins = append(balancePlugins, pluginsMap[plName].(framework.BalancePlugin)) + } + + if len(config.Plugins.Sort.Enabled) != 1 { + return nil, fmt.Errorf("expected only a single sort plugin, have %v", len(config.Plugins.Sort.Enabled)) + } + + if len(config.Plugins.Evict.Enabled) != 1 { + return nil, fmt.Errorf("expected only a single evict plugin, have %v", len(config.Plugins.Evict.Enabled)) + } + + evictPluginName := config.Plugins.Evict.Enabled[0] + handle.evictorImpl.evictPlugin = pluginsMap[evictPluginName].(framework.EvictPlugin) + + sortPluginName := config.Plugins.Evict.Enabled[0] + handle.evictorImpl.sortPlugin = pluginsMap[sortPluginName].(framework.SortPlugin) + + return &Profile{ + handleImpl: handle, + deschedulePlugins: deschedulePlugins, + balancePlugins: balancePlugins, + }, nil +} diff --git a/pkg/framework/profile/profile_test.go b/pkg/framework/profile/profile_test.go new file mode 100644 index 0000000000..bfd4ad4b65 --- /dev/null +++ b/pkg/framework/profile/profile_test.go @@ -0,0 +1,91 @@ +package profile + +import ( + "context" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/registry" + "sigs.k8s.io/descheduler/test" +) + +func TestNewProfile(t *testing.T) { + var seconds uint = 10 + cfg := v1alpha2.Profile{ + Name: "test-profile", + PluginConfig: []v1alpha2.PluginConfig{ + v1alpha2.PluginConfig{ + Name: "PodLifeTime", + Args: &framework.PodLifeTimeArgs{ + MaxPodLifeTimeSeconds: &seconds, + }, + }, + v1alpha2.PluginConfig{ + Name: "RemoveFailedPods", + Args: &framework.RemoveFailedPodsArgs{}, + }, + v1alpha2.PluginConfig{ + Name: "DefaultEvictor", + Args: &framework.DefaultEvictorArgs{}, + }, + }, + Plugins: v1alpha2.Plugins{ + Deschedule: v1alpha2.Plugin{ + Enabled: []string{ + "PodLifeTime", + // "RemoveFailedPods", + }, + }, + Sort: v1alpha2.Plugin{ + Enabled: []string{ + "DefaultEvictor", + }, + }, + Evict: v1alpha2.Plugin{ + Enabled: []string{ + "DefaultEvictor", + }, + }, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // TODO(jchaloup): implement plugin args defaulting + reg := registry.NewRegistry() + node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) + olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) + p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) + p1.ObjectMeta.CreationTimestamp = olderPodCreationTime + fakeClient := fake.NewSimpleClientset(node1, p1) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + + podEvictor := framework.NewPodEvictor( + fakeClient, + policyv1.SchemeGroupVersion.String(), + false, + nil, + nil, + false, + ) + + h, err := NewProfile(cfg, reg, WithClientSet(fakeClient), WithSharedInformerFactory(sharedInformerFactory), WithPodEvictor(podEvictor)) + if err != nil { + t.Fatal(err) + } + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + h.RunDeschedulePlugins(ctx, []*v1.Node{node1}) + h.RunBalancePlugins(ctx, []*v1.Node{node1}) +} diff --git a/pkg/framework/registry/registry.go b/pkg/framework/registry/registry.go new file mode 100644 index 0000000000..8818c37b83 --- /dev/null +++ b/pkg/framework/registry/registry.go @@ -0,0 +1,37 @@ +package registry + +import ( + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/descheduler/pkg/framework" + + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization" + "sigs.k8s.io/descheduler/pkg/framework/plugins/podlifetime" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removeduplicatepods" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removefailedpods" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodshavingtoomanyrestarts" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodeaffinity" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodetaints" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingtopologyspreadconstraint" +) + +type PluginBuilder = func(args runtime.Object, handle framework.Handle) (framework.Plugin, error) + +type Registry = map[string]PluginBuilder + +func NewRegistry() Registry { + return Registry{ + nodeutilization.HighNodeUtilizationPluginName: nodeutilization.NewHighNodeUtilization, + nodeutilization.LowNodeUtilizationPluginName: nodeutilization.NewLowNodeUtilization, + podlifetime.PluginName: podlifetime.New, + removeduplicatepods.PluginName: removeduplicatepods.New, + removefailedpods.PluginName: removefailedpods.New, + removepodshavingtoomanyrestarts.PluginName: removepodshavingtoomanyrestarts.New, + removepodsviolatinginterpodantiaffinity.PluginName: removepodsviolatinginterpodantiaffinity.New, + removepodsviolatingnodeaffinity.PluginName: removepodsviolatingnodeaffinity.New, + removepodsviolatingnodetaints.PluginName: removepodsviolatingnodetaints.New, + removepodsviolatingtopologyspreadconstraint.PluginName: removepodsviolatingtopologyspreadconstraint.New, + defaultevictor.PluginName: defaultevictor.New, + } +} diff --git a/pkg/framework/runtime/framework.go b/pkg/framework/runtime/framework.go new file mode 100644 index 0000000000..63e00aeca5 --- /dev/null +++ b/pkg/framework/runtime/framework.go @@ -0,0 +1,130 @@ +package runtime + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/profile" + "sigs.k8s.io/descheduler/pkg/framework/registry" +) + +// Option for the handleImpl. +type Option func(*frameworkImplOpts) + +type frameworkImplOpts struct { + clientSet clientset.Interface + sharedInformerFactory informers.SharedInformerFactory + podEvictor framework.Evictable + registry registry.Registry +} + +// WithClientSet sets clientSet for the scheduling Framework. +func WithClientSet(clientSet clientset.Interface) Option { + return func(o *frameworkImplOpts) { + o.clientSet = clientSet + } +} + +func WithSharedInformerFactory(sharedInformerFactory informers.SharedInformerFactory) Option { + return func(o *frameworkImplOpts) { + o.sharedInformerFactory = sharedInformerFactory + } +} + +func WithPodEvictor(podEvictor framework.Evictable) Option { + return func(o *frameworkImplOpts) { + o.podEvictor = podEvictor + } +} + +func WithRegistry(registry registry.Registry) Option { + return func(o *frameworkImplOpts) { + o.registry = registry + } +} + +type Framework struct { + profiles []*profile.Profile + + podEvictor framework.Evictable + evicted uint +} + +func (f *Framework) Evict(ctx context.Context, pod *v1.Pod) bool { + return f.podEvictor.Evict(ctx, pod) +} + +func NewFramework(config v1alpha2.DeschedulerConfiguration, opts ...Option) (*Framework, error) { + fOpts := &frameworkImplOpts{} + for _, optFnc := range opts { + optFnc(fOpts) + } + + frmwrk := &Framework{ + podEvictor: fOpts.podEvictor, + } + + for _, profileCfg := range config.Profiles { + profImpl, err := profile.NewProfile( + profileCfg, + fOpts.registry, + profile.WithClientSet(fOpts.clientSet), + profile.WithPodEvictor(frmwrk), + profile.WithSharedInformerFactory(fOpts.sharedInformerFactory), + ) + if err != nil { + return nil, fmt.Errorf("unable to create profile for %v: %v", profileCfg.Name, err) + } + frmwrk.profiles = append(frmwrk.profiles, profImpl) + } + + return frmwrk, nil +} + +func (f *Framework) RunDeschedulePlugins(ctx context.Context, nodes []*v1.Node) *framework.Status { + errs := []error{} + f.evicted = 0 + for _, profile := range f.profiles { + status := profile.RunDeschedulePlugins(ctx, nodes) + if status != nil && status.Err != nil { + errs = append(errs, fmt.Errorf("profile=%v, %v", profile, status.Err)) + } + } + klog.V(1).InfoS("Total number of pods evicted by Deschedule extension point", "evictedPods", f.evicted) + aggrErr := errors.NewAggregate(errs) + if aggrErr == nil { + return &framework.Status{} + } + + return &framework.Status{ + Err: fmt.Errorf("%v", aggrErr.Error()), + } +} + +func (f *Framework) RunBalancePlugins(ctx context.Context, nodes []*v1.Node) *framework.Status { + errs := []error{} + f.evicted = 0 + for _, profile := range f.profiles { + status := profile.RunBalancePlugins(ctx, nodes) + if status != nil && status.Err != nil { + errs = append(errs, fmt.Errorf("profile=%v, %v", profile, status.Err)) + } + } + klog.V(1).InfoS("Total number of pods evicted by Balance extension point", "evictedPods", f.evicted) + aggrErr := errors.NewAggregate(errs) + if aggrErr == nil { + return &framework.Status{} + } + + return &framework.Status{ + Err: fmt.Errorf("%v", aggrErr.Error()), + } +} diff --git a/pkg/framework/runtime/framework_test.go b/pkg/framework/runtime/framework_test.go new file mode 100644 index 0000000000..4ff7383e12 --- /dev/null +++ b/pkg/framework/runtime/framework_test.go @@ -0,0 +1,105 @@ +package runtime + +import ( + "context" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "sigs.k8s.io/descheduler/pkg/api/v1alpha2" + "sigs.k8s.io/descheduler/pkg/framework" + "sigs.k8s.io/descheduler/pkg/framework/registry" + "sigs.k8s.io/descheduler/test" +) + +func TestNewFramework(t *testing.T) { + var seconds uint = 10 + cfg := v1alpha2.DeschedulerConfiguration{ + Profiles: []v1alpha2.Profile{ + { + Name: "test-profile", + PluginConfig: []v1alpha2.PluginConfig{ + v1alpha2.PluginConfig{ + Name: "PodLifeTime", + Args: &framework.PodLifeTimeArgs{ + MaxPodLifeTimeSeconds: &seconds, + }, + }, + v1alpha2.PluginConfig{ + Name: "RemoveFailedPods", + Args: &framework.RemoveFailedPodsArgs{}, + }, + v1alpha2.PluginConfig{ + Name: "DefaultEvictor", + Args: &framework.DefaultEvictorArgs{}, + }, + }, + Plugins: v1alpha2.Plugins{ + Deschedule: v1alpha2.Plugin{ + Enabled: []string{ + "PodLifeTime", + // "RemoveFailedPods", + }, + }, + Sort: v1alpha2.Plugin{ + Enabled: []string{ + "DefaultEvictor", + }, + }, + Evict: v1alpha2.Plugin{ + Enabled: []string{ + "DefaultEvictor", + }, + }, + }, + }, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // TODO(jchaloup): implement plugin args defaulting + reg := registry.NewRegistry() + node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) + olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) + p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) + p1.ObjectMeta.CreationTimestamp = olderPodCreationTime + fakeClient := fake.NewSimpleClientset(node1, p1) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + + podEvictor := framework.NewPodEvictor( + fakeClient, + policyv1.SchemeGroupVersion.String(), + false, + nil, + nil, + false, + ) + + frmwrk, err := NewFramework(cfg, + WithClientSet(fakeClient), + WithSharedInformerFactory(sharedInformerFactory), + WithPodEvictor(podEvictor), + WithRegistry(reg), + ) + if err != nil { + t.Fatal(err) + } + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + if status := frmwrk.RunDeschedulePlugins(ctx, []*v1.Node{node1}); status != nil && status.Err != nil { + t.Fatalf("Running deschedule plugins returned non-empty status: %v", status.Err) + } + + if status := frmwrk.RunBalancePlugins(ctx, []*v1.Node{node1}); status != nil && status.Err != nil { + t.Fatalf("Running balance plugins returned non-empty status: %v", status.Err) + } +} diff --git a/pkg/framework/types.go b/pkg/framework/types.go new file mode 100644 index 0000000000..bd7eaa7ba8 --- /dev/null +++ b/pkg/framework/types.go @@ -0,0 +1,258 @@ +package framework + +import ( + "context" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" + + "sigs.k8s.io/descheduler/pkg/api" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" +) + +type Handle interface { + // ClientSet returns a kubernetes clientSet. + ClientSet() clientset.Interface + Evictor() Evictor + GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc + SharedInformerFactory() informers.SharedInformerFactory +} + +type Evictor interface { + // Sort pods from the most to the least suitable for eviction + Sort([]*v1.Pod) + // Filter checks if a pod can be evicted + Filter(*v1.Pod) bool + // Evict evicts a pod (no pre-check performed) + Evict(context.Context, *v1.Pod) bool +} + +type Evictable interface { + Evict(context.Context, *v1.Pod) bool +} + +type Status struct { + Err error +} + +// Plugin is the parent type for all the descheduling framework plugins. +type Plugin interface { + Name() string +} + +type DeschedulePlugin interface { + Plugin + Deschedule(ctx context.Context, nodes []*v1.Node) *Status +} + +type BalancePlugin interface { + Plugin + Balance(ctx context.Context, nodes []*v1.Node) *Status +} + +// Sort plugin sorts pods +type PreSortPlugin interface { + Plugin + PreLess(*v1.Pod, *v1.Pod) bool +} + +type SortPlugin interface { + Plugin + Less(*v1.Pod, *v1.Pod) bool +} + +type EvictPlugin interface { + Plugin + Filter(*v1.Pod) bool +} + +type CommonArgs struct { + Namespaces *api.Namespaces + PriorityThreshold *api.PriorityThreshold + NodeFit bool +} + +// RemoveDuplicatePodsArgs holds arguments used to configure the RemoveDuplicatePods plugin. +type RemoveDuplicatePodsArgs struct { + metav1.TypeMeta + + CommonArgs + ExcludeOwnerKinds []string +} + +// TODO(jchaloup): have this go generated +func (in *RemoveDuplicatePodsArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemoveFailedPodsArgs holds arguments used to configure the RemoveFailedPods plugin. +type RemoveFailedPodsArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + MinPodLifetimeSeconds *uint + Reasons []string + IncludingInitContainers bool + ExcludeOwnerKinds []string +} + +// TODO(jchaloup): have this go generated +func (in *RemoveFailedPodsArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemovePodsViolatingNodeAffinityArgs holds arguments used to configure the RemovePodsViolatingNodeAffinity plugin. +type RemovePodsViolatingNodeAffinityArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + NodeAffinityType []string +} + +// TODO(jchaloup): have this go generated +func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemovePodsViolatingNodeTaintsArgs holds arguments used to configure the RemovePodsViolatingNodeTaints plugin. +type RemovePodsViolatingNodeTaintsArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + IncludePreferNoSchedule bool + ExcludedTaints []string +} + +// TODO(jchaloup): have this go generated +func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemovePodsViolatingInterPodAntiAffinityArgs holds arguments used to configure the RemovePodsViolatingInterPodAntiAffinity plugin. +type RemovePodsViolatingInterPodAntiAffinityArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector +} + +// TODO(jchaloup): have this go generated +func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopyObject() runtime.Object { + return nil +} + +// PodLifeTimeArgs holds arguments used to configure the PodLifeTime plugin. +type PodLifeTimeArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + MaxPodLifeTimeSeconds *uint + PodStatusPhases []string +} + +// TODO(jchaloup): have this go generated +func (in *PodLifeTimeArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemovePodsHavingTooManyRestartsArgs holds arguments used to configure the RemovePodsHavingTooManyRestarts plugin. +type RemovePodsHavingTooManyRestartsArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + PodRestartThreshold int32 + IncludingInitContainers bool +} + +// TODO(jchaloup): have this go generated +func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyObject() runtime.Object { + return nil +} + +// RemovePodsViolatingTopologySpreadConstraintArgs holds arguments used to configure the RemovePodsViolatingTopologySpreadConstraint plugin. +type RemovePodsViolatingTopologySpreadConstraintArgs struct { + metav1.TypeMeta + + CommonArgs + LabelSelector *metav1.LabelSelector + IncludeSoftConstraints bool +} + +// TODO(jchaloup): have this go generated +func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopyObject() runtime.Object { + return nil +} + +// LowNodeUtilizationArgs holds arguments used to configure the LowNodeUtilization plugin. +type LowNodeUtilizationArgs struct { + metav1.TypeMeta + + PriorityThreshold *api.PriorityThreshold + NodeFit bool + UseDeviationThresholds bool + Thresholds api.ResourceThresholds + TargetThresholds api.ResourceThresholds + NumberOfNodes int +} + +// TODO(jchaloup): have this go generated +func (in *LowNodeUtilizationArgs) DeepCopyObject() runtime.Object { + return nil +} + +// HighNodeUtilizationArgs holds arguments used to configure the HighNodeUtilization plugin. +type HighNodeUtilizationArgs struct { + metav1.TypeMeta + + PriorityThreshold *api.PriorityThreshold + NodeFit bool + Thresholds api.ResourceThresholds + TargetThresholds api.ResourceThresholds + NumberOfNodes int +} + +// TODO(jchaloup): have this go generated +func (in *HighNodeUtilizationArgs) DeepCopyObject() runtime.Object { + return nil +} + +func ValidateCommonArgs(args CommonArgs) error { + // At most one of include/exclude can be set + if args.Namespaces != nil && len(args.Namespaces.Include) > 0 && len(args.Namespaces.Exclude) > 0 { + return fmt.Errorf("only one of Include/Exclude namespaces can be set") + } + if args.PriorityThreshold != nil && args.PriorityThreshold.Value != nil && args.PriorityThreshold.Name != "" { + return fmt.Errorf("only one of priorityThreshold fields can be set") + } + + return nil +} + +// DefaultEvictorArgs holds arguments used to configure the DefaultEvictor plugin. +type DefaultEvictorArgs struct { + metav1.TypeMeta + + EvictFailedBarePods bool + EvictLocalStoragePods bool + EvictSystemCriticalPods bool + IgnorePvcPods bool + PriorityThreshold *api.PriorityThreshold + NodeFit bool + LabelSelector *metav1.LabelSelector + // TODO(jchaloup): turn it into *metav1.LabelSelector + NodeSelector string +} + +// TODO(jchaloup): have this go generated +func (in *DefaultEvictorArgs) DeepCopyObject() runtime.Object { + return nil +} diff --git a/pkg/utils/priority.go b/pkg/utils/priority.go index 709273dfcf..058de5ea87 100644 --- a/pkg/utils/priority.go +++ b/pkg/utils/priority.go @@ -72,3 +72,23 @@ func GetPriorityFromStrategyParams(ctx context.Context, client clientset.Interfa } return } + +// GetPriorityValueFromPriorityThreshold gets priority from the given PriorityThreshold. +// It will return SystemCriticalPriority by default. +func GetPriorityValueFromPriorityThreshold(ctx context.Context, client clientset.Interface, priorityThreshold *api.PriorityThreshold) (priority int32, err error) { + if priorityThreshold == nil { + return SystemCriticalPriority, nil + } + if priorityThreshold.Value != nil { + priority = *priorityThreshold.Value + } else { + priority, err = GetPriorityFromPriorityClass(ctx, client, priorityThreshold.Name) + if err != nil { + return 0, fmt.Errorf("unable to get priority value from the priority class: %v", err) + } + } + if priority > SystemCriticalPriority { + return 0, fmt.Errorf("priority threshold can't be greater than %d", SystemCriticalPriority) + } + return +}