From 17f266b2a782b0fca87955db1ac0357ece3b2d8d Mon Sep 17 00:00:00 2001 From: lili Date: Wed, 8 Nov 2023 20:26:47 +0800 Subject: [PATCH] support preemption when the number of pods of a node reaches the upper limit Signed-off-by: lili --- pkg/scheduler/api/pod_info.go | 1 + pkg/scheduler/api/resource_info.go | 1 + .../plugins/predicates/predicates.go | 20 +++++++++++-------- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pkg/scheduler/api/pod_info.go b/pkg/scheduler/api/pod_info.go index d9cdf1943a6..5530f9a7eab 100644 --- a/pkg/scheduler/api/pod_info.go +++ b/pkg/scheduler/api/pod_info.go @@ -65,6 +65,7 @@ func GetPodResourceRequest(pod *v1.Pod) *Resource { for _, container := range pod.Spec.InitContainers { result.SetMaxResource(NewResource(container.Resources.Requests)) } + result.AddScalar(v1.ResourcePods, 1) return result } diff --git a/pkg/scheduler/api/resource_info.go b/pkg/scheduler/api/resource_info.go index f776d3d1ee5..d732fafcaaf 100644 --- a/pkg/scheduler/api/resource_info.go +++ b/pkg/scheduler/api/resource_info.go @@ -77,6 +77,7 @@ func NewResource(rl v1.ResourceList) *Resource { r.Memory += float64(rQuant.Value()) case v1.ResourcePods: r.MaxTaskNum += int(rQuant.Value()) + r.AddScalar(rName, float64(rQuant.Value())) case v1.ResourceEphemeralStorage: r.AddScalar(rName, float64(rQuant.MilliValue())) default: diff --git a/pkg/scheduler/plugins/predicates/predicates.go b/pkg/scheduler/plugins/predicates/predicates.go index 198133df46e..5de77c59c18 100644 --- a/pkg/scheduler/plugins/predicates/predicates.go +++ b/pkg/scheduler/plugins/predicates/predicates.go @@ -415,18 +415,22 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { } if node.Allocatable.MaxTaskNum <= len(nodeInfo.Pods) { - klog.V(4).Infof("NodePodNumber predicates Task <%s/%s> on Node <%s> failed", - task.Namespace, task.Name, node.Name) + klog.V(4).Infof("NodePodNumber predicates Task <%s/%s> on Node <%s> failed, %d, %d", + task.Namespace, task.Name, node.Name, node.Allocatable.MaxTaskNum, len(nodeInfo.Pods)) podsNumStatus := &api.Status{ - // TODO(wangyang0616): When the number of pods of a node reaches the upper limit, preemption is not supported for now. - // Record details in #3079 (volcano.sh/volcano) - // In the preempt stage, the pipeline of the pod number is not considered, - // the preemption of the pod number is released directly, which will cause the pods in the node to be cyclically evicted. - Code: api.UnschedulableAndUnresolvable, + Code: api.Unschedulable, Reason: api.NodePodNumberExceeded, } predicateStatus = append(predicateStatus, podsNumStatus) - return predicateStatus, fmt.Errorf("%s", api.NodePodNumberExceeded) + } + return predicateStatus, nil + }) + + ssn.AddPredicateFn(pp.Name(), func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) { + predicateStatus := make([]*api.Status, 0) + nodeInfo, found := nodeMap[node.Name] + if !found { + return predicateStatus, fmt.Errorf("failed to predicates, node info for %s not found", node.Name) } predicateByStablefilter := func(pod *v1.Pod, nodeInfo *k8sframework.NodeInfo) ([]*api.Status, bool, error) {