add gang-scheduling benchmark for coscheduling plugin with jobset and… (

#98) … kueue Signed-off-by: Dmitry Shmulevich <[email protected]>
NVIDIA · Aug 20, 2024 · bbf5a6e · bbf5a6e
1 parent a18514c
commit bbf5a6e
Show file tree

Hide file tree

Showing 26 changed files with 549 additions and 87 deletions.
diff --git a/charts/virtual-nodes/Chart.yaml b/charts/virtual-nodes/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.2.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/charts/virtual-nodes/templates/nodes.yaml b/charts/virtual-nodes/templates/nodes.yaml
@@ -63,8 +63,8 @@
 {{- end }}
 
 {{- $count := ($node.count | int) }}
-{{- $suffix := ( randAlphaNum 6 | lower ) }}
 {{- range until $count }}
+{{- $suffix := ( randAlphaNum 6 | lower ) }}
 ---
 apiVersion: v1
 kind: Node

diff --git a/pkg/engine/configure_task.go b/pkg/engine/configure_task.go
@@ -46,11 +46,11 @@ type ConfigureTask struct {
 }
 
 type configureTaskParams struct {
-	Nodes              []virtualNode       `yaml:"nodes"`
-	Namespaces         []namespace         `yaml:"namespaces"`
-	ConfigMaps         []configmap         `yaml:"configmaps"`
-	PriorityClasses    []priorityClass     `yaml:"priorityClasses"`
-	DeploymentRestarts []deploymentRestart `yaml:"deploymentRestarts"`
+	Nodes              []virtualNode        `yaml:"nodes"`
+	Namespaces         []namespace          `yaml:"namespaces"`
+	ConfigMaps         []configmap          `yaml:"configmaps"`
+	PriorityClasses    []priorityClass      `yaml:"priorityClasses"`
+	DeploymentRestarts []*deploymentRestart `yaml:"deploymentRestarts"`
 
 	Timeout time.Duration `yaml:"timeout"`
 }
@@ -217,7 +217,13 @@ func (task *ConfigureTask) Exec(ctx context.Context) error {
 		return err
 	}
 
-	return task.restartDeployments(ctx)
+	for _, dr := range task.DeploymentRestarts {
+		if err = task.restartDeployment(ctx, dr); err != nil {
+			return err
+		}
+	}
+
+	return nil
 }
 
 func (task *ConfigureTask) updateNamespaces(ctx context.Context) error {
@@ -337,69 +343,65 @@ func (task *ConfigureTask) updateConfigmaps(ctx context.Context) error {
 	return nil
 }
 
-func (task *ConfigureTask) restartDeployments(ctx context.Context) error {
-	for _, dr := range task.DeploymentRestarts {
-		dClient := task.client.AppsV1().Deployments(dr.Namespace)
-
-		dName := dr.Name
-		if len(dName) == 0 {
-			labels := make([]string, 0, len(dr.Labels))
-			for key, val := range dr.Labels {
-				labels = append(labels, key+"="+val)
-			}
-			lbl := strings.Join(labels, ",")
+func (task *ConfigureTask) restartDeployment(ctx context.Context, dr *deploymentRestart) error {
+	dClient := task.client.AppsV1().Deployments(dr.Namespace)
 
-			list, err := dClient.List(ctx, metav1.ListOptions{LabelSelector: lbl})
-			if err != nil {
-				log.InfoS("Warning: skipping restart of deployment", "labels", lbl, "error", err.Error())
-				return nil
-			}
+	dName := dr.Name
+	if len(dName) == 0 {
+		labels := make([]string, 0, len(dr.Labels))
+		for key, val := range dr.Labels {
+			labels = append(labels, key+"="+val)
+		}
+		lbl := strings.Join(labels, ",")
 
-			if len(list.Items) == 0 {
-				log.InfoS("Warning: no deployment to restart", "labels", lbl)
-				return nil
-			}
+		list, err := dClient.List(ctx, metav1.ListOptions{LabelSelector: lbl})
+		if err != nil {
+			log.InfoS("Warning: skipping restart of deployment", "labels", lbl, "error", err.Error())
+			return nil
+		}
 
-			if len(list.Items) != 1 {
-				return fmt.Errorf("expected 1 deployment with labels %s, not %d", lbl, len(list.Items))
-			}
+		if len(list.Items) == 0 {
+			log.InfoS("Warning: no deployment to restart", "labels", lbl)
+			return nil
+		}
 
-			dName = list.Items[0].Name
+		if len(list.Items) != 1 {
+			return fmt.Errorf("expected 1 deployment with labels %s, not %d", lbl, len(list.Items))
 		}
-		log.Infof("Restarting deployment %s", dName)
 
-		update := fmt.Sprintf(`{"spec": {"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "%s"}}}}}`,
-			time.Now().Format("2006-01-02T15:04:05-07:00"))
+		dName = list.Items[0].Name
+	}
+	log.Infof("Restarting deployment %s", dName)
 
-		_, err := dClient.Patch(ctx, dName, k8stypes.StrategicMergePatchType, []byte(update), metav1.PatchOptions{})
-		if err != nil {
-			return fmt.Errorf("failed to update deployment %s: %s", dName, err.Error())
-		}
+	update := fmt.Sprintf(`{"spec": {"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "%s"}}}}}`,
+		time.Now().Format("2006-01-02T15:04:05-07:00"))
 
-		delay := 5 * time.Second
-		timer := time.NewTimer(delay)
-		defer timer.Stop()
-		for {
-			select {
-			case <-timer.C:
-				d, err := dClient.Get(ctx, dName, metav1.GetOptions{})
-				if err != nil {
-					log.Errorf("failed to get status for deployment %s : %v", dName, err)
-				} else if d.Status.UnavailableReplicas != 0 {
-					log.V(4).Infof("Restarting deployment %s: %d unavailable replicas", dName, d.Status.UnavailableReplicas)
-				} else {
-					log.Infof("Restarted deployment %s", dName)
-					return nil
-				}
-				timer.Reset(delay)
+	_, err := dClient.Patch(ctx, dName, k8stypes.StrategicMergePatchType, []byte(update), metav1.PatchOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to update deployment %s: %s", dName, err.Error())
+	}
 
-			case <-ctx.Done():
-				return ctx.Err()
+	delay := 5 * time.Second
+	timer := time.NewTimer(delay)
+	defer timer.Stop()
+	for {
+		select {
+		case <-timer.C:
+			d, err := dClient.Get(ctx, dName, metav1.GetOptions{})
+			if err != nil {
+				log.Errorf("failed to get status for deployment %s : %v", dName, err)
+			} else if d.Status.UnavailableReplicas != 0 {
+				log.V(4).Infof("Restarting deployment %s: %d unavailable replicas", dName, d.Status.UnavailableReplicas)
+			} else {
+				log.Infof("Restarted deployment %s", dName)
+				return nil
 			}
+			timer.Reset(delay)
+
+		case <-ctx.Done():
+			return ctx.Err()
 		}
 	}
-
-	return nil
 }
 
 func (task *ConfigureTask) updateVirtualNodes(ctx context.Context) error {

diff --git a/pkg/engine/configure_task_test.go b/pkg/engine/configure_task_test.go
@@ -240,7 +240,7 @@ func TestNewConfigureTask(t *testing.T) {
 							Op:   OpDelete,
 						},
 					},
-					DeploymentRestarts: []deploymentRestart{
+					DeploymentRestarts: []*deploymentRestart{
 						{
 							Namespace: "ns1",
 							Name:      "deploy1",

diff --git a/pkg/engine/submit_object_task.go b/pkg/engine/submit_object_task.go
@@ -144,7 +144,8 @@ func (task *SubmitObjTask) Exec(ctx context.Context) error {
 			}
 
 			if _, err := task.client.Resource(regObjParams.gvr[i]).Namespace(obj.Metadata.Namespace).Create(ctx, crd, metav1.CreateOptions{}); err != nil {
-				return err
+				return fmt.Errorf("%s: failed to create resource %s %s: %v",
+					task.ID(), regObjParams.gvr[i].String(), crd.GetName(), err)
 			}
 		}
 	}

diff --git a/resources/benchmarks/README.md b/resources/benchmarks/README.md
@@ -25,13 +25,13 @@ The gang-scheduling benchmark workflow operates on 32 virtual GPU nodes, submitt
 To run the benchmark test for Kueue:
 
 ```bash
-./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-kueue.yaml,run-test.yaml}'
+./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-nodes.yaml,config-kueue.yaml,run-test.yaml}'
 ```
 
 #### Run:ai
 
 ```bash
-./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/runai-test.yaml
+./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-nodes.yaml,runai-test.yaml}'
 ```
 
 ## Scaling Benchmark Test

diff --git a/resources/benchmarks/gang-scheduling/workflows/config-combo-coscheduling.yaml b/resources/benchmarks/gang-scheduling/workflows/config-combo-coscheduling.yaml
@@ -0,0 +1,100 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: config-kueue
+tasks:
+- id: register-cluster-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/cluster-queue.yaml"
+- id: register-local-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/local-queue.yaml"
+- id: register-resource-flavor
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/resource-flavor.yaml"
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/jobset/jobset-coscheduling.yaml"
+    nameFormat: "jobset{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-workers-[0-9]+-[0-9]+-.+"
+    podCount: "{{.replicas}}"
+- id: create-resource-flavor
+  type: SubmitObj
+  params:
+    refTaskId: register-resource-flavor
+    canExist: true
+    params:
+      name: "gpu-node"
+      nodeLabels:
+        nvidia.com/gpu.count: "8"
+- id: create-cluster-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-cluster-queue
+    canExist: true
+    params:
+      name: team
+      flavor: gpu-node
+      cpu: 8
+      memory: 36Gi
+      pods: 32
+      gpu: 256
+- id: create-local-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-local-queue
+    canExist: true
+    params:
+      name: team-queue
+      namespace: default
+      clusterQueue: team
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: scheduler-config
+      namespace: scheduler-plugins
+      op: create
+      data:
+        scheduler-config.yaml: |
+          apiVersion: kubescheduler.config.k8s.io/v1
+          kind: KubeSchedulerConfiguration
+          leaderElection:
+            leaderElect: false
+          profiles:
+          # Compose all plugins in one profile
+          - schedulerName: scheduler-plugins-scheduler
+            plugins:
+              multiPoint:
+                enabled:
+                - name: Coscheduling
+                - name: CapacityScheduling
+                - name: NodeResourcesAllocatable
+                disabled:
+                - name: NodeResourceTopologyMatch
+                - name: PrioritySort
+            pluginConfig:
+            - args:
+                permitWaitingTimeSeconds: 10
+              name: Coscheduling
+    deploymentRestarts:
+    - namespace: scheduler-plugins
+      name: scheduler-plugins-controller
+    - namespace: scheduler-plugins
+      name: scheduler-plugins-scheduler
+    timeout: 2m
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 name: config-kueue
 description: register, deploy and configure kueue custom resources
 tasks:

diff --git a/resources/benchmarks/gang-scheduling/workflows/config-nodes.yaml b/resources/benchmarks/gang-scheduling/workflows/config-nodes.yaml
@@ -0,0 +1,25 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: config-nodes
+tasks:
+- id: configure
+  type: Configure
+  params:
+    nodes:
+    - type: dgxa100.80g
+      count: 32
+      labels:
+        nvidia.com/gpu.count: "8"
+    timeout: 1m
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-volcano.yaml b/resources/benchmarks/gang-scheduling/workflows/config-volcano.yaml
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 name: config-volcano
 description: register, deploy and configure volcano custom resources
 tasks:

diff --git a/resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yaml b/resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yaml
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 name: config-yunikorn
 description: register, deploy and configure yunikorn custom resources
 tasks: