Skip to content

Commit

Permalink
add gang-scheduling benchmark for coscheduling plugin with jobset and…
Browse files Browse the repository at this point in the history
… kueue

Signed-off-by: Dmitry Shmulevich <[email protected]>
  • Loading branch information
dmitsh committed Aug 19, 2024
1 parent 7836225 commit b25e6f0
Show file tree
Hide file tree
Showing 26 changed files with 549 additions and 87 deletions.
2 changes: 1 addition & 1 deletion charts/virtual-nodes/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.2.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
2 changes: 1 addition & 1 deletion charts/virtual-nodes/templates/nodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
{{- end }}

{{- $count := ($node.count | int) }}
{{- $suffix := ( randAlphaNum 6 | lower ) }}
{{- range until $count }}
{{- $suffix := ( randAlphaNum 6 | lower ) }}
---
apiVersion: v1
kind: Node
Expand Down
116 changes: 59 additions & 57 deletions pkg/engine/configure_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ type ConfigureTask struct {
}

type configureTaskParams struct {
Nodes []virtualNode `yaml:"nodes"`
Namespaces []namespace `yaml:"namespaces"`
ConfigMaps []configmap `yaml:"configmaps"`
PriorityClasses []priorityClass `yaml:"priorityClasses"`
DeploymentRestarts []deploymentRestart `yaml:"deploymentRestarts"`
Nodes []virtualNode `yaml:"nodes"`
Namespaces []namespace `yaml:"namespaces"`
ConfigMaps []configmap `yaml:"configmaps"`
PriorityClasses []priorityClass `yaml:"priorityClasses"`
DeploymentRestarts []*deploymentRestart `yaml:"deploymentRestarts"`

Timeout time.Duration `yaml:"timeout"`
}
Expand Down Expand Up @@ -217,7 +217,13 @@ func (task *ConfigureTask) Exec(ctx context.Context) error {
return err
}

return task.restartDeployments(ctx)
for _, dr := range task.DeploymentRestarts {
if err = task.restartDeployment(ctx, dr); err != nil {
return err
}
}

return nil
}

func (task *ConfigureTask) updateNamespaces(ctx context.Context) error {
Expand Down Expand Up @@ -337,69 +343,65 @@ func (task *ConfigureTask) updateConfigmaps(ctx context.Context) error {
return nil
}

func (task *ConfigureTask) restartDeployments(ctx context.Context) error {
for _, dr := range task.DeploymentRestarts {
dClient := task.client.AppsV1().Deployments(dr.Namespace)

dName := dr.Name
if len(dName) == 0 {
labels := make([]string, 0, len(dr.Labels))
for key, val := range dr.Labels {
labels = append(labels, key+"="+val)
}
lbl := strings.Join(labels, ",")
func (task *ConfigureTask) restartDeployment(ctx context.Context, dr *deploymentRestart) error {
dClient := task.client.AppsV1().Deployments(dr.Namespace)

list, err := dClient.List(ctx, metav1.ListOptions{LabelSelector: lbl})
if err != nil {
log.InfoS("Warning: skipping restart of deployment", "labels", lbl, "error", err.Error())
return nil
}
dName := dr.Name
if len(dName) == 0 {
labels := make([]string, 0, len(dr.Labels))
for key, val := range dr.Labels {
labels = append(labels, key+"="+val)
}
lbl := strings.Join(labels, ",")

if len(list.Items) == 0 {
log.InfoS("Warning: no deployment to restart", "labels", lbl)
return nil
}
list, err := dClient.List(ctx, metav1.ListOptions{LabelSelector: lbl})
if err != nil {
log.InfoS("Warning: skipping restart of deployment", "labels", lbl, "error", err.Error())
return nil
}

if len(list.Items) != 1 {
return fmt.Errorf("expected 1 deployment with labels %s, not %d", lbl, len(list.Items))
}
if len(list.Items) == 0 {
log.InfoS("Warning: no deployment to restart", "labels", lbl)
return nil
}

dName = list.Items[0].Name
if len(list.Items) != 1 {
return fmt.Errorf("expected 1 deployment with labels %s, not %d", lbl, len(list.Items))
}
log.Infof("Restarting deployment %s", dName)

update := fmt.Sprintf(`{"spec": {"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "%s"}}}}}`,
time.Now().Format("2006-01-02T15:04:05-07:00"))
dName = list.Items[0].Name
}
log.Infof("Restarting deployment %s", dName)

_, err := dClient.Patch(ctx, dName, k8stypes.StrategicMergePatchType, []byte(update), metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to update deployment %s: %s", dName, err.Error())
}
update := fmt.Sprintf(`{"spec": {"template": {"metadata": {"annotations": {"kubectl.kubernetes.io/restartedAt": "%s"}}}}}`,
time.Now().Format("2006-01-02T15:04:05-07:00"))

delay := 5 * time.Second
timer := time.NewTimer(delay)
defer timer.Stop()
for {
select {
case <-timer.C:
d, err := dClient.Get(ctx, dName, metav1.GetOptions{})
if err != nil {
log.Errorf("failed to get status for deployment %s : %v", dName, err)
} else if d.Status.UnavailableReplicas != 0 {
log.V(4).Infof("Restarting deployment %s: %d unavailable replicas", dName, d.Status.UnavailableReplicas)
} else {
log.Infof("Restarted deployment %s", dName)
return nil
}
timer.Reset(delay)
_, err := dClient.Patch(ctx, dName, k8stypes.StrategicMergePatchType, []byte(update), metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("failed to update deployment %s: %s", dName, err.Error())
}

case <-ctx.Done():
return ctx.Err()
delay := 5 * time.Second
timer := time.NewTimer(delay)
defer timer.Stop()
for {
select {
case <-timer.C:
d, err := dClient.Get(ctx, dName, metav1.GetOptions{})
if err != nil {
log.Errorf("failed to get status for deployment %s : %v", dName, err)
} else if d.Status.UnavailableReplicas != 0 {
log.V(4).Infof("Restarting deployment %s: %d unavailable replicas", dName, d.Status.UnavailableReplicas)
} else {
log.Infof("Restarted deployment %s", dName)
return nil
}
timer.Reset(delay)

case <-ctx.Done():
return ctx.Err()
}
}

return nil
}

func (task *ConfigureTask) updateVirtualNodes(ctx context.Context) error {
Expand Down
2 changes: 1 addition & 1 deletion pkg/engine/configure_task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func TestNewConfigureTask(t *testing.T) {
Op: OpDelete,
},
},
DeploymentRestarts: []deploymentRestart{
DeploymentRestarts: []*deploymentRestart{
{
Namespace: "ns1",
Name: "deploy1",
Expand Down
3 changes: 2 additions & 1 deletion pkg/engine/submit_object_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ func (task *SubmitObjTask) Exec(ctx context.Context) error {
}

if _, err := task.client.Resource(regObjParams.gvr[i]).Namespace(obj.Metadata.Namespace).Create(ctx, crd, metav1.CreateOptions{}); err != nil {
return err
return fmt.Errorf("%s: failed to create resource %s %s: %v",
task.ID(), regObjParams.gvr[i].String(), crd.GetName(), err)
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions resources/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ The gang-scheduling benchmark workflow operates on 32 virtual GPU nodes, submitt
To run the benchmark test for Kueue:

```bash
./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-kueue.yaml,run-test.yaml}'
./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-nodes.yaml,config-kueue.yaml,run-test.yaml}'
```

#### Run:ai

```bash
./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/runai-test.yaml
./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-nodes.yaml,runai-test.yaml}'
```

## Scaling Benchmark Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: config-kueue
tasks:
- id: register-cluster-queue
type: RegisterObj
params:
template: "resources/templates/kueue/cluster-queue.yaml"
- id: register-local-queue
type: RegisterObj
params:
template: "resources/templates/kueue/local-queue.yaml"
- id: register-resource-flavor
type: RegisterObj
params:
template: "resources/templates/kueue/resource-flavor.yaml"
- id: register
type: RegisterObj
params:
template: "resources/benchmarks/templates/jobset/jobset-coscheduling.yaml"
nameFormat: "jobset{{._ENUM_}}"
podNameFormat: "{{._NAME_}}-workers-[0-9]+-[0-9]+-.+"
podCount: "{{.replicas}}"
- id: create-resource-flavor
type: SubmitObj
params:
refTaskId: register-resource-flavor
canExist: true
params:
name: "gpu-node"
nodeLabels:
nvidia.com/gpu.count: "8"
- id: create-cluster-queue
type: SubmitObj
params:
refTaskId: register-cluster-queue
canExist: true
params:
name: team
flavor: gpu-node
cpu: 8
memory: 36Gi
pods: 32
gpu: 256
- id: create-local-queue
type: SubmitObj
params:
refTaskId: register-local-queue
canExist: true
params:
name: team-queue
namespace: default
clusterQueue: team
- id: configure
type: Configure
params:
configmaps:
- name: scheduler-config
namespace: scheduler-plugins
op: create
data:
scheduler-config.yaml: |
apiVersion: kubescheduler.config.k8s.io/v1
kind: KubeSchedulerConfiguration
leaderElection:
leaderElect: false
profiles:
# Compose all plugins in one profile
- schedulerName: scheduler-plugins-scheduler
plugins:
multiPoint:
enabled:
- name: Coscheduling
- name: CapacityScheduling
- name: NodeResourcesAllocatable
disabled:
- name: NodeResourceTopologyMatch
- name: PrioritySort
pluginConfig:
- args:
permitWaitingTimeSeconds: 10
name: Coscheduling
deploymentRestarts:
- namespace: scheduler-plugins
name: scheduler-plugins-controller
- namespace: scheduler-plugins
name: scheduler-plugins-scheduler
timeout: 2m
14 changes: 14 additions & 0 deletions resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: config-kueue
description: register, deploy and configure kueue custom resources
tasks:
Expand Down
25 changes: 25 additions & 0 deletions resources/benchmarks/gang-scheduling/workflows/config-nodes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: config-nodes
tasks:
- id: configure
type: Configure
params:
nodes:
- type: dgxa100.80g
count: 32
labels:
nvidia.com/gpu.count: "8"
timeout: 1m
14 changes: 14 additions & 0 deletions resources/benchmarks/gang-scheduling/workflows/config-volcano.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: config-volcano
description: register, deploy and configure volcano custom resources
tasks:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: config-yunikorn
description: register, deploy and configure yunikorn custom resources
tasks:
Expand Down
Loading

0 comments on commit b25e6f0

Please sign in to comment.