Skip to content

Commit

Permalink
add scaling benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Shmulevich <[email protected]>
  • Loading branch information
dmitsh committed Aug 13, 2024
1 parent 28095af commit 0458083
Show file tree
Hide file tree
Showing 19 changed files with 413 additions and 155 deletions.
53 changes: 53 additions & 0 deletions resources/benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Benchmark Tests

This directory contains benchmark tests for the following workload managers and schedulers:

- Kueue
- Volcano
- Yunikorn
- Run:ai

The benchmark tests involve submitting workloads intended to evaluate the scheduler's performance under specific scenarios.

These workloads are designed to fully utilize the cluster under optimal scheduling conditions.

One approach to benchmarking is to run this workload on clusters with different schedulers and then compare the average GPU occupancy of the nodes.

For all workload managers except Run:ai, the benchmark test involves two sequential workflows. The first workflow registers the CRDs, and the second workflow runs the common part of the test.
Run:ai requires additional customization and thus has a separate workflow

## Gang Scheduling Benchmark Test

The gang-scheduling benchmark workflow operates on 32 virtual GPU nodes, submitting a burst of 53 jobs with replica numbers ranging from 1 to 32 in a [predetermined order](gang-scheduling/workflows/run-test-common.yml).

#### Example

To run the benchmark test for Kueue:

```bash
./bin/knavigator -workflow 'resources/benchmarks/gang-scheduling/workflows/{config-kueue.yml,run-test-common.yml}'
```

#### Run:ai

```bash
./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
```

## Scaling Benchmark Test

The scaling benchmark workflow operates on 500 virtual GPU nodes, submitting [two workloads](workflows/run-test-common.yml) one after another. The first workload is a job with 500 replicas, the second workload is 500 single node jobs started simultaneously.

### Example

To run the benchmark test for Volcano:

```bash
./bin/knavigator -workflow 'resources/benchmarks/scaling/workflows/{config-volcano.yml,run-test-common.yml}'
```

### Run:ai

```bash
./bin/knavigator -workflow resources/benchmarks/scaling/workflows/run-test-runai.yml
```
35 changes: 0 additions & 35 deletions resources/benchmarks/gang-scheduling/README.md

This file was deleted.

This file was deleted.

78 changes: 78 additions & 0 deletions resources/benchmarks/gang-scheduling/workflows/config-kueue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ tasks:
flavor: gpu-node
cpu: 8
memory: 36Gi
pods: 32
gpu: 256
- id: create-local-queue
type: SubmitObj
Expand All @@ -48,3 +49,80 @@ tasks:
name: team-queue
namespace: default
clusterQueue: team
- id: configure
type: Configure
params:
configmaps:
- name: kueue-manager-config
namespace: kueue-system
op: create
data:
controller_manager_config.yaml: |
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
health:
healthProbeBindAddress: :8081
metrics:
bindAddress: :8080
# enableClusterQueueResources: true
webhook:
port: 9443
leaderElection:
leaderElect: true
resourceName: c1f6bfd2.kueue.x-k8s.io
controller:
groupKindConcurrency:
Job.batch: 5
Pod: 5
Workload.kueue.x-k8s.io: 5
LocalQueue.kueue.x-k8s.io: 1
ClusterQueue.kueue.x-k8s.io: 1
ResourceFlavor.kueue.x-k8s.io: 1
clientConnection:
qps: 50
burst: 100
#pprofBindAddress: :8083
waitForPodsReady:
enable: true
timeout: 5m
blockAdmission: true
requeuingStrategy:
timestamp: Eviction
backoffLimitCount: null # null indicates infinite requeuing
backoffBaseSeconds: 60
backoffMaxSeconds: 3600
#manageJobsWithoutQueueName: true
#internalCertManagement:
# enable: false
# webhookServiceName: ""
# webhookSecretName: ""
integrations:
frameworks:
- "batch/job"
- "kubeflow.org/mpijob"
- "ray.io/rayjob"
- "ray.io/raycluster"
- "jobset.x-k8s.io/jobset"
- "kubeflow.org/mxjob"
- "kubeflow.org/paddlejob"
- "kubeflow.org/pytorchjob"
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
# - "pod"
# externalFrameworks:
# - "Foo.v1.example.com"
# podOptions:
# namespaceSelector:
# matchExpressions:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
#fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
#resources:
# excludeResourcePrefixes: []
deploymentRestarts:
- namespace: kueue-system
name: kueue-controller-manager
timeout: 1m
128 changes: 128 additions & 0 deletions resources/benchmarks/scaling/workflows/config-kueue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
name: config-kueue
tasks:
- id: register-cluster-queue
type: RegisterObj
params:
template: "resources/templates/kueue/cluster-queue.yml"
- id: register-local-queue
type: RegisterObj
params:
template: "resources/templates/kueue/local-queue.yml"
- id: register-resource-flavor
type: RegisterObj
params:
template: "resources/templates/kueue/resource-flavor.yml"
- id: register
type: RegisterObj
params:
template: "resources/benchmarks/templates/kueue/job.yml"
nameFormat: "job{{._ENUM_}}"
podNameFormat: "{{._NAME_}}-[0-9]-.*"
podCount: "{{.replicas}}"
- id: create-resource-flavor
type: SubmitObj
params:
refTaskId: register-resource-flavor
canExist: true
params:
name: "gpu-node"
nodeLabels:
nvidia.com/gpu.count: "8"
- id: create-cluster-queue
type: SubmitObj
params:
refTaskId: register-cluster-queue
canExist: true
params:
name: team
flavor: gpu-node
cpu: 50
memory: 360Gi
pods: 500
gpu: 4000
- id: create-local-queue
type: SubmitObj
params:
refTaskId: register-local-queue
canExist: true
params:
name: team-queue
namespace: default
clusterQueue: team
- id: configure
type: Configure
params:
configmaps:
- name: kueue-manager-config
namespace: kueue-system
op: create
data:
controller_manager_config.yaml: |
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
health:
healthProbeBindAddress: :8081
metrics:
bindAddress: :8080
# enableClusterQueueResources: true
webhook:
port: 9443
leaderElection:
leaderElect: true
resourceName: c1f6bfd2.kueue.x-k8s.io
controller:
groupKindConcurrency:
Job.batch: 5
Pod: 5
Workload.kueue.x-k8s.io: 5
LocalQueue.kueue.x-k8s.io: 1
ClusterQueue.kueue.x-k8s.io: 1
ResourceFlavor.kueue.x-k8s.io: 1
clientConnection:
qps: 50
burst: 100
#pprofBindAddress: :8083
waitForPodsReady:
enable: true
timeout: 5m
blockAdmission: true
requeuingStrategy:
timestamp: Eviction
backoffLimitCount: null # null indicates infinite requeuing
backoffBaseSeconds: 60
backoffMaxSeconds: 3600
#manageJobsWithoutQueueName: true
#internalCertManagement:
# enable: false
# webhookServiceName: ""
# webhookSecretName: ""
integrations:
frameworks:
- "batch/job"
- "kubeflow.org/mpijob"
- "ray.io/rayjob"
- "ray.io/raycluster"
- "jobset.x-k8s.io/jobset"
- "kubeflow.org/mxjob"
- "kubeflow.org/paddlejob"
- "kubeflow.org/pytorchjob"
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
# - "pod"
# externalFrameworks:
# - "Foo.v1.example.com"
# podOptions:
# namespaceSelector:
# matchExpressions:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
#fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
#resources:
# excludeResourcePrefixes: []
deploymentRestarts:
- namespace: kueue-system
name: kueue-controller-manager
timeout: 1m
31 changes: 31 additions & 0 deletions resources/benchmarks/scaling/workflows/config-volcano.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: config-volcano
tasks:
- id: register
type: RegisterObj
params:
template: "resources/benchmarks/templates/volcano/job.yml"
nameFormat: "j{{._ENUM_}}"
podNameFormat: "{{._NAME_}}-test-[0-9]+"
podCount: "{{.replicas}}"
- id: configure
type: Configure
params:
configmaps:
- name: volcano-scheduler-configmap
namespace: volcano-system
op: create
data:
volcano-scheduler.conf: |
actions: "enqueue, allocate, backfill"
tiers:
- plugins:
- name: priority
- name: gang
- name: conformance
- plugins:
- name: drf
- name: predicates
- name: proportion
- name: nodeorder
- name: binpack
timeout: 1m
29 changes: 29 additions & 0 deletions resources/benchmarks/scaling/workflows/config-yunikorn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: config-yunikorn
tasks:
- id: register
type: RegisterObj
params:
template: "resources/benchmarks/templates/yunikorn/job.yml"
nameFormat: "job{{._ENUM_}}"
podNameFormat: "{{._NAME_}}-.*"
podCount: "{{.replicas}}"
- id: configure
type: Configure
params:
configmaps:
- name: yunikorn-configs
namespace: yunikorn
op: create
data:
queues.yaml: |
partitions:
- name: default
queues:
- name: root
queues:
- name: sandbox
submitacl: '*'
resources:
max:
{memory: 360Gi, vcore: 50000m, nvidia.com/gpu: 4000}
timeout: 1m
Loading

0 comments on commit 0458083

Please sign in to comment.