Skip to content

Commit

Permalink
Merge branch 'cherry_picks_0.5' into 'release-0.5'
Browse files Browse the repository at this point in the history
Cherry-picks for v0.5.4 release

See merge request nvidia/cloud-native/mig-parted!137
  • Loading branch information
shivamerla committed Sep 7, 2023
2 parents 9ab5c66 + 5022617 commit 32fdad3
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cmd/nvidia-mig-parted/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func main() {
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Manage MIG partitions across the full set of NVIDIA GPUs on a node"
c.Version = "0.5.2"
c.Version = "0.5.4"

// Setup the flags for this command
c.Flags = []cli.Flag{
Expand Down
53 changes: 51 additions & 2 deletions deployments/gpu-operator/nvidia-mig-manager-example-hopper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
serviceAccountName: nvidia-mig-manager-service-account
containers:
- name: nvidia-mig-manager
image: nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.5.2-ubi8
image: nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.5.4-ubi8
imagePullPolicy: IfNotPresent
env:
- name: NODE_NAME
Expand Down Expand Up @@ -144,10 +144,59 @@ data:
mig-devices:
"7g.80gb": 1
all-balanced:
# H100 NVL, H800 NVL
all-1g.12gb:
- devices: all
mig-enabled: true
mig-devices:
"1g.12gb": 7
all-1g.12gb.me:
- devices: all
mig-enabled: true
mig-devices:
"1g.12gb+me": 1
all-1g.24gb:
- devices: all
mig-enabled: true
mig-devices:
"1g.24gb": 4
all-2g.24gb:
- devices: all
mig-enabled: true
mig-devices:
"2g.24gb": 3
all-3g.47gb:
- devices: all
mig-enabled: true
mig-devices:
"3g.47gb": 2
all-7g.94gb:
- devices: all
mig-enabled: true
mig-devices:
"7g.94gb": 1
# H100-80GB, H100-NVL, H800-80GB, H800-NVL
all-balanced:
# H100-80GB, H800-80GB
- device-filter: ["0x233110DE", "0x233010DE", "0x232210DE"]
devices: all
mig-enabled: true
mig-devices:
"1g.10gb": 2
"2g.20gb": 1
"3g.40gb": 1
# H100 NVL, H800 NVL
- device-filter: ["0x232110DE", "0x233A10DE"]
devices: all
mig-enabled: true
mig-devices:
"1g.12gb": 1
"2g.24gb": 1
"3g.47gb": 1
2 changes: 1 addition & 1 deletion deployments/gpu-operator/nvidia-mig-manager-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ spec:
serviceAccountName: nvidia-mig-manager-service-account
containers:
- name: nvidia-mig-manager
image: nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.5.2-ubi8
image: nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.5.4-ubi8
imagePullPolicy: IfNotPresent
env:
- name: NODE_NAME
Expand Down
4 changes: 2 additions & 2 deletions deployments/systemd/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ mig-configs:
# H100-80GB, H800-80GB, A100-80GB, A800-80GB, A100-40GB, A800-40GB
all-1g.10gb:
# H100-80GB, H800-80GB, A100-80GB, A800-80GB
- device-filter: ["0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
- device-filter: ["0x233110DE", "0x233010DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
devices: all
mig-enabled: true
mig-devices:
Expand Down Expand Up @@ -163,7 +163,7 @@ mig-configs:
"3g.20gb": 1

# H100-80GB, H800-80GB, A100-80GB, A800-80GB
- device-filter: ["0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
- device-filter: ["0x233110DE", "0x233010DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
devices: all
mig-enabled: true
mig-devices:
Expand Down
13 changes: 13 additions & 0 deletions deployments/systemd/packages/debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
nvidia-mig-manager (0.5.4-1) UNRELEASED; urgency=medium

* Update MIG config for Hopper with device ID of H100 80GB HBM3 SKU

-- NVIDIA CORPORATION <[email protected]> Thu, 7 Sep 2023 21:27:44 +0200

nvidia-mig-manager (0.5.3-1) UNRELEASED; urgency=medium

* Update to latest CUDA image 12.2.0
* Update example config for Hopper with H100 NVL and H800 NVL

-- NVIDIA CORPORATION <[email protected]> Wed, 12 Jul 2023 11:06:03 +0200

nvidia-mig-manager (0.5.2-1) UNRELEASED; urgency=medium

* Update to latest CUDA image 12.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ maybe_remove_hooks_symlink
maybe_remove_config_symlink

%changelog
* Thu Sep 7 2023 NVIDIA CORPORATION <[email protected]> 0.5.4-1
- Update MIG config for Hopper with device ID of H100 80GB HBM3 SKU

* Wed Jul 12 2023 NVIDIA CORPORATION <[email protected]> 0.5.3-1
- Update to latest CUDA image 12.2.0
- Update example config for Hopper with H100 NVL and H800 NVL

* Tue Mar 28 2023 NVIDIA CORPORATION <[email protected]> 0.5.2-1
- Update to latest CUDA image 12.1.0
- Update k8s-mig-manager to support CDI
Expand Down
6 changes: 3 additions & 3 deletions versions.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

VERSION ?= 0.5.2
VERSION ?= 0.5.4

vVERSION := v$(VERSION:v%=%)

CUDA_VERSION := 12.1.0
CUDA_VERSION := 12.2.0
GOLANG_VERSION := 1.20.1

NVIDIA_CTK_VERSION := f6983969ad5d67b84adfda8eee5b43083790ff22
NVIDIA_CTK_VERSION := v1.13.4

0 comments on commit 32fdad3

Please sign in to comment.