Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for DCGM 3.3.0 #45

Merged
merged 3 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/dcgm/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"sync"
"time"
)

var (
Expand Down Expand Up @@ -89,7 +90,7 @@ func GetDeviceTopology(gpuId uint) ([]P2PLink, error) {
// WatchPidFields lets DCGM start recording stats for GPU process
// It needs to be called before calling GetProcessInfo
func WatchPidFields() (GroupHandle, error) {
return watchPidFields(defaultUpdateFreq, defaultMaxKeepAge, defaultMaxKeepSamples)
return watchPidFields(time.Microsecond*time.Duration(defaultUpdateFreq), time.Second*time.Duration(defaultMaxKeepAge), defaultMaxKeepSamples)
}

// GetProcessInfo provides detailed per GPU stats for this process
Expand Down
215 changes: 56 additions & 159 deletions pkg/dcgm/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ const (
DCGM_FI_DEV_FB_USED = 252
DCGM_FI_DEV_FB_RESERVED = 253
DCGM_FI_DEV_FB_USED_PERCENT = 254
DCGM_FI_DEV_C2C_LINK_COUNT = 285
DCGM_FI_DEV_C2C_LINK_STATUS = 286
DCGM_FI_DEV_C2C_MAX_BANDWIDTH = 287
DCGM_FI_DEV_ECC_CURRENT = 300
DCGM_FI_DEV_ECC_PENDING = 301
DCGM_FI_DEV_ECC_SBE_VOL_TOTAL = 310
Expand Down Expand Up @@ -311,82 +314,15 @@ const (
DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE = 532
DCGM_FI_DEV_VGPU_PCI_ID = 533
DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID = 534
DCGM_FI_FIRST_VGPU_FIELD_ID = 520
DCGM_FI_LAST_VGPU_FIELD_ID = 570
DCGM_FI_INTERNAL_FIELDS_0_START = 600
DCGM_FI_INTERNAL_FIELDS_0_END = 699
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00 = 700
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00 = 701
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00 = 702
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00 = 703
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01 = 704
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01 = 705
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01 = 706
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01 = 707
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02 = 708
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02 = 709
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02 = 710
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02 = 711
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03 = 712
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03 = 713
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03 = 714
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03 = 715
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04 = 716
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04 = 717
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04 = 718
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04 = 719
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05 = 720
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05 = 721
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05 = 722
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05 = 723
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06 = 724
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06 = 725
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06 = 726
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06 = 727
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07 = 728
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07 = 729
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07 = 730
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07 = 731
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08 = 732
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08 = 733
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08 = 734
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08 = 735
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09 = 736
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09 = 737
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09 = 738
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09 = 739
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10 = 740
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10 = 741
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10 = 742
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10 = 743
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11 = 744
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11 = 745
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11 = 746
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11 = 747
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12 = 748
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12 = 749
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12 = 750
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12 = 751
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13 = 752
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13 = 753
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13 = 754
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13 = 755
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14 = 756
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14 = 757
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14 = 758
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14 = 759
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15 = 760
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15 = 761
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15 = 762
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15 = 763
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16 = 764
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16 = 765
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16 = 766
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16 = 767
DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17 = 768
DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17 = 769
DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17 = 770
DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17 = 771
DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT = 701
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ = 702
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV = 703
DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD = 704
DCGM_FI_DEV_NVSWITCH_POWER_VDD = 705
DCGM_FI_DEV_NVSWITCH_POWER_DVDD = 706
DCGM_FI_DEV_NVSWITCH_POWER_HVDD = 707
DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX = 780
DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX = 781
DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS = 782
Expand Down Expand Up @@ -447,8 +383,6 @@ const (
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID = 876
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID = 877
DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_UUID = 878
DCGM_FI_FIRST_NVSWITCH_FIELD_ID = 700
DCGM_FI_LAST_NVSWITCH_FIELD_ID = 899
DCGM_FI_PROF_GR_ENGINE_ACTIVE = 1001
DCGM_FI_PROF_SM_ACTIVE = 1002
DCGM_FI_PROF_SM_OCCUPANCY = 1003
Expand Down Expand Up @@ -518,7 +452,20 @@ const (
DCGM_FI_PROF_NVLINK_L16_RX_BYTES = 1073
DCGM_FI_PROF_NVLINK_L17_TX_BYTES = 1074
DCGM_FI_PROF_NVLINK_L17_RX_BYTES = 1075
DCGM_FI_MAX_FIELDS = 1076
DCGM_FI_DEV_CPU_UTIL_TOTAL = 1100
DCGM_FI_DEV_CPU_UTIL_USER = 1101
DCGM_FI_DEV_CPU_UTIL_NICE = 1102
DCGM_FI_DEV_CPU_UTIL_SYS = 1103
DCGM_FI_DEV_CPU_UTIL_IRQ = 1104
DCGM_FI_DEV_CPU_TEMP_CURRENT = 1110
DCGM_FI_DEV_CPU_TEMP_WARNING = 1111
DCGM_FI_DEV_CPU_TEMP_CRITICAL = 1112
DCGM_FI_DEV_CPU_CLOCK_CURRENT = 1120
DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT = 1130
DCGM_FI_DEV_CPU_POWER_LIMIT = 1131
DCGM_FI_DEV_CPU_VENDOR = 1140
DCGM_FI_DEV_CPU_MODEL = 1141
DCGM_FI_MAX_FIELDS = 1142

DCGM_ST_OK = 0
DCGM_ST_BADPARAM = -1
Expand Down Expand Up @@ -573,15 +520,18 @@ const (
DCGM_ST_NVVS_ISOLATE_ERROR = -51
DCGM_ST_NVVS_BINARY_NOT_FOUND = -52
DCGM_ST_NVVS_KILLED = -53
DCGM_ST_PAUSED = -54
DCGM_ST_ALREADY_INITIALIZED = -55
)

var (
DCGM_FI = map[string]Short{
"DCGM_FT_BINARY": Short('b'),
"DCGM_FT_DOUBLE": Short('d'),
"DCGM_FT_INT64": Short('i'),
"DCGM_FT_STRING": Short('s'),
"DCGM_FT_TIMESTAMP": Short('t'),
"DCGM_FT_BINARY": Short('b'),
"DCGM_FT_DOUBLE": Short('d'),
"DCGM_FT_INT64": Short('i'),
"DCGM_FT_STRING": Short('s'),
"DCGM_FT_TIMESTAMP": Short('t'),

"DCGM_FI_UNKNOWN": 0,
"DCGM_FI_DRIVER_VERSION": 1,
"DCGM_FI_NVML_VERSION": 2,
Expand Down Expand Up @@ -682,6 +632,9 @@ var (
"DCGM_FI_DEV_FB_USED": 252,
"DCGM_FI_DEV_FB_RESERVED": 253,
"DCGM_FI_DEV_FB_USED_PERCENT": 254,
"DCGM_FI_DEV_C2C_LINK_COUNT": 285,
"DCGM_FI_DEV_C2C_LINK_STATUS": 286,
"DCGM_FI_DEV_C2C_MAX_BANDWIDTH": 287,
"DCGM_FI_DEV_ECC_CURRENT": 300,
"DCGM_FI_DEV_ECC_PENDING": 301,
"DCGM_FI_DEV_ECC_SBE_VOL_TOTAL": 310,
Expand Down Expand Up @@ -845,82 +798,15 @@ var (
"DCGM_FI_DEV_VGPU_INSTANCE_LICENSE_STATE": 532,
"DCGM_FI_DEV_VGPU_PCI_ID": 533,
"DCGM_FI_DEV_VGPU_VM_GPU_INSTANCE_ID": 534,
"DCGM_FI_FIRST_VGPU_FIELD_ID": 520,
"DCGM_FI_LAST_VGPU_FIELD_ID": 570,
"DCGM_FI_INTERNAL_FIELDS_0_START": 600,
"DCGM_FI_INTERNAL_FIELDS_0_END": 699,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P00": 700,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P00": 701,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P00": 702,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P00": 703,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P01": 704,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P01": 705,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P01": 706,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P01": 707,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P02": 708,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P02": 709,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P02": 710,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P02": 711,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P03": 712,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P03": 713,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P03": 714,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P03": 715,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P04": 716,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P04": 717,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P04": 718,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P04": 719,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P05": 720,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P05": 721,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P05": 722,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P05": 723,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P06": 724,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P06": 725,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P06": 726,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P06": 727,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P07": 728,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P07": 729,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P07": 730,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P07": 731,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P08": 732,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P08": 733,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P08": 734,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P08": 735,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P09": 736,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P09": 737,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P09": 738,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P09": 739,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P10": 740,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P10": 741,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P10": 742,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P10": 743,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P11": 744,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P11": 745,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P11": 746,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P11": 747,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P12": 748,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P12": 749,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P12": 750,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P12": 751,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P13": 752,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P13": 753,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P13": 754,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P13": 755,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P14": 756,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P14": 757,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P14": 758,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P14": 759,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P15": 760,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P15": 761,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P15": 762,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P15": 763,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P16": 764,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P16": 765,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P16": 766,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P16": 767,
"DCGM_FI_DEV_NVSWITCH_LATENCY_LOW_P17": 768,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MED_P17": 769,
"DCGM_FI_DEV_NVSWITCH_LATENCY_HIGH_P17": 770,
"DCGM_FI_DEV_NVSWITCH_LATENCY_MAX_P17": 771,
"DCGM_FI_DEV_NVSWITCH_VOLTAGE_MVOLT": 701,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ": 702,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_REV": 703,
"DCGM_FI_DEV_NVSWITCH_CURRENT_IDDQ_DVDD": 704,
"DCGM_FI_DEV_NVSWITCH_POWER_VDD": 705,
"DCGM_FI_DEV_NVSWITCH_POWER_DVDD": 706,
"DCGM_FI_DEV_NVSWITCH_POWER_HVDD": 707,
"DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_TX": 780,
"DCGM_FI_DEV_NVSWITCH_LINK_THROUGHPUT_RX": 781,
"DCGM_FI_DEV_NVSWITCH_LINK_FATAL_ERRORS": 782,
Expand Down Expand Up @@ -981,8 +867,6 @@ var (
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_ID": 876,
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_LINK_SID": 877,
"DCGM_FI_DEV_NVSWITCH_LINK_DEVICE_UUID": 878,
"DCGM_FI_FIRST_NVSWITCH_FIELD_ID": 700,
"DCGM_FI_LAST_NVSWITCH_FIELD_ID": 899,
"DCGM_FI_PROF_GR_ENGINE_ACTIVE": 1001,
"DCGM_FI_PROF_SM_ACTIVE": 1002,
"DCGM_FI_PROF_SM_OCCUPANCY": 1003,
Expand Down Expand Up @@ -1052,7 +936,20 @@ var (
"DCGM_FI_PROF_NVLINK_L16_RX_BYTES": 1073,
"DCGM_FI_PROF_NVLINK_L17_TX_BYTES": 1074,
"DCGM_FI_PROF_NVLINK_L17_RX_BYTES": 1075,
"DCGM_FI_MAX_FIELDS": 1076,
"DCGM_FI_DEV_CPU_UTIL_TOTAL": 1100,
"DCGM_FI_DEV_CPU_UTIL_USER": 1101,
"DCGM_FI_DEV_CPU_UTIL_NICE": 1102,
"DCGM_FI_DEV_CPU_UTIL_SYS": 1103,
"DCGM_FI_DEV_CPU_UTIL_IRQ": 1104,
"DCGM_FI_DEV_CPU_TEMP_CURRENT": 1110,
"DCGM_FI_DEV_CPU_TEMP_WARNING": 1111,
"DCGM_FI_DEV_CPU_TEMP_CRITICAL": 1112,
"DCGM_FI_DEV_CPU_CLOCK_CURRENT": 1120,
"DCGM_FI_DEV_CPU_POWER_UTIL_CURRENT": 1130,
"DCGM_FI_DEV_CPU_POWER_LIMIT": 1131,
"DCGM_FI_DEV_CPU_VENDOR": 1140,
"DCGM_FI_DEV_CPU_MODEL": 1141,
"DCGM_FI_MAX_FIELDS": 1142,
}
)

Expand Down
25 changes: 21 additions & 4 deletions pkg/dcgm/dcgm_agent.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,13 +17,13 @@
#ifndef DCGM_AGENT_H
#define DCGM_AGENT_H

#define DCGM_PUBLIC_API
#include "dcgm_structs.h"

#ifdef __cplusplus
extern "C" {
#endif

#define DCGM_PUBLIC_API

/***************************************************************************************************/
/** @defgroup DCGMAPI_Admin Administrative
Expand Down Expand Up @@ -274,8 +274,8 @@ DCGM_PUBLIC_API dcgmReturn_t dcgmModuleIdToName(dcgmModuleId_t id, char const **
/***************************************************************************************************/
/** @defgroup DCGMAPI_SYS System
* @{
* This chapter describes the APIs used to identify set of GPUs on the node, grouping functions to
* provide mechanism to operate on a group of GPUs, and status management APIs in
* This chapter describes the APIs used to identify entities on the node, grouping functions to
* provide mechanism to operate on a group of entities, and status management APIs in
* order to get individual statuses for each operation. The APIs in System module can be
* broken down into following categories:
*/
Expand Down Expand Up @@ -405,6 +405,23 @@ dcgmReturn_t DCGM_PUBLIC_API dcgmGetGpuInstanceHierarchy(dcgmHandle_t dcgmHandle
*/
dcgmReturn_t DCGM_PUBLIC_API dcgmGetNvLinkLinkStatus(dcgmHandle_t dcgmHandle, dcgmNvLinkStatus_v3 *linkStatus);


/**
* List supported CPUs and their cores present on the system
*
* This and other CPU APIs only support datacenter NVIDIA CPUs
*
* @param dcgmHandle IN: DCGM Handle
* @param cpuHierarchy OUT: Structure where the CPUs and their associated cores will be enumerated
*
* @return
* - \ref DCGM_ST_OK if the call was successful.
* - \ref DCGM_ST_NOT_SUPPORTED if the device is unsupported
* - \ref DCGM_ST_MODULE_NOT_LOADED if the sysmon module could not be loaded
* - \ref DCGM_ST_BADPARAM if any parameter is invalid
*/
dcgmReturn_t DCGM_PUBLIC_API dcgmGetCpuHierarchy(dcgmHandle_t dcgmHandle, dcgmCpuHierarchy_v1 *cpuHierarchy);

/** @} */

/***************************************************************************************************/
Expand Down
Loading