diff --git a/llm_analysis/gpu_configs/a10-pcie-40gb.json b/llm_analysis/gpu_configs/a10-pcie-28gb.json similarity index 84% rename from llm_analysis/gpu_configs/a10-pcie-40gb.json rename to llm_analysis/gpu_configs/a10-pcie-28gb.json index 8603465..0a93f8a 100644 --- a/llm_analysis/gpu_configs/a10-pcie-40gb.json +++ b/llm_analysis/gpu_configs/a10-pcie-28gb.json @@ -2,10 +2,10 @@ "name": "a10-pcie-28gb", "mem_per_GPU_in_GB": 28, "hbm_bandwidth_in_GB_per_sec": 600, - "intra_node_bandwidth_in_GB_per_sec": 300, + "intra_node_bandwidth_in_GB_per_sec": 32, "intra_node_min_message_latency": 8e-06, "peak_fp16_TFLOPS": 125, "peak_i8_TFLOPS": 250, "peak_i4_TFLOPS": 500, "inter_node_bandwidth_in_GB_per_sec": 200 -} +} \ No newline at end of file diff --git a/llm_analysis/gpu_configs/a10g-pcie-24gb.json b/llm_analysis/gpu_configs/a10g-pcie-24gb.json new file mode 100644 index 0000000..e107d7f --- /dev/null +++ b/llm_analysis/gpu_configs/a10g-pcie-24gb.json @@ -0,0 +1,11 @@ +{ + "name": "a10g-pcie-24gb", + "mem_per_GPU_in_GB": 24, + "hbm_bandwidth_in_GB_per_sec": 600, + "intra_node_bandwidth_in_GB_per_sec": 32, + "intra_node_min_message_latency": 8e-06, + "peak_fp16_TFLOPS": 70, + "peak_i8_TFLOPS": 140, + "peak_i4_TFLOPS": 280, + "inter_node_bandwidth_in_GB_per_sec": 200 +} \ No newline at end of file diff --git a/llm_analysis/gpu_configs/v100-pcie-16gb.json b/llm_analysis/gpu_configs/v100-pcie-16gb.json index 8ad5077..a9e133c 100644 --- a/llm_analysis/gpu_configs/v100-pcie-16gb.json +++ b/llm_analysis/gpu_configs/v100-pcie-16gb.json @@ -2,10 +2,10 @@ "name": "v100-pcie-16gb", "mem_per_GPU_in_GB": 16, "hbm_bandwidth_in_GB_per_sec": 900, - "intra_node_bandwidth_in_GB_per_sec": 150, + "intra_node_bandwidth_in_GB_per_sec": 16, "intra_node_min_message_latency": 8e-06, "peak_fp16_TFLOPS": 112, - "peak_i8_TFLOPS": 224, - "peak_i4_TFLOPS": 448, + "peak_i8_TFLOPS": 0, + "peak_i4_TFLOPS": 0, "inter_node_bandwidth_in_GB_per_sec": 200 -} +} \ No newline at end of file diff --git a/llm_analysis/gpu_configs/v100-pcie-32gb.json b/llm_analysis/gpu_configs/v100-pcie-32gb.json index 7eff5d5..80a8a3e 100644 --- a/llm_analysis/gpu_configs/v100-pcie-32gb.json +++ b/llm_analysis/gpu_configs/v100-pcie-32gb.json @@ -2,10 +2,10 @@ "name": "v100-pcie-32gb", "mem_per_GPU_in_GB": 32, "hbm_bandwidth_in_GB_per_sec": 900, - "intra_node_bandwidth_in_GB_per_sec": 150, + "intra_node_bandwidth_in_GB_per_sec": 16, "intra_node_min_message_latency": 8e-06, "peak_fp16_TFLOPS": 112, - "peak_i8_TFLOPS": 224, - "peak_i4_TFLOPS": 448, + "peak_i8_TFLOPS": 0, + "peak_i4_TFLOPS": 0, "inter_node_bandwidth_in_GB_per_sec": 200 -} +} \ No newline at end of file diff --git a/llm_analysis/gpu_configs/v100-sxm-16gb.json b/llm_analysis/gpu_configs/v100-sxm-16gb.json index d550759..5245e80 100644 --- a/llm_analysis/gpu_configs/v100-sxm-16gb.json +++ b/llm_analysis/gpu_configs/v100-sxm-16gb.json @@ -5,7 +5,7 @@ "intra_node_bandwidth_in_GB_per_sec": 150, "intra_node_min_message_latency": 8e-06, "peak_fp16_TFLOPS": 125, - "peak_i8_TFLOPS": 250, - "peak_i4_TFLOPS": 500, + "peak_i8_TFLOPS": 0, + "peak_i4_TFLOPS": 0, "inter_node_bandwidth_in_GB_per_sec": 200 -} +} \ No newline at end of file diff --git a/llm_analysis/gpu_configs/v100-sxm-32gb.json b/llm_analysis/gpu_configs/v100-sxm-32gb.json index e9f029b..47b5df7 100644 --- a/llm_analysis/gpu_configs/v100-sxm-32gb.json +++ b/llm_analysis/gpu_configs/v100-sxm-32gb.json @@ -5,7 +5,7 @@ "intra_node_bandwidth_in_GB_per_sec": 150, "intra_node_min_message_latency": 8e-06, "peak_fp16_TFLOPS": 125, - "peak_i8_TFLOPS": 250, - "peak_i4_TFLOPS": 500, + "peak_i8_TFLOPS": 0, + "peak_i4_TFLOPS": 0, "inter_node_bandwidth_in_GB_per_sec": 200 -} +} \ No newline at end of file