Squashed commit of the following:

commit c5b2fc0 Author: Allison Piper <[email protected]> Date: Sat Apr 6 21:48:20 2024 +0000 Add supported compilers and tools in README.md. commit 92fe366 Author: Allison Piper <[email protected]> Date: Sat Apr 6 20:45:30 2024 +0000 Fix issues discovered by header tests. commit f7f6c92 Author: Allison Piper <[email protected]> Date: Sat Apr 6 20:45:06 2024 +0000 Setup header tests, add C++20 header tests + examples. The core library will always be built with C++17, but we test our headers / examples under 17 and 20. commit 4b24f26 Author: Allison Piper <[email protected]> Date: Sat Apr 6 16:21:42 2024 +0000 Pass CUDA FLAGS to install tests. commit 4fb672a Author: Allison Piper <[email protected]> Date: Sat Apr 6 15:43:41 2024 +0000 Add newer GCC (13) and Clang (17, 18).
NVIDIA · Apr 6, 2024 · a0f2fab · a0f2fab
1 parent a2f88ff
commit a0f2fab
Show file tree

Hide file tree

Showing 20 changed files with 405 additions and 127 deletions.
diff --git a/.devcontainer/cuda12.4-llvm17/devcontainer.json b/.devcontainer/cuda12.4-llvm17/devcontainer.json
@@ -0,0 +1,46 @@
+{
+  "shutdownAction": "stopContainer",
+  "image": "rapidsai/devcontainers:24.06-cpp-llvm17-cuda12.4-ubuntu22.04",
+  "hostRequirements": {
+    "gpu": "optional"
+  },
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+  ],
+  "containerEnv": {
+    "SCCACHE_REGION": "us-east-2",
+    "SCCACHE_BUCKET": "rapids-sccache-devs",
+    "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+    "DEVCONTAINER_NAME": "cuda12.4-llvm17",
+    "CCCL_CUDA_VERSION": "12.4",
+    "CCCL_HOST_COMPILER": "llvm",
+    "CCCL_HOST_COMPILER_VERSION": "17",
+    "CCCL_BUILD_INFIX": "cuda12.4-llvm17"
+  },
+  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "llvm-vs-code-extensions.vscode-clangd",
+        "xaver.clang-format"
+      ],
+      "settings": {
+        "editor.defaultFormatter": "xaver.clang-format",
+        "clang-format.executable": "/usr/local/bin/clang-format",
+        "clangd.arguments": [
+          "--compile-commands-dir=${workspaceFolder}"
+        ]
+      }
+    }
+  },
+  "name": "cuda12.4-llvm17"
+}
diff --git a/.devcontainer/cuda12.4-llvm18/devcontainer.json b/.devcontainer/cuda12.4-llvm18/devcontainer.json
@@ -0,0 +1,46 @@
+{
+  "shutdownAction": "stopContainer",
+  "image": "rapidsai/devcontainers:24.06-cpp-llvm18-cuda12.4-ubuntu22.04",
+  "hostRequirements": {
+    "gpu": "optional"
+  },
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+  ],
+  "containerEnv": {
+    "SCCACHE_REGION": "us-east-2",
+    "SCCACHE_BUCKET": "rapids-sccache-devs",
+    "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+    "DEVCONTAINER_NAME": "cuda12.4-llvm18",
+    "CCCL_CUDA_VERSION": "12.4",
+    "CCCL_HOST_COMPILER": "llvm",
+    "CCCL_HOST_COMPILER_VERSION": "18",
+    "CCCL_BUILD_INFIX": "cuda12.4-llvm18"
+  },
+  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "llvm-vs-code-extensions.vscode-clangd",
+        "xaver.clang-format"
+      ],
+      "settings": {
+        "editor.defaultFormatter": "xaver.clang-format",
+        "clang-format.executable": "/usr/local/bin/clang-format",
+        "clangd.arguments": [
+          "--compile-commands-dir=${workspaceFolder}"
+        ]
+      }
+    }
+  },
+  "name": "cuda12.4-llvm18"
+}
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
@@ -8,21 +8,13 @@ write_output() {
   echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
 }
 
-explode_std_versions() {
-  jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
-}
-
-explode_libs() {
-  jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))'
-}
-
 extract_matrix() {
   local file="$1"
   local type="$2"
   local matrix=$(yq -o=json "$file" | jq -cr ".$type")
   write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
 
-  local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
+  local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc')"
   local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
   write_output "PER_CUDA_COMPILER_MATRIX"  "$per_cuda_compiler_matrix"
   write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"

diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
@@ -28,9 +28,9 @@ jobs:
         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
     with:
       cpu: ${{ matrix.cpu }}
-      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}}
-      build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}"
-      test_script:  "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}"
+      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} ${{matrix.extra_build_args}}
+      build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}"
+      test_script:  "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}"
       container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
 
   build_and_test_windows:
@@ -45,7 +45,7 @@ jobs:
       matrix:
         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
     with:
-      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
-      build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}"
-      test_script:  "./ci/windows/test_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}"
+      test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}
+      build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}"
+      test_script:  "./ci/windows/test_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}"
       container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,6 +21,11 @@ project(NVBench
 
 nvbench_init_rapids_cmake()
 
+# Define NVBench_DETECTED_${LANG}_STANDARDS
+include(cmake/DetectSupportedStandards.cmake)
+detect_supported_standards(NVBench CXX 17 20)
+detect_supported_standards(NVBench CUDA 17 20)
+
 # See NVIDIA/NVBench#52
 find_package(CUDAToolkit REQUIRED)
 set(cupti_default ON)
@@ -34,6 +39,7 @@ option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON)
 option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default})
 
 option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
+option(NVBench_ENABLE_HEADER_TESTING "Build NVBench testing suite." OFF)
 option(NVBench_ENABLE_DEVICE_TESTING
   "Include tests that require a GPU (with locked clocks)."
   OFF
@@ -55,7 +61,10 @@ message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 
 add_subdirectory(nvbench)
 
-if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING)
+if (NVBench_ENABLE_EXAMPLES OR
+    NVBench_ENABLE_TESTING OR
+    NVBench_ENABLE_HEADER_TESTING)
+  include(CTest)
   enable_testing()
 endif()
 
@@ -69,4 +78,8 @@ if (NVBench_ENABLE_TESTING)
   add_subdirectory(testing)
 endif()
 
+if (NVBench_ENABLE_HEADER_TESTING)
+  include(cmake/NVBenchHeaderTesting.cmake)
+endif()
+
 nvbench_generate_exports()
diff --git a/CMakePresets.json b/CMakePresets.json
@@ -17,37 +17,35 @@
         "NVBench_ENABLE_CUPTI": true,
         "NVBench_ENABLE_DEVICE_TESTING": false,
         "NVBench_ENABLE_EXAMPLES": true,
+        "NVBench_ENABLE_HEADER_TESTING": true,
         "NVBench_ENABLE_INSTALL_RULES": true,
         "NVBench_ENABLE_NVML": true,
         "NVBench_ENABLE_TESTING": true,
         "NVBench_ENABLE_WERROR": true
       }
     },
     {
-      "name": "all-dev",
+      "name": "nvbench-dev",
+      "displayName": "Developer Build",
       "inherits": "base",
       "cacheVariables": {
         "NVBench_ENABLE_DEVICE_TESTING": true
       }
     },
     {
-      "name": "nvbench-cpp17",
-      "displayName": "nvbench_c++17",
-      "inherits": "base",
-      "cacheVariables": {
-        "CMAKE_CXX_STANDARD": "17",
-        "CMAKE_CUDA_STANDARD": "17"
-      }
+      "name": "nvbench-ci",
+      "displayName": "NVBench CI",
+      "inherits": "base"
     }
   ],
   "buildPresets": [
     {
-      "name": "all-dev",
-      "configurePreset": "all-dev"
+      "name": "nvbench-dev",
+      "configurePreset": "nvbench-dev"
     },
     {
-      "name": "nvbench-cpp17",
-      "configurePreset": "nvbench-cpp17"
+      "name": "nvbench-ci",
+      "configurePreset": "nvbench-ci"
     }
   ],
   "testPresets": [
@@ -63,13 +61,13 @@
       }
     },
     {
-      "name": "all-dev",
-      "configurePreset": "all-dev",
+      "name": "nvbench-dev",
+      "configurePreset": "nvbench-dev",
       "inherits": "base"
     },
     {
-      "name": "nvbench-cpp17",
-      "configurePreset": "nvbench-cpp17",
+      "name": "nvbench-ci",
+      "configurePreset": "nvbench-ci",
       "inherits": "base"
     }
   ]

diff --git a/README.md b/README.md
@@ -26,6 +26,15 @@ features:
     * Executes the benchmark multiple times back-to-back and records total time.
     * Reports the average execution time (total time / number of executions).
 
+# Supported Compilers and Tools
+
+- CMake > 2.23.1
+- CUDA Toolkit + nvcc: 11.1 -> 12.4
+- g++: 7 -> 12
+- clang++: 9 -> 18
+- cl.exe: 2019 -> 2022 (19.29, 29.39)
+- Headers are tested with C++17 -> C++20.
+
 # Getting Started
 
 ## Minimal Benchmark
@@ -34,7 +43,7 @@ A basic kernel benchmark can be created with just a few lines of CUDA C++:
 
 ```cpp
 void my_benchmark(nvbench::state& state) {
-  state.exec([](nvbench::launch& launch) { 
+  state.exec([](nvbench::launch& launch) {
     my_kernel<<<num_blocks, 256, 0, launch.get_stream()>>>();
   });
 }
@@ -72,7 +81,7 @@ mkdir -p build
 cd build
 cmake -DNVBench_ENABLE_EXAMPLES=ON -DCMAKE_CUDA_ARCHITECTURES=70 .. && make
 ```
-Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on. 
+Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on.
 
 Examples are built by default into `build/bin` and are prefixed with `nvbench.example`.
 
@@ -119,7 +128,7 @@ Pass: Batch: 0.261963ms GPU, 7.18s total GPU, 27394x
 ## Demo Project
 
 To get started using NVBench with your own kernels, consider trying out
-the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo). 
+the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo).
 
 `nvbench_demo` provides a simple CMake project that uses NVBench to build an
 example benchmark. It's a great way to experiment with the library without a lot
@@ -129,7 +138,7 @@ of investment.
 
 Contributions are welcome!
 
-For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors. 
+For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors.
 
 ## Tests
 
@@ -146,7 +155,7 @@ To run all tests:
 ```
 make test
 ```
-or 
+or
 ```
 ctest
 ```

diff --git a/ci/build_nvbench.sh b/ci/build_nvbench.sh
@@ -4,7 +4,7 @@ source "$(dirname "$0")/build_common.sh"
 
 print_environment_details
 
-PRESET="nvbench-cpp$CXX_STANDARD"
+PRESET="nvbench-ci"
 
 CMAKE_OPTIONS=""