From a0f2fab72bc26dfe919aff326028c7f134ec7146 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sat, 6 Apr 2024 22:05:40 +0000 Subject: [PATCH] Squashed commit of the following: commit c5b2fc0a8b2a5332f46edfe95f94675a03eda0c1 Author: Allison Piper Date: Sat Apr 6 21:48:20 2024 +0000 Add supported compilers and tools in README.md. commit 92fe366da54c81a812e43c33ab493a709d4d63df Author: Allison Piper Date: Sat Apr 6 20:45:30 2024 +0000 Fix issues discovered by header tests. commit f7f6c921437a41278931cd6ab59028c8802658ef Author: Allison Piper Date: Sat Apr 6 20:45:06 2024 +0000 Setup header tests, add C++20 header tests + examples. The core library will always be built with C++17, but we test our headers / examples under 17 and 20. commit 4b24f26b661e32b4d5ad0ff569d3dc4b0c1c58ec Author: Allison Piper Date: Sat Apr 6 16:21:42 2024 +0000 Pass CUDA FLAGS to install tests. commit 4fb672ae9115b19c3720253508f2744e8bc250a9 Author: Allison Piper Date: Sat Apr 6 15:43:41 2024 +0000 Add newer GCC (13) and Clang (17, 18). --- .../cuda12.4-llvm17/devcontainer.json | 46 ++++++++++ .../cuda12.4-llvm18/devcontainer.json | 46 ++++++++++ .../actions/compute-matrix/compute-matrix.sh | 10 +-- .github/workflows/dispatch-build-and-test.yml | 12 +-- CMakeLists.txt | 15 +++- CMakePresets.json | 30 +++---- README.md | 19 ++-- ci/build_nvbench.sh | 2 +- ci/matrix.yaml | 73 ++++++++------- ci/test_nvbench.sh | 2 +- ci/windows/build_nvbench.ps1 | 9 +- ci/windows/test_nvbench.ps1 | 9 +- cmake/DetectSupportedStandards.cmake | 65 ++++++++++++++ cmake/NVBenchHeaderTesting.cmake | 40 +++++++++ cmake/header_test.in.cxx | 57 ++++++++++++ examples/CMakeLists.txt | 89 +++++++++++-------- nvbench/axis_base.cuh | 1 + nvbench/detail/type_list_impl.cuh | 4 +- nvbench/test_kernels.cuh | 2 + testing/cmake/CMakeLists.txt | 1 + 20 files changed, 405 insertions(+), 127 deletions(-) create mode 100644 .devcontainer/cuda12.4-llvm17/devcontainer.json create mode 100644 .devcontainer/cuda12.4-llvm18/devcontainer.json create mode 100644 cmake/DetectSupportedStandards.cmake create mode 100644 cmake/NVBenchHeaderTesting.cmake create mode 100644 cmake/header_test.in.cxx diff --git a/.devcontainer/cuda12.4-llvm17/devcontainer.json b/.devcontainer/cuda12.4-llvm17/devcontainer.json new file mode 100644 index 00000000..7b9f2e54 --- /dev/null +++ b/.devcontainer/cuda12.4-llvm17/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.06-cpp-llvm17-cuda12.4-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "VAULT_HOST": "https://vault.ops.k8s.rapids.ai", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.4-llvm17", + "CCCL_CUDA_VERSION": "12.4", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "17", + "CCCL_BUILD_INFIX": "cuda12.4-llvm17" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.4-llvm17" +} diff --git a/.devcontainer/cuda12.4-llvm18/devcontainer.json b/.devcontainer/cuda12.4-llvm18/devcontainer.json new file mode 100644 index 00000000..ff2c1a78 --- /dev/null +++ b/.devcontainer/cuda12.4-llvm18/devcontainer.json @@ -0,0 +1,46 @@ +{ + "shutdownAction": "stopContainer", + "image": "rapidsai/devcontainers:24.06-cpp-llvm18-cuda12.4-ubuntu22.04", + "hostRequirements": { + "gpu": "optional" + }, + "initializeCommand": [ + "/bin/bash", + "-c", + "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}" + ], + "containerEnv": { + "SCCACHE_REGION": "us-east-2", + "SCCACHE_BUCKET": "rapids-sccache-devs", + "VAULT_HOST": "https://vault.ops.k8s.rapids.ai", + "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history", + "DEVCONTAINER_NAME": "cuda12.4-llvm18", + "CCCL_CUDA_VERSION": "12.4", + "CCCL_HOST_COMPILER": "llvm", + "CCCL_HOST_COMPILER_VERSION": "18", + "CCCL_BUILD_INFIX": "cuda12.4-llvm18" + }, + "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "xaver.clang-format" + ], + "settings": { + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.executable": "/usr/local/bin/clang-format", + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}" + ] + } + } + }, + "name": "cuda12.4-llvm18" +} diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh index 8a6d635c..cd3946f1 100755 --- a/.github/actions/compute-matrix/compute-matrix.sh +++ b/.github/actions/compute-matrix/compute-matrix.sh @@ -8,21 +8,13 @@ write_output() { echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" } -explode_std_versions() { - jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))' -} - -explode_libs() { - jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))' -} - extract_matrix() { local file="$1" local type="$2" local matrix=$(yq -o=json "$file" | jq -cr ".$type") write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" - local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )" + local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc')" local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix" write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')" diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml index ce54c673..72cfb6bf 100644 --- a/.github/workflows/dispatch-build-and-test.yml +++ b/.github/workflows/dispatch-build-and-test.yml @@ -28,9 +28,9 @@ jobs: include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} with: cpu: ${{ matrix.cpu }} - test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}} - build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}" - test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}" + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} ${{matrix.extra_build_args}} + build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}" + test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}" container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} build_and_test_windows: @@ -45,7 +45,7 @@ jobs: matrix: include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }} with: - test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} - build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}" - test_script: "./ci/windows/test_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}" + test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} + build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}" + test_script: "./ci/windows/test_${{ inputs.project_name }}.ps1 ${{matrix.extra_build_args}}" container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}} diff --git a/CMakeLists.txt b/CMakeLists.txt index b052350f..8eb5f883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,11 @@ project(NVBench nvbench_init_rapids_cmake() +# Define NVBench_DETECTED_${LANG}_STANDARDS +include(cmake/DetectSupportedStandards.cmake) +detect_supported_standards(NVBench CXX 17 20) +detect_supported_standards(NVBench CUDA 17 20) + # See NVIDIA/NVBench#52 find_package(CUDAToolkit REQUIRED) set(cupti_default ON) @@ -34,6 +39,7 @@ option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON) option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default}) option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF) +option(NVBench_ENABLE_HEADER_TESTING "Build NVBench testing suite." OFF) option(NVBench_ENABLE_DEVICE_TESTING "Include tests that require a GPU (with locked clocks)." OFF @@ -55,7 +61,10 @@ message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") add_subdirectory(nvbench) -if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING) +if (NVBench_ENABLE_EXAMPLES OR + NVBench_ENABLE_TESTING OR + NVBench_ENABLE_HEADER_TESTING) + include(CTest) enable_testing() endif() @@ -69,4 +78,8 @@ if (NVBench_ENABLE_TESTING) add_subdirectory(testing) endif() +if (NVBench_ENABLE_HEADER_TESTING) + include(cmake/NVBenchHeaderTesting.cmake) +endif() + nvbench_generate_exports() diff --git a/CMakePresets.json b/CMakePresets.json index 42e24428..3e66f9ad 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -17,6 +17,7 @@ "NVBench_ENABLE_CUPTI": true, "NVBench_ENABLE_DEVICE_TESTING": false, "NVBench_ENABLE_EXAMPLES": true, + "NVBench_ENABLE_HEADER_TESTING": true, "NVBench_ENABLE_INSTALL_RULES": true, "NVBench_ENABLE_NVML": true, "NVBench_ENABLE_TESTING": true, @@ -24,30 +25,27 @@ } }, { - "name": "all-dev", + "name": "nvbench-dev", + "displayName": "Developer Build", "inherits": "base", "cacheVariables": { "NVBench_ENABLE_DEVICE_TESTING": true } }, { - "name": "nvbench-cpp17", - "displayName": "nvbench_c++17", - "inherits": "base", - "cacheVariables": { - "CMAKE_CXX_STANDARD": "17", - "CMAKE_CUDA_STANDARD": "17" - } + "name": "nvbench-ci", + "displayName": "NVBench CI", + "inherits": "base" } ], "buildPresets": [ { - "name": "all-dev", - "configurePreset": "all-dev" + "name": "nvbench-dev", + "configurePreset": "nvbench-dev" }, { - "name": "nvbench-cpp17", - "configurePreset": "nvbench-cpp17" + "name": "nvbench-ci", + "configurePreset": "nvbench-ci" } ], "testPresets": [ @@ -63,13 +61,13 @@ } }, { - "name": "all-dev", - "configurePreset": "all-dev", + "name": "nvbench-dev", + "configurePreset": "nvbench-dev", "inherits": "base" }, { - "name": "nvbench-cpp17", - "configurePreset": "nvbench-cpp17", + "name": "nvbench-ci", + "configurePreset": "nvbench-ci", "inherits": "base" } ] diff --git a/README.md b/README.md index c1cad5ad..285213f1 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,15 @@ features: * Executes the benchmark multiple times back-to-back and records total time. * Reports the average execution time (total time / number of executions). +# Supported Compilers and Tools + +- CMake > 2.23.1 +- CUDA Toolkit + nvcc: 11.1 -> 12.4 +- g++: 7 -> 12 +- clang++: 9 -> 18 +- cl.exe: 2019 -> 2022 (19.29, 29.39) +- Headers are tested with C++17 -> C++20. + # Getting Started ## Minimal Benchmark @@ -34,7 +43,7 @@ A basic kernel benchmark can be created with just a few lines of CUDA C++: ```cpp void my_benchmark(nvbench::state& state) { - state.exec([](nvbench::launch& launch) { + state.exec([](nvbench::launch& launch) { my_kernel<<>>(); }); } @@ -72,7 +81,7 @@ mkdir -p build cd build cmake -DNVBench_ENABLE_EXAMPLES=ON -DCMAKE_CUDA_ARCHITECTURES=70 .. && make ``` -Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on. +Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on. Examples are built by default into `build/bin` and are prefixed with `nvbench.example`. @@ -119,7 +128,7 @@ Pass: Batch: 0.261963ms GPU, 7.18s total GPU, 27394x ## Demo Project To get started using NVBench with your own kernels, consider trying out -the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo). +the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo). `nvbench_demo` provides a simple CMake project that uses NVBench to build an example benchmark. It's a great way to experiment with the library without a lot @@ -129,7 +138,7 @@ of investment. Contributions are welcome! -For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors. +For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [![](https://img.shields.io/github/labels/NVIDIA/nvbench/good%20first%20issue)](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors. ## Tests @@ -146,7 +155,7 @@ To run all tests: ``` make test ``` -or +or ``` ctest ``` diff --git a/ci/build_nvbench.sh b/ci/build_nvbench.sh index ecd06289..cc245d3a 100755 --- a/ci/build_nvbench.sh +++ b/ci/build_nvbench.sh @@ -4,7 +4,7 @@ source "$(dirname "$0")/build_common.sh" print_environment_details -PRESET="nvbench-cpp$CXX_STANDARD" +PRESET="nvbench-ci" CMAKE_OPTIONS="" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index a1bfb570..99594730 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -14,6 +14,7 @@ gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' } gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' } gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' } gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' } +gcc12: &gcc13 { name: 'gcc', version: '13', exe: 'g++' } # LLVM Compiler configurations llvm9: &llvm9 { name: 'llvm', version: '9', exe: 'clang++' } @@ -24,6 +25,8 @@ llvm13: &llvm13 { name: 'llvm', version: '13', exe: 'clang++' } llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' } llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' } llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' } +llvm16: &llvm17 { name: 'llvm', version: '17', exe: 'clang++' } +llvm16: &llvm18 { name: 'llvm', version: '18', exe: 'clang++' } # MSVC configs msvc2019: &msvc2019 { name: 'cl', version: '14.29', exe: 'cl++' } @@ -44,36 +47,40 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } # Configurations that will run for every PR pull_request: nvcc: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} - - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17]} - - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [17]} - - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [17]} - - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} - - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} + - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"} + - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10} + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} + - {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13} + - {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12} + # Fails to compile simple input on CTK12.4. Try to add later. + # {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13} + - {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17} + - {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18, extra_build_args: "-cmake-options '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler'"} + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} + - {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"} diff --git a/ci/test_nvbench.sh b/ci/test_nvbench.sh index f89c6fe6..40559eda 100755 --- a/ci/test_nvbench.sh +++ b/ci/test_nvbench.sh @@ -11,7 +11,7 @@ print_environment_details ./build_nvbench.sh "$@" -PRESET="nvbench-cpp$CXX_STANDARD" +PRESET="nvbench-ci" test_preset "NVBench" ${PRESET} diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1 index e2a90a25..1ac8bd16 100644 --- a/ci/windows/build_nvbench.ps1 +++ b/ci/windows/build_nvbench.ps1 @@ -1,10 +1,5 @@ Param( - [Parameter(Mandatory = $true)] - [Alias("std")] - [ValidateNotNullOrEmpty()] - [ValidateSet(17)] - [int]$CXX_STANDARD = 17, [Parameter(Mandatory = $false)] [Alias("cmake-options")] [ValidateNotNullOrEmpty()] @@ -18,9 +13,9 @@ If($CURRENT_PATH -ne "ci") { } Remove-Module -Name build_common -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList 17 -$PRESET = "nvbench-cpp$CXX_STANDARD" +$PRESET = "nvbench-ci" $CMAKE_OPTIONS = "" # Append any arguments pass in on the command line diff --git a/ci/windows/test_nvbench.ps1 b/ci/windows/test_nvbench.ps1 index 57ccd8e8..bcd9f2c9 100644 --- a/ci/windows/test_nvbench.ps1 +++ b/ci/windows/test_nvbench.ps1 @@ -1,10 +1,5 @@ Param( - [Parameter(Mandatory = $true)] - [Alias("std")] - [ValidateNotNullOrEmpty()] - [ValidateSet(17)] - [int]$CXX_STANDARD = 17, [Parameter(Mandatory = $false)] [Alias("cmake-options")] [ValidateNotNullOrEmpty()] @@ -18,9 +13,9 @@ If($CURRENT_PATH -ne "ci") { } Remove-Module -Name build_common -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList 17 -$PRESET = "nvbench-cpp$CXX_STANDARD" +$PRESET = "nvbench-ci" $CMAKE_OPTIONS = "" # Append any arguments pass in on the command line diff --git a/cmake/DetectSupportedStandards.cmake b/cmake/DetectSupportedStandards.cmake new file mode 100644 index 00000000..6a86d6ac --- /dev/null +++ b/cmake/DetectSupportedStandards.cmake @@ -0,0 +1,65 @@ +# Detect the langauge standards supported by the current compilers. +# +# Usage: detect_supported_cxx_standards( ) +# +# - var_prefix: Used to name result variables, +# e.g. ${var_prefix}_${lang}_XX_SUPPORTED will be TRUE or FALSE. Defined for +# each XX in ${standards}. +# - lang: The language to test: C, CXX, or CUDA. +# - standards: List of any standard versions. +# +# Example: detect_supported_standards(PROJ CXX 11 14 17) +# - Sets the following variables in the parent scope to TRUE or FALSE: +# - PROJ_CXX_11_SUPPORTED +# - PROJ_CXX_14_SUPPORTED +# - PROJ_CXX_17_SUPPORTED +# - Sets `PROJ_DETECTED_CXX_STANDARDS` to a list of supported standards (e.g. "11;14;17"). +function(detect_supported_standards prefix lang) + string(TOLOWER "${lang}_std" feature_prefix) + set(all_stds) + foreach(standard IN LISTS ARGN) + set(var_name "${prefix}_${lang}_${standard}_SUPPORTED") + if ("${feature_prefix}_${standard}" IN_LIST CMAKE_${lang}_COMPILE_FEATURES) + set(${var_name} TRUE) + else() + set(${var_name} FALSE) + endif() + + # Special cases: + if (standard EQUAL 17 AND + (lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND + ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))) + # gcc < 7 and clang < 8 don't fully support C++17. + # They accept the flag and have partial support, but nvcc will refuse + # to enable it and falls back to the default dialect for the current + # CXX compiler version. This breaks our CI. + # CMake's COMPILE_FEATURES var reports that these compilers support C++17, + # but we can't rely on it, so manually disable the dialect in these cases. + set(${var_name} FALSE) + endif() + + if (standard EQUAL 20 AND + (lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND + ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND + CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1930))) + # Similar to the above, but for C++20. + set(${var_name} FALSE) + endif() + + if (${var_name}) + list(APPEND all_stds ${standard}) + endif() + + message(STATUS "Testing ${lang}${standard} Support: ${${var_name}}") + set(${var_name} ${${var_name}} PARENT_SCOPE) + endforeach() + + set(${prefix}_DETECTED_${lang}_STANDARDS "${all_stds}" PARENT_SCOPE) +endfunction() diff --git a/cmake/NVBenchHeaderTesting.cmake b/cmake/NVBenchHeaderTesting.cmake new file mode 100644 index 00000000..354ec84d --- /dev/null +++ b/cmake/NVBenchHeaderTesting.cmake @@ -0,0 +1,40 @@ +# For every public header, build a translation unit containing `#include
` +# with some various checks. + +set(excluded_headers_regexes + # Should never be used externally. + "^detail" + "^internal" +) + +# Meta target for all configs' header builds: +add_custom_target(nvbench.headers.all) +add_dependencies(nvbench.all nvbench.headers.all) + +file(GLOB_RECURSE header_files + RELATIVE "${NVBench_SOURCE_DIR}/nvbench/" + CONFIGURE_DEPENDS + "${NVBench_SOURCE_DIR}/nvbench/*.cuh" +) + +foreach (exclusion IN LISTS excluded_headers_regexes) + list(FILTER header_files EXCLUDE REGEX "${exclusion}") +endforeach() + +function (nvbench_add_header_target target_name cuda_std) + foreach (header IN LISTS header_files) + set(headertest_src "headers/${target_name}/${header}.cu") + set(header_str "nvbench/${header}") # Substitution used by configure_file: + configure_file("${NVBench_SOURCE_DIR}/cmake/header_test.in.cxx" "${headertest_src}") + list(APPEND headertest_srcs "${headertest_src}") + endforeach() + + add_library(${target_name} OBJECT ${headertest_srcs}) + target_link_libraries(${target_name} PUBLIC nvbench::nvbench) + set_target_properties(${target_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std}) + add_dependencies(nvbench.headers.all ${target_name}) +endfunction() + +foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS) + nvbench_add_header_target(nvbench.headers.cpp${std} ${std}) +endforeach() diff --git a/cmake/header_test.in.cxx b/cmake/header_test.in.cxx new file mode 100644 index 00000000..c26753e1 --- /dev/null +++ b/cmake/header_test.in.cxx @@ -0,0 +1,57 @@ +// This source file checks that: +// 1) Header <${header_str}> compiles without error. +// 2) Common macro collisions with platform/system headers are avoided. + +// Turn off failures for certain configurations: +#ifndef NVBench_IGNORE_MACRO_CHECKS + +// Define NVBench_MACRO_CHECK(macro, header), which emits a diagnostic indicating +// a potential macro collision and halts. +// +// Hacky way to build a string, but it works on all tested platforms. +#define NVBench_MACRO_CHECK(MACRO, HEADER) \ + NVBench_MACRO_CHECK_IMPL(Identifier MACRO should not be used from NVBench \ + headers due to conflicts with HEADER macros.) + +// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we +// don't want to #include any headers other than the one being tested. +// +// This is only implemented for MSVC/GCC/Clang. +#if defined(_MSC_VER) // MSVC + +// Fake up an error for MSVC +#define NVBench_MACRO_CHECK_IMPL(msg) \ + /* Print message that looks like an error: */ \ + __pragma(message(__FILE__ ":" NVBench_MACRO_CHECK_IMPL0(__LINE__) \ + ": error: " #msg)) \ + /* abort compilation due to static_assert or syntax error: */ \ + static_assert(false, #msg); +#define NVBench_MACRO_CHECK_IMPL0(x) NVBench_MACRO_CHECK_IMPL1(x) +#define NVBench_MACRO_CHECK_IMPL1(x) #x + +#elif defined(__clang__) || defined(__GNUC__) + +// GCC/clang are easy: +#define NVBench_MACRO_CHECK_IMPL(msg) NVBench_MACRO_CHECK_IMPL0(GCC error #msg) +#define NVBench_MACRO_CHECK_IMPL0(expr) _Pragma(#expr) + +#endif + +// complex.h conflicts +#define I NVBench_MACRO_CHECK('I', complex.h) + +// windows.h conflicts +#define small NVBench_MACRO_CHECK('small', windows.h) +// We can't enable these checks without breaking some builds -- some standard +// library implementations unconditionally `#undef` these macros, which then +// causes random failures later. +// Leaving these commented out as a warning: Here be dragons. +//#define min(...) NVBench_MACRO_CHECK('min', windows.h) +//#define max(...) NVBench_MACRO_CHECK('max', windows.h) + +// termios.h conflicts (NVIDIA/thrust#1547) +#define B0 NVBench_MACRO_CHECK("B0", termios.h) + +#endif // NVBench_IGNORE_MACRO_CHECKS + +#include <${header_str}> diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b0f288c4..a98bcbeb 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -14,47 +14,58 @@ set(example_srcs add_custom_target(nvbench.example.all) add_dependencies(nvbench.all nvbench.example.all) -foreach(example_src IN LISTS example_srcs) - get_filename_component(example_name "${example_src}" NAME_WLE) - string(PREPEND example_name "nvbench.example.") - add_executable(${example_name} "${example_src}") - nvbench_config_target(${example_name}) - target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}") - target_link_libraries(${example_name} PRIVATE nvbench::main) - set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_17) - add_test(NAME ${example_name} - COMMAND "$" --timeout 0.1 --min-time 1e-5 - ) - # These should not deadlock. If they do, it may be that the CUDA context was created before - # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. - set_tests_properties(${example_name} PROPERTIES - FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected" - ) +function (nvbench_add_examples_target target_prefix cuda_std) + add_custom_target(${target_prefix}.all) + add_dependencies(nvbench.example.all ${target_prefix}.all) - add_dependencies(nvbench.example.all ${example_name}) -endforeach() + foreach(example_src IN LISTS example_srcs) + get_filename_component(example_name "${example_src}" NAME_WLE) + string(PREPEND example_name "${target_prefix}.") + add_executable(${example_name} "${example_src}") + nvbench_config_target(${example_name}) + target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}") + target_link_libraries(${example_name} PRIVATE nvbench::main) + set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std}) + add_test(NAME ${example_name} + COMMAND "$" --timeout 0.1 --min-time 1e-5 + ) -# Silence some warnings from old thrust headers: -set(thrust_examples - auto_throughput - axes - custom_criterion - exec_tag_sync - exec_tag_timer - skip - stream - throughput -) -foreach (example IN LISTS thrust_examples) - if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # C4324: structure was padded due to alignment specifier - nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4324") - - # warning C4201: nonstandard extension used: nameless struct/union: - # Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3) - if (${CUDAToolkit_VERSION} VERSION_LESS 11.4) - nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4201") + # These should not deadlock. If they do, it may be that the CUDA context was created before + # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136. + set_tests_properties(${example_name} PROPERTIES + FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected" + ) + + add_dependencies(${target_prefix}.all ${example_name}) + endforeach() + + # Silence some warnings from old thrust headers: + set(thrust_examples + auto_throughput + axes + custom_criterion + exec_tag_sync + exec_tag_timer + skip + stream + throughput + ) + foreach (example IN LISTS thrust_examples) + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # C4324: structure was padded due to alignment specifier + nvbench_add_cxx_flag(${target_prefix}.${example} PRIVATE "/wd4324") + + # warning C4201: nonstandard extension used: nameless struct/union: + # Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3) + if (${CUDAToolkit_VERSION} VERSION_LESS 11.4) + nvbench_add_cxx_flag(${target_prefix}.${example} PRIVATE "/wd4201") + endif() endif() - endif() + endforeach() +endfunction() + + +foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS) + nvbench_add_examples_target(nvbench.example.cpp${std} ${std}) endforeach() diff --git a/nvbench/axis_base.cuh b/nvbench/axis_base.cuh index 85d92c7d..b3e089ec 100644 --- a/nvbench/axis_base.cuh +++ b/nvbench/axis_base.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace nvbench diff --git a/nvbench/detail/type_list_impl.cuh b/nvbench/detail/type_list_impl.cuh index 8a18aa3d..e97aaaa1 100644 --- a/nvbench/detail/type_list_impl.cuh +++ b/nvbench/detail/type_list_impl.cuh @@ -22,8 +22,8 @@ namespace tl::detail template auto size(nvbench::type_list) -> std::integral_constant; -template -auto get(nvbench::type_list) -> std::tuple_element_t>; +template +auto get(nvbench::type_list) -> std::tuple_element_t>; template auto concat(nvbench::type_list, nvbench::type_list) diff --git a/nvbench/test_kernels.cuh b/nvbench/test_kernels.cuh index f01305c8..f46216dc 100644 --- a/nvbench/test_kernels.cuh +++ b/nvbench/test_kernels.cuh @@ -18,6 +18,8 @@ #pragma once +#include + #include #include diff --git a/testing/cmake/CMakeLists.txt b/testing/cmake/CMakeLists.txt index 2cb2f5fa..6932c00c 100644 --- a/testing/cmake/CMakeLists.txt +++ b/testing/cmake/CMakeLists.txt @@ -12,6 +12,7 @@ set(cmake_opts -D "CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" -D "CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}" + -D "CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}" -D "CMAKE_CUDA_ARCHITECTURES=${arches}" )