From c7897fd74bb59fc3ea60ea6a636fe5f9eccff921 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 28 Nov 2024 17:38:54 +0900 Subject: [PATCH 1/5] Introduce Kokkos::Profiling::Region to profile tpl calls --- fft/src/KokkosFFT_Cuda_plans.hpp | 19 +++++++++++++- fft/src/KokkosFFT_Cuda_transform.hpp | 12 +++++++++ fft/src/KokkosFFT_HIP_plans.hpp | 19 +++++++++++++- fft/src/KokkosFFT_HIP_transform.hpp | 12 +++++++++ fft/src/KokkosFFT_Host_plans.hpp | 8 +++++- fft/src/KokkosFFT_Host_transform.hpp | 12 +++++++++ fft/src/KokkosFFT_ROCM_plans.hpp | 9 +++++++ fft/src/KokkosFFT_ROCM_transform.hpp | 12 +++++++++ fft/src/KokkosFFT_SYCL_plans.hpp | 6 +++++ fft/src/KokkosFFT_SYCL_transform.hpp | 12 +++++++++ fft/src/KokkosFFT_Transform.hpp | 38 ++++++++++++++++++++++++++++ 11 files changed, 156 insertions(+), 3 deletions(-) diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 9abd8451..35208cff 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -35,6 +35,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -53,6 +55,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -78,6 +82,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -96,6 +102,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -121,6 +129,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -141,6 +151,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -170,7 +182,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +212,7 @@ auto create_plan(const ExecutionSpace& exec_space, out_extents.data(), ostride, odist, type, howmany); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlanMany failed"); + Kokkos::Profiling::popRegion(); return fft_size; } @@ -206,7 +221,9 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_cufft]"); cufftDestroy(*plan); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 83f0cb45..f047230c 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -13,42 +13,54 @@ namespace Impl { template inline void exec_plan(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecR2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecD2Z failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2R failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2D failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index c94ed23e..c520c1d5 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -35,6 +35,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -53,6 +55,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan1d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -78,6 +82,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -96,6 +102,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan2d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -121,6 +129,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -141,6 +151,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan3d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -170,7 +182,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +212,7 @@ auto create_plan(const ExecutionSpace& exec_space, out_extents.data(), ostride, odist, type, howmany); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlanMany failed"); + Kokkos::Profiling::popRegion(); return fft_size; } @@ -206,7 +221,9 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_hipfft]"); hipfftDestroy(*plan); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 6e131150..26fc6836 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -13,42 +13,54 @@ namespace Impl { template inline void exec_plan(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecR2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecD2Z failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2R failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2D failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 7b66522e..4ce21583 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -54,8 +54,10 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_fftw]"); + + const int rank = fft_rank; init_threads>( exec_space); @@ -109,6 +111,8 @@ auto create_plan(const ExecutionSpace& exec_space, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -116,11 +120,13 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_fftw]"); if constexpr (std::is_same_v) { fftwf_destroy_plan(*plan); } else { fftw_destroy_plan(*plan); } + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 4dfc04bb..147461b9 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -12,37 +12,49 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, fftwf_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_r2c(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, fftw_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_r2c(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, float* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_c2r(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftw_complex* idata, double* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_c2r(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType plan, fftw_complex* idata, fftw_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft(plan, idata, odata); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index e1b115e9..fed385e0 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -109,6 +109,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +201,8 @@ auto create_plan(const ExecutionSpace& exec_space, KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_plan_description_destroy failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -206,8 +211,12 @@ template = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + rocfft_execution_info_destroy(execution_info); rocfft_plan_destroy(*plan); + + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index 2c6d50b8..11f1a63e 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -14,8 +14,10 @@ namespace Impl { inline void exec_plan(rocfft_plan& plan, float* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for R2C failed"); } @@ -23,8 +25,10 @@ inline void exec_plan(rocfft_plan& plan, float* idata, inline void exec_plan(rocfft_plan& plan, double* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for D2Z failed"); } @@ -32,8 +36,10 @@ inline void exec_plan(rocfft_plan& plan, double* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, float* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2R failed"); } @@ -41,8 +47,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, double* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2D failed"); } @@ -50,8 +58,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2C failed"); } @@ -59,8 +69,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2Z failed"); } diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index fa9d232c..a1ead852 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -68,6 +68,8 @@ auto create_plan(const ExecutionSpace& exec_space, InViewType::rank() >= fft_rank, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_oneMKL]"); + auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, @@ -107,6 +109,8 @@ auto create_plan(const ExecutionSpace& exec_space, sycl::queue q = exec_space.sycl_queue(); plan->commit(q); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -116,6 +120,8 @@ template < std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr&, InfoType&) { // In oneMKL, plans are destroybed by destructor + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_oneMKL]"); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index bd85ec7f..0ee67b2e 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -13,49 +13,61 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, float* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, double* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index b23886ce..629f8560 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -35,11 +35,14 @@ void fft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "fft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axis, n); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT in backward direction @@ -65,11 +68,14 @@ void ifft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "ifft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ifft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axis, n); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT for real input @@ -103,9 +109,12 @@ void rfft(const ExecutionSpace& exec_space, const InViewType& in, "rfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::rfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); fft(exec_space, in, out, norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft @@ -140,9 +149,12 @@ void irfft(const ExecutionSpace& exec_space, const InViewType& in, "irfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft: OutViewType must be real"); + + Kokkos::Profiling::pushRegion("KokkosFFT::irfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); ifft(exec_space, in, out, norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT of a signal that has Hermitian symmetry @@ -178,6 +190,8 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, "hfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "hfft: OutViewType must be real"); + + Kokkos::Profiling::pushRegion("KokkosFFT::hfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -187,6 +201,7 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, InViewType in_conj; KokkosFFT::Impl::conjugate(exec_space, in, in_conj); irfft(exec_space, in_conj, out, new_norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of hfft @@ -220,6 +235,8 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, "ihfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "ihfft: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ihfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -227,6 +244,7 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, rfft(exec_space, in, out, new_norm, axis, n); KokkosFFT::Impl::conjugate(exec_space, out, out_conj); Kokkos::deep_copy(exec_space, out, out_conj); + Kokkos::Profiling::popRegion(); } // 2D FFT @@ -253,11 +271,14 @@ void fft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "fft2: View rank must be larger than or equal to 2"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT in backward direction @@ -283,11 +304,13 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "ifft2: View rank must be larger than or equal to 2"); + Kokkos::Profiling::pushRegion("KokkosFFT::ifft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT for real input @@ -321,9 +344,11 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, "rfft2: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft2: OutViewType must be complex"); + Kokkos::Profiling::pushRegion("KokkosFFT::rfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fft2(exec_space, in, out, norm, axes, s); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft2 @@ -357,9 +382,11 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, "irfft2: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft2: OutViewType must be real"); + Kokkos::Profiling::pushRegion("KokkosFFT::irfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifft2(exec_space, in, out, norm, axes, s); + Kokkos::Profiling::popRegion(); } // ND FFT @@ -398,11 +425,14 @@ void fftn( static_assert( InViewType::rank() >= DIM, "fftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of fftn @@ -441,11 +471,14 @@ void ifftn( static_assert( InViewType::rank() >= DIM, "ifftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ifftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief N-dimensional FFT for real input @@ -492,9 +525,12 @@ void rfftn( "rfftn: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfftn: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::rfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fftn(exec_space, in, out, axes, norm, s); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfftn @@ -541,9 +577,11 @@ void irfftn( "irfftn: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfftn: OutViewType must be real"); + Kokkos::Profiling::pushRegion("KokkosFFT::irfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifftn(exec_space, in, out, axes, norm, s); + Kokkos::Profiling::popRegion(); } } // namespace KokkosFFT From 945462fa64a35ac1e41886f731ae1edb43008df7 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 28 Nov 2024 22:38:52 +0900 Subject: [PATCH 2/5] use ScopedRegion instead of pushRegion/popRegion --- fft/src/KokkosFFT_Cuda_plans.hpp | 18 ++++-------- fft/src/KokkosFFT_Cuda_transform.hpp | 18 ++++-------- fft/src/KokkosFFT_HIP_plans.hpp | 19 ++++--------- fft/src/KokkosFFT_HIP_transform.hpp | 18 ++++-------- fft/src/KokkosFFT_Host_plans.hpp | 7 ++--- fft/src/KokkosFFT_Host_transform.hpp | 18 ++++-------- fft/src/KokkosFFT_ROCM_plans.hpp | 8 ++---- fft/src/KokkosFFT_ROCM_transform.hpp | 18 ++++-------- fft/src/KokkosFFT_SYCL_plans.hpp | 7 ++--- fft/src/KokkosFFT_SYCL_transform.hpp | 18 ++++-------- fft/src/KokkosFFT_Transform.hpp | 42 ++++++++++------------------ 11 files changed, 61 insertions(+), 130 deletions(-) diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 35208cff..30b3dc12 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -35,7 +35,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); @@ -55,8 +55,6 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -82,7 +80,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); @@ -102,8 +100,6 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -129,7 +125,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); @@ -151,8 +147,6 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -183,7 +177,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type, std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_cufft]"); cufftDestroy(*plan); - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index f047230c..cc76ebf5 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -13,54 +13,48 @@ namespace Impl { template inline void exec_plan(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecR2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecD2Z failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2R failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2D failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index c520c1d5..46a6d616 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -35,7 +35,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); @@ -55,8 +55,6 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan1d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -82,7 +80,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); @@ -102,8 +100,6 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan2d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -129,7 +125,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); @@ -151,8 +147,6 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan3d failed"); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -183,7 +177,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type, std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_hipfft]"); hipfftDestroy(*plan); - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 26fc6836..fe452acc 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -13,54 +13,48 @@ namespace Impl { template inline void exec_plan(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecR2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecD2Z failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2R failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2D failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 4ce21583..99068f8d 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -55,7 +55,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_fftw]"); const int rank = fft_rank; init_threads, std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_fftw]"); if constexpr (std::is_same_v) { fftwf_destroy_plan(*plan); } else { fftw_destroy_plan(*plan); } - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 147461b9..4b35452c 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -12,49 +12,43 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, fftwf_complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_r2c(plan, idata, odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, fftw_complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_r2c(plan, idata, odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, float* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_c2r(plan, idata, odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftw_complex* idata, double* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_c2r(plan, idata, odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft(plan, idata, odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType plan, fftw_complex* idata, fftw_complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft(plan, idata, odata); - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index fed385e0..24d92998 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -110,7 +110,7 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_rocfft]"); constexpr auto type = KokkosFFT::Impl::transform_type = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_rocfft]"); rocfft_execution_info_destroy(execution_info); rocfft_plan_destroy(*plan); - - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index 11f1a63e..7ad152fe 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -14,10 +14,9 @@ namespace Impl { inline void exec_plan(rocfft_plan& plan, float* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for R2C failed"); } @@ -25,10 +24,9 @@ inline void exec_plan(rocfft_plan& plan, float* idata, inline void exec_plan(rocfft_plan& plan, double* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for D2Z failed"); } @@ -36,10 +34,9 @@ inline void exec_plan(rocfft_plan& plan, double* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, float* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2R failed"); } @@ -47,10 +44,9 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, double* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2D failed"); } @@ -58,10 +54,9 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2C failed"); } @@ -69,10 +64,9 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); - Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2Z failed"); } diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index a1ead852..d84c86f2 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -68,7 +68,7 @@ auto create_plan(const ExecutionSpace& exec_space, InViewType::rank() >= fft_rank, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); - Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_oneMKL]"); auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); @@ -109,8 +109,6 @@ auto create_plan(const ExecutionSpace& exec_space, sycl::queue q = exec_space.sycl_queue(); plan->commit(q); - Kokkos::Profiling::popRegion(); - return fft_size; } @@ -120,8 +118,7 @@ template < std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr&, InfoType&) { // In oneMKL, plans are destroybed by destructor - Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_oneMKL]"); - Kokkos::Profiling::popRegion(); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_oneMKL]"); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index 0ee67b2e..04d94417 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -13,61 +13,55 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, std::complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, std::complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, float* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, double* odata, int /*direction*/, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } - Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { - Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } - Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 629f8560..b70bc3e3 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -36,13 +36,12 @@ void fft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(InViewType::rank() >= 1, "fft: View rank must be larger than or equal to 1"); - Kokkos::Profiling::pushRegion("KokkosFFT::fft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axis, n); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT in backward direction @@ -69,13 +68,12 @@ void ifft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(InViewType::rank() >= 1, "ifft: View rank must be larger than or equal to 1"); - Kokkos::Profiling::pushRegion("KokkosFFT::ifft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axis, n); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT for real input @@ -110,11 +108,10 @@ void rfft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(KokkosFFT::Impl::is_complex_v, "rfft: OutViewType must be complex"); - Kokkos::Profiling::pushRegion("KokkosFFT::rfft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); fft(exec_space, in, out, norm, axis, n); - Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft @@ -150,11 +147,10 @@ void irfft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(KokkosFFT::Impl::is_real_v, "irfft: OutViewType must be real"); - Kokkos::Profiling::pushRegion("KokkosFFT::irfft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); ifft(exec_space, in, out, norm, axis, n); - Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT of a signal that has Hermitian symmetry @@ -191,7 +187,7 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(KokkosFFT::Impl::is_real_v, "hfft: OutViewType must be real"); - Kokkos::Profiling::pushRegion("KokkosFFT::hfft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::hfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -201,7 +197,6 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, InViewType in_conj; KokkosFFT::Impl::conjugate(exec_space, in, in_conj); irfft(exec_space, in_conj, out, new_norm, axis, n); - Kokkos::Profiling::popRegion(); } /// \brief Inverse of hfft @@ -236,7 +231,7 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, static_assert(KokkosFFT::Impl::is_complex_v, "ihfft: OutViewType must be complex"); - Kokkos::Profiling::pushRegion("KokkosFFT::ihfft"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ihfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -244,7 +239,6 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, rfft(exec_space, in, out, new_norm, axis, n); KokkosFFT::Impl::conjugate(exec_space, out, out_conj); Kokkos::deep_copy(exec_space, out, out_conj); - Kokkos::Profiling::popRegion(); } // 2D FFT @@ -272,13 +266,12 @@ void fft2(const ExecutionSpace& exec_space, const InViewType& in, static_assert(InViewType::rank() >= 2, "fft2: View rank must be larger than or equal to 2"); - Kokkos::Profiling::pushRegion("KokkosFFT::fft2"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT in backward direction @@ -304,13 +297,12 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "ifft2: View rank must be larger than or equal to 2"); - Kokkos::Profiling::pushRegion("KokkosFFT::ifft2"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT for real input @@ -344,11 +336,10 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, "rfft2: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft2: OutViewType must be complex"); - Kokkos::Profiling::pushRegion("KokkosFFT::rfft2"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fft2(exec_space, in, out, norm, axes, s); - Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft2 @@ -382,11 +373,10 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, "irfft2: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft2: OutViewType must be real"); - Kokkos::Profiling::pushRegion("KokkosFFT::irfft2"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifft2(exec_space, in, out, norm, axes, s); - Kokkos::Profiling::popRegion(); } // ND FFT @@ -426,13 +416,12 @@ void fftn( InViewType::rank() >= DIM, "fftn: View rank must be larger than or equal to the Rank of FFT axes"); - Kokkos::Profiling::pushRegion("KokkosFFT::fftn"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief Inverse of fftn @@ -472,13 +461,12 @@ void ifftn( InViewType::rank() >= DIM, "ifftn: View rank must be larger than or equal to the Rank of FFT axes"); - Kokkos::Profiling::pushRegion("KokkosFFT::ifftn"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); - Kokkos::Profiling::popRegion(); } /// \brief N-dimensional FFT for real input @@ -526,11 +514,10 @@ void rfftn( static_assert(KokkosFFT::Impl::is_complex_v, "rfftn: OutViewType must be complex"); - Kokkos::Profiling::pushRegion("KokkosFFT::rfftn"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fftn(exec_space, in, out, axes, norm, s); - Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfftn @@ -577,11 +564,10 @@ void irfftn( "irfftn: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfftn: OutViewType must be real"); - Kokkos::Profiling::pushRegion("KokkosFFT::irfftn"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifftn(exec_space, in, out, axes, norm, s); - Kokkos::Profiling::popRegion(); } } // namespace KokkosFFT From e6beb90abfb2fccee5ed0c4568de53b4cb73afe7 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 28 Nov 2024 22:49:52 +0900 Subject: [PATCH 3/5] Include Kokkos_Profiling_ScopedRegion.hpp --- fft/src/KokkosFFT_Cuda_plans.hpp | 1 + fft/src/KokkosFFT_Cuda_transform.hpp | 1 + fft/src/KokkosFFT_HIP_plans.hpp | 1 + fft/src/KokkosFFT_HIP_transform.hpp | 1 + fft/src/KokkosFFT_Host_plans.hpp | 1 + fft/src/KokkosFFT_Host_transform.hpp | 1 + fft/src/KokkosFFT_ROCM_plans.hpp | 1 + fft/src/KokkosFFT_ROCM_transform.hpp | 1 + fft/src/KokkosFFT_SYCL_plans.hpp | 1 + fft/src/KokkosFFT_SYCL_transform.hpp | 3 ++- fft/src/KokkosFFT_Transform.hpp | 1 + 11 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 30b3dc12..6c7bbd26 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_CUDA_PLANS_HPP #include +#include #include "KokkosFFT_Cuda_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index cc76ebf5..5b7a02be 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_CUDA_TRANSFORM_HPP #include +#include #include "KokkosFFT_asserts.hpp" namespace KokkosFFT { diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 46a6d616..1dc3a331 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HIP_PLANS_HPP #include +#include #include "KokkosFFT_HIP_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index fe452acc..07062f6b 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HIP_TRANSFORM_HPP #include +#include #include "KokkosFFT_asserts.hpp" namespace KokkosFFT { diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 99068f8d..7f24dde8 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HOST_PLANS_HPP #include +#include #include "KokkosFFT_default_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 4b35452c..749e58b3 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HOST_TRANSFORM_HPP #include +#include namespace KokkosFFT { namespace Impl { diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index 24d92998..41d221b8 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_ROCM_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index 7ad152fe..d6e4860d 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_asserts.hpp" namespace KokkosFFT { diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index d84c86f2..e8da9fdc 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_SYCL_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index 04d94417..39c5d2b6 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -5,8 +5,9 @@ #ifndef KOKKOSFFT_SYCL_TRANSFORM_HPP #define KOKKOSFFT_SYCL_TRANSFORM_HPP -#include #include +#include +#include namespace KokkosFFT { namespace Impl { diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index b70bc3e3..374ff4dd 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_TRANSFORM_HPP #include +#include #include "KokkosFFT_traits.hpp" #include "KokkosFFT_normalization.hpp" #include "KokkosFFT_utils.hpp" From d60c6e9c995aceca60959cf090030f4f7216acd8 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 21 Jan 2025 18:53:41 +0900 Subject: [PATCH 4/5] fix: conflicts in KokkosFFT_ROCM_plans.hpp --- fft/src/KokkosFFT_ROCM_plans.hpp | 101 +++++++++++++------------------ 1 file changed, 41 insertions(+), 60 deletions(-) diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index 25853b58..d21ffb63 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -5,84 +5,65 @@ #ifndef KOKKOSFFT_ROCM_PLANS_HPP #define KOKKOSFFT_ROCM_PLANS_HPP -<<<<<<< HEAD #include #include #include - ======= ->>>>>>> main #include "KokkosFFT_ROCM_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" #include "KokkosFFT_asserts.hpp" #include "KokkosFFT_utils.hpp" - namespace KokkosFFT { - namespace Impl { +namespace KokkosFFT { +namespace Impl { - // batched transform, over ND Views - template , - std::nullptr_t> = nullptr> - auto create_plan(const ExecutionSpace& exec_space, - std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, Direction direction, - axis_type axes, shape_type s, - bool is_inplace) { - static_assert( - KokkosFFT::Impl::are_operatable_views_v, - "create_plan: InViewType and OutViewType must have the same base " - "floating point type (float/double), the same layout " - "(LayoutLeft/LayoutRight), " - "and the same rank. ExecutionSpace must be accessible to the data in " - "InViewType and OutViewType."); +// batched transform, over ND Views +template , + std::nullptr_t> = nullptr> +auto create_plan(const ExecutionSpace& exec_space, + std::unique_ptr& plan, const InViewType& in, + const OutViewType& out, Direction direction, + axis_type axes, shape_type s, + bool is_inplace) { + static_assert( + KokkosFFT::Impl::are_operatable_views_v, + "create_plan: InViewType and OutViewType must have the same base " + "floating point type (float/double), the same layout " + "(LayoutLeft/LayoutRight), " + "and the same rank. ExecutionSpace must be accessible to the data in " + "InViewType and OutViewType."); - static_assert(InViewType::rank() >= fft_rank, - "KokkosFFT::create_plan: Rank of View must be larger than " - "Rank of FFT."); + static_assert(InViewType::rank() >= fft_rank, + "KokkosFFT::create_plan: Rank of View must be larger than " + "Rank of FFT."); - using in_value_type = typename InViewType::non_const_value_type; - using out_value_type = typename OutViewType::non_const_value_type; + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; - Kokkos::Profiling::ScopedRegion region( - "KokkosFFT::create_plan[TPL_rocfft]"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_rocfft]"); - constexpr auto type = - KokkosFFT::Impl::transform_type::type(); - auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents, howmany] = + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); - // Create a plan - plan = - std::make_unique(type, in_extents, out_extents, fft_extents, - howmany, direction, is_inplace); - plan->commit(exec_space); + // Create a plan + plan = std::make_unique(type, in_extents, out_extents, fft_extents, + howmany, direction, is_inplace); + plan->commit(exec_space); - // Calculate the total size of the FFT - int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, - std::multiplies<>()); + // Calculate the total size of the FFT + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); - return fft_size; - } + return fft_size; +} -<<<<<<< HEAD - template , - std::nullptr_t> = nullptr> - void destroy_plan_and_info(std::unique_ptr& plan, - InfoType& execution_info) { - Kokkos::Profiling::ScopedRegion region( - "KokkosFFT::destroy_plan[TPL_rocfft]"); - - rocfft_execution_info_destroy(execution_info); - rocfft_plan_destroy(*plan); - } -======= ->>>>>>> main - } // namespace Impl +} // namespace Impl } // namespace KokkosFFT #endif From a03bbf248667326b6d6bf00111e5f60a5865b842 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sat, 25 Jan 2025 16:26:19 +0900 Subject: [PATCH 5/5] Add an empty line before introducing the scoped region --- fft/src/KokkosFFT_ROCM_plans.hpp | 1 - fft/src/KokkosFFT_SYCL_plans.hpp | 1 - fft/src/KokkosFFT_Transform.hpp | 4 ++++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index d21ffb63..198b5410 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -44,7 +44,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_rocfft]"); - constexpr auto type = KokkosFFT::Impl::transform_type::type(); diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index 2fed7d18..166b634b 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -69,7 +69,6 @@ auto create_plan(const ExecutionSpace& exec_space, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_oneMKL]"); - auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 0dba4d6e..005de032 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -298,6 +298,7 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "ifft2: View rank must be larger than or equal to 2"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); @@ -337,6 +338,7 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, "rfft2: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft2: OutViewType must be complex"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); @@ -374,6 +376,7 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, "irfft2: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft2: OutViewType must be real"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); @@ -565,6 +568,7 @@ void irfftn( "irfftn: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfftn: OutViewType must be real"); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views");