diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index dc8423c8..b745bed3 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_CUDA_PLANS_HPP #include +#include #include "KokkosFFT_Cuda_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" @@ -33,6 +34,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = @@ -66,6 +69,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -99,6 +104,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -139,7 +146,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 8a1663da..5cfb070e 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_CUDA_TRANSFORM_HPP #include +#include #include "KokkosFFT_asserts.hpp" #include "KokkosFFT_Cuda_types.hpp" @@ -14,6 +15,8 @@ namespace Impl { inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftReal* idata, cufftComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecR2C]"); cufftResult cufft_rt = cufftExecR2C(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecR2C failed"); } @@ -21,12 +24,16 @@ inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftReal* idata, inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecD2Z]"); cufftResult cufft_rt = cufftExecD2Z(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecD2Z failed"); } inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, cufftReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecC2R]"); cufftResult cufft_rt = cufftExecC2R(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2R failed"); } @@ -34,12 +41,16 @@ inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecZ2D]"); cufftResult cufft_rt = cufftExecZ2D(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2D failed"); } inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, cufftComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecC2C]"); cufftResult cufft_rt = cufftExecC2C(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2C failed"); @@ -48,6 +59,8 @@ inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecZ2Z]"); cufftResult cufft_rt = cufftExecZ2Z(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2Z failed"); diff --git a/fft/src/KokkosFFT_Cuda_types.hpp b/fft/src/KokkosFFT_Cuda_types.hpp index f5a1fe62..00e123a2 100644 --- a/fft/src/KokkosFFT_Cuda_types.hpp +++ b/fft/src/KokkosFFT_Cuda_types.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_common_types.hpp" #include "KokkosFFT_asserts.hpp" @@ -56,6 +57,8 @@ struct ScopedCufftPlan { } ~ScopedCufftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_cufft]"); cufftResult cufft_rt = cufftDestroy(m_plan); if (cufft_rt != CUFFT_SUCCESS) Kokkos::abort("cufftDestroy failed"); } diff --git a/fft/src/KokkosFFT_FFTW_Types.hpp b/fft/src/KokkosFFT_FFTW_Types.hpp index 8f686577..20aac85c 100644 --- a/fft/src/KokkosFFT_FFTW_Types.hpp +++ b/fft/src/KokkosFFT_FFTW_Types.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_common_types.hpp" #include "KokkosFFT_utils.hpp" @@ -104,6 +105,7 @@ struct ScopedFFTWPlan { } ~ScopedFFTWPlan() noexcept { + Kokkos::Profiling::ScopedRegion region("KokkosFFT::cleanup_plan[TPL_fftw]"); if constexpr (std::is_same_v) { fftwf_destroy_plan(m_plan); } else { diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 9859cd44..fe617e6a 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HIP_PLANS_HPP #include +#include #include "KokkosFFT_HIP_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" @@ -33,6 +34,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = @@ -66,6 +69,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -99,6 +104,8 @@ auto create_plan(const ExecutionSpace& exec_space, "InViewType and OutViewType."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -139,7 +146,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index ba103afc..9ac5c954 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HIP_TRANSFORM_HPP #include +#include #include "KokkosFFT_asserts.hpp" #include "KokkosFFT_HIP_types.hpp" @@ -14,6 +15,8 @@ namespace Impl { inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftReal* idata, hipfftComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecR2C]"); hipfftResult hipfft_rt = hipfftExecR2C(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecR2C failed"); } @@ -21,12 +24,16 @@ inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftReal* idata, inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecD2Z]"); hipfftResult hipfft_rt = hipfftExecD2Z(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecD2Z failed"); } inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, hipfftReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecC2R]"); hipfftResult hipfft_rt = hipfftExecC2R(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2R failed"); } @@ -34,12 +41,16 @@ inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecZ2D]"); hipfftResult hipfft_rt = hipfftExecZ2D(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2D failed"); } inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, hipfftComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecC2C]"); hipfftResult hipfft_rt = hipfftExecC2C(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2C failed"); @@ -48,6 +59,8 @@ inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecZ2Z]"); hipfftResult hipfft_rt = hipfftExecZ2Z(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2Z failed"); diff --git a/fft/src/KokkosFFT_HIP_types.hpp b/fft/src/KokkosFFT_HIP_types.hpp index 4be4d397..16e5eea1 100644 --- a/fft/src/KokkosFFT_HIP_types.hpp +++ b/fft/src/KokkosFFT_HIP_types.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_common_types.hpp" #include "KokkosFFT_asserts.hpp" @@ -56,6 +57,8 @@ struct ScopedHIPfftPlan { } ~ScopedHIPfftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftDestroy(m_plan); if (hipfft_rt != HIPFFT_SUCCESS) Kokkos::abort("hipfftDestroy failed"); } diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 00042f74..ad96c1ea 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HOST_PLANS_HPP #include +#include #include "KokkosFFT_default_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" @@ -37,8 +38,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_fftw]"); + const int rank = fft_rank; auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 33db513e..b4a54805 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_HOST_TRANSFORM_HPP #include +#include namespace KokkosFFT { namespace Impl { @@ -13,36 +14,48 @@ namespace Impl { template void exec_plan(const ScopedPlanType& scoped_plan, float* idata, fftwf_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecR2C]"); fftwf_execute_dft_r2c(scoped_plan.plan(), idata, odata); } template void exec_plan(const ScopedPlanType& scoped_plan, double* idata, fftw_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecD2Z]"); fftw_execute_dft_r2c(scoped_plan.plan(), idata, odata); } template void exec_plan(const ScopedPlanType& scoped_plan, fftwf_complex* idata, float* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecC2R]"); fftwf_execute_dft_c2r(scoped_plan.plan(), idata, odata); } template void exec_plan(const ScopedPlanType& scoped_plan, fftw_complex* idata, double* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecZ2D]"); fftw_execute_dft_c2r(scoped_plan.plan(), idata, odata); } template void exec_plan(const ScopedPlanType& scoped_plan, fftwf_complex* idata, fftwf_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecC2C]"); fftwf_execute_dft(scoped_plan.plan(), idata, odata); } template void exec_plan(const ScopedPlanType& scoped_plan, fftw_complex* idata, fftw_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecZ2Z]"); fftw_execute_dft(scoped_plan.plan(), idata, odata); } } // namespace Impl diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index 56c1d6c3..198b5410 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -5,6 +5,9 @@ #ifndef KOKKOSFFT_ROCM_PLANS_HPP #define KOKKOSFFT_ROCM_PLANS_HPP +#include +#include +#include #include "KokkosFFT_ROCM_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" @@ -33,12 +36,14 @@ auto create_plan(const ExecutionSpace& exec_space, "and the same rank. ExecutionSpace must be accessible to the data in " "InViewType and OutViewType."); - static_assert( - InViewType::rank() >= fft_rank, - "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); + static_assert(InViewType::rank() >= fft_rank, + "KokkosFFT::create_plan: Rank of View must be larger than " + "Rank of FFT."); using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_rocfft]"); constexpr auto type = KokkosFFT::Impl::transform_type::type(); diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index 04d1ead8..bb89baff 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_asserts.hpp" #include "KokkosFFT_ROCM_types.hpp" @@ -14,6 +15,8 @@ namespace KokkosFFT { namespace Impl { inline void exec_plan(const ScopedRocfftPlan& scoped_plan, float* idata, std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecR2C]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); @@ -24,6 +27,8 @@ inline void exec_plan(const ScopedRocfftPlan& scoped_plan, float* idata, inline void exec_plan(const ScopedRocfftPlan& scoped_plan, double* idata, std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecD2Z]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); @@ -34,6 +39,8 @@ inline void exec_plan(const ScopedRocfftPlan& scoped_plan, inline void exec_plan( const ScopedRocfftPlan>& scoped_plan, std::complex* idata, float* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecC2R]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); @@ -44,6 +51,8 @@ inline void exec_plan( inline void exec_plan( const ScopedRocfftPlan>& scoped_plan, std::complex* idata, double* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecZ2D]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); @@ -54,6 +63,8 @@ inline void exec_plan( inline void exec_plan( const ScopedRocfftPlan>& scoped_plan, std::complex* idata, std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecC2C]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); @@ -65,6 +76,8 @@ inline void exec_plan( const ScopedRocfftPlan>& scoped_plan, std::complex* idata, std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecZ2Z]"); rocfft_status status = rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, scoped_plan.execution_info()); diff --git a/fft/src/KokkosFFT_ROCM_types.hpp b/fft/src/KokkosFFT_ROCM_types.hpp index 8bf19c07..38c6682e 100644 --- a/fft/src/KokkosFFT_ROCM_types.hpp +++ b/fft/src/KokkosFFT_ROCM_types.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include "KokkosFFT_common_types.hpp" #include "KokkosFFT_traits.hpp" #include "KokkosFFT_asserts.hpp" @@ -186,6 +187,8 @@ struct ScopedRocfftPlan { "rocfft_plan_create failed"); } ~ScopedRocfftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_rocfft]"); rocfft_status status = rocfft_plan_destroy(m_plan); if (status != rocfft_status_success) Kokkos::abort("rocfft_plan_destroy failed"); diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index f386c269..166b634b 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -7,6 +7,7 @@ #include #include +#include #include "KokkosFFT_SYCL_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" @@ -67,6 +68,7 @@ auto create_plan(const ExecutionSpace& exec_space, InViewType::rank() >= fft_rank, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); + Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_oneMKL]"); auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index bd85ec7f..3c6e354b 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -5,14 +5,17 @@ #ifndef KOKKOSFFT_SYCL_TRANSFORM_HPP #define KOKKOSFFT_SYCL_TRANSFORM_HPP -#include #include +#include +#include namespace KokkosFFT { namespace Impl { template void exec_plan(PlanType& plan, float* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecR2C]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); } @@ -20,6 +23,8 @@ void exec_plan(PlanType& plan, float* idata, std::complex* odata, template void exec_plan(PlanType& plan, double* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecD2Z]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); } @@ -27,6 +32,8 @@ void exec_plan(PlanType& plan, double* idata, std::complex* odata, template void exec_plan(PlanType& plan, std::complex* idata, float* odata, int /*direction*/, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecC2R]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); } @@ -34,6 +41,8 @@ void exec_plan(PlanType& plan, std::complex* idata, float* odata, template void exec_plan(PlanType& plan, std::complex* idata, double* odata, int /*direction*/, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecZ2D]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); } @@ -41,6 +50,8 @@ void exec_plan(PlanType& plan, std::complex* idata, double* odata, template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecC2C]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { @@ -51,6 +62,8 @@ void exec_plan(PlanType& plan, std::complex* idata, template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecZ2Z]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 3f319b6e..005de032 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -6,6 +6,7 @@ #define KOKKOSFFT_TRANSFORM_HPP #include +#include #include "KokkosFFT_traits.hpp" #include "KokkosFFT_normalization.hpp" #include "KokkosFFT_utils.hpp" @@ -35,6 +36,8 @@ void fft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "fft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axis, @@ -65,6 +68,8 @@ void ifft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "ifft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, @@ -103,6 +108,8 @@ void rfft(const ExecutionSpace& exec_space, const InViewType& in, "rfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft: OutViewType must be complex"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); fft(exec_space, in, out, norm, axis, n); @@ -140,6 +147,8 @@ void irfft(const ExecutionSpace& exec_space, const InViewType& in, "irfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft: OutViewType must be real"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); ifft(exec_space, in, out, norm, axis, n); @@ -178,6 +187,8 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, "hfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "hfft: OutViewType must be real"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::hfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -220,6 +231,8 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, "ihfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "ihfft: OutViewType must be complex"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ihfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -253,6 +266,8 @@ void fft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "fft2: View rank must be larger than or equal to 2"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, @@ -283,6 +298,8 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "ifft2: View rank must be larger than or equal to 2"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, @@ -321,6 +338,8 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, "rfft2: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft2: OutViewType must be complex"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fft2(exec_space, in, out, norm, axes, s); @@ -357,6 +376,8 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, "irfft2: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft2: OutViewType must be real"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifft2(exec_space, in, out, norm, axes, s); @@ -398,6 +419,8 @@ void fftn( static_assert( InViewType::rank() >= DIM, "fftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::fftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, @@ -441,6 +464,8 @@ void ifftn( static_assert( InViewType::rank() >= DIM, "ifftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::ifftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, @@ -492,6 +517,8 @@ void rfftn( "rfftn: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfftn: OutViewType must be complex"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::rfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fftn(exec_space, in, out, axes, norm, s); @@ -541,6 +568,8 @@ void irfftn( "irfftn: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfftn: OutViewType must be real"); + + Kokkos::Profiling::ScopedRegion region("KokkosFFT::irfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifftn(exec_space, in, out, axes, norm, s);