From c110bd1101830df19ec4b62dcb7f7edac76573dd Mon Sep 17 00:00:00 2001 From: Victoriya Fedotova Date: Thu, 13 Jun 2024 03:37:50 -0700 Subject: [PATCH 1/4] Fix dpc vectorization error when running with new compiler --- cpp/oneapi/dal/table/backend/csr_kernels.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp index 8e5aef236b2..d365168b847 100644 --- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp +++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp @@ -14,6 +14,7 @@ * limitations under the License. *******************************************************************************/ +#include "oneapi/dal/backend/common.hpp" #include "oneapi/dal/table/backend/csr_kernels.hpp" #include "oneapi/dal/table/backend/convert.hpp" @@ -411,6 +412,10 @@ bool is_sorted(sycl::queue& queue, sycl::buffer count_buf(&count_descending_pairs, sycl::range<1>(1)); + const auto count_m1 = count - 1LL; + const auto wg_size = dal::backend::device_max_wg_size(queue); + const auto local_size = (wg_size < count_m1) ? wg_size : count_m1; + // count the number of pairs of the subsequent elements in the data array that are sorted // in desccending order using sycl::reduction queue @@ -419,9 +424,10 @@ bool is_sorted(sycl::queue& queue, auto count_descending_reduction = sycl::reduction(count_buf, cgh, sycl::ext::oneapi::plus()); - cgh.parallel_for(sycl::range<1>{ dal::detail::integral_cast(count - 1) }, + cgh.parallel_for(sycl::nd_range<1>{ count_m1, local_size }, count_descending_reduction, - [=](sycl::id<1> i, auto& count_descending) { + [=](sycl::nd_item<1> idx, auto& count_descending) { + const auto i = idx.get_global_id(0); if (data[i] > data[i + 1]) count_descending.combine(1); }); From bbfff891c6363507ed00c8cd84c82d39ec9ababb Mon Sep 17 00:00:00 2001 From: Victoriya Fedotova Date: Fri, 14 Jun 2024 13:16:55 +0200 Subject: [PATCH 2/4] Update cpp/oneapi/dal/table/backend/csr_kernels.cpp Fix indexing error --- cpp/oneapi/dal/table/backend/csr_kernels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp index d365168b847..24f03bf68b2 100644 --- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp +++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp @@ -428,7 +428,7 @@ bool is_sorted(sycl::queue& queue, count_descending_reduction, [=](sycl::nd_item<1> idx, auto& count_descending) { const auto i = idx.get_global_id(0); - if (data[i] > data[i + 1]) + if (i < count_m1 && data[i + 1] < data[i]) count_descending.combine(1); }); }) From 51a3fc2bd8f895ef93fe6bf1f197517c69342568 Mon Sep 17 00:00:00 2001 From: Victoriya Fedotova Date: Mon, 17 Jun 2024 02:36:51 -0700 Subject: [PATCH 3/4] Fix nd_range size --- cpp/oneapi/dal/table/backend/csr_kernels.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp index 24f03bf68b2..662a6f270fc 100644 --- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp +++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp @@ -414,7 +414,9 @@ bool is_sorted(sycl::queue& queue, const auto count_m1 = count - 1LL; const auto wg_size = dal::backend::device_max_wg_size(queue); - const auto local_size = (wg_size < count_m1) ? wg_size : count_m1; + const size_t count_m1_unsigned = static_cast(count_m1); + + const size_t wg_count = (count_m1 % wg_size) ? count_m1 / wg_size + 1 : count_m1 / wg_size; // count the number of pairs of the subsequent elements in the data array that are sorted // in desccending order using sycl::reduction @@ -424,11 +426,11 @@ bool is_sorted(sycl::queue& queue, auto count_descending_reduction = sycl::reduction(count_buf, cgh, sycl::ext::oneapi::plus()); - cgh.parallel_for(sycl::nd_range<1>{ count_m1, local_size }, + cgh.parallel_for(sycl::nd_range<1>{ wg_count * wg_size, wg_size }, count_descending_reduction, [=](sycl::nd_item<1> idx, auto& count_descending) { const auto i = idx.get_global_id(0); - if (i < count_m1 && data[i + 1] < data[i]) + if (i < count_m1_unsigned && data[i + 1] < data[i]) count_descending.combine(1); }); }) From 52e1c39698f0519eb2d30bca18dbc91662e849a1 Mon Sep 17 00:00:00 2001 From: Victoriya Fedotova Date: Tue, 18 Jun 2024 13:06:52 +0200 Subject: [PATCH 4/4] simplify sycl range size computations Co-authored-by: Anatoly Volkov <117643568+avolkov-intel@users.noreply.github.com> --- cpp/oneapi/dal/table/backend/csr_kernels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp index 662a6f270fc..7bf510318bd 100644 --- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp +++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp @@ -416,7 +416,7 @@ bool is_sorted(sycl::queue& queue, const auto wg_size = dal::backend::device_max_wg_size(queue); const size_t count_m1_unsigned = static_cast(count_m1); - const size_t wg_count = (count_m1 % wg_size) ? count_m1 / wg_size + 1 : count_m1 / wg_size; + const size_t wg_count = (count_m1 + wg_size - 1) / wg_size; // count the number of pairs of the subsequent elements in the data array that are sorted // in desccending order using sycl::reduction