triSYCL · keryell · Feb 11, 2021 · Feb 5, 2021 · Feb 4, 2021 · Feb 5, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.16)
 
-option(USE_SINGLE_TASK "Use a SYCL executor based on .single_task() instead of .parallel_for(), better for FPGA" OFF)
+option(USE_SINGLE_TASK "Use a SYCL executor that loops over pixel in one task instead of using a parallel_for(), better for FPGA" OFF)
+option(SANITIZE_THREADS "Activate thread sanitizer" OFF)
 set(SYCL_CXX_COMPILER "" CACHE STRING "Path to the SYCL compiler. Defaults to using triSYCL CPU implementation" )
 # Use SYCL host device by default
 set(SYCL_DEVICE_TRIPLE "" CACHE STRING "Device triple to be used. only used with SYCL_CXX_COMPILER")
@@ -10,6 +11,9 @@ if (NOT "${SYCL_CXX_COMPILER}" STREQUAL "")
 endif()
 
 project(SYCL-path-tracer LANGUAGES CXX)
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
 
 # Use triSYCL
 if ("${SYCL_CXX_COMPILER}" STREQUAL "")
@@ -23,12 +27,12 @@ else()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_SYCL_COMPILER")
 endif()
 
- 
+
 # Set a default build type if none was specified
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   message(STATUS "Setting build type to Release as none was specified.")
   set(CMAKE_BUILD_TYPE "Release" CACHE
-      STRING "Choose the type of build." FORCE)
+    STRING "Choose the type of build." FORCE)
   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
     "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 endif()
@@ -49,14 +53,16 @@ endif()
 # Use C+20
 target_compile_features(sycl-rt PRIVATE cxx_std_20)
 
+if (SANITIZE_THREADS)
+target_compile_options(sycl-rt PRIVATE
+                       -fno-omit-frame-pointer -fsanitize=thread)
+target_link_options(sycl-rt PRIVATE -fsanitize=thread)
+endif()
 # To use various code sanitizer:
 #target_compile_options(sycl-rt PRIVATE
 #                       -fno-omit-frame-pointer -fsanitize=address)
 #target_link_options(sycl-rt PRIVATE -fsanitize=address)
 #target_compile_options(sycl-rt PRIVATE
-#                       -fno-omit-frame-pointer -fsanitize=thread)
-#target_link_options(sycl-rt PRIVATE -fsanitize=thread)
-#target_compile_options(sycl-rt PRIVATE
 #                       -fno-omit-frame-pointer -fsanitize=undefined)
 #target_link_options(sycl-rt PRIVATE -fsanitize=undefined)
 #target_compile_options(sycl-rt PRIVATE
@@ -67,8 +73,9 @@ target_compile_features(sycl-rt PRIVATE cxx_std_20)
 if(USE_SINGLE_TASK)
   # On FPGA use a loop on image pixels instead of a parallel_for
   set_property(TARGET sycl-rt
-    APPEND PROPERTY
-    COMPILE_DEFINITIONS USE_SINGLE_TASK=)
+  APPEND PROPERTY
+  COMPILE_DEFINITIONS USE_SINGLE_TASK=)
 endif()
 
 message(STATUS "path_tracer USE_SINGLE_TASK:      ${USE_SINGLE_TASK}")
+message(STATUS "path_tracer SANITIZE_THREADS:      ${SANITIZE_THREADS}")
diff --git a/include/build_parameters.hpp b/include/build_parameters.hpp
@@ -1,12 +1,18 @@
 #ifndef BUILD_PARAMETERS_HPP
 #define BUILD_PARAMETERS_HPP
 namespace buildparams {
+
 #if USE_SINGLE_TASK
 constexpr bool use_single_task = true;
 #else
 constexpr bool use_single_task = false;
 #endif
-}
 
+#ifdef USE_SYCL_COMPILER
+constexpr bool use_sycl_compiler = USE_SYCL_COMPILER;
+#else
+constexpr bool use_sycl_compiler = false;
+#endif
+} // namespace buildparams
 
 #endif // BUILD_PARAMETERS_HPP
diff --git a/include/camera.hpp b/include/camera.hpp
@@ -8,12 +8,13 @@
 
 /** Camera model
 
-    This implements:
+        This implements:
 
-    -
+        -
    https://raytracing.github.io/books/RayTracingInOneWeekend.html#positionablecamera
 
-    - https://raytracing.github.io/books/RayTracingInOneWeekend.html#defocusblur
+        -
+   https://raytracing.github.io/books/RayTracingInOneWeekend.html#defocusblur
 */
 class camera {
 
@@ -47,21 +48,21 @@ class camera {
  public:
   /** Create a parameterized camera
 
-      \param[in] look_from is the position of the camera
+          \param[in] look_from is the position of the camera
 
-      \param[in] look_at is a point the camera is looking at
+          \param[in] look_at is a point the camera is looking at
 
-      \param[in] vup is the “view up” orientation for the
-      camera. {0,1,0} means the usual vertical orientation
+          \param[in] vup is the “view up” orientation for the
+          camera. {0,1,0} means the usual vertical orientation
 
-      \param[in] degree_vfov is the vertical field-of-view in degrees
+          \param[in] degree_vfov is the vertical field-of-view in degrees
 
-      \param[in] aspect_ratio is the ratio between the camera image
-      width and the camera image height
+          \param[in] aspect_ratio is the ratio between the camera image
+          width and the camera image height
 
-      \param[in] aperture is the lens aperture of the camera
+          \param[in] aperture is the lens aperture of the camera
 
-      \param[in] focus_dist is the focus distance
+          \param[in] focus_dist is the focus distance
   */
   camera(const point& look_from, const point& look_at, const vec& vup,
          real_t degree_vfov, real_t aspect_ratio, real_t aperture,
@@ -86,8 +87,8 @@ class camera {
   }
 
   /** Computes ray from camera passing through
-      viewport local coordinates (s,t) based on viewport
-      width, height and focus distance
+          viewport local coordinates (s,t) based on viewport
+          width, height and focus distance
   */
   ray get_ray(real_t s, real_t t) const {
     vec rd = lens_radius * random_in_unit_disk();

diff --git a/include/material.hpp b/include/material.hpp
@@ -99,10 +99,7 @@ struct lightsource_material {
   lightsource_material(const color& a)
       : emit { solid_texture { a } } {}
 
-  bool scatter(const ray& r_in, const hit_record& rec, color& attenuation,
-               ray& scattered) const {
-    return false;
-  }
+  template <typename... T> bool scatter(T&...) const { return false; }
 
   color emitted(const hit_record& rec) {
     return dev_visit([&](auto&& arg) { return arg.value(rec); }, emit);

diff --git a/include/rectangle.hpp b/include/rectangle.hpp
@@ -9,7 +9,7 @@
 
 /** The Following classes implement:
 
-    -
+        -
    https://raytracing.github.io/books/RayTracingTheNextWeek.html#rectanglesandlights/creatingrectangleobjectsa
 */
 

diff --git a/include/render.hpp b/include/render.hpp
@@ -1,4 +1,6 @@
+#include <array>
 #include <type_traits>
+#include <vector>
 
 #include "box.hpp"
 #include "build_parameters.hpp"
@@ -24,47 +26,41 @@ static constexpr auto TileY = 8;
 } // namespace constants
 
 template <int width, int height, int samples, int depth>
-auto pixel_renderer(sycl::accessor<color, 1, sycl::access::mode::write,
-                                   sycl::access::target::global_buffer>
-                        frame_ptr,
-                    sycl::accessor<hittable_t, 1, sycl::access::mode::read,
-                                   sycl::access::target::global_buffer>
-                        hitable_ptr,
-                    int num_hittables, camera& cam) {
-  auto hit_world = [hitable_ptr, num_hittables](
-                       const ray& r, real_t min, real_t max, hit_record& rec,
-                       material_t& material_type) -> bool {
-    // Check if ray hits anything in the world
-    hit_record temp_rec;
-    material_t temp_material_type;
-    auto hit_anything = false;
-    auto closest_so_far = max;
-    // Checking if the ray hits any of the spheres
-    for (auto i = 0; i < num_hittables; i++) {
-      if (dev_visit(
-              [&](auto&& arg) {
-                return arg.hit(r, min, closest_so_far, temp_rec,
-                               temp_material_type);
-              },
-              hitable_ptr.get_pointer()[i])) {
-        hit_anything = true;
-        closest_so_far = temp_rec.t;
-        rec = temp_rec;
-        material_type = temp_material_type;
+inline auto render_pixel(int x_coord, int y_coord, camera const& cam,
+                         hittable_t const* hittable_ptr, int nb_hittable,
+                         color* fb_ptr) {
+  auto get_color = [&](const ray& r) {
+    auto hit_world = [&](const ray& r, hit_record& rec,
+                         material_t& material_type) {
+      hit_record temp_rec;
+      material_t temp_material_type;
+      auto hit_anything = false;
+      auto closest_so_far = infinity;
+      // Checking if the ray hits any of the spheres
+      for (auto i = 0; i < nb_hittable; i++) {
+        if (dev_visit(
+                [&](auto&& arg) {
+                  return arg.hit(r, 0.001f, closest_so_far, temp_rec,
+                                 temp_material_type);
+                },
+                hittable_ptr[i])) {
+          hit_anything = true;
+          closest_so_far = temp_rec.t;
+          rec = temp_rec;
+          material_type = temp_material_type;
+        }
       }
-    }
-    return hit_anything;
-  };
+      return hit_anything;
+    };
 
-  auto get_color = [=](const ray& r) -> color {
     ray cur_ray = r;
     color cur_attenuation { 1.0f, 1.0f, 1.0f };
     ray scattered;
     color emitted;
     material_t material_type;
     for (auto i = 0; i < depth; i++) {
       hit_record rec;
-      if (hit_world(cur_ray, real_t { 0.001f }, infinity, rec, material_type)) {
+      if (hit_world(cur_ray, rec, material_type)) {
         emitted = dev_visit([&](auto&& arg) { return arg.emitted(rec); },
                             material_type);
         if (dev_visit(
@@ -97,68 +93,94 @@ auto pixel_renderer(sycl::accessor<color, 1, sycl::access::mode::write,
     return color { 0.0f, 0.0f, 0.0f };
   };
 
-  return [=](int x_coord, int y_coord) -> void {
-    // map the 2D indices to a single linear, 1D index
-    const auto pixel_index = y_coord * width + x_coord;
-
-    // Color sampling for antialiasing
-    color final_color(0.0f, 0.0f, 0.0f);
-    for (auto i = 0; i < samples; i++) {
-      const auto u = (x_coord + random_float()) / width;
-      const auto v = (y_coord + random_float()) / height;
-      // u and v are points on the viewport
-      ray r = cam.get_ray(u, v);
-      final_color += get_color(r);
-    }
-    final_color /= static_cast<real_t>(samples);
+  color final_color(0.0f, 0.0f, 0.0f);
+  for (auto i = 0; i < samples; i++) {
+    const auto u = (x_coord + random_float()) / width;
+    const auto v = (y_coord + random_float()) / height;
+    // u and v are points on the viewport
+    ray r = cam.get_ray(u, v);
+    final_color += get_color(r);
+  }
+  final_color /= static_cast<real_t>(samples);
 
-    // Write final color to the frame buffer global memory
-    frame_ptr[pixel_index] = final_color;
-  };
+  // Write final color to the frame buffer global memory
+  fb_ptr[y_coord * width + x_coord] = final_color;
 }
 
-template <int width, int height, typename T>
-void executor(trisycl::handler& cgh, T render_kernel) {
-  if constexpr (buildparams::use_single_task) {
-    cgh.single_task([render_kernel]() -> void {
-      for (int x_coord = 0; x_coord != width; ++x_coord)
-        for (int y_coord = 0; y_coord != height; ++y_coord) {
-          render_kernel(x_coord, y_coord);
-        }
-    });
-  } else {
-    const auto global = sycl::range<2>(width, height);
-    const auto local = sycl::range<2>(constants::TileX, constants::TileY);
-    const auto index_space = sycl::nd_range<2>(global, local);
-    // Launch 1 work-item per pixel in parallel
-    cgh.parallel_for(index_space,
-                     [render_kernel](sycl::nd_item<2> item) -> void {
-                       const auto x_coord = item.get_global_id(0);
-                       const auto y_coord = item.get_global_id(1);
-                       render_kernel(x_coord, y_coord);
-                     });
+// Find the greatest divider of width which is lower or equal to val
+template <int width> int find_immediate_least_divider(int val) {
+  for (int i = val; i > 0; --i) {
+    if ((val % i) == 0)
+      return i;
   }
+  return 0;
 }
 
 // Render function to call the render kernel
 template <int width, int height, int samples>
-void render(sycl::queue queue, color* fb_data, const hittable_t* hittables,
-            int num_hittables, camera& cam) {
-  constexpr auto num_pixels = width * height;
-  auto const depth = 50;
-  auto frame_buf = sycl::buffer<color, 1>(fb_data, sycl::range<1>(num_pixels));
-  auto hittables_buf =
-      sycl::buffer<hittable_t, 1>(hittables, sycl::range<1>(num_hittables));
+void render(sycl::queue& queue, std::array<color, width * height>& fb,
+            std::vector<hittable_t>& hittables, camera& cam) {
+  auto constexpr depth = 50;
+  const auto nb_hittable = hittables.size();
+  auto frame_buf =
+      sycl::buffer<color, 2>(fb.data(), sycl::range<2>(height, width));
+  auto hittables_buf = sycl::buffer<hittable_t, 1>(hittables.data(),
+                                                   sycl::range<1>(nb_hittable));
+
+  // Compute "ideal" work distribution :
+  // Trying to create as many work_group as possible, with as many work_item as
+  // possible Each item is responsible for an image slice of size pg_width *
+  // pg_height
+  const auto& dev = queue.get_device();
+  const auto comp_unit =
+      (buildparams::use_single_task)
+          ? 1
+          : dev.get_info<sycl::info::device::max_compute_units>();
+  const auto wg_size =
+      (buildparams::use_single_task)
+          ? 1
+          : dev.get_info<sycl::info::device::max_work_group_size>();
+
+  const auto parallel_exec = comp_unit * wg_size;
+
+  const auto total_work = width * height;
+
+  const auto ideal_pg_height = height / parallel_exec;
+  const auto pg_height = (ideal_pg_height == 0) ? 1 : ideal_pg_height;
+  const auto pg_width =
+      (ideal_pg_height == 0)
+          ? find_immediate_least_divider<width>(total_work / parallel_exec)
+          : width;
+  const auto nb_pg_line = width / pg_width;
+  const auto required_pg_line =
+      height / ideal_pg_height + ((height % ideal_pg_height) ? 1 : 0);
+  const auto required_pg = required_pg_line * nb_pg_line;
+
   // Submit command group on device
-  queue.submit([&](sycl::handler& cgh) {
-    // Get memory access
-    auto frame_ptr = frame_buf.get_access<sycl::access::mode::write>(cgh);
-    auto hittables_ptr =
+  queue.submit([=, &hittables_buf, &frame_buf, &cam](sycl::handler& cgh) {
+    auto fb_acc = frame_buf.get_access<sycl::access::mode::discard_write>(cgh);
+    auto hittables_acc =
         hittables_buf.get_access<sycl::access::mode::read>(cgh);
-    // Construct kernel functor
-
-    auto render_kernel = pixel_renderer<width, height, samples, depth>(
-        frame_ptr, hittables_ptr, num_hittables, cam);
-    executor<width, height>(cgh, render_kernel);
+    hittable_t const* hittable_ptr = hittables_acc.get_pointer();
+    color* fb_ptr = fb_acc.get_pointer();
+    const auto global_range = sycl::range<1>(required_pg);
+    const auto local_range = sycl::range<1>(wg_size);
+    cgh.parallel_for(
+        sycl::nd_range<1>(global_range, local_range), [=](sycl::nd_item<1> it) {
+          auto gid = it.get_global_id(0);
+          const auto grid_x = gid % nb_pg_line;
+          const auto grid_y = gid / nb_pg_line;
+          const auto start_x = grid_x * pg_width;
+          const auto start_y = grid_y * pg_height;
+          const auto max_x = start_x + pg_width;
+          const auto th_max_y = start_y + pg_height;
+          const auto max_y = (th_max_y > height) ? height : th_max_y;
+          for (auto y = start_y; y < max_y; ++y) {
+            for (auto x = start_x; x < max_x; ++x) {
+              render_pixel<width, height, samples, depth>(
+                  x, y, cam, hittable_ptr, nb_hittable, fb_ptr);
+            }
+          }
+        });
   });
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,7 @@ @@
     /** The Following classes implement:
-        -
+            -
        https://raytracing.github.io/books/RayTracingTheNextWeek.html#rectanglesandlights/creatingrectangleobjectsa
     */
@@ Expand Down @@