From 471413d259f9be0998f8d59b4879076f43b61e47 Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 09:20:20 +0100
Subject: [PATCH 1/7] gemm skeleton

---
 Cargo.toml      |   4 ++
 benches/gemm.rs | 154 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 benches/gemm.rs
diff --git a/Cargo.toml b/Cargo.toml
index a723995..8156bf0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,3 +48,7 @@ harness = false
 [[bench]]
 name = "gemv"
 harness = false
+
+[[bench]]
+name = "gemm"
+harness = false
\ No newline at end of file
diff --git a/benches/gemm.rs b/benches/gemm.rs
new file mode 100644
index 0000000..95d57a4
--- /dev/null
+++ b/benches/gemm.rs
@@ -0,0 +1,154 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use poc_kokkos_rs::{
+    functor::KernelArgs,
+    routines::{
+        parallel_for,
+        parameters::{ExecutionPolicy, ExecutionSpace, RangePolicy, Schedule},
+    },
+    view::{parameters::Layout, ViewOwned},
+};
+use rand::{
+    distributions::{Distribution, Uniform},
+    rngs::SmallRng,
+    SeedableRng,
+};
+
+// Serial GEMM
+fn f1(
+    length: usize,
+    aa_init: Vec<f64>,
+    bb_init: Vec<f64>,
+    cc_init: Vec<f64>,
+    alpha: f64,
+    beta: f64,
+) {
+    let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]);
+    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]);
+    let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]);
+    black_box(&mut aa);
+    black_box(&mut bb);
+    black_box(&mut cc);
+
+    let execp = ExecutionPolicy {
+        space: ExecutionSpace::Serial,
+        range: RangePolicy::RangePolicy(0..length),
+        schedule: Schedule::Static,
+    };
+
+    // y = alpha * A * x + beta * y
+    let gemm_kernel = |arg: KernelArgs<1>| match arg {
+        KernelArgs::Index1D(i) => {
+            todo!()
+        }
+        KernelArgs::IndexND(_) => unimplemented!(),
+        KernelArgs::Handle => unimplemented!(),
+    };
+    parallel_for(execp, gemm_kernel).unwrap();
+    black_box(&cc);
+}
+
+// DeviceCPU GEMM
+fn f2(
+    length: usize,
+    aa_init: Vec<f64>,
+    bb_init: Vec<f64>,
+    cc_init: Vec<f64>,
+    alpha: f64,
+    beta: f64,
+) {
+    let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]);
+    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]);
+    let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]);
+    black_box(&mut aa);
+    black_box(&mut bb);
+    black_box(&mut cc);
+
+    let execp = ExecutionPolicy {
+        space: ExecutionSpace::DeviceCPU,
+        range: RangePolicy::RangePolicy(0..length),
+        schedule: Schedule::Static,
+    };
+
+    // y = alpha * A * x + beta * y
+    let gemm_kernel = |arg: KernelArgs<1>| match arg {
+        KernelArgs::Index1D(i) => {
+            todo!()
+        }
+        KernelArgs::IndexND(_) => unimplemented!(),
+        KernelArgs::Handle => unimplemented!(),
+    };
+    parallel_for(execp, gemm_kernel).unwrap();
+    black_box(&cc);
+}
+
+pub fn criterion_benchmark(c: &mut Criterion) {
+    // Generate/Define the input
+    const DATA_SIZE: u32 = 12;
+    let length = 2_usize.pow(DATA_SIZE);
+    let seed: u64 = 9817498146784;
+    let mut rng = SmallRng::seed_from_u64(seed);
+    let range: Uniform<f64> = rand::distributions::Uniform::new(0.0, 100.0);
+    let aa_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let bb_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let cc_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let alpha: f64 = range.sample(&mut rng);
+    let beta: f64 = range.sample(&mut rng);
+
+    let mut group = c.benchmark_group("gemm");
+    group.bench_with_input(
+        BenchmarkId::new("exec-serial", ""),
+        &(
+            length,
+            aa_init.clone(),
+            bb_init.clone(),
+            cc_init.clone(),
+            alpha,
+            beta,
+        ),
+        |b, (length, aa_init, bb_init, cc_init, alpha, beta)| {
+            b.iter(|| {
+                f1(
+                    *length,
+                    aa_init.clone(),
+                    bb_init.clone(),
+                    cc_init.clone(),
+                    *alpha,
+                    *beta,
+                )
+            })
+        },
+    );
+    group.bench_with_input(
+        BenchmarkId::new("exec-devicecpu", ""),
+        &(
+            length,
+            aa_init.clone(),
+            bb_init.clone(),
+            cc_init.clone(),
+            alpha,
+            beta,
+        ),
+        |b, (length, aa_init, bb_init, cc_init, alpha, beta)| {
+            b.iter(|| {
+                f2(
+                    *length,
+                    aa_init.clone(),
+                    bb_init.clone(),
+                    cc_init.clone(),
+                    *alpha,
+                    *beta,
+                )
+            })
+        },
+    );
+    group.finish()
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

From fa9c601fd3aeb9083f3da2566fbba9c3255051c9 Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 10:08:49 +0100
Subject: [PATCH 2/7] completed gemm

speedup > n_cores ????
---
 benches/gemm.rs | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/benches/gemm.rs b/benches/gemm.rs
index 95d57a4..9939290 100644
--- a/benches/gemm.rs
+++ b/benches/gemm.rs
@@ -23,7 +23,7 @@ fn f1(
     beta: f64,
 ) {
     let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]);
-    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]);
+    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Left, [length, length]); // optimal layout since we iterate inside columns :)
     let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]);
     black_box(&mut aa);
     black_box(&mut bb);
@@ -35,10 +35,17 @@ fn f1(
         schedule: Schedule::Static,
     };
 
-    // y = alpha * A * x + beta * y
+    // C = alpha * A * B + beta * C
     let gemm_kernel = |arg: KernelArgs<1>| match arg {
+        // lines
         KernelArgs::Index1D(i) => {
-            todo!()
+            // cols
+            for j in 0..length {
+                // b[j, k] because was init using a layout left
+                let ab_ij: f64 = (0..length).map(|k| aa.get([i, k]) * bb.get([j, k])).sum();
+                let val: f64 = alpha * ab_ij + beta * cc.get([i, j]);
+                cc.set([i, j], val);
+            }
         }
         KernelArgs::IndexND(_) => unimplemented!(),
         KernelArgs::Handle => unimplemented!(),
@@ -57,7 +64,7 @@ fn f2(
     beta: f64,
 ) {
     let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]);
-    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]);
+    let mut bb = ViewOwned::new_from_data(bb_init, Layout::Left, [length, length]); // optimal layout since we iterate inside columns :)
     let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]);
     black_box(&mut aa);
     black_box(&mut bb);
@@ -69,10 +76,17 @@ fn f2(
         schedule: Schedule::Static,
     };
 
-    // y = alpha * A * x + beta * y
+    // C = alpha * A * B + beta * C
     let gemm_kernel = |arg: KernelArgs<1>| match arg {
+        // lines
         KernelArgs::Index1D(i) => {
-            todo!()
+            // cols
+            for j in 0..length {
+                // all b[k, j] for k values are adjacent in memory thanks to the LayoutLeft
+                let ab_ij: f64 = (0..length).map(|k| aa.get([i, k]) * bb.get([k, j])).sum();
+                let val: f64 = alpha * ab_ij + beta * cc.get([i, j]);
+                cc.set([i, j], val);
+            }
         }
         KernelArgs::IndexND(_) => unimplemented!(),
         KernelArgs::Handle => unimplemented!(),
@@ -83,7 +97,7 @@ fn f2(
 
 pub fn criterion_benchmark(c: &mut Criterion) {
     // Generate/Define the input
-    const DATA_SIZE: u32 = 12;
+    const DATA_SIZE: u32 = 10;
     let length = 2_usize.pow(DATA_SIZE);
     let seed: u64 = 9817498146784;
     let mut rng = SmallRng::seed_from_u64(seed);

From e2358536e33de6b3435ade145d36842585ee67f0 Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 11:43:35 +0100
Subject: [PATCH 3/7] hardcoded gemm bench

it seems the library creates much more overhead for serial execution
than parallel
---
 Cargo.toml                |   6 ++
 benches/hardcoded_gemm.rs | 152 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 benches/hardcoded_gemm.rs

diff --git a/Cargo.toml b/Cargo.toml
index 8156bf0..e90c9c4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,6 +23,8 @@ num_cpus = {version = "1.0", optional=true}
 [dev-dependencies]
 criterion = { version = "*", features = ["html_reports"] }
 rand      = { version = "*", features = ["small_rng", "alloc"] }
+atomic   = {version = "0.5.3"}
+rayon    = {version = "*"}
 
 [build-dependencies]
 cxx-build = "*"
@@ -51,4 +53,8 @@ harness = false
 
 [[bench]]
 name = "gemm"
+harness = false
+
+[[bench]]
+name = "hardcoded_gemm"
 harness = false
\ No newline at end of file
diff --git a/benches/hardcoded_gemm.rs b/benches/hardcoded_gemm.rs
new file mode 100644
index 0000000..19065fe
--- /dev/null
+++ b/benches/hardcoded_gemm.rs
@@ -0,0 +1,152 @@
+use atomic::Atomic;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use rand::{
+    distributions::{Distribution, Uniform},
+    rngs::SmallRng,
+    SeedableRng,
+};
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+// hardcoded serial GEMM
+fn serial_gemm(length: usize, aa: Vec<f64>, bb: Vec<f64>, cc: Vec<f64>, alpha: f64, beta: f64) {
+    let mut aa = aa;
+    let mut bb = bb;
+    let mut cc = cc;
+    black_box(&mut aa);
+    black_box(&mut bb);
+    black_box(&mut cc);
+
+    for i in 0..length {
+        for j in 0..length {
+            // all b[k, j] for k values are adjacent in memory thanks to the LayoutLeft
+            let ab_ij: f64 = (0..length)
+                // unsafe access to keep things fair?
+                .map(|k| unsafe {
+                    aa.get_unchecked(i * length + k) * bb.get_unchecked(j * length + k)
+                })
+                .sum();
+            let val: f64 = alpha * ab_ij + beta * cc[i * length + j];
+            cc[i * length + j] = val;
+        }
+    }
+    black_box(&cc);
+}
+
+// hardcoded rayon GEMM
+fn gemm(
+    length: usize,
+    aa_init: Vec<f64>,
+    bb_init: Vec<f64>,
+    cc_init: Vec<f64>,
+    alpha: f64,
+    beta: f64,
+) {
+    let mut aa: Vec<Atomic<f64>> = aa_init
+        .iter()
+        .map(|val| atomic::Atomic::new(*val))
+        .collect();
+    let mut bb: Vec<Atomic<f64>> = bb_init
+        .iter()
+        .map(|val| atomic::Atomic::new(*val))
+        .collect();
+    let mut cc: Vec<Atomic<f64>> = cc_init
+        .iter()
+        .map(|val| atomic::Atomic::new(*val))
+        .collect();
+    black_box(&mut aa);
+    black_box(&mut bb);
+    black_box(&mut cc);
+
+    // C = alpha * A * B + beta * C
+    (0..length).into_par_iter().for_each(|i| {
+        for j in 0..length {
+            let ab_ij: f64 = (0..length)
+                // unsafe access to keep things fair?
+                .map(|k| unsafe {
+                    aa.get_unchecked(i * length + k)
+                        .load(atomic::Ordering::Relaxed)
+                        * bb.get_unchecked(j * length + k)
+                            .load(atomic::Ordering::Relaxed)
+                })
+                .sum();
+            let val: f64 =
+                alpha * ab_ij + beta * cc[i * length + j].load(atomic::Ordering::Relaxed);
+            cc[i * length + j].store(val, atomic::Ordering::Relaxed);
+        }
+    });
+    black_box(&cc);
+}
+
+pub fn criterion_benchmark(c: &mut Criterion) {
+    // Generate/Define the input
+    const DATA_SIZE: u32 = 10;
+    let length = 2_usize.pow(DATA_SIZE);
+    let seed: u64 = 9817498146784;
+    let mut rng = SmallRng::seed_from_u64(seed);
+    let range: Uniform<f64> = rand::distributions::Uniform::new(0.0, 100.0);
+    let aa_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let bb_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let cc_init: Vec<f64> = (0..length * length)
+        .map(|_| range.sample(&mut rng))
+        .collect();
+    let alpha: f64 = range.sample(&mut rng);
+    let beta: f64 = range.sample(&mut rng);
+
+    let mut group = c.benchmark_group("hardcoded-gemm");
+    group.bench_with_input(
+        BenchmarkId::new("serial", ""),
+        &(
+            length,
+            aa_init.clone(),
+            bb_init.clone(),
+            cc_init.clone(),
+            alpha,
+            beta,
+        ),
+        |b, (length, aa_init, bb_init, cc_init, alpha, beta)| {
+            b.iter(|| {
+                serial_gemm(
+                    *length,
+                    aa_init.clone(),
+                    bb_init.clone(),
+                    cc_init.clone(),
+                    *alpha,
+                    *beta,
+                )
+            })
+        },
+    );
+    group.bench_with_input(
+        BenchmarkId::new("rayon", ""),
+        &(
+            length,
+            aa_init.clone(),
+            bb_init.clone(),
+            cc_init.clone(),
+            alpha,
+            beta,
+        ),
+        |b, (length, aa_init, bb_init, cc_init, alpha, beta)| {
+            b.iter(|| {
+                gemm(
+                    *length,
+                    aa_init.clone(),
+                    bb_init.clone(),
+                    cc_init.clone(),
+                    *alpha,
+                    *beta,
+                )
+            })
+        },
+    );
+
+    group.finish()
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

From 374ab4935a696d81e26d84ba2fd6b42e8161416c Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 11:46:51 +0100
Subject: [PATCH 4/7] grouped blas speedup benches in a folder

---
 Cargo.toml                         | 9 +++++++++
 benches/{ => blas-speedup}/axpy.rs | 0
 benches/{ => blas-speedup}/gemm.rs | 0
 benches/{ => blas-speedup}/gemv.rs | 0
 4 files changed, 9 insertions(+)
 rename benches/{ => blas-speedup}/axpy.rs (100%)
 rename benches/{ => blas-speedup}/gemm.rs (100%)
 rename benches/{ => blas-speedup}/gemv.rs (100%)

diff --git a/Cargo.toml b/Cargo.toml
index e90c9c4..e73189e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,8 @@ cxx-build = "*"
 
 # BENCHMARKS
 
+## misc 
+
 [[bench]]
 name = "layout"
 harness = false
@@ -43,18 +45,25 @@ harness = false
 name = "view_access"
 harness = false
 
+## blas speedup measures
+
 [[bench]]
 name = "axpy"
+path = "blas-speedup/axpy.rs"
 harness = false
 
 [[bench]]
 name = "gemv"
+path = "blas-speedup/gemv.rs"
 harness = false
 
 [[bench]]
 name = "gemm"
+path = "blas-speedup/gemm.rs"
 harness = false
 
+## library overhead measures
+
 [[bench]]
 name = "hardcoded_gemm"
 harness = false
\ No newline at end of file
diff --git a/benches/axpy.rs b/benches/blas-speedup/axpy.rs
similarity index 100%
rename from benches/axpy.rs
rename to benches/blas-speedup/axpy.rs
diff --git a/benches/gemm.rs b/benches/blas-speedup/gemm.rs
similarity index 100%
rename from benches/gemm.rs
rename to benches/blas-speedup/gemm.rs
diff --git a/benches/gemv.rs b/benches/blas-speedup/gemv.rs
similarity index 100%
rename from benches/gemv.rs
rename to benches/blas-speedup/gemv.rs

From 044e2456e4eddea0935fa01ea092ffeb544204fa Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 15:00:48 +0100
Subject: [PATCH 5/7] fixed bench paths

---
 Cargo.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index e73189e..dae8e65 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -49,17 +49,17 @@ harness = false
 
 [[bench]]
 name = "axpy"
-path = "blas-speedup/axpy.rs"
+path = "benches/blas-speedup/axpy.rs"
 harness = false
 
 [[bench]]
 name = "gemv"
-path = "blas-speedup/gemv.rs"
+path = "benches/blas-speedup/gemv.rs"
 harness = false
 
 [[bench]]
 name = "gemm"
-path = "blas-speedup/gemm.rs"
+path = "benches/blas-speedup/gemm.rs"
 harness = false
 
 ## library overhead measures

From c1c63e5fac2f700294c2d455f07f3d1459fe8ccc Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 15:12:48 +0100
Subject: [PATCH 6/7] update doc & readme

---
 README.md  | 64 ++++++++++++++++++++++++++++++++++++++++--------------
 src/lib.rs |  9 ++++----
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index dae8b90..77e1330 100644
--- a/README.md
+++ b/README.md
@@ -23,27 +23,66 @@ This makes limit-testing an fundamental part of the project.
 
 ## Quickstart
 
-### Example
+The PoC itself is a library, but you can run benchmarks and examples out of the box.
 
-The PoC itself is a library, but you can run examples by using the following command: 
+### Benchmarks
 
+Benchmarks can be run using the following command:
+
+```bash
+# all benchmarks
+cargo bench
+# a specific benchmark
+cargo bench --bench bench_name
 ```
+
+All results are compiled to the `target/criterion/` folder. The following
+benchmarks are available:
+
+- `layout`: Matrix-Vector product computation; This is used to put numbers on the
+  importance of data layout in memory.
+- `view_init`: Compare initialization performances of regular vectors to [Views][view]; This
+  is used to spot potential scaling issues induced by the more complex structure of Views.
+- `view_access`: Compare data access performances of regular vectors to [Views][view]; This
+  is used to spot potential scaling issues induced by the more complex structure of Views.
+- `axpy` / `gemv` / `gemm`: Measure speedup on basic BLAS implementations by running the same kernel
+  in serial mode first, then using parallelization on CPU. _Meant to be executed using features_.
+- `hardcoded_gemm`: Compute the same operations as the `gemm` benchmark, but using a hardcoded implementation
+  instead of methods from the PoC. Used to assess the additional cost induced by the library.
+
+
+### Examples
+
+```bash
 cargo run --example hello-world
 ```
 
-The following examples are available: 
+The following examples are available:
 
-- `hello-world`: ...
-- `openmp-parallel`: ...
+- `hello_world`: ...
+- `hello_world_omp`: ...
 
 
-### Documentation
+## Features
 
-A consise documentation can be generated and accessed using the following command: 
+Using `features`, the crate can be compiled to use different backend for execution of parallel section.
+These can also be enabled in benchmarks.
 
+```bash
+cargo build --features <FEATURE>
 ```
-cargo doc --open --no-deps
-```
+
+Available features:
+
+- `rayon`: Uses the [rayon][2] crate to handle parallelization on CPU.
+- `threads` : Uses [`std::thread`] methods to handle parallelization on CPU.
+- `gpu`: Currently used as a way to gate GPU usage as this cannot be done in pure Rust.
+
+## Compilation
+
+The build script will read the `CXX` environment variable to choose which C++ compiler to use
+for Rust/C++ interop. Note that the crate itself does not currently use C++ code, only examples
+do.
 
 ## References
 
@@ -54,16 +93,9 @@ cargo doc --open --no-deps
 - `move` keyword semantic & implementation: [link][MOVE]
 
 
-### Functor Implementation
-
-- A very specific answer to a very specific rust-lang issue: [link][FNIMPL]
-
-
-
 [1]: https://kokkos.github.io/kokkos-core-wiki/index.html
 [2]: https://docs.rs/rayon/latest/rayon/
 
 [NDARRAY]: https://docs.rs/ndarray/latest/ndarray/
 [CONSTG]: https://doc.rust-lang.org/reference/items/generics.html
-[FNIMPL]: https://github.com/rust-lang/rust/issues/29625#issuecomment-1692602873
 [MOVE]: https://stackoverflow.com/questions/30288782/what-are-move-semantics-in-rust
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index 72d1ea7..93b80b5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -38,11 +38,10 @@
 //!   is used to spot potential scaling issues induced by the more complex structure of Views.
 //! - `view_access`: Compare data access performances of regular vectors to [Views][view]; This
 //!   is used to spot potential scaling issues induced by the more complex structure of Views.
-//! - `mdrange_populate`: Compare performance of our implementation of MDRangePolicy compared to
-//!   regular implementation. Currently, only a serial implementation with no tiling is tested.
-//! - `feature`: Assess the correct usage of feature-specific backend. This one is meant to be run
-//!   multiple times, with varying features each time (e.g. no feature, then `rayon` to observe the
-//!   speedup).
+//! - `axpy` / `gemv` / `gemm`: Measure speedup on basic BLAS implementations by running the same kernel
+//!   in serial mode first, then using parallelization on CPU. _Meant to be executed using features_.
+//! - `hardcoded_gemm`: Compute the same operations as the `gemm` benchmark, but using a hardcoded implementation
+//!   instead of methods from the PoC. Used to assess the additional cost induced by the library.
 //!
 //!
 //! ### Examples

From 3d6169b3a6b79f14101b5855af29b23108897844 Mon Sep 17 00:00:00 2001
From: imrn99 <95699343+imrn99@users.noreply.github.com>
Date: Thu, 23 Nov 2023 15:31:08 +0100
Subject: [PATCH 7/7] fixed warnings when testing using parallel features

---
 src/routines/dispatch.rs | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/routines/dispatch.rs b/src/routines/dispatch.rs
index 4550975..f46335b 100644
--- a/src/routines/dispatch.rs
+++ b/src/routines/dispatch.rs
@@ -319,8 +319,14 @@ mod tests {
             routines::parameters::{ExecutionSpace, Schedule},
             view::{parameters::Layout, ViewOwned},
         };
-
-        let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+        // fixes warnings when testing using a parallel feature
+        cfg_if::cfg_if! {
+            if #[cfg(any(feature = "threads", feature = "rayon"))] {
+                let mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+            } else {
+                let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+            }
+        }
         let ref_mat = ViewOwned::new_from_data(vec![1.0; 15], Layout::Right, [15]);
         let rangep = RangePolicy::RangePolicy(0..15);
         let execp = ExecutionPolicy {
@@ -348,8 +354,14 @@ mod tests {
             routines::parameters::{ExecutionSpace, Schedule},
             view::{parameters::Layout, ViewOwned},
         };
-
-        let mut mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]);
+        // fixes warnings when testing using a parallel feature
+        cfg_if::cfg_if! {
+            if #[cfg(any(feature = "threads", feature = "rayon"))] {
+                let mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]);
+            } else {
+                let mut mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]);
+            }
+        }
         let ref_mat = ViewOwned::new_from_data(vec![1.0; 150], Layout::Right, [10, 15]);
         let rangep = RangePolicy::MDRangePolicy([0..10, 0..15]);
         let execp = ExecutionPolicy {
@@ -378,7 +390,14 @@ mod tests {
             view::{parameters::Layout, ViewOwned},
         };
 
-        let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+        // fixes warnings when testing using a parallel feature
+        cfg_if::cfg_if! {
+            if #[cfg(any(feature = "threads", feature = "rayon"))] {
+                let mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+            } else {
+                let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]);
+            }
+        }
         let ref_mat = ViewOwned::new_from_data(vec![1.0; 15], Layout::Right, [15]);
         #[allow(clippy::single_range_in_vec_init)]
         let rangep = RangePolicy::MDRangePolicy([0..15]);