From 471413d259f9be0998f8d59b4879076f43b61e47 Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 09:20:20 +0100 Subject: [PATCH 1/7] gemm skeleton --- Cargo.toml | 4 ++ benches/gemm.rs | 154 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 benches/gemm.rs diff --git a/Cargo.toml b/Cargo.toml index a723995..8156bf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,3 +48,7 @@ harness = false [[bench]] name = "gemv" harness = false + +[[bench]] +name = "gemm" +harness = false \ No newline at end of file diff --git a/benches/gemm.rs b/benches/gemm.rs new file mode 100644 index 0000000..95d57a4 --- /dev/null +++ b/benches/gemm.rs @@ -0,0 +1,154 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use poc_kokkos_rs::{ + functor::KernelArgs, + routines::{ + parallel_for, + parameters::{ExecutionPolicy, ExecutionSpace, RangePolicy, Schedule}, + }, + view::{parameters::Layout, ViewOwned}, +}; +use rand::{ + distributions::{Distribution, Uniform}, + rngs::SmallRng, + SeedableRng, +}; + +// Serial GEMM +fn f1( + length: usize, + aa_init: Vec, + bb_init: Vec, + cc_init: Vec, + alpha: f64, + beta: f64, +) { + let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]); + let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]); + let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]); + black_box(&mut aa); + black_box(&mut bb); + black_box(&mut cc); + + let execp = ExecutionPolicy { + space: ExecutionSpace::Serial, + range: RangePolicy::RangePolicy(0..length), + schedule: Schedule::Static, + }; + + // y = alpha * A * x + beta * y + let gemm_kernel = |arg: KernelArgs<1>| match arg { + KernelArgs::Index1D(i) => { + todo!() + } + KernelArgs::IndexND(_) => unimplemented!(), + KernelArgs::Handle => unimplemented!(), + }; + parallel_for(execp, gemm_kernel).unwrap(); + black_box(&cc); +} + +// DeviceCPU GEMM +fn f2( + length: usize, + aa_init: Vec, + bb_init: Vec, + cc_init: Vec, + alpha: f64, + beta: f64, +) { + let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]); + let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]); + let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]); + black_box(&mut aa); + black_box(&mut bb); + black_box(&mut cc); + + let execp = ExecutionPolicy { + space: ExecutionSpace::DeviceCPU, + range: RangePolicy::RangePolicy(0..length), + schedule: Schedule::Static, + }; + + // y = alpha * A * x + beta * y + let gemm_kernel = |arg: KernelArgs<1>| match arg { + KernelArgs::Index1D(i) => { + todo!() + } + KernelArgs::IndexND(_) => unimplemented!(), + KernelArgs::Handle => unimplemented!(), + }; + parallel_for(execp, gemm_kernel).unwrap(); + black_box(&cc); +} + +pub fn criterion_benchmark(c: &mut Criterion) { + // Generate/Define the input + const DATA_SIZE: u32 = 12; + let length = 2_usize.pow(DATA_SIZE); + let seed: u64 = 9817498146784; + let mut rng = SmallRng::seed_from_u64(seed); + let range: Uniform = rand::distributions::Uniform::new(0.0, 100.0); + let aa_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let bb_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let cc_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let alpha: f64 = range.sample(&mut rng); + let beta: f64 = range.sample(&mut rng); + + let mut group = c.benchmark_group("gemm"); + group.bench_with_input( + BenchmarkId::new("exec-serial", ""), + &( + length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + alpha, + beta, + ), + |b, (length, aa_init, bb_init, cc_init, alpha, beta)| { + b.iter(|| { + f1( + *length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + *alpha, + *beta, + ) + }) + }, + ); + group.bench_with_input( + BenchmarkId::new("exec-devicecpu", ""), + &( + length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + alpha, + beta, + ), + |b, (length, aa_init, bb_init, cc_init, alpha, beta)| { + b.iter(|| { + f2( + *length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + *alpha, + *beta, + ) + }) + }, + ); + group.finish() +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From fa9c601fd3aeb9083f3da2566fbba9c3255051c9 Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 10:08:49 +0100 Subject: [PATCH 2/7] completed gemm speedup > n_cores ???? --- benches/gemm.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/benches/gemm.rs b/benches/gemm.rs index 95d57a4..9939290 100644 --- a/benches/gemm.rs +++ b/benches/gemm.rs @@ -23,7 +23,7 @@ fn f1( beta: f64, ) { let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]); - let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]); + let mut bb = ViewOwned::new_from_data(bb_init, Layout::Left, [length, length]); // optimal layout since we iterate inside columns :) let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]); black_box(&mut aa); black_box(&mut bb); @@ -35,10 +35,17 @@ fn f1( schedule: Schedule::Static, }; - // y = alpha * A * x + beta * y + // C = alpha * A * B + beta * C let gemm_kernel = |arg: KernelArgs<1>| match arg { + // lines KernelArgs::Index1D(i) => { - todo!() + // cols + for j in 0..length { + // b[j, k] because was init using a layout left + let ab_ij: f64 = (0..length).map(|k| aa.get([i, k]) * bb.get([j, k])).sum(); + let val: f64 = alpha * ab_ij + beta * cc.get([i, j]); + cc.set([i, j], val); + } } KernelArgs::IndexND(_) => unimplemented!(), KernelArgs::Handle => unimplemented!(), @@ -57,7 +64,7 @@ fn f2( beta: f64, ) { let mut aa = ViewOwned::new_from_data(aa_init, Layout::Right, [length, length]); - let mut bb = ViewOwned::new_from_data(bb_init, Layout::Right, [length, length]); + let mut bb = ViewOwned::new_from_data(bb_init, Layout::Left, [length, length]); // optimal layout since we iterate inside columns :) let mut cc = ViewOwned::new_from_data(cc_init, Layout::Right, [length, length]); black_box(&mut aa); black_box(&mut bb); @@ -69,10 +76,17 @@ fn f2( schedule: Schedule::Static, }; - // y = alpha * A * x + beta * y + // C = alpha * A * B + beta * C let gemm_kernel = |arg: KernelArgs<1>| match arg { + // lines KernelArgs::Index1D(i) => { - todo!() + // cols + for j in 0..length { + // all b[k, j] for k values are adjacent in memory thanks to the LayoutLeft + let ab_ij: f64 = (0..length).map(|k| aa.get([i, k]) * bb.get([k, j])).sum(); + let val: f64 = alpha * ab_ij + beta * cc.get([i, j]); + cc.set([i, j], val); + } } KernelArgs::IndexND(_) => unimplemented!(), KernelArgs::Handle => unimplemented!(), @@ -83,7 +97,7 @@ fn f2( pub fn criterion_benchmark(c: &mut Criterion) { // Generate/Define the input - const DATA_SIZE: u32 = 12; + const DATA_SIZE: u32 = 10; let length = 2_usize.pow(DATA_SIZE); let seed: u64 = 9817498146784; let mut rng = SmallRng::seed_from_u64(seed); From e2358536e33de6b3435ade145d36842585ee67f0 Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 11:43:35 +0100 Subject: [PATCH 3/7] hardcoded gemm bench it seems the library creates much more overhead for serial execution than parallel --- Cargo.toml | 6 ++ benches/hardcoded_gemm.rs | 152 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 benches/hardcoded_gemm.rs diff --git a/Cargo.toml b/Cargo.toml index 8156bf0..e90c9c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ num_cpus = {version = "1.0", optional=true} [dev-dependencies] criterion = { version = "*", features = ["html_reports"] } rand = { version = "*", features = ["small_rng", "alloc"] } +atomic = {version = "0.5.3"} +rayon = {version = "*"} [build-dependencies] cxx-build = "*" @@ -51,4 +53,8 @@ harness = false [[bench]] name = "gemm" +harness = false + +[[bench]] +name = "hardcoded_gemm" harness = false \ No newline at end of file diff --git a/benches/hardcoded_gemm.rs b/benches/hardcoded_gemm.rs new file mode 100644 index 0000000..19065fe --- /dev/null +++ b/benches/hardcoded_gemm.rs @@ -0,0 +1,152 @@ +use atomic::Atomic; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; + +use rand::{ + distributions::{Distribution, Uniform}, + rngs::SmallRng, + SeedableRng, +}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +// hardcoded serial GEMM +fn serial_gemm(length: usize, aa: Vec, bb: Vec, cc: Vec, alpha: f64, beta: f64) { + let mut aa = aa; + let mut bb = bb; + let mut cc = cc; + black_box(&mut aa); + black_box(&mut bb); + black_box(&mut cc); + + for i in 0..length { + for j in 0..length { + // all b[k, j] for k values are adjacent in memory thanks to the LayoutLeft + let ab_ij: f64 = (0..length) + // unsafe access to keep things fair? + .map(|k| unsafe { + aa.get_unchecked(i * length + k) * bb.get_unchecked(j * length + k) + }) + .sum(); + let val: f64 = alpha * ab_ij + beta * cc[i * length + j]; + cc[i * length + j] = val; + } + } + black_box(&cc); +} + +// hardcoded rayon GEMM +fn gemm( + length: usize, + aa_init: Vec, + bb_init: Vec, + cc_init: Vec, + alpha: f64, + beta: f64, +) { + let mut aa: Vec> = aa_init + .iter() + .map(|val| atomic::Atomic::new(*val)) + .collect(); + let mut bb: Vec> = bb_init + .iter() + .map(|val| atomic::Atomic::new(*val)) + .collect(); + let mut cc: Vec> = cc_init + .iter() + .map(|val| atomic::Atomic::new(*val)) + .collect(); + black_box(&mut aa); + black_box(&mut bb); + black_box(&mut cc); + + // C = alpha * A * B + beta * C + (0..length).into_par_iter().for_each(|i| { + for j in 0..length { + let ab_ij: f64 = (0..length) + // unsafe access to keep things fair? + .map(|k| unsafe { + aa.get_unchecked(i * length + k) + .load(atomic::Ordering::Relaxed) + * bb.get_unchecked(j * length + k) + .load(atomic::Ordering::Relaxed) + }) + .sum(); + let val: f64 = + alpha * ab_ij + beta * cc[i * length + j].load(atomic::Ordering::Relaxed); + cc[i * length + j].store(val, atomic::Ordering::Relaxed); + } + }); + black_box(&cc); +} + +pub fn criterion_benchmark(c: &mut Criterion) { + // Generate/Define the input + const DATA_SIZE: u32 = 10; + let length = 2_usize.pow(DATA_SIZE); + let seed: u64 = 9817498146784; + let mut rng = SmallRng::seed_from_u64(seed); + let range: Uniform = rand::distributions::Uniform::new(0.0, 100.0); + let aa_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let bb_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let cc_init: Vec = (0..length * length) + .map(|_| range.sample(&mut rng)) + .collect(); + let alpha: f64 = range.sample(&mut rng); + let beta: f64 = range.sample(&mut rng); + + let mut group = c.benchmark_group("hardcoded-gemm"); + group.bench_with_input( + BenchmarkId::new("serial", ""), + &( + length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + alpha, + beta, + ), + |b, (length, aa_init, bb_init, cc_init, alpha, beta)| { + b.iter(|| { + serial_gemm( + *length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + *alpha, + *beta, + ) + }) + }, + ); + group.bench_with_input( + BenchmarkId::new("rayon", ""), + &( + length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + alpha, + beta, + ), + |b, (length, aa_init, bb_init, cc_init, alpha, beta)| { + b.iter(|| { + gemm( + *length, + aa_init.clone(), + bb_init.clone(), + cc_init.clone(), + *alpha, + *beta, + ) + }) + }, + ); + + group.finish() +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From 374ab4935a696d81e26d84ba2fd6b42e8161416c Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 11:46:51 +0100 Subject: [PATCH 4/7] grouped blas speedup benches in a folder --- Cargo.toml | 9 +++++++++ benches/{ => blas-speedup}/axpy.rs | 0 benches/{ => blas-speedup}/gemm.rs | 0 benches/{ => blas-speedup}/gemv.rs | 0 4 files changed, 9 insertions(+) rename benches/{ => blas-speedup}/axpy.rs (100%) rename benches/{ => blas-speedup}/gemm.rs (100%) rename benches/{ => blas-speedup}/gemv.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index e90c9c4..e73189e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ cxx-build = "*" # BENCHMARKS +## misc + [[bench]] name = "layout" harness = false @@ -43,18 +45,25 @@ harness = false name = "view_access" harness = false +## blas speedup measures + [[bench]] name = "axpy" +path = "blas-speedup/axpy.rs" harness = false [[bench]] name = "gemv" +path = "blas-speedup/gemv.rs" harness = false [[bench]] name = "gemm" +path = "blas-speedup/gemm.rs" harness = false +## library overhead measures + [[bench]] name = "hardcoded_gemm" harness = false \ No newline at end of file diff --git a/benches/axpy.rs b/benches/blas-speedup/axpy.rs similarity index 100% rename from benches/axpy.rs rename to benches/blas-speedup/axpy.rs diff --git a/benches/gemm.rs b/benches/blas-speedup/gemm.rs similarity index 100% rename from benches/gemm.rs rename to benches/blas-speedup/gemm.rs diff --git a/benches/gemv.rs b/benches/blas-speedup/gemv.rs similarity index 100% rename from benches/gemv.rs rename to benches/blas-speedup/gemv.rs From 044e2456e4eddea0935fa01ea092ffeb544204fa Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 15:00:48 +0100 Subject: [PATCH 5/7] fixed bench paths --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e73189e..dae8e65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,17 +49,17 @@ harness = false [[bench]] name = "axpy" -path = "blas-speedup/axpy.rs" +path = "benches/blas-speedup/axpy.rs" harness = false [[bench]] name = "gemv" -path = "blas-speedup/gemv.rs" +path = "benches/blas-speedup/gemv.rs" harness = false [[bench]] name = "gemm" -path = "blas-speedup/gemm.rs" +path = "benches/blas-speedup/gemm.rs" harness = false ## library overhead measures From c1c63e5fac2f700294c2d455f07f3d1459fe8ccc Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 15:12:48 +0100 Subject: [PATCH 6/7] update doc & readme --- README.md | 64 ++++++++++++++++++++++++++++++++++++++++-------------- src/lib.rs | 9 ++++---- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index dae8b90..77e1330 100644 --- a/README.md +++ b/README.md @@ -23,27 +23,66 @@ This makes limit-testing an fundamental part of the project. ## Quickstart -### Example +The PoC itself is a library, but you can run benchmarks and examples out of the box. -The PoC itself is a library, but you can run examples by using the following command: +### Benchmarks +Benchmarks can be run using the following command: + +```bash +# all benchmarks +cargo bench +# a specific benchmark +cargo bench --bench bench_name ``` + +All results are compiled to the `target/criterion/` folder. The following +benchmarks are available: + +- `layout`: Matrix-Vector product computation; This is used to put numbers on the + importance of data layout in memory. +- `view_init`: Compare initialization performances of regular vectors to [Views][view]; This + is used to spot potential scaling issues induced by the more complex structure of Views. +- `view_access`: Compare data access performances of regular vectors to [Views][view]; This + is used to spot potential scaling issues induced by the more complex structure of Views. +- `axpy` / `gemv` / `gemm`: Measure speedup on basic BLAS implementations by running the same kernel + in serial mode first, then using parallelization on CPU. _Meant to be executed using features_. +- `hardcoded_gemm`: Compute the same operations as the `gemm` benchmark, but using a hardcoded implementation + instead of methods from the PoC. Used to assess the additional cost induced by the library. + + +### Examples + +```bash cargo run --example hello-world ``` -The following examples are available: +The following examples are available: -- `hello-world`: ... -- `openmp-parallel`: ... +- `hello_world`: ... +- `hello_world_omp`: ... -### Documentation +## Features -A consise documentation can be generated and accessed using the following command: +Using `features`, the crate can be compiled to use different backend for execution of parallel section. +These can also be enabled in benchmarks. +```bash +cargo build --features ``` -cargo doc --open --no-deps -``` + +Available features: + +- `rayon`: Uses the [rayon][2] crate to handle parallelization on CPU. +- `threads` : Uses [`std::thread`] methods to handle parallelization on CPU. +- `gpu`: Currently used as a way to gate GPU usage as this cannot be done in pure Rust. + +## Compilation + +The build script will read the `CXX` environment variable to choose which C++ compiler to use +for Rust/C++ interop. Note that the crate itself does not currently use C++ code, only examples +do. ## References @@ -54,16 +93,9 @@ cargo doc --open --no-deps - `move` keyword semantic & implementation: [link][MOVE] -### Functor Implementation - -- A very specific answer to a very specific rust-lang issue: [link][FNIMPL] - - - [1]: https://kokkos.github.io/kokkos-core-wiki/index.html [2]: https://docs.rs/rayon/latest/rayon/ [NDARRAY]: https://docs.rs/ndarray/latest/ndarray/ [CONSTG]: https://doc.rust-lang.org/reference/items/generics.html -[FNIMPL]: https://github.com/rust-lang/rust/issues/29625#issuecomment-1692602873 [MOVE]: https://stackoverflow.com/questions/30288782/what-are-move-semantics-in-rust \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 72d1ea7..93b80b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,11 +38,10 @@ //! is used to spot potential scaling issues induced by the more complex structure of Views. //! - `view_access`: Compare data access performances of regular vectors to [Views][view]; This //! is used to spot potential scaling issues induced by the more complex structure of Views. -//! - `mdrange_populate`: Compare performance of our implementation of MDRangePolicy compared to -//! regular implementation. Currently, only a serial implementation with no tiling is tested. -//! - `feature`: Assess the correct usage of feature-specific backend. This one is meant to be run -//! multiple times, with varying features each time (e.g. no feature, then `rayon` to observe the -//! speedup). +//! - `axpy` / `gemv` / `gemm`: Measure speedup on basic BLAS implementations by running the same kernel +//! in serial mode first, then using parallelization on CPU. _Meant to be executed using features_. +//! - `hardcoded_gemm`: Compute the same operations as the `gemm` benchmark, but using a hardcoded implementation +//! instead of methods from the PoC. Used to assess the additional cost induced by the library. //! //! //! ### Examples From 3d6169b3a6b79f14101b5855af29b23108897844 Mon Sep 17 00:00:00 2001 From: imrn99 <95699343+imrn99@users.noreply.github.com> Date: Thu, 23 Nov 2023 15:31:08 +0100 Subject: [PATCH 7/7] fixed warnings when testing using parallel features --- src/routines/dispatch.rs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/routines/dispatch.rs b/src/routines/dispatch.rs index 4550975..f46335b 100644 --- a/src/routines/dispatch.rs +++ b/src/routines/dispatch.rs @@ -319,8 +319,14 @@ mod tests { routines::parameters::{ExecutionSpace, Schedule}, view::{parameters::Layout, ViewOwned}, }; - - let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + // fixes warnings when testing using a parallel feature + cfg_if::cfg_if! { + if #[cfg(any(feature = "threads", feature = "rayon"))] { + let mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + } else { + let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + } + } let ref_mat = ViewOwned::new_from_data(vec![1.0; 15], Layout::Right, [15]); let rangep = RangePolicy::RangePolicy(0..15); let execp = ExecutionPolicy { @@ -348,8 +354,14 @@ mod tests { routines::parameters::{ExecutionSpace, Schedule}, view::{parameters::Layout, ViewOwned}, }; - - let mut mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]); + // fixes warnings when testing using a parallel feature + cfg_if::cfg_if! { + if #[cfg(any(feature = "threads", feature = "rayon"))] { + let mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]); + } else { + let mut mat = ViewOwned::new_from_data(vec![0.0; 150], Layout::Right, [10, 15]); + } + } let ref_mat = ViewOwned::new_from_data(vec![1.0; 150], Layout::Right, [10, 15]); let rangep = RangePolicy::MDRangePolicy([0..10, 0..15]); let execp = ExecutionPolicy { @@ -378,7 +390,14 @@ mod tests { view::{parameters::Layout, ViewOwned}, }; - let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + // fixes warnings when testing using a parallel feature + cfg_if::cfg_if! { + if #[cfg(any(feature = "threads", feature = "rayon"))] { + let mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + } else { + let mut mat = ViewOwned::new_from_data(vec![0.0; 15], Layout::Right, [15]); + } + } let ref_mat = ViewOwned::new_from_data(vec![1.0; 15], Layout::Right, [15]); #[allow(clippy::single_range_in_vec_init)] let rangep = RangePolicy::MDRangePolicy([0..15]);