Skip to content

Commit

Permalink
Use Rayon for 2D and 3D rasterization (#205)
Browse files Browse the repository at this point in the history
This is a step towards multithreading on the web.
  • Loading branch information
mkeeter authored Dec 1, 2024
1 parent 21b825c commit b5a3864
Show file tree
Hide file tree
Showing 17 changed files with 340 additions and 327 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@
between threads, but tapes were _already_ using an `Arc<..>` under the hood.
- Changed `Tape::recycle` from returning a `Storage` to returning an
`Option<Storage>`, as tapes may now be shared between threads.
- Use Rayon for 2D and 3D rasterization
- The `threads` member of `VoxelRenderConfig` and `ImageRenderConfig` is now
a `Option<ThreadPool>`, which can be `None` (use a single thread),
`Some(ThreadPool::Global)` (use the global Rayon pool), or
`Some(ThreadPool::Custom(..))` (use a user-provided pool)
- This is a step towards WebAssembly multithreading, using
`wasm-bindgen-rayon`.
- `ThreadCount` is moved to `fidget::mesh`, because that's the only place
it's now used
- The plan is to switch to Rayon for meshing as well, eventually

# 0.3.3
- `Function` and evaluator types now produce multiple outputs
Expand Down
12 changes: 7 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ notify = "7.0"
num-traits = "0.2"
ordered-float = "4"
rand = "0.8.5"
rayon = "1.10"
rhai = { version = "1.17", features = ["sync"] }
serde = { version = "1.0", features = ["derive", "rc"] }
static_assertions = "1"
Expand Down
1 change: 1 addition & 0 deletions demos/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ env_logger.workspace = true
image.workspace = true
log.workspace = true
nalgebra.workspace = true
rayon.workspace = true

fidget.path = "../../fidget"
workspace-hack = { version = "0.1", path = "../../workspace-hack" }
Expand Down
36 changes: 32 additions & 4 deletions demos/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ struct ImageSettings {
eval: EvalMode,

/// Number of threads to use
#[clap(short, long, default_value_t = NonZeroUsize::new(8).unwrap())]
threads: NonZeroUsize,
#[clap(short, long)]
threads: Option<NonZeroUsize>,

/// Number of times to render (for benchmarking)
#[clap(short = 'N', default_value_t = 1)]
Expand Down Expand Up @@ -119,10 +119,24 @@ fn run3d<F: fidget::eval::Function + fidget::render::RenderHints>(
if !isometric {
*mat.matrix_mut().get_mut((3, 2)).unwrap() = 0.3;
}
let pool: Option<rayon::ThreadPool>;
let threads = match settings.threads {
Some(n) if n.get() == 1 => None,
Some(n) => {
pool = Some(
rayon::ThreadPoolBuilder::new()
.num_threads(n.get())
.build()
.unwrap(),
);
pool.as_ref().map(fidget::render::ThreadPool::Custom)
}
None => Some(fidget::render::ThreadPool::Global),
};
let cfg = fidget::render::VoxelRenderConfig {
image_size: fidget::render::VoxelSize::from(settings.size),
tile_sizes: F::tile_sizes_3d(),
threads: settings.threads.into(),
threads,
..Default::default()
};
let shape = shape.apply_transform(mat.into());
Expand Down Expand Up @@ -197,10 +211,24 @@ fn run2d<F: fidget::eval::Function + fidget::render::RenderHints>(
.flat_map(|i| i.into_iter())
.collect()
} else {
let pool: Option<rayon::ThreadPool>;
let threads = match settings.threads {
Some(n) if n.get() == 1 => None,
Some(n) => {
pool = Some(
rayon::ThreadPoolBuilder::new()
.num_threads(n.get())
.build()
.unwrap(),
);
pool.as_ref().map(fidget::render::ThreadPool::Custom)
}
None => Some(fidget::render::ThreadPool::Global),
};
let cfg = fidget::render::ImageRenderConfig {
image_size: fidget::render::ImageSize::from(settings.size),
tile_sizes: F::tile_sizes_2d(),
threads: settings.threads.into(),
threads,
..Default::default()
};
if sdf {
Expand Down
1 change: 1 addition & 0 deletions fidget/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ nalgebra.workspace = true
num-traits.workspace = true
ordered-float.workspace = true
rand.workspace = true
rayon.workspace = true
serde.workspace = true
static_assertions.workspace = true
thiserror.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion fidget/benches/mesh.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
};
use fidget::render::ThreadCount;
use fidget::mesh::ThreadCount;

const COLONNADE: &str = include_str!("../../models/colonnade.vm");

Expand Down
26 changes: 19 additions & 7 deletions fidget/benches/render.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
};
use fidget::render::{ImageSize, RenderHints, ThreadCount};
use fidget::render::{ImageSize, RenderHints, ThreadPool};

const PROSPERO: &str = include_str!("../../models/prospero.vm");

Expand Down Expand Up @@ -53,16 +53,28 @@ pub fn prospero_thread_sweep(c: &mut Criterion) {

let mut group =
c.benchmark_group("speed vs threads (prospero, 2d) (1024 x 1024)");
for threads in std::iter::once(ThreadCount::One).chain(
[1, 2, 4, 8, 16].map(|i| ThreadCount::Many(i.try_into().unwrap())),
) {
let pools = [1, 2, 4, 8, 16].map(|i| {
rayon::ThreadPoolBuilder::new()
.num_threads(i)
.build()
.unwrap()
});
for threads in [None, Some(ThreadPool::Global)]
.into_iter()
.chain(pools.iter().map(|p| Some(ThreadPool::Custom(p))))
{
let name = match &threads {
None => "-".to_string(),
Some(ThreadPool::Custom(i)) => i.current_num_threads().to_string(),
Some(ThreadPool::Global) => "N".to_string(),
};
let cfg = &fidget::render::ImageRenderConfig {
image_size: ImageSize::from(1024),
tile_sizes: fidget::vm::VmFunction::tile_sizes_2d(),
threads,
threads: threads.clone(),
..Default::default()
};
group.bench_function(BenchmarkId::new("vm", threads), move |b| {
group.bench_function(BenchmarkId::new("vm", &name), move |b| {
b.iter(|| {
let tape = shape_vm.clone();
black_box(cfg.run::<_, fidget::render::BitRenderMode>(tape))
Expand All @@ -76,7 +88,7 @@ pub fn prospero_thread_sweep(c: &mut Criterion) {
threads,
..Default::default()
};
group.bench_function(BenchmarkId::new("jit", threads), move |b| {
group.bench_function(BenchmarkId::new("jit", &name), move |b| {
b.iter(|| {
let tape = shape_jit.clone();
black_box(cfg.run::<_, fidget::render::BitRenderMode>(tape))
Expand Down
2 changes: 1 addition & 1 deletion fidget/src/core/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ pub trait Function: Send + Sync + Clone {
/// This type must implement [`Eq`] so that traces can be compared; calling
/// [`Function::simplify`] with traces that compare equal should produce an
/// identical result and may be cached.
type Trace: Clone + Eq + Send + Trace;
type Trace: Clone + Eq + Send + Sync + Trace;

/// Associated type for storage used by the function itself
type Storage: Default + Send;
Expand Down
4 changes: 2 additions & 2 deletions fidget/src/core/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,11 @@ impl<const N: usize> Function for GenericVmFunction<N> {

impl<const N: usize> RenderHints for GenericVmFunction<N> {
fn tile_sizes_3d() -> TileSizes {
TileSizes::new(&[256, 128, 64, 32, 16, 8]).unwrap()
TileSizes::new(&[128, 64, 32, 16, 8]).unwrap()
}

fn tile_sizes_2d() -> TileSizes {
TileSizes::new(&[256, 128, 64, 32, 16, 8]).unwrap()
TileSizes::new(&[128, 32, 8]).unwrap()
}
}

Expand Down
66 changes: 65 additions & 1 deletion fidget/src/mesh/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,71 @@ mod octree;
mod output;
mod qef;

use crate::render::{ThreadCount, View3};
use crate::render::View3;

/// Number of threads to use during evaluation
///
/// In a WebAssembly build, only the [`ThreadCount::One`] variant is available.
#[derive(Copy, Clone, Debug)]
pub enum ThreadCount {
/// Perform all evaluation in the main thread, not spawning any workers
One,

/// Spawn some number of worker threads for evaluation
///
/// This can be set to `1`, in which case a single worker thread will be
/// spawned; this is different from doing work in the main thread, but not
/// particularly useful!
#[cfg(not(target_arch = "wasm32"))]
Many(std::num::NonZeroUsize),
}

#[cfg(not(target_arch = "wasm32"))]
impl From<std::num::NonZeroUsize> for ThreadCount {
fn from(v: std::num::NonZeroUsize) -> Self {
match v.get() {
0 => unreachable!(),
1 => ThreadCount::One,
_ => ThreadCount::Many(v),
}
}
}

/// Single-threaded mode is shown as `-`; otherwise, an integer
impl std::fmt::Display for ThreadCount {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ThreadCount::One => write!(f, "-"),
#[cfg(not(target_arch = "wasm32"))]
ThreadCount::Many(n) => write!(f, "{n}"),
}
}
}

impl ThreadCount {
/// Gets the thread count
///
/// Returns `None` if we are required to be single-threaded
pub fn get(&self) -> Option<usize> {
match self {
ThreadCount::One => None,
#[cfg(not(target_arch = "wasm32"))]
ThreadCount::Many(v) => Some(v.get()),
}
}
}

impl Default for ThreadCount {
#[cfg(target_arch = "wasm32")]
fn default() -> Self {
Self::One
}

#[cfg(not(target_arch = "wasm32"))]
fn default() -> Self {
Self::Many(std::num::NonZeroUsize::new(8).unwrap())
}
}

#[cfg(not(target_arch = "wasm32"))]
mod mt;
Expand Down
6 changes: 3 additions & 3 deletions fidget/src/mesh/octree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ use super::{
gen::CELL_TO_VERT_TO_EDGES,
qef::QuadraticErrorSolver,
types::{Axis, Corner, Edge},
Mesh, Settings,
Mesh, Settings, ThreadCount,
};
use crate::{
eval::{BulkEvaluator, Function, TracingEvaluator},
render::{RenderHints, ThreadCount},
render::RenderHints,
shape::{Shape, ShapeBulkEval, ShapeTape, ShapeTracingEval, ShapeVars},
types::Grad,
};
Expand Down Expand Up @@ -1218,7 +1218,7 @@ mod test {
use crate::{
context::Tree,
mesh::types::{Edge, X, Y, Z},
render::{ThreadCount, View3},
render::View3,
shape::EzShape,
var::Var,
vm::{VmFunction, VmShape},
Expand Down
Loading

0 comments on commit b5a3864

Please sign in to comment.