diff --git a/benches/bench.rs b/benches/bench.rs index d3b42d8..9526829 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,6 +1,9 @@ use byteview::ByteView; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use std::time::Duration; +use std::{ + io::{Cursor, Read}, + time::Duration, +}; fn cmp_short(c: &mut Criterion) { let mut group = c.benchmark_group("cmp short"); @@ -198,22 +201,99 @@ fn eq_long(c: &mut Criterion) { } } -fn ctor(c: &mut Criterion) { - let mut group = c.benchmark_group("ctor long"); +fn ctor_short(c: &mut Criterion) { + let mut group = c.benchmark_group("ctor short"); + + let value = b"abcdefabcdef"; group.bench_function("Arc'd slice", |b| { b.iter(|| { - let _x: std::sync::Arc<[u8]> = - std::sync::Arc::from(nanoid::nanoid!().clone().as_bytes()); + let _x = std::sync::Arc::from(value); }); }); group.bench_function("ByteView", |b| { b.iter(|| { - let _x = ByteView::from(nanoid::nanoid!()); + let _x = ByteView::from(*value); + }); + }); +} + +fn ctor_long(c: &mut Criterion) { + let mut group = c.benchmark_group("ctor ctor_long"); + + let value = b"abcdefabcdefabcdefabcdefabcdefabcdef"; + + group.bench_function("Arc'd slice", |b| { + b.iter(|| { + let _x = std::sync::Arc::from(value); + }); + }); + + group.bench_function("ByteView", |b| { + b.iter(|| { + let _x = ByteView::from(*value); + }); + }); +} + +// Simulates `lsm-tree`-like deserializing of KV values +fn ctor_from_reader(c: &mut Criterion) { + use std::sync::Arc; + + let mut group = c.benchmark_group("ctor long from reader"); + + let value = b"abcdefabcdefabcdefabcdefabcdefabcdef"; + + group.bench_function("Arc'd slice", |b| { + b.iter(|| { + let mut c = Cursor::new(value); + let mut v = vec![0; value.len()]; + c.read_exact(&mut v).unwrap(); + let _x: Arc<[u8]> = v.into(); + }); + }); + + group.bench_function("Arc'd slice - preallocated", |b| { + b.iter(|| { + let mut c = Cursor::new(value); + + let v = vec![0; value.len()]; + let mut v: Arc<[u8]> = v.into(); + + let builder = Arc::get_mut(&mut v).unwrap(); + c.read_exact(builder).unwrap(); + }); + }); + + group.bench_function("ByteView::with_size", |b| { + b.iter(|| { + let mut c = Cursor::new(value); + + let mut x = ByteView::with_size(value.len()); + { + let mut builder = x.get_mut().unwrap(); + c.read_exact(&mut builder).unwrap(); + } + }); + }); + + group.bench_function("ByteView::from_reader", |b| { + b.iter(|| { + let mut c = Cursor::new(value); + let _x = ByteView::from_reader(&mut c, value.len()).unwrap(); }); }); } -criterion_group!(benches, eq_short, eq_long, cmp_short, cmp_long, ctor); +criterion_group!( + benches, + ctor_short, + ctor_long, + ctor_from_reader, + eq_short, + eq_long, + cmp_short, + cmp_long, +); criterion_main!(benches); diff --git a/src/byteview.rs b/src/byteview.rs index 696444a..56de552 100644 --- a/src/byteview.rs +++ b/src/byteview.rs @@ -187,7 +187,7 @@ impl std::hash::Hash for ByteView { /// RAII guard for [`ByteView::get_mut`], so the prefix gets /// updated properly when the mutation is done -pub struct Mutator<'a>(&'a mut ByteView); +pub struct Mutator<'a>(pub(crate) &'a mut ByteView); impl<'a> std::ops::Deref for Mutator<'a> { type Target = [u8]; @@ -253,6 +253,21 @@ impl ByteView { } } + /// Creates a slice and populates it with `len` bytes + /// from the given reader. + /// + /// # Errors + /// + /// Returns an error if an I/O exception occurred. + pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { + let mut s = Self::with_size(len); + { + let mut mutator = Mutator(&mut s); + reader.read_exact(&mut mutator)?; + } + Ok(s) + } + /// Creates a new zeroed, fixed-length byteview. /// /// Use [`ByteView::get_mut`] to mutate the content. @@ -344,72 +359,6 @@ impl ByteView { } view - - /* let slice_len = slice.len(); - - let Ok(len) = u32::try_from(slice_len) else { - panic!("byte slice too long"); - }; - - let mut builder = Self { - trailer: Trailer { - short: ManuallyDrop::new(ShortRepr { - len, - data: [0; INLINE_SIZE], - }), - }, - }; - - if builder.is_inline() { - // SAFETY: We check for inlinability - // so we know the the input slice fits our buffer - unsafe { - let base_ptr = std::ptr::addr_of_mut!(builder) as *mut u8; - let prefix_offset = base_ptr.add(std::mem::size_of::()); - std::ptr::copy_nonoverlapping(slice.as_ptr(), prefix_offset, slice_len); - } - } else { - unsafe { - (*builder.trailer.long) - .prefix - .copy_from_slice(&slice[0..PREFIX_SIZE]); - - let header_size = std::mem::size_of::(); - let alignment = std::mem::align_of::(); - let total_size = header_size + slice_len; - let layout = std::alloc::Layout::from_size_align(total_size, alignment).unwrap(); - - let heap_ptr = std::alloc::alloc(layout); - if heap_ptr.is_null() { - std::alloc::handle_alloc_error(layout); - } - - // SAFETY: We store a pointer to the copied slice, which comes directly after the header - (*builder.trailer.long).data = - heap_ptr.add(std::mem::size_of::()); - - // Copy byte slice into heap allocation - std::ptr::copy_nonoverlapping( - slice.as_ptr(), - (*builder.trailer.long).data.cast_mut(), - slice_len, - ); - - // Set pointer to heap allocation address - (*builder.trailer.long).heap = heap_ptr; - - // Set ref count - let heap_region = heap_ptr as *const HeapAllocationHeader; - let heap_region = &*heap_region; - heap_region.ref_count.store(1, Ordering::Release); - } - } - - debug_assert_eq!(slice, &*builder); - debug_assert_eq!(1, builder.ref_count()); - debug_assert_eq!(builder.len(), slice.len()); - - builder */ } fn get_heap_region(&self) -> &HeapAllocationHeader { @@ -736,6 +685,7 @@ mod serde { #[cfg(test)] mod tests { use super::{ByteView, HeapAllocationHeader}; + use std::io::Cursor; #[test] #[cfg(target_pointer_width = "64")] @@ -758,6 +708,17 @@ mod tests { ); } + #[test] + fn from_reader_1() -> std::io::Result<()> { + let str = b"abcdef"; + let mut cursor = Cursor::new(str); + + let a = ByteView::from_reader(&mut cursor, 6)?; + assert!(&*a == b"abcdef"); + + Ok(()) + } + #[test] fn cmp_misc_1() { let a = ByteView::from("abcdef"); diff --git a/src/lib.rs b/src/lib.rs index dc33220..bc55e13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,8 @@ clippy::nursery, clippy::expect_used, clippy::unwrap_used, - clippy::indexing_slicing + clippy::indexing_slicing, + clippy::needless_lifetimes )] mod byteview; diff --git a/src/strview.rs b/src/strview.rs index 41efccc..02f9032 100644 --- a/src/strview.rs +++ b/src/strview.rs @@ -1,4 +1,4 @@ -use crate::ByteView; +use crate::{byteview::Mutator, ByteView}; use std::{ops::Deref, sync::Arc}; /// An immutable, UTF-8–encoded string slice @@ -47,6 +47,21 @@ impl StrView { Self(ByteView::new(s.as_bytes())) } + /// Creates a new string and populates it with `len` bytes + /// from the given reader. + /// + /// # Errors + /// + /// Returns an error if an I/O exception occurred. + pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { + let mut s = ByteView::with_size(len); + { + let mut mutator = Mutator(&mut s); + reader.read_exact(&mut mutator)?; + } + Ok(Self(s)) + } + /// Clones the contents of this string into a string. #[must_use] pub fn to_owned(&self) -> String { @@ -176,6 +191,18 @@ mod serde { #[cfg(test)] mod tests { use super::StrView; + use std::io::Cursor; + + #[test] + fn from_reader_1() -> std::io::Result<()> { + let str = "abcdef"; + let mut cursor = Cursor::new(str); + + let a = StrView::from_reader(&mut cursor, 6)?; + assert!(&*a == "abcdef"); + + Ok(()) + } #[test] fn cmp_misc_1() {