Skip to content

Commit

Permalink
Allow user to provide a pre-hashed value
Browse files Browse the repository at this point in the history
This is ~10% faster for u64 lookups. For lookups and construction the
time is constant regardless of the true length of the input (assuming
you can amortize the hashing cost somehow externally to this library).

For example, for a 128 byte string construction is ~2.5x faster and
lookups are ~1.7x faster.
  • Loading branch information
vlovich committed Mar 14, 2024
1 parent 6763f88 commit 6bf1804
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 113 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ harness = false
[features]
default = ["parallel"]
parallel = ["rayon", "crossbeam-utils"]

[profile.release]
lto = true
debug = 2
115 changes: 111 additions & 4 deletions benches/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ extern crate bencher;

use bencher::Bencher;

use boomphf::Mphf;
use boomphf::{ExternallyHashed, Mphf};

fn build1_ser_u64(bench: &mut Bencher) {
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
Expand All @@ -13,13 +13,48 @@ fn build1_ser_u64(bench: &mut Bencher) {
});
}

fn build1_ser_externally_hashed(bench: &mut Bencher) {
let items: Vec<ExternallyHashed> = (0..1000000u64)
.map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
.collect();
bench.iter(|| {
std::hint::black_box(Mphf::new(2.0, &items));
});
}

fn build1_ser_slices(bench: &mut Bencher) {
let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
bench.iter(|| {
std::hint::black_box(Mphf::new(2.0, &items));
});
}

fn build1_ser_long_slices(bench: &mut Bencher) {
let items = (0..1000000u64)
.map(|x| {
let mut long_key = [0u8; 128];
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
long_key
})
.collect::<Vec<_>>();
bench.iter(|| {
std::hint::black_box(Mphf::new(2.0, &items));
});
}

fn build1_ser_long_slices_externally_hashed(bench: &mut Bencher) {
let items = (0..1000000u64)
.map(|x| {
let mut long_key = [0u8; 128];
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
ExternallyHashed(wyhash::wyhash(&long_key, 0))
})
.collect::<Vec<_>>();
bench.iter(|| {
std::hint::black_box(Mphf::new(2.0, &items));
});
}

#[allow(dead_code)]
fn build1_par_u64(bench: &mut Bencher) {
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
Expand All @@ -38,16 +73,88 @@ fn build1_par_slices(bench: &mut Bencher) {
});
}

fn scan1_ser(bench: &mut Bencher) {
fn scan1_ser_u64(bench: &mut Bencher) {
let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
let phf = Mphf::new(2.0, &items);

bench.iter(|| {
for i in (0..1000000u64).map(|x| x * 2) {
for i in &items {
std::hint::black_box(phf.hash(&i));
}
});
}

benchmark_group!(benches, build1_ser_u64, build1_ser_slices, build1_par_u64, build1_par_slices, scan1_ser);
fn scan1_ser_slice(bench: &mut Bencher) {
let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
let phf = Mphf::new(2.0, &items);

bench.iter(|| {
for i in &items {
std::hint::black_box(phf.hash(i));
}
});
}

fn scan1_ser_externally_hashed(bench: &mut Bencher) {
let items: Vec<ExternallyHashed> = (0..1000000u64)
.map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
.collect();
let phf = Mphf::new(2.0, &items);

bench.iter(|| {
for i in &items {
std::hint::black_box(phf.hash(i));
}
});
}

fn scan1_ser_long_key(bench: &mut Bencher) {
let items = (0..1000000u64)
.map(|x| {
let mut long_key = [0u8; 128];
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
long_key
})
.collect::<Vec<_>>();
let phf = Mphf::new(2.0, &items);

bench.iter(|| {
for i in &items {
std::hint::black_box(phf.hash(i));
}
});
}

fn scan1_ser_long_key_externally_hashed(bench: &mut Bencher) {
let items: Vec<ExternallyHashed> = (0..1000000u64)
.map(|x| {
let mut long_key = [0u8; 128];
long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
ExternallyHashed(wyhash::wyhash(&long_key, 0))
})
.collect();
let phf = Mphf::new(2.0, &items);

bench.iter(|| {
for i in &items {
std::hint::black_box(phf.hash(i));
}
});
}

benchmark_group!(
benches,
build1_ser_externally_hashed,
build1_ser_u64,
build1_ser_slices,
build1_ser_long_slices,
build1_ser_long_slices_externally_hashed,
build1_par_u64,
build1_par_slices,
scan1_ser_u64,
scan1_ser_slice,
scan1_ser_externally_hashed,
scan1_ser_long_key,
scan1_ser_long_key_externally_hashed
);
benchmark_main!(benches);
2 changes: 1 addition & 1 deletion src/bitvector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl BitVector {
#[inline]
pub fn get_word(&self, word: usize) -> u64 {
#[cfg(feature = "parallel")]
return self.vector[word].load(Ordering::Relaxed) as u64;
return self.vector[word].load(Ordering::Relaxed);

#[cfg(not(feature = "parallel"))]
return self.vector[word] as u64;
Expand Down
Loading

0 comments on commit 6bf1804

Please sign in to comment.