From 63d3f0c0c66a5632dd3e29dfd8b2e59b0b999106 Mon Sep 17 00:00:00 2001
From: Vitali Lovich <vlovich+github@gmail.com>
Date: Mon, 13 Nov 2023 19:19:57 -0800
Subject: [PATCH 1/4] Add benchmarks for slices

---
 benches/build.rs | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/benches/build.rs b/benches/build.rs
index 6e0da04..8c36b8e 100644
--- a/benches/build.rs
+++ b/benches/build.rs
@@ -6,19 +6,35 @@ use bencher::Bencher;
 
 use boomphf::Mphf;
 
-fn build1_ser(bench: &mut Bencher) {
+fn build1_ser_u64(bench: &mut Bencher) {
+    let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
+    bench.iter(|| {
+        std::hint::black_box(Mphf::new(2.0, &items));
+    });
+}
+
+fn build1_ser_slices(bench: &mut Bencher) {
+    let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
+    bench.iter(|| {
+        std::hint::black_box(Mphf::new(2.0, &items));
+    });
+}
+
+#[allow(dead_code)]
+fn build1_par_u64(bench: &mut Bencher) {
+    let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
+    #[cfg(feature = "parallel")]
     bench.iter(|| {
-        let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
-        let _ = Mphf::new(2.0, &items);
+        std::hint::black_box(Mphf::new_parallel(2.0, &items, None));
     });
 }
 
 #[allow(dead_code)]
-fn build1_par(bench: &mut Bencher) {
+fn build1_par_slices(bench: &mut Bencher) {
+    let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
     #[cfg(feature = "parallel")]
     bench.iter(|| {
-        let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
-        let _ = Mphf::new_parallel(2.0, &items, None);
+        std::hint::black_box(Mphf::new_parallel(2.0, &items, None));
     });
 }
 
@@ -28,10 +44,10 @@ fn scan1_ser(bench: &mut Bencher) {
 
     bench.iter(|| {
         for i in (0..1000000u64).map(|x| x * 2) {
-            phf.hash(&i);
+            std::hint::black_box(phf.hash(&i));
         }
     });
 }
 
-benchmark_group!(benches, build1_ser, build1_par, scan1_ser);
+benchmark_group!(benches, build1_ser_u64, build1_ser_slices, build1_par_u64, build1_par_slices, scan1_ser);
 benchmark_main!(benches);

From 6763f881092ff80b4eb6f207ddd29021180f9dc8 Mon Sep 17 00:00:00 2001
From: Vitali Lovich <vlovich+github@gmail.com>
Date: Mon, 13 Nov 2023 20:38:34 -0800
Subject: [PATCH 2/4] Cleanup hashmod function

---
 src/lib.rs | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 32ca5fc..83c68a9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -64,11 +64,6 @@ fn hash_with_seed<T: Hash + ?Sized>(iter: u64, v: &T) -> u64 {
     state.finish()
 }
 
-#[inline]
-fn hash_with_seed32<T: Hash + ?Sized>(iter: u64, v: &T) -> u32 {
-    fold(hash_with_seed(iter, v))
-}
-
 #[inline]
 fn fastmod(hash: u32, n: u32) -> u64 {
     ((hash as u64) * (n as u64)) >> 32
@@ -78,11 +73,10 @@ fn fastmod(hash: u32, n: u32) -> u64 {
 fn hashmod<T: Hash + ?Sized>(iter: u64, v: &T, n: u64) -> u64 {
     // when n < 2^32, use the fast alternative to modulo described here:
     // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+    let h = hash_with_seed(iter, v);
     if n < (1 << 32) {
-        let h = hash_with_seed32(iter, v);
-        fastmod(h, n as u32) as u64
+        fastmod(fold(h), n as u32) as u64
     } else {
-        let h = hash_with_seed(iter, v);
         h % (n as u64)
     }
 }

From 5f72f7acdd43b22426ad74da2f8d3beaf701c103 Mon Sep 17 00:00:00 2001
From: Vitali Lovich <vlovich+github@gmail.com>
Date: Mon, 13 Nov 2023 21:12:47 -0800
Subject: [PATCH 3/4] Allow user to provide a pre-hashed value

This is ~10% faster for u64 lookups. For lookups and construction the
time is constant regardless of the true length of the input (assuming
you can amortize the hashing cost somehow externally to this library).

For example, for a 128 byte string construction is ~2.5x faster and
lookups are ~1.7x faster.
---
 Cargo.toml       |   4 +
 benches/build.rs | 115 +++++++++++++++-
 src/bitvector.rs |   2 +-
 src/hashmap.rs   |  63 +++++----
 src/lib.rs       | 344 +++++++++++++++++++++++++++++++++++++----------
 src/par_iter.rs  |   5 +-
 6 files changed, 419 insertions(+), 114 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 2a604da..379f9a1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,3 +30,7 @@ harness = false
 [features]
 default = ["parallel"]
 parallel = ["rayon", "crossbeam-utils"]
+
+[profile.release]
+lto = true
+debug = 2
diff --git a/benches/build.rs b/benches/build.rs
index 8c36b8e..5daba91 100644
--- a/benches/build.rs
+++ b/benches/build.rs
@@ -4,7 +4,7 @@ extern crate bencher;
 
 use bencher::Bencher;
 
-use boomphf::Mphf;
+use boomphf::{ExternallyHashed, Mphf};
 
 fn build1_ser_u64(bench: &mut Bencher) {
     let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
@@ -13,6 +13,15 @@ fn build1_ser_u64(bench: &mut Bencher) {
     });
 }
 
+fn build1_ser_externally_hashed(bench: &mut Bencher) {
+    let items: Vec<ExternallyHashed> = (0..1000000u64)
+        .map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
+        .collect();
+    bench.iter(|| {
+        std::hint::black_box(Mphf::new(2.0, &items));
+    });
+}
+
 fn build1_ser_slices(bench: &mut Bencher) {
     let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
     bench.iter(|| {
@@ -20,6 +29,32 @@ fn build1_ser_slices(bench: &mut Bencher) {
     });
 }
 
+fn build1_ser_long_slices(bench: &mut Bencher) {
+    let items = (0..1000000u64)
+        .map(|x| {
+            let mut long_key = [0u8; 128];
+            long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
+            long_key
+        })
+        .collect::<Vec<_>>();
+    bench.iter(|| {
+        std::hint::black_box(Mphf::new(2.0, &items));
+    });
+}
+
+fn build1_ser_long_slices_externally_hashed(bench: &mut Bencher) {
+    let items = (0..1000000u64)
+        .map(|x| {
+            let mut long_key = [0u8; 128];
+            long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
+            ExternallyHashed(wyhash::wyhash(&long_key, 0))
+        })
+        .collect::<Vec<_>>();
+    bench.iter(|| {
+        std::hint::black_box(Mphf::new(2.0, &items));
+    });
+}
+
 #[allow(dead_code)]
 fn build1_par_u64(bench: &mut Bencher) {
     let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
@@ -38,16 +73,88 @@ fn build1_par_slices(bench: &mut Bencher) {
     });
 }
 
-fn scan1_ser(bench: &mut Bencher) {
+fn scan1_ser_u64(bench: &mut Bencher) {
     let items: Vec<u64> = (0..1000000u64).map(|x| x * 2).collect();
     let phf = Mphf::new(2.0, &items);
 
     bench.iter(|| {
-        for i in (0..1000000u64).map(|x| x * 2) {
+        for i in &items {
             std::hint::black_box(phf.hash(&i));
         }
     });
 }
 
-benchmark_group!(benches, build1_ser_u64, build1_ser_slices, build1_par_u64, build1_par_slices, scan1_ser);
+fn scan1_ser_slice(bench: &mut Bencher) {
+    let items: Vec<[u8; 8]> = (0..1000000u64).map(|x| (x * 2).to_le_bytes()).collect();
+    let phf = Mphf::new(2.0, &items);
+
+    bench.iter(|| {
+        for i in &items {
+            std::hint::black_box(phf.hash(i));
+        }
+    });
+}
+
+fn scan1_ser_externally_hashed(bench: &mut Bencher) {
+    let items: Vec<ExternallyHashed> = (0..1000000u64)
+        .map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
+        .collect();
+    let phf = Mphf::new(2.0, &items);
+
+    bench.iter(|| {
+        for i in &items {
+            std::hint::black_box(phf.hash(i));
+        }
+    });
+}
+
+fn scan1_ser_long_key(bench: &mut Bencher) {
+    let items = (0..1000000u64)
+        .map(|x| {
+            let mut long_key = [0u8; 128];
+            long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
+            long_key
+        })
+        .collect::<Vec<_>>();
+    let phf = Mphf::new(2.0, &items);
+
+    bench.iter(|| {
+        for i in &items {
+            std::hint::black_box(phf.hash(i));
+        }
+    });
+}
+
+fn scan1_ser_long_key_externally_hashed(bench: &mut Bencher) {
+    let items: Vec<ExternallyHashed> = (0..1000000u64)
+        .map(|x| {
+            let mut long_key = [0u8; 128];
+            long_key[0..8].copy_from_slice(&(x * 2).to_le_bytes());
+            ExternallyHashed(wyhash::wyhash(&long_key, 0))
+        })
+        .collect();
+    let phf = Mphf::new(2.0, &items);
+
+    bench.iter(|| {
+        for i in &items {
+            std::hint::black_box(phf.hash(i));
+        }
+    });
+}
+
+benchmark_group!(
+    benches,
+    build1_ser_externally_hashed,
+    build1_ser_u64,
+    build1_ser_slices,
+    build1_ser_long_slices,
+    build1_ser_long_slices_externally_hashed,
+    build1_par_u64,
+    build1_par_slices,
+    scan1_ser_u64,
+    scan1_ser_slice,
+    scan1_ser_externally_hashed,
+    scan1_ser_long_key,
+    scan1_ser_long_key_externally_hashed
+);
 benchmark_main!(benches);
diff --git a/src/bitvector.rs b/src/bitvector.rs
index b6e7b34..5ead413 100644
--- a/src/bitvector.rs
+++ b/src/bitvector.rs
@@ -363,7 +363,7 @@ impl BitVector {
     #[inline]
     pub fn get_word(&self, word: usize) -> u64 {
         #[cfg(feature = "parallel")]
-        return self.vector[word].load(Ordering::Relaxed) as u64;
+        return self.vector[word].load(Ordering::Relaxed);
 
         #[cfg(not(feature = "parallel"))]
         return self.vector[word] as u64;
diff --git a/src/hashmap.rs b/src/hashmap.rs
index 49df9f0..3d54135 100644
--- a/src/hashmap.rs
+++ b/src/hashmap.rs
@@ -3,17 +3,16 @@
 #[cfg(feature = "serde")]
 use serde::{self, Deserialize, Serialize};
 
-use crate::Mphf;
+use crate::{Mphf, SeedableHash};
 use std::borrow::Borrow;
 use std::fmt::Debug;
-use std::hash::Hash;
 use std::iter::ExactSizeIterator;
 
 /// A HashMap data structure where the mapping between keys and values is encoded in a Mphf. This lets us store the keys and values in dense
 /// arrays, with ~3 bits/item overhead in the Mphf.
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct BoomHashMap<K: Hash, D> {
+pub struct BoomHashMap<K: SeedableHash, D> {
     mphf: Mphf<K>,
     pub(crate) keys: Vec<K>,
     pub(crate) values: Vec<D>,
@@ -21,7 +20,7 @@ pub struct BoomHashMap<K: Hash, D> {
 
 impl<K, D> BoomHashMap<K, D>
 where
-    K: Hash + Debug + PartialEq,
+    K: SeedableHash + Debug + PartialEq,
     D: Debug,
 {
     fn create_map(mut keys: Vec<K>, mut values: Vec<D>, mphf: Mphf<K>) -> BoomHashMap<K, D> {
@@ -49,7 +48,7 @@ where
     pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<&D>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -69,7 +68,7 @@ where
     pub fn get_mut<Q: ?Sized>(&mut self, kmer: &Q) -> Option<&mut D>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -89,7 +88,7 @@ where
     pub fn get_key_id<Q: ?Sized>(&self, kmer: &Q) -> Option<usize>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -133,7 +132,7 @@ where
 
 impl<K, D> core::iter::FromIterator<(K, D)> for BoomHashMap<K, D>
 where
-    K: Hash + Debug + PartialEq,
+    K: SeedableHash + Debug + PartialEq,
     D: Debug,
 {
     fn from_iter<I: IntoIterator<Item = (K, D)>>(iter: I) -> Self {
@@ -149,21 +148,21 @@ where
 }
 
 #[cfg(feature = "parallel")]
-pub trait ConstructibleKey: Hash + Debug + PartialEq + Send + Sync {}
+pub trait ConstructibleKey: SeedableHash + Debug + PartialEq + Send + Sync {}
 
 #[cfg(feature = "parallel")]
-impl<T> ConstructibleKey for T where T: Hash + Debug + PartialEq + Send + Sync {}
+impl<T> ConstructibleKey for T where T: SeedableHash + Debug + PartialEq + Send + Sync {}
 
 #[cfg(not(feature = "parallel"))]
-pub trait ConstructibleKey: Hash + Debug + PartialEq {}
+pub trait ConstructibleKey: SeedableHash + Debug + PartialEq {}
 
 #[cfg(not(feature = "parallel"))]
-impl<T> ConstructibleKey for T where T: Hash + Debug + PartialEq {}
+impl<T> ConstructibleKey for T where T: SeedableHash + Debug + PartialEq {}
 
 #[cfg(feature = "parallel")]
 impl<K, D> BoomHashMap<K, D>
 where
-    K: Hash + Debug + PartialEq + Send + Sync,
+    K: SeedableHash + Debug + PartialEq + Send + Sync,
     D: Debug,
 {
     /// Create a new hash map from the parallel array `keys` and `values`, using a parallelized method to construct the Mphf.
@@ -174,12 +173,12 @@ where
 }
 
 /// Iterate over key-value pairs in a BoomHashMap
-pub struct BoomIterator<'a, K: Hash + 'a, D: 'a> {
+pub struct BoomIterator<'a, K: SeedableHash + 'a, D: 'a> {
     hash: &'a BoomHashMap<K, D>,
     index: usize,
 }
 
-impl<'a, K: Hash, D> Iterator for BoomIterator<'a, K, D> {
+impl<'a, K: SeedableHash, D> Iterator for BoomIterator<'a, K, D> {
     type Item = (&'a K, &'a D);
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -199,9 +198,9 @@ impl<'a, K: Hash, D> Iterator for BoomIterator<'a, K, D> {
     }
 }
 
-impl<'a, K: Hash, D1> ExactSizeIterator for BoomIterator<'a, K, D1> {}
+impl<'a, K: SeedableHash, D1> ExactSizeIterator for BoomIterator<'a, K, D1> {}
 
-impl<'a, K: Hash, D> IntoIterator for &'a BoomHashMap<K, D> {
+impl<'a, K: SeedableHash, D> IntoIterator for &'a BoomHashMap<K, D> {
     type Item = (&'a K, &'a D);
     type IntoIter = BoomIterator<'a, K, D>;
 
@@ -219,19 +218,19 @@ impl<'a, K: Hash, D> IntoIterator for &'a BoomHashMap<K, D> {
 /// arrays, with ~3 bits/item overhead in the Mphf.
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct BoomHashMap2<K: Hash, D1, D2> {
+pub struct BoomHashMap2<K: SeedableHash, D1, D2> {
     mphf: Mphf<K>,
     keys: Vec<K>,
     values: Vec<D1>,
     aux_values: Vec<D2>,
 }
 
-pub struct Boom2Iterator<'a, K: Hash + 'a, D1: 'a, D2: 'a> {
+pub struct Boom2Iterator<'a, K: SeedableHash + 'a, D1: 'a, D2: 'a> {
     hash: &'a BoomHashMap2<K, D1, D2>,
     index: usize,
 }
 
-impl<'a, K: Hash, D1, D2> Iterator for Boom2Iterator<'a, K, D1, D2> {
+impl<'a, K: SeedableHash, D1, D2> Iterator for Boom2Iterator<'a, K, D1, D2> {
     type Item = (&'a K, &'a D1, &'a D2);
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -254,9 +253,9 @@ impl<'a, K: Hash, D1, D2> Iterator for Boom2Iterator<'a, K, D1, D2> {
     }
 }
 
-impl<'a, K: Hash, D1, D2> ExactSizeIterator for Boom2Iterator<'a, K, D1, D2> {}
+impl<'a, K: SeedableHash, D1, D2> ExactSizeIterator for Boom2Iterator<'a, K, D1, D2> {}
 
-impl<'a, K: Hash, D1, D2> IntoIterator for &'a BoomHashMap2<K, D1, D2> {
+impl<'a, K: SeedableHash, D1, D2> IntoIterator for &'a BoomHashMap2<K, D1, D2> {
     type Item = (&'a K, &'a D1, &'a D2);
     type IntoIter = Boom2Iterator<'a, K, D1, D2>;
 
@@ -270,7 +269,7 @@ impl<'a, K: Hash, D1, D2> IntoIterator for &'a BoomHashMap2<K, D1, D2> {
 
 impl<K, D1, D2> BoomHashMap2<K, D1, D2>
 where
-    K: Hash + Debug + PartialEq,
+    K: SeedableHash + Debug + PartialEq,
     D1: Debug,
     D2: Debug,
 {
@@ -310,7 +309,7 @@ where
     pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<(&D1, &D2)>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -329,7 +328,7 @@ where
     pub fn get_mut<Q: ?Sized>(&mut self, kmer: &Q) -> Option<(&mut D1, &mut D2)>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -351,7 +350,7 @@ where
     pub fn get_key_id<Q: ?Sized>(&self, kmer: &Q) -> Option<usize>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -395,7 +394,7 @@ where
 
 impl<K, D1, D2> core::iter::FromIterator<(K, D1, D2)> for BoomHashMap2<K, D1, D2>
 where
-    K: Hash + Debug + PartialEq,
+    K: SeedableHash + Debug + PartialEq,
     D1: Debug,
     D2: Debug,
 {
@@ -416,7 +415,7 @@ where
 #[cfg(feature = "parallel")]
 impl<K, D1, D2> BoomHashMap2<K, D1, D2>
 where
-    K: Hash + Debug + PartialEq + Send + Sync,
+    K: SeedableHash + Debug + PartialEq + Send + Sync,
     D1: Debug,
     D2: Debug,
 {
@@ -500,7 +499,7 @@ where
     pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<&D1>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -513,7 +512,7 @@ where
     pub fn get_mut<Q: ?Sized>(&mut self, kmer: &Q) -> Option<&mut D1>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         match maybe_pos {
@@ -619,7 +618,7 @@ where
     pub fn get<Q: ?Sized>(&self, kmer: &Q) -> Option<(&D1, &D2)>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         maybe_pos.map(|pos| (&self.values[pos as usize], &self.aux_values[pos as usize]))
@@ -629,7 +628,7 @@ where
     pub fn get_mut<Q: ?Sized>(&mut self, kmer: &Q) -> Option<(&mut D1, &mut D2)>
     where
         K: Borrow<Q>,
-        Q: Hash + Eq,
+        Q: SeedableHash + Eq,
     {
         let maybe_pos = self.mphf.try_hash(kmer);
         maybe_pos.map(|pos| {
diff --git a/src/lib.rs b/src/lib.rs
index 83c68a9..49eccb1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -52,32 +52,200 @@ use std::sync::{Arc, Mutex};
 #[cfg(feature = "serde")]
 use serde::{self, Deserialize, Serialize};
 
-#[inline]
-fn fold(v: u64) -> u32 {
-    ((v & 0xFFFFFFFF) as u32) ^ ((v >> 32) as u32)
+/// fastmod used to construct the seed as 1 << (iters + iters). However, for external hashing
+/// there's a faster path available via lookup tables if we just pass in iters. This method is
+/// to ensure that pre-existing hashes continue to work as before when not using ExternallyHashed.
+#[inline(always)]
+fn default_seed_correction(seed: u64) -> u64 {
+    1 << (seed + seed)
 }
 
-#[inline]
-fn hash_with_seed<T: Hash + ?Sized>(iter: u64, v: &T) -> u64 {
-    let mut state = wyhash::WyHash::with_seed(1 << (iter + iter));
-    v.hash(&mut state);
+fn default_hash_with_seed<T: Hash>(value: &T, seed: u64) -> u64 {
+    let mut state = wyhash::WyHash::with_seed(1 << (seed + seed));
+    value.hash(&mut state);
     state.finish()
 }
 
+// This custom trait allows us to fast-path &[u8] to avoid constructing the temporary Hasher object.
+// Can be simplified once specialization is stabilized.
+pub trait SeedableHash {
+    fn hash_with_seed(&self, seed: u64) -> u64;
+}
+
+impl SeedableHash for [u8] {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(self, default_seed_correction(seed))
+    }
+}
+
+impl<const N: usize> SeedableHash for [u8; N] {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(self, default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for u8 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&[*self], default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for i16 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for u16 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for i32 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for u32 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for i64 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for u64 {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for isize {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl SeedableHash for usize {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        wyhash::wyhash(&self.to_le_bytes(), default_seed_correction(seed))
+    }
+}
+
+impl<T: SeedableHash> SeedableHash for &T {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        (**self).hash_with_seed(seed)
+    }
+}
+
+impl<T: Hash> SeedableHash for &[T] {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        default_hash_with_seed(self, seed)
+    }
+}
+
+impl<T: Hash> SeedableHash for Vec<T> {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        default_hash_with_seed(self, seed)
+    }
+}
+
+impl SeedableHash for &str {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        default_hash_with_seed(self, seed)
+    }
+}
+
+impl SeedableHash for String {
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        default_hash_with_seed(self, seed)
+    }
+}
+
+/// This is a fast-path where the hash for an entry is known externally. That way we can skip hashing the
+/// key for building / lookups which provides savings as keys grow longer or you need to do a lookup of the
+/// same key across multiple perfect hashes. It's the user's responsibility to construct this with a value
+/// that is deterministically derived from a key.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ExternallyHashed(pub u64);
+
+impl ExternallyHashed {
+    // Helper function for wyrng.
+    const fn wymum(a: u64, b: u64) -> u64 {
+        let mul = a as u128 * b as u128;
+        ((mul >> 64) ^ mul) as u64
+    }
+
+    // wyrng except a constified version
+    const fn wyrng(seed: u64) -> u64 {
+        const P0: u64 = 0xa076_1d64_78bd_642f;
+        const P1: u64 = 0xe703_7ed1_a0b4_28db;
+
+        let seed = seed.wrapping_add(P0);
+        Self::wymum(seed ^ P1, seed)
+    }
+
+    // Generate lookup tables to map the hash seed to a random value.
+    const fn gen_seed_lookups() -> [u64; MAX_ITERS as usize + 1] {
+        let mut result = [0; MAX_ITERS as usize + 1];
+        let mut i = 0;
+        while i <= MAX_ITERS {
+            result[i as usize] = Self::wyrng(i);
+            i += 1;
+        }
+        result
+    }
+    const SEED_HASH_LOOKUP_TABLES: [u64; MAX_ITERS as usize + 1] = Self::gen_seed_lookups();
+
+    // Helper utility to convert the seed passed in from hashmod (which is in 0..MAX_ITERS) into a hash.
+    fn fast_seed_hash(x: u64) -> u64 {
+        debug_assert!(x <= MAX_ITERS);
+        Self::SEED_HASH_LOOKUP_TABLES[x as usize]
+    }
+
+    // Quickly combine two hashes. Because .0 represents a hash, we know it's random and doesn't need to be
+    // independently hashed again, so we just need to combine it uniquely with iters.
+    fn hash_combine(h1: u64, h2: u64) -> u64 {
+        // https://stackoverflow.com/questions/5889238/why-is-xor-the-default-way-to-combine-hashes
+        h1 ^ (h2
+            .wrapping_add(0x517cc1b727220a95)
+            .wrapping_add(h1 << 6)
+            .wrapping_add(h1 >> 2))
+    }
+}
+
+impl SeedableHash for ExternallyHashed {
+    #[inline(always)]
+    fn hash_with_seed(&self, seed: u64) -> u64 {
+        Self::hash_combine(self.0, Self::fast_seed_hash(seed))
+    }
+}
+
+#[inline]
+fn fold(v: u64) -> u32 {
+    ((v & 0xFFFFFFFF) as u32) ^ ((v >> 32) as u32)
+}
+
 #[inline]
 fn fastmod(hash: u32, n: u32) -> u64 {
     ((hash as u64) * (n as u64)) >> 32
 }
 
 #[inline]
-fn hashmod<T: Hash + ?Sized>(iter: u64, v: &T, n: u64) -> u64 {
+fn hashmod<T: SeedableHash + ?Sized>(iter: u64, v: &T, n: u64) -> u64 {
     // when n < 2^32, use the fast alternative to modulo described here:
     // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-    let h = hash_with_seed(iter, v);
+    let h = v.hash_with_seed(iter);
     if n < (1 << 32) {
         fastmod(fold(h), n as u32) as u64
     } else {
-        h % (n as u64)
+        h % n
     }
 }
 
@@ -91,7 +259,52 @@ pub struct Mphf<T> {
 
 const MAX_ITERS: u64 = 100;
 
-impl<'a, T: 'a + Hash + Debug> Mphf<T> {
+impl<T> Mphf<T> {
+    fn compute_ranks(bvs: Vec<BitVector>) -> Box<[(BitVector, Box<[u64]>)]> {
+        let mut ranks = Vec::new();
+        let mut pop = 0_u64;
+
+        for bv in bvs {
+            let mut rank: Vec<u64> = Vec::new();
+            for i in 0..bv.num_words() {
+                let v = bv.get_word(i);
+
+                if i % 8 == 0 {
+                    rank.push(pop)
+                }
+
+                pop += v.count_ones() as u64;
+            }
+
+            ranks.push((bv, rank.into_boxed_slice()))
+        }
+
+        ranks.into_boxed_slice()
+    }
+
+    #[inline]
+    fn get_rank(&self, hash: u64, i: usize) -> u64 {
+        let idx = hash as usize;
+        let (bv, ranks) = self.bitvecs.get(i).expect("that level doesn't exist");
+
+        // Last pre-computed rank
+        let mut rank = ranks[idx / 512];
+
+        // Add rank of intervening words
+        for j in (idx / 64) & !7..idx / 64 {
+            rank += bv.get_word(j).count_ones() as u64;
+        }
+
+        // Add rank of final word up to hash
+        let final_word = bv.get_word(idx / 64);
+        if idx % 64 > 0 {
+            rank += (final_word << (64 - (idx % 64))).count_ones() as u64;
+        }
+        rank
+    }
+}
+
+impl<'a, T: 'a + SeedableHash + Debug> Mphf<T> {
     /// Constructs an MPHF from a (possibly lazy) iterator over iterators.
     /// This allows construction of very large MPHFs without holding all the keys
     /// in memory simultaneously.
@@ -121,7 +334,7 @@ impl<'a, T: 'a + Hash + Debug> Mphf<T> {
         loop {
             if iter > MAX_ITERS {
                 error!("ran out of key space. items: {:?}", done_keys.len());
-                panic!("counldn't find unique hashes");
+                panic!("couldn't find unique hashes");
             }
 
             let keys_remaining = if iter == 0 {
@@ -193,7 +406,7 @@ impl<'a, T: 'a + Hash + Debug> Mphf<T> {
 
                         object_pos = object_index + 1;
 
-                        let idx = hashmod(seed, &key, size);
+                        let idx = hashmod(seed, &&key, size);
 
                         if collide.contains(idx) {
                             a.remove(idx);
@@ -220,7 +433,7 @@ impl<'a, T: 'a + Hash + Debug> Mphf<T> {
     }
 }
 
-impl<T: Hash + Debug> Mphf<T> {
+impl<T: SeedableHash + Debug> Mphf<T> {
     /// Generate a minimal perfect hash function for the set of `objects`.
     /// `objects` must not contain any duplicate items.
     /// `gamma` controls the tradeoff between the construction-time and run-time speed,
@@ -268,49 +481,6 @@ impl<T: Hash + Debug> Mphf<T> {
         }
     }
 
-    fn compute_ranks(bvs: Vec<BitVector>) -> Box<[(BitVector, Box<[u64]>)]> {
-        let mut ranks = Vec::new();
-        let mut pop = 0_u64;
-
-        for bv in bvs {
-            let mut rank: Vec<u64> = Vec::new();
-            for i in 0..bv.num_words() {
-                let v = bv.get_word(i);
-
-                if i % 8 == 0 {
-                    rank.push(pop)
-                }
-
-                pop += v.count_ones() as u64;
-            }
-
-            ranks.push((bv, rank.into_boxed_slice()))
-        }
-
-        ranks.into_boxed_slice()
-    }
-
-    #[inline]
-    fn get_rank(&self, hash: u64, i: usize) -> u64 {
-        let idx = hash as usize;
-        let (bv, ranks) = self.bitvecs.get(i).expect("that level doesn't exist");
-
-        // Last pre-computed rank
-        let mut rank = ranks[idx / 512];
-
-        // Add rank of intervening words
-        for j in (idx / 64) & !7..idx / 64 {
-            rank += bv.get_word(j).count_ones() as u64;
-        }
-
-        // Add rank of final word up to hash
-        let final_word = bv.get_word(idx / 64);
-        if idx % 64 > 0 {
-            rank += (final_word << (64 - (idx % 64))).count_ones() as u64;
-        }
-        rank
-    }
-
     /// Compute the hash value of `item`. This method should only be used
     /// with items known to be in construction set. Use `try_hash` if you cannot
     /// guarantee that `item` was in the construction set. If `item` was not present
@@ -318,7 +488,7 @@ impl<T: Hash + Debug> Mphf<T> {
     pub fn hash(&self, item: &T) -> u64 {
         for i in 0..self.bitvecs.len() {
             let (bv, _) = &self.bitvecs[i];
-            let hash = hashmod(i as u64, item, bv.capacity() as u64);
+            let hash = hashmod(i as u64, item, bv.capacity());
 
             if bv.contains(hash) {
                 return self.get_rank(hash, i);
@@ -334,11 +504,11 @@ impl<T: Hash + Debug> Mphf<T> {
     pub fn try_hash<Q>(&self, item: &Q) -> Option<u64>
     where
         T: Borrow<Q>,
-        Q: ?Sized + Hash,
+        Q: ?Sized + SeedableHash,
     {
         for i in 0..self.bitvecs.len() {
             let (bv, _) = &(self.bitvecs)[i];
-            let hash = hashmod(i as u64, item, bv.capacity() as u64);
+            let hash = hashmod(i as u64, item, bv.capacity());
 
             if bv.contains(hash) {
                 return Some(self.get_rank(hash, i));
@@ -350,7 +520,7 @@ impl<T: Hash + Debug> Mphf<T> {
 }
 
 #[cfg(feature = "parallel")]
-impl<T: Hash + Debug + Sync + Send> Mphf<T> {
+impl<T: SeedableHash + Debug + Sync + Send> Mphf<T> {
     /// Same as `new`, but parallelizes work on the rayon default Rayon threadpool.
     /// Configure the number of threads on that threadpool to control CPU usage.
     #[cfg(feature = "parallel")]
@@ -412,7 +582,7 @@ struct Context {
 impl Context {
     fn new(size: u64, seed: u64) -> Self {
         Self {
-            size: size as u64,
+            size,
             seed,
             a: BitVector::new(size),
             collide: BitVector::new(size),
@@ -420,14 +590,14 @@ impl Context {
     }
 
     #[cfg(feature = "parallel")]
-    fn find_collisions<T: Hash>(&self, v: &T) {
+    fn find_collisions<T: SeedableHash>(&self, v: &T) {
         let idx = hashmod(self.seed, v, self.size);
         if !self.collide.contains(idx) && !self.a.insert(idx) {
             self.collide.insert(idx);
         }
     }
 
-    fn find_collisions_sync<T: Hash>(&mut self, v: &T) {
+    fn find_collisions_sync<T: SeedableHash>(&mut self, v: &T) {
         let idx = hashmod(self.seed, v, self.size);
         if !self.collide.contains(idx) && !self.a.insert_sync(idx) {
             self.collide.insert_sync(idx);
@@ -435,7 +605,7 @@ impl Context {
     }
 
     #[cfg(feature = "parallel")]
-    fn filter<'t, T: Hash>(&self, v: &'t T) -> Option<&'t T> {
+    fn filter<'t, T: SeedableHash>(&self, v: &'t T) -> Option<&'t T> {
         let idx = hashmod(self.seed, v, self.size);
         if self.collide.contains(idx) {
             self.a.remove(idx);
@@ -446,7 +616,7 @@ impl Context {
     }
 
     #[cfg(not(feature = "parallel"))]
-    fn filter<'t, T: Hash>(&mut self, v: &'t T) -> Option<&'t T> {
+    fn filter<'t, T: SeedableHash>(&mut self, v: &'t T) -> Option<&'t T> {
         let idx = hashmod(self.seed, v, self.size);
         if self.collide.contains(idx) {
             self.a.remove(idx);
@@ -527,7 +697,10 @@ where
 }
 
 #[cfg(feature = "parallel")]
-impl<'a, T: 'a + Hash + Debug + Send + Sync> Mphf<T> {
+impl<'a, T: 'a + SeedableHash + Debug + Send + Sync> Mphf<T>
+where
+    &'a T: SeedableHash,
+{
     /// Same as to `from_chunked_iterator` but parallelizes work over `num_threads` threads.
     #[cfg(feature = "parallel")]
     pub fn from_chunked_iterator_parallel<I, N>(
@@ -563,7 +736,7 @@ impl<'a, T: 'a + Hash + Debug + Send + Sync> Mphf<T> {
         loop {
             if max_iters.is_some() && iter > max_iters.unwrap() {
                 error!("ran out of key space. items: {:?}", global.done_keys.len());
-                panic!("counldn't find unique hashes");
+                panic!("couldn't find unique hashes");
             }
 
             let keys_remaining = if iter == 0 {
@@ -695,7 +868,7 @@ mod tests {
     /// Check that a Minimal perfect hash function (MPHF) is generated for the set xs
     fn check_mphf<T>(xs: HashSet<T>) -> bool
     where
-        T: Sync + Hash + PartialEq + Eq + Debug + Send,
+        T: Sync + SeedableHash + PartialEq + Eq + Debug + Send,
     {
         let xsv: Vec<T> = xs.into_iter().collect();
 
@@ -706,7 +879,7 @@ mod tests {
     /// Check that a Minimal perfect hash function (MPHF) is generated for the set xs
     fn check_mphf_serial<T>(xsv: &[T]) -> bool
     where
-        T: Hash + PartialEq + Eq + Debug,
+        T: SeedableHash + PartialEq + Eq + Debug,
     {
         // Generate the MPHF
         let phf = Mphf::new(1.7, xsv);
@@ -725,7 +898,7 @@ mod tests {
     #[cfg(feature = "parallel")]
     fn check_mphf_parallel<T>(xsv: &[T]) -> bool
     where
-        T: Sync + Hash + PartialEq + Eq + Debug + Send,
+        T: Sync + SeedableHash + PartialEq + Eq + Debug + Send,
     {
         // Generate the MPHF
         let phf = Mphf::new_parallel(1.7, xsv, None);
@@ -743,14 +916,14 @@ mod tests {
     #[cfg(not(feature = "parallel"))]
     fn check_mphf_parallel<T>(_xsv: &[T]) -> bool
     where
-        T: Hash + PartialEq + Eq + Debug,
+        T: SeedableHash + PartialEq + Eq + Debug,
     {
         true
     }
 
     fn check_chunked_mphf<T>(values: Vec<Vec<T>>, total: u64) -> bool
     where
-        T: Sync + Hash + PartialEq + Eq + Debug + Send,
+        T: Sync + SeedableHash + PartialEq + Eq + Debug + Send,
     {
         let phf = Mphf::from_chunked_iterator(1.7, &values, total);
 
@@ -770,7 +943,7 @@ mod tests {
     #[cfg(feature = "parallel")]
     fn check_chunked_mphf_parallel<T>(values: Vec<Vec<T>>, total: u64) -> bool
     where
-        T: Sync + Hash + PartialEq + Eq + Debug + Send,
+        T: Sync + SeedableHash + PartialEq + Eq + Debug + Send,
     {
         let phf = Mphf::from_chunked_iterator_parallel(1.7, &values, None, total, 2);
 
@@ -877,4 +1050,27 @@ mod tests {
         let items = (0..1000000).map(|x| x * 2);
         assert!(check_mphf(HashSet::from_iter(items)));
     }
+
+    #[test]
+    fn externally_hashed() {
+        let total = 1000000;
+        // User gets to pick the hash function.
+        let entries = (0..total)
+            .map(|x| ExternallyHashed(wyhash::wyrng(&mut (x * 2))))
+            .collect::<Vec<ExternallyHashed>>();
+        let phf = Mphf::new(1.7, &entries);
+
+        let mut hashes = entries.iter().map(|eh| phf.hash(eh)).collect::<Vec<u64>>();
+        hashes.sort_unstable();
+
+        let gt = (0..total as u64).collect::<Vec<u64>>();
+        assert_eq!(hashes, gt);
+
+        // Hand-picked a value that fails to hash since it's not in the original set that it's built from.
+        // It's not ideal that this assertion is sensitive to the implementation details internal to Mphf.
+        assert_eq!(
+            phf.try_hash(&ExternallyHashed(wyhash::wyrng(&mut 1000129))),
+            None
+        );
+    }
 }
diff --git a/src/par_iter.rs b/src/par_iter.rs
index 0ced54d..a00c530 100644
--- a/src/par_iter.rs
+++ b/src/par_iter.rs
@@ -1,12 +1,11 @@
-use std::hash::Hash;
-
 use crate::hashmap::BoomHashMap;
+use crate::SeedableHash;
 use rayon::iter::plumbing::{bridge, Consumer, Producer, ProducerCallback, UnindexedConsumer};
 use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
 
 impl<'data, K, V> IntoParallelIterator for &'data BoomHashMap<K, V>
 where
-    K: Hash + Sync + 'data,
+    K: SeedableHash + Sync + 'data,
     V: Sync + 'data,
 {
     type Item = (&'data K, &'data V);

From dab764c10d70450c667ffdd2809d03b575e12e90 Mon Sep 17 00:00:00 2001
From: Vitali Lovich <vlovich+github@gmail.com>
Date: Thu, 14 Mar 2024 10:29:31 -0700
Subject: [PATCH 4/4] Bump rust versions to fix CI failures

Latest rayon requires at least Rust 1.63, regex requires 1.65, and
std::hint::black_box was stabilized in 1.66, so bump that.

Also bump the current stable version to validate with a newer Rust since
presumably that's the intention of the second version.
---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c7fc43f..d4bd33a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -17,8 +17,8 @@ jobs:
     strategy:
       matrix:
         rust:
-          - "1.60.0"
-          - "1.65.0"
+          - "1.66.0"
+          - "1.76.0"
     steps:
       - uses: dtolnay/rust-toolchain@master
         with: