From e51bcac6cb22808b1f050dfda236effedf128712 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Sun, 5 Jan 2025 18:14:53 +0200 Subject: [PATCH 1/2] Hide unnecessarily public function like in other platforms --- src/portable.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/portable.rs b/src/portable.rs index 7af6828b0..8cd1ec393 100644 --- a/src/portable.rs +++ b/src/portable.rs @@ -120,7 +120,7 @@ pub fn compress_xof( crate::platform::le_bytes_from_words_64(&state) } -pub fn hash1( +fn hash1( input: &[u8; N], key: &CVWords, counter: u64, From d80acb358f93b4ac6cddc587fdf29190a7da0e3a Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Sun, 5 Jan 2025 18:58:46 +0200 Subject: [PATCH 2/2] Make portable implementation `const` --- src/lib.rs | 6 +-- src/platform.rs | 127 ++++++++++++++++++++++++++++-------------------- src/portable.rs | 64 ++++++++++++------------ 3 files changed, 110 insertions(+), 87 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 37c2c0b31..325829c7e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -185,12 +185,12 @@ const DERIVE_KEY_CONTEXT: u8 = 1 << 5; const DERIVE_KEY_MATERIAL: u8 = 1 << 6; #[inline] -fn counter_low(counter: u64) -> u32 { +const fn counter_low(counter: u64) -> u32 { counter as u32 } #[inline] -fn counter_high(counter: u64) -> u32 { +const fn counter_high(counter: u64) -> u32 { (counter >> 32) as u32 } @@ -623,7 +623,7 @@ pub enum IncrementCounter { impl IncrementCounter { #[inline] - fn yes(&self) -> bool { + const fn yes(&self) -> bool { match self { IncrementCounter::Yes => true, IncrementCounter::No => false, diff --git a/src/platform.rs b/src/platform.rs index cd8ef63d2..18dab214f 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -1,5 +1,4 @@ use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN}; -use arrayref::{array_mut_ref, array_ref}; cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { @@ -473,74 +472,96 @@ pub fn sse2_detected() -> bool { false } +macro_rules! extract_u32_from_byte_chunks { + ($src:ident, $chunk_index:literal) => { + u32::from_le_bytes([ + $src[$chunk_index * 4 + 0], + $src[$chunk_index * 4 + 1], + $src[$chunk_index * 4 + 2], + $src[$chunk_index * 4 + 3], + ]) + }; +} + +macro_rules! store_u32_to_by_chunks { + ($src:ident, $dst:ident, $chunk_index:literal) => { + [ + $dst[$chunk_index * 4 + 0], + $dst[$chunk_index * 4 + 1], + $dst[$chunk_index * 4 + 2], + $dst[$chunk_index * 4 + 3], + ] = $src[$chunk_index].to_le_bytes(); + }; +} + #[inline(always)] -pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] { +pub const fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] { let mut out = [0; 8]; - out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4)); - out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4)); - out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4)); - out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4)); - out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4)); - out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4)); - out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4)); - out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4)); + out[0] = extract_u32_from_byte_chunks!(bytes, 0); + out[1] = extract_u32_from_byte_chunks!(bytes, 1); + out[2] = extract_u32_from_byte_chunks!(bytes, 2); + out[3] = extract_u32_from_byte_chunks!(bytes, 3); + out[4] = extract_u32_from_byte_chunks!(bytes, 4); + out[5] = extract_u32_from_byte_chunks!(bytes, 5); + out[6] = extract_u32_from_byte_chunks!(bytes, 6); + out[7] = extract_u32_from_byte_chunks!(bytes, 7); out } #[inline(always)] -pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] { +pub const fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] { let mut out = [0; 16]; - out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4)); - out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4)); - out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4)); - out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4)); - out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4)); - out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4)); - out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4)); - out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4)); - out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4)); - out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4)); - out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4)); - out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4)); - out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4)); - out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4)); - out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4)); - out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4)); + out[0] = extract_u32_from_byte_chunks!(bytes, 0); + out[1] = extract_u32_from_byte_chunks!(bytes, 1); + out[2] = extract_u32_from_byte_chunks!(bytes, 2); + out[3] = extract_u32_from_byte_chunks!(bytes, 3); + out[4] = extract_u32_from_byte_chunks!(bytes, 4); + out[5] = extract_u32_from_byte_chunks!(bytes, 5); + out[6] = extract_u32_from_byte_chunks!(bytes, 6); + out[7] = extract_u32_from_byte_chunks!(bytes, 7); + out[8] = extract_u32_from_byte_chunks!(bytes, 8); + out[9] = extract_u32_from_byte_chunks!(bytes, 9); + out[10] = extract_u32_from_byte_chunks!(bytes, 10); + out[11] = extract_u32_from_byte_chunks!(bytes, 11); + out[12] = extract_u32_from_byte_chunks!(bytes, 12); + out[13] = extract_u32_from_byte_chunks!(bytes, 13); + out[14] = extract_u32_from_byte_chunks!(bytes, 14); + out[15] = extract_u32_from_byte_chunks!(bytes, 15); out } #[inline(always)] -pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] { +pub const fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] { let mut out = [0; 32]; - *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes(); - *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes(); - *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes(); - *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes(); - *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes(); - *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes(); - *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes(); - *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes(); + store_u32_to_by_chunks!(words, out, 0); + store_u32_to_by_chunks!(words, out, 1); + store_u32_to_by_chunks!(words, out, 2); + store_u32_to_by_chunks!(words, out, 3); + store_u32_to_by_chunks!(words, out, 4); + store_u32_to_by_chunks!(words, out, 5); + store_u32_to_by_chunks!(words, out, 6); + store_u32_to_by_chunks!(words, out, 7); out } #[inline(always)] -pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] { +pub const fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] { let mut out = [0; 64]; - *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes(); - *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes(); - *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes(); - *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes(); - *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes(); - *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes(); - *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes(); - *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes(); - *array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes(); - *array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes(); - *array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes(); - *array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes(); - *array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes(); - *array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes(); - *array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes(); - *array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes(); + store_u32_to_by_chunks!(words, out, 0); + store_u32_to_by_chunks!(words, out, 1); + store_u32_to_by_chunks!(words, out, 2); + store_u32_to_by_chunks!(words, out, 3); + store_u32_to_by_chunks!(words, out, 4); + store_u32_to_by_chunks!(words, out, 5); + store_u32_to_by_chunks!(words, out, 6); + store_u32_to_by_chunks!(words, out, 7); + store_u32_to_by_chunks!(words, out, 8); + store_u32_to_by_chunks!(words, out, 9); + store_u32_to_by_chunks!(words, out, 10); + store_u32_to_by_chunks!(words, out, 11); + store_u32_to_by_chunks!(words, out, 12); + store_u32_to_by_chunks!(words, out, 13); + store_u32_to_by_chunks!(words, out, 14); + store_u32_to_by_chunks!(words, out, 15); out } diff --git a/src/portable.rs b/src/portable.rs index 8cd1ec393..7d48ccc90 100644 --- a/src/portable.rs +++ b/src/portable.rs @@ -2,10 +2,9 @@ use crate::{ counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, IV, MSG_SCHEDULE, OUT_LEN, }; -use arrayref::{array_mut_ref, array_ref}; #[inline(always)] -fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) { +const fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) { state[a] = state[a].wrapping_add(state[b]).wrapping_add(x); state[d] = (state[d] ^ state[a]).rotate_right(16); state[c] = state[c].wrapping_add(state[d]); @@ -17,7 +16,7 @@ fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u } #[inline(always)] -fn round(state: &mut [u32; 16], msg: &[u32; 16], round: usize) { +const fn round(state: &mut [u32; 16], msg: &[u32; 16], round: usize) { // Select the message schedule based on the round. let schedule = MSG_SCHEDULE[round]; @@ -35,7 +34,7 @@ fn round(state: &mut [u32; 16], msg: &[u32; 16], round: usize) { } #[inline(always)] -fn compress_pre( +const fn compress_pre( cv: &CVWords, block: &[u8; BLOCK_LEN], block_len: u8, @@ -74,7 +73,7 @@ fn compress_pre( state } -pub fn compress_in_place( +pub const fn compress_in_place( cv: &mut CVWords, block: &[u8; BLOCK_LEN], block_len: u8, @@ -93,7 +92,7 @@ pub fn compress_in_place( cv[7] = state[7] ^ state[15]; } -pub fn compress_xof( +pub const fn compress_xof( cv: &CVWords, block: &[u8; BLOCK_LEN], block_len: u8, @@ -120,7 +119,7 @@ pub fn compress_xof( crate::platform::le_bytes_from_words_64(&state) } -fn hash1( +const fn hash1( input: &[u8; N], key: &CVWords, counter: u64, @@ -129,48 +128,51 @@ fn hash1( flags_end: u8, out: &mut CVBytes, ) { - debug_assert_eq!(N % BLOCK_LEN, 0, "uneven blocks"); + debug_assert!(N % BLOCK_LEN == 0, "uneven blocks"); let mut cv = *key; let mut block_flags = flags | flags_start; - let mut slice = &input[..]; + let mut slice = input.as_slice(); while slice.len() >= BLOCK_LEN { - if slice.len() == BLOCK_LEN { + let block; + (block, slice) = slice.split_at(BLOCK_LEN); + if slice.is_empty() { block_flags |= flags_end; } - compress_in_place( - &mut cv, - array_ref!(slice, 0, BLOCK_LEN), - BLOCK_LEN as u8, - counter, - block_flags, - ); + let block = { + let ptr = block.as_ptr() as *const [u8; BLOCK_LEN]; + // SAFETY: Sliced off correct length above + unsafe { &*ptr } + }; + + compress_in_place(&mut cv, block, BLOCK_LEN as u8, counter, block_flags); block_flags = flags; - slice = &slice[BLOCK_LEN..]; } *out = crate::platform::le_bytes_from_words_32(&cv); } -pub fn hash_many( - inputs: &[&[u8; N]], +pub const fn hash_many( + mut inputs: &[&[u8; N]], key: &CVWords, mut counter: u64, increment_counter: IncrementCounter, flags: u8, flags_start: u8, flags_end: u8, - out: &mut [u8], + mut out: &mut [u8], ) { debug_assert!(out.len() >= inputs.len() * OUT_LEN, "out too short"); - for (&input, output) in inputs.iter().zip(out.chunks_exact_mut(OUT_LEN)) { - hash1( - input, - key, - counter, - flags, - flags_start, - flags_end, - array_mut_ref!(output, 0, OUT_LEN), - ); + while !inputs.is_empty() { + let input; + (input, inputs) = inputs.split_first().expect("Not empty; qed"); + let o; + (o, out) = out.split_at_mut(OUT_LEN); + let o = { + let ptr = o.as_mut_ptr() as *mut [u8; OUT_LEN]; + // SAFETY: Sliced off correct length above + unsafe { &mut *ptr } + }; + + hash1(input, key, counter, flags, flags_start, flags_end, o); if increment_counter.yes() { counter += 1; }