Skip to content

Commit

Permalink
Use barrent reduction
Browse files Browse the repository at this point in the history
Use barrent reduction for CRC calculation.
Currently I use dogfood (sse2neon) for code prototype.
  • Loading branch information
Cuda-Chen committed Jan 25, 2024
1 parent 3c2791f commit ea2a572
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -8498,6 +8498,28 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
#elif ((__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32)) || \
(defined(_M_ARM64) && !defined(__clang__))
crc = __crc32cb(crc, v);
#elif defined(__ARM_FEATURE_PMULL)
// Barrent reduction
__m128i orig = _mm_set_epi64x(0, (uint64_t) (crc ^ v) << (32 - 8));
__m128i tmp = orig;

uint64_t p = 0x105EC76F1;
uint64_t mu =
0x1dea713f1; // Barrett Reduction constant (u64`) = u` = (x**64 /
// P(x))` = 0x11f91caf6 0b 1 0001 1111 1001 0001 1100 1010
// 1111 0110 0x 1 f 3 1 7 a e d0

// Multiply by mu_{64}
tmp = _mm_clmulepi64_si128(tmp, _mm_set_epi64x(0, mu), 0x00);
// Divide by 2^64 (mask away the unnecessary bits)
tmp = _mm_and_si128(tmp, _mm_set_epi64x(0, 0xFFFFFFFF));
// Multiply by p (shifted left by 1 for alignment reasons)
tmp = _mm_clmulepi64_si128(tmp, _mm_set_epi64x(0, p), 0x00);
// Subtract original from result
tmp = _mm_xor_si128(tmp, orig);

// Extract the 'lower' (in bit-reflected sense) 32 bits
crc = (uint32_t) _mm_extract_epi32(tmp, 1);
#else
crc ^= v;

Expand Down

0 comments on commit ea2a572

Please sign in to comment.