From 0dc32c8ed68b062f5f2adfbc4b37e699608f1179 Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Tue, 24 Oct 2023 10:08:28 -0400 Subject: [PATCH 1/3] Remove redundant a_generic kernels Also, rename u_generic to generic. Signed-off-by: Clayton Smith --- kernels/volk/volk_16i_convert_8i.h | 16 ------- kernels/volk/volk_16i_s32f_convert_32f.h | 17 -------- kernels/volk/volk_16u_byteswap.h | 15 ------- kernels/volk/volk_32f_asin_32f.h | 2 +- kernels/volk/volk_32f_convert_64f.h | 17 -------- kernels/volk/volk_32f_exp_32f.h | 18 +------- kernels/volk/volk_32f_log2_32f.h | 24 ++--------- kernels/volk/volk_32f_s32f_convert_16i.h | 25 ----------- kernels/volk/volk_32f_s32f_convert_32i.h | 12 ------ kernels/volk/volk_32f_s32f_multiply_32f.h | 19 -------- kernels/volk/volk_32f_x2_add_32f.h | 18 -------- kernels/volk/volk_32f_x2_dot_prod_32f.h | 24 ----------- kernels/volk/volk_32f_x2_multiply_32f.h | 19 -------- kernels/volk/volk_32fc_conjugate_32fc.h | 17 -------- kernels/volk/volk_32fc_deinterleave_64f_x2.h | 19 -------- kernels/volk/volk_32fc_magnitude_32f.h | 18 -------- .../volk/volk_32fc_magnitude_squared_32f.h | 16 ------- kernels/volk/volk_32fc_s32fc_multiply_32fc.h | 30 ------------- .../volk_32fc_x2_conjugate_dot_prod_32fc.h | 41 ------------------ kernels/volk/volk_32fc_x2_dot_prod_32fc.h | 43 +------------------ kernels/volk/volk_32fc_x2_multiply_32fc.h | 19 -------- .../volk_32fc_x2_multiply_conjugate_32fc.h | 20 --------- kernels/volk/volk_32i_s32f_convert_32f.h | 19 -------- kernels/volk/volk_32u_byteswap.h | 20 --------- kernels/volk/volk_64f_convert_32f.h | 17 -------- kernels/volk/volk_64u_byteswap.h | 23 ---------- kernels/volk/volk_8i_convert_16i.h | 17 -------- kernels/volk/volk_8i_s32f_convert_32f.h | 19 -------- 28 files changed, 6 insertions(+), 558 deletions(-) diff --git a/kernels/volk/volk_16i_convert_8i.h b/kernels/volk/volk_16i_convert_8i.h index 652d24cec..cb7168ef8 100644 --- a/kernels/volk/volk_16i_convert_8i.h +++ b/kernels/volk/volk_16i_convert_8i.h @@ -276,20 +276,4 @@ static inline void volk_16i_convert_8i_neon(int8_t* outputVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector, - const int16_t* inputVector, - unsigned int num_points) -{ - int8_t* outputVectorPtr = outputVector; - const int16_t* inputVectorPtr = inputVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8)); - } -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_16i_convert_8i_a_H */ diff --git a/kernels/volk/volk_16i_s32f_convert_32f.h b/kernels/volk/volk_16i_s32f_convert_32f.h index 22afd6849..817ecd22f 100644 --- a/kernels/volk/volk_16i_s32f_convert_32f.h +++ b/kernels/volk/volk_16i_s32f_convert_32f.h @@ -483,21 +483,4 @@ static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, } #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, - const int16_t* inputVector, - const float scalar, - unsigned int num_points) -{ - float* outputVectorPtr = outputVector; - const int16_t* inputVectorPtr = inputVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar; - } -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */ diff --git a/kernels/volk/volk_16u_byteswap.h b/kernels/volk/volk_16u_byteswap.h index 56a11feb8..010ff81d8 100644 --- a/kernels/volk/volk_16u_byteswap.h +++ b/kernels/volk/volk_16u_byteswap.h @@ -271,21 +271,6 @@ static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap, } #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, - unsigned int num_points) -{ - uint16_t* inputPtr = intsToSwap; - for (unsigned int point = 0; point < num_points; point++) { - uint16_t output = *inputPtr; - output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00)); - *inputPtr = output; - inputPtr++; - } -} -#endif /* LV_HAVE_GENERIC */ - #ifdef LV_HAVE_ORC extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points); diff --git a/kernels/volk/volk_32f_asin_32f.h b/kernels/volk/volk_32f_asin_32f.h index 9c797d81f..093771639 100644 --- a/kernels/volk/volk_32f_asin_32f.h +++ b/kernels/volk/volk_32f_asin_32f.h @@ -474,7 +474,7 @@ volk_32f_asin_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int nu #ifdef LV_HAVE_GENERIC static inline void -volk_32f_asin_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points) +volk_32f_asin_32f_generic(float* bVector, const float* aVector, unsigned int num_points) { float* bPtr = bVector; const float* aPtr = aVector; diff --git a/kernels/volk/volk_32f_convert_64f.h b/kernels/volk/volk_32f_convert_64f.h index 0570a604c..93d1c6110 100644 --- a/kernels/volk/volk_32f_convert_64f.h +++ b/kernels/volk/volk_32f_convert_64f.h @@ -231,21 +231,4 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector, #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_convert_64f_a_generic(double* outputVector, - const float* inputVector, - unsigned int num_points) -{ - double* outputVectorPtr = outputVector; - const float* inputVectorPtr = inputVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((double)(*inputVectorPtr++)); - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_32f_convert_64f_a_H */ diff --git a/kernels/volk/volk_32f_exp_32f.h b/kernels/volk/volk_32f_exp_32f.h index c1cee5ea1..13d21201f 100644 --- a/kernels/volk/volk_32f_exp_32f.h +++ b/kernels/volk/volk_32f_exp_32f.h @@ -165,22 +165,6 @@ volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_p #endif /* LV_HAVE_SSE2 for aligned */ -#ifdef LV_HAVE_GENERIC - -static inline void -volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int num_points) -{ - float* bPtr = bVector; - const float* aPtr = aVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *bPtr++ = expf(*aPtr++); - } -} - -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_32f_exp_32f_a_H */ #ifndef INCLUDED_volk_32f_exp_32f_u_H @@ -269,7 +253,7 @@ volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_p #ifdef LV_HAVE_GENERIC static inline void -volk_32f_exp_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points) +volk_32f_exp_32f_generic(float* bVector, const float* aVector, unsigned int num_points) { float* bPtr = bVector; const float* aPtr = aVector; diff --git a/kernels/volk/volk_32f_log2_32f.h b/kernels/volk/volk_32f_log2_32f.h index 8b96dab13..0443e56e4 100644 --- a/kernels/volk/volk_32f_log2_32f.h +++ b/kernels/volk/volk_32f_log2_32f.h @@ -459,24 +459,6 @@ volk_32f_log2_32f_neon(float* bVector, const float* aVector, unsigned int num_po #define INCLUDED_volk_32f_log2_32f_u_H -#ifdef LV_HAVE_GENERIC - -static inline void -volk_32f_log2_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points) -{ - float* bPtr = bVector; - const float* aPtr = aVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - float const result = log2f(*aPtr++); - *bPtr++ = isinf(result) ? -127.0f : result; - } -} - -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_SSE4_1 #include @@ -555,7 +537,7 @@ volk_32f_log2_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int nu } number = quarterPoints * 4; - volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number); + volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number); } #endif /* LV_HAVE_SSE4_1 for unaligned */ @@ -643,7 +625,7 @@ static inline void volk_32f_log2_32f_u_avx2_fma(float* bVector, } number = eighthPoints * 8; - volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number); + volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number); } #endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */ @@ -731,7 +713,7 @@ volk_32f_log2_32f_u_avx2(float* bVector, const float* aVector, unsigned int num_ } number = eighthPoints * 8; - volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number); + volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number); } #endif /* LV_HAVE_AVX2 for unaligned */ diff --git a/kernels/volk/volk_32f_s32f_convert_16i.h b/kernels/volk/volk_32f_s32f_convert_16i.h index 7d34fcd83..fe5a31b3f 100644 --- a/kernels/volk/volk_32f_s32f_convert_16i.h +++ b/kernels/volk/volk_32f_s32f_convert_16i.h @@ -553,29 +553,4 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, - const float* inputVector, - const float scalar, - unsigned int num_points) -{ - int16_t* outputVectorPtr = outputVector; - const float* inputVectorPtr = inputVector; - unsigned int number = 0; - float min_val = SHRT_MIN; - float max_val = SHRT_MAX; - float r; - - for (number = 0; number < num_points; number++) { - r = *inputVectorPtr++ * scalar; - if (r < min_val) - r = min_val; - else if (r > max_val) - r = max_val; - *outputVectorPtr++ = (int16_t)rintf(r); - } -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */ diff --git a/kernels/volk/volk_32f_s32f_convert_32i.h b/kernels/volk/volk_32f_s32f_convert_32i.h index 4592fd933..0cd9dee8e 100644 --- a/kernels/volk/volk_32f_s32f_convert_32i.h +++ b/kernels/volk/volk_32f_s32f_convert_32i.h @@ -406,16 +406,4 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, - const float* inputVector, - const float scalar, - unsigned int num_points) -{ - volk_32f_s32f_convert_32i_generic(outputVector, inputVector, scalar, num_points); -} - -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */ diff --git a/kernels/volk/volk_32f_s32f_multiply_32f.h b/kernels/volk/volk_32f_s32f_multiply_32f.h index 28dc14eee..1cdb6a1af 100644 --- a/kernels/volk/volk_32f_s32f_multiply_32f.h +++ b/kernels/volk/volk_32f_s32f_multiply_32f.h @@ -258,25 +258,6 @@ static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, - const float* aVector, - const float scalar, - unsigned int num_points) -{ - unsigned int number = 0; - const float* inputPtr = aVector; - float* outputPtr = cVector; - for (number = 0; number < num_points; number++) { - *outputPtr = (*inputPtr) * scalar; - inputPtr++; - outputPtr++; - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_ORC extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, diff --git a/kernels/volk/volk_32f_x2_add_32f.h b/kernels/volk/volk_32f_x2_add_32f.h index 3d3da0a0c..b2b4bb34d 100644 --- a/kernels/volk/volk_32f_x2_add_32f.h +++ b/kernels/volk/volk_32f_x2_add_32f.h @@ -373,24 +373,6 @@ extern void volk_32f_x2_add_32f_a_neonpipeline(float* cVector, unsigned int num_points); #endif /* LV_HAVE_NEONV7 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_x2_add_32f_a_generic(float* cVector, - const float* aVector, - const float* bVector, - unsigned int num_points) -{ - float* cPtr = cVector; - const float* aPtr = aVector; - const float* bPtr = bVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *cPtr++ = (*aPtr++) + (*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ - #ifdef LV_HAVE_ORC diff --git a/kernels/volk/volk_32f_x2_dot_prod_32f.h b/kernels/volk/volk_32f_x2_dot_prod_32f.h index 4f9d8f387..5bdb72ced 100644 --- a/kernels/volk/volk_32f_x2_dot_prod_32f.h +++ b/kernels/volk/volk_32f_x2_dot_prod_32f.h @@ -470,30 +470,6 @@ static inline void volk_32f_x2_dot_prod_32f_u_avx512f(float* result, #include -#ifdef LV_HAVE_GENERIC - - -static inline void volk_32f_x2_dot_prod_32f_a_generic(float* result, - const float* input, - const float* taps, - unsigned int num_points) -{ - - float dotProduct = 0; - const float* aPtr = input; - const float* bPtr = taps; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - dotProduct += ((*aPtr++) * (*bPtr++)); - } - - *result = dotProduct; -} - -#endif /*LV_HAVE_GENERIC*/ - - #ifdef LV_HAVE_SSE diff --git a/kernels/volk/volk_32f_x2_multiply_32f.h b/kernels/volk/volk_32f_x2_multiply_32f.h index b18484748..e8f76055b 100644 --- a/kernels/volk/volk_32f_x2_multiply_32f.h +++ b/kernels/volk/volk_32f_x2_multiply_32f.h @@ -341,25 +341,6 @@ static inline void volk_32f_x2_multiply_32f_neon(float* cVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, - const float* aVector, - const float* bVector, - unsigned int num_points) -{ - float* cPtr = cVector; - const float* aPtr = aVector; - const float* bPtr = bVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *cPtr++ = (*aPtr++) * (*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_ORC extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, diff --git a/kernels/volk/volk_32fc_conjugate_32fc.h b/kernels/volk/volk_32fc_conjugate_32fc.h index 265c27ee9..aa1134abd 100644 --- a/kernels/volk/volk_32fc_conjugate_32fc.h +++ b/kernels/volk/volk_32fc_conjugate_32fc.h @@ -260,21 +260,4 @@ static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, - const lv_32fc_t* aVector, - unsigned int num_points) -{ - lv_32fc_t* cPtr = cVector; - const lv_32fc_t* aPtr = aVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *cPtr++ = lv_conj(*aPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */ diff --git a/kernels/volk/volk_32fc_deinterleave_64f_x2.h b/kernels/volk/volk_32fc_deinterleave_64f_x2.h index 51718dde6..1af5098f7 100644 --- a/kernels/volk/volk_32fc_deinterleave_64f_x2.h +++ b/kernels/volk/volk_32fc_deinterleave_64f_x2.h @@ -277,25 +277,6 @@ static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, } #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, - double* qBuffer, - const lv_32fc_t* complexVector, - unsigned int num_points) -{ - unsigned int number = 0; - const float* complexVectorPtr = (float*)complexVector; - double* iBufferPtr = iBuffer; - double* qBufferPtr = qBuffer; - - for (number = 0; number < num_points; number++) { - *iBufferPtr++ = (double)*complexVectorPtr++; - *qBufferPtr++ = (double)*complexVectorPtr++; - } -} -#endif /* LV_HAVE_GENERIC */ - #ifdef LV_HAVE_NEONV8 #include diff --git a/kernels/volk/volk_32fc_magnitude_32f.h b/kernels/volk/volk_32fc_magnitude_32f.h index 607e05d19..eca00e246 100644 --- a/kernels/volk/volk_32fc_magnitude_32f.h +++ b/kernels/volk/volk_32fc_magnitude_32f.h @@ -308,24 +308,6 @@ static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, - const lv_32fc_t* complexVector, - unsigned int num_points) -{ - const float* complexVectorPtr = (float*)complexVector; - float* magnitudeVectorPtr = magnitudeVector; - unsigned int number = 0; - for (number = 0; number < num_points; number++) { - const float real = *complexVectorPtr++; - const float imag = *complexVectorPtr++; - *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag)); - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_NEON #include diff --git a/kernels/volk/volk_32fc_magnitude_squared_32f.h b/kernels/volk/volk_32fc_magnitude_squared_32f.h index 39dc3fedc..e7b11ae96 100644 --- a/kernels/volk/volk_32fc_magnitude_squared_32f.h +++ b/kernels/volk/volk_32fc_magnitude_squared_32f.h @@ -351,20 +351,4 @@ static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_magnitude_squared_32f_a_generic( - float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points) -{ - const float* complexVectorPtr = (float*)complexVector; - float* magnitudeVectorPtr = magnitudeVector; - unsigned int number = 0; - for (number = 0; number < num_points; number++) { - const float real = *complexVectorPtr++; - const float imag = *complexVectorPtr++; - *magnitudeVectorPtr++ = (real * real) + (imag * imag); - } -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */ diff --git a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h index f560b5e07..1593b7cbe 100644 --- a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h +++ b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h @@ -410,34 +410,4 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, } #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, - const lv_32fc_t* aVector, - const lv_32fc_t scalar, - unsigned int num_points) -{ - lv_32fc_t* cPtr = cVector; - const lv_32fc_t* aPtr = aVector; - unsigned int number = num_points; - - // unwrap loop - while (number >= 8) { - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - number -= 8; - } - - // clean up any remaining - while (number-- > 0) - *cPtr++ = *aPtr++ * scalar; -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */ diff --git a/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h b/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h index a7fd48ea3..b1c1938f7 100644 --- a/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h +++ b/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h @@ -422,47 +422,6 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse3(lv_32fc_t* result #endif /*LV_HAVE_SSE3*/ -#ifdef LV_HAVE_GENERIC - - -static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* result, - const lv_32fc_t* input, - const lv_32fc_t* taps, - unsigned int num_points) -{ - - const unsigned int num_bytes = num_points * 8; - - float* res = (float*)result; - float* in = (float*)input; - float* tp = (float*)taps; - unsigned int n_2_ccomplex_blocks = num_bytes >> 4; - - float sum0[2] = { 0, 0 }; - float sum1[2] = { 0, 0 }; - unsigned int i = 0; - - for (i = 0; i < n_2_ccomplex_blocks; ++i) { - sum0[0] += in[0] * tp[0] + in[1] * tp[1]; - sum0[1] += (-in[0] * tp[1]) + in[1] * tp[0]; - sum1[0] += in[2] * tp[2] + in[3] * tp[3]; - sum1[1] += (-in[2] * tp[3]) + in[3] * tp[2]; - - in += 4; - tp += 4; - } - - res[0] = sum0[0] + sum1[0]; - res[1] = sum0[1] + sum1[1]; - - if (num_bytes >> 3 & 1) { - *result += input[(num_bytes >> 3) - 1] * lv_conj(taps[(num_bytes >> 3) - 1]); - } -} - -#endif /*LV_HAVE_GENERIC*/ - - #if LV_HAVE_SSE && LV_HAVE_64 static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* result, diff --git a/kernels/volk/volk_32fc_x2_dot_prod_32fc.h b/kernels/volk/volk_32fc_x2_dot_prod_32fc.h index 5d8722b99..16851f80e 100644 --- a/kernels/volk/volk_32fc_x2_dot_prod_32fc.h +++ b/kernels/volk/volk_32fc_x2_dot_prod_32fc.h @@ -519,47 +519,6 @@ static inline void volk_32fc_x2_dot_prod_32fc_u_avx_fma(lv_32fc_t* result, #include -#ifdef LV_HAVE_GENERIC - - -static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, - const lv_32fc_t* input, - const lv_32fc_t* taps, - unsigned int num_points) -{ - - const unsigned int num_bytes = num_points * 8; - - float* res = (float*)result; - float* in = (float*)input; - float* tp = (float*)taps; - unsigned int n_2_ccomplex_blocks = num_bytes >> 4; - - float sum0[2] = { 0, 0 }; - float sum1[2] = { 0, 0 }; - unsigned int i = 0; - - for (i = 0; i < n_2_ccomplex_blocks; ++i) { - sum0[0] += in[0] * tp[0] - in[1] * tp[1]; - sum0[1] += in[0] * tp[1] + in[1] * tp[0]; - sum1[0] += in[2] * tp[2] - in[3] * tp[3]; - sum1[1] += in[2] * tp[3] + in[3] * tp[2]; - - in += 4; - tp += 4; - } - - res[0] = sum0[0] + sum1[0]; - res[1] = sum0[1] + sum1[1]; - - if (num_points & 1) { - *result += input[num_points - 1] * taps[num_points - 1]; - } -} - -#endif /*LV_HAVE_GENERIC*/ - - #if LV_HAVE_SSE && LV_HAVE_64 @@ -700,7 +659,7 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, unsigned int num_points) { - volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points); + volk_32fc_x2_dot_prod_32fc_generic(result, input, taps, num_points); #if 0 const unsigned int num_bytes = num_points*8; diff --git a/kernels/volk/volk_32fc_x2_multiply_32fc.h b/kernels/volk/volk_32fc_x2_multiply_32fc.h index 44ddc6384..0d63ecd8a 100644 --- a/kernels/volk/volk_32fc_x2_multiply_32fc.h +++ b/kernels/volk/volk_32fc_x2_multiply_32fc.h @@ -341,25 +341,6 @@ static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, - const lv_32fc_t* aVector, - const lv_32fc_t* bVector, - unsigned int num_points) -{ - lv_32fc_t* cPtr = cVector; - const lv_32fc_t* aPtr = aVector; - const lv_32fc_t* bPtr = bVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *cPtr++ = (*aPtr++) * (*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_NEON #include diff --git a/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h b/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h index 478c4c26e..12e4948a0 100644 --- a/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h +++ b/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h @@ -288,24 +288,4 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_neon(lv_32fc_t* cVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void -volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, - const lv_32fc_t* aVector, - const lv_32fc_t* bVector, - unsigned int num_points) -{ - lv_32fc_t* cPtr = cVector; - const lv_32fc_t* aPtr = aVector; - const lv_32fc_t* bPtr = bVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */ diff --git a/kernels/volk/volk_32i_s32f_convert_32f.h b/kernels/volk/volk_32i_s32f_convert_32f.h index 0bcd9fb86..678290fc8 100644 --- a/kernels/volk/volk_32i_s32f_convert_32f.h +++ b/kernels/volk/volk_32i_s32f_convert_32f.h @@ -314,23 +314,4 @@ static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, - const int32_t* inputVector, - const float scalar, - unsigned int num_points) -{ - float* outputVectorPtr = outputVector; - const int32_t* inputVectorPtr = inputVector; - unsigned int number = 0; - const float iScalar = 1.0 / scalar; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */ diff --git a/kernels/volk/volk_32u_byteswap.h b/kernels/volk/volk_32u_byteswap.h index 150fc0d7d..a6ec86f80 100644 --- a/kernels/volk/volk_32u_byteswap.h +++ b/kernels/volk/volk_32u_byteswap.h @@ -344,24 +344,4 @@ static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int n #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, - unsigned int num_points) -{ - uint32_t* inputPtr = intsToSwap; - - unsigned int point; - for (point = 0; point < num_points; point++) { - uint32_t output = *inputPtr; - output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | - ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000)); - - *inputPtr = output; - inputPtr++; - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_32u_byteswap_a_H */ diff --git a/kernels/volk/volk_64f_convert_32f.h b/kernels/volk/volk_64f_convert_32f.h index f68f3aff3..b5f9b5070 100644 --- a/kernels/volk/volk_64f_convert_32f.h +++ b/kernels/volk/volk_64f_convert_32f.h @@ -316,21 +316,4 @@ static inline void volk_64f_convert_32f_a_sse2(float* outputVector, #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_64f_convert_32f_a_generic(float* outputVector, - const double* inputVector, - unsigned int num_points) -{ - float* outputVectorPtr = outputVector; - const double* inputVectorPtr = inputVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((float)(*inputVectorPtr++)); - } -} -#endif /* LV_HAVE_GENERIC */ - - #endif /* INCLUDED_volk_64f_convert_32f_a_H */ diff --git a/kernels/volk/volk_64u_byteswap.h b/kernels/volk/volk_64u_byteswap.h index 22bccab2d..f7c4d9501 100644 --- a/kernels/volk/volk_64u_byteswap.h +++ b/kernels/volk/volk_64u_byteswap.h @@ -474,28 +474,5 @@ static inline void volk_64u_byteswap_u_ssse3(uint64_t* intsToSwap, } #endif /* LV_HAVE_SSSE3 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, - unsigned int num_points) -{ - uint32_t* inputPtr = (uint32_t*)intsToSwap; - unsigned int point; - for (point = 0; point < num_points; point++) { - uint32_t output1 = *inputPtr; - uint32_t output2 = inputPtr[1]; - - output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | - ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); - - output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | - ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); - - *inputPtr++ = output2; - *inputPtr++ = output1; - } -} -#endif /* LV_HAVE_GENERIC */ - #endif /* INCLUDED_volk_64u_byteswap_a_H */ diff --git a/kernels/volk/volk_8i_convert_16i.h b/kernels/volk/volk_8i_convert_16i.h index d52d4a65c..1005c2578 100644 --- a/kernels/volk/volk_8i_convert_16i.h +++ b/kernels/volk/volk_8i_convert_16i.h @@ -217,23 +217,6 @@ static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector, #endif /* LV_HAVE_SSE4_1 */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector, - const int8_t* inputVector, - unsigned int num_points) -{ - int16_t* outputVectorPtr = outputVector; - const int8_t* inputVectorPtr = inputVector; - unsigned int number = 0; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256; - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_NEON #include diff --git a/kernels/volk/volk_8i_s32f_convert_32f.h b/kernels/volk/volk_8i_s32f_convert_32f.h index bed006ebd..bdd572886 100644 --- a/kernels/volk/volk_8i_s32f_convert_32f.h +++ b/kernels/volk/volk_8i_s32f_convert_32f.h @@ -334,25 +334,6 @@ static inline void volk_8i_s32f_convert_32f_neon(float* outputVector, #endif /* LV_HAVE_NEON */ -#ifdef LV_HAVE_GENERIC - -static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, - const int8_t* inputVector, - const float scalar, - unsigned int num_points) -{ - float* outputVectorPtr = outputVector; - const int8_t* inputVectorPtr = inputVector; - unsigned int number = 0; - const float iScalar = 1.0 / scalar; - - for (number = 0; number < num_points; number++) { - *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; - } -} -#endif /* LV_HAVE_GENERIC */ - - #ifdef LV_HAVE_ORC extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const int8_t* inputVector, From 36a571ae0daa27d89d19545c1f98d4e7eeb3b612 Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Tue, 24 Oct 2023 14:02:53 -0400 Subject: [PATCH 2/3] reverse: Rename dword_shuffle to generic Signed-off-by: Clayton Smith --- kernels/volk/volk_32u_reverse_32u.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernels/volk/volk_32u_reverse_32u.h b/kernels/volk/volk_32u_reverse_32u.h index 6448b839d..909824d27 100644 --- a/kernels/volk/volk_32u_reverse_32u.h +++ b/kernels/volk/volk_32u_reverse_32u.h @@ -97,9 +97,8 @@ static const unsigned char BitReverseTable256[] = { 0x3F, 0xBF, 0x7F, 0xFF }; #ifdef LV_HAVE_GENERIC -static inline void volk_32u_reverse_32u_dword_shuffle(uint32_t* out, - const uint32_t* in, - unsigned int num_points) +static inline void +volk_32u_reverse_32u_generic(uint32_t* out, const uint32_t* in, unsigned int num_points) { const struct dword_split* in_ptr = (const struct dword_split*)in; struct dword_split* out_ptr = (struct dword_split*)out; From 15e45b978a3491310457004eeabbf705d3b73fbd Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Tue, 24 Oct 2023 14:14:03 -0400 Subject: [PATCH 3/3] Require all kernels to have a generic implementation Signed-off-by: Clayton Smith --- gen/volk_kernel_defs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gen/volk_kernel_defs.py b/gen/volk_kernel_defs.py index 994f1cbf4..36e36132e 100644 --- a/gen/volk_kernel_defs.py +++ b/gen/volk_kernel_defs.py @@ -162,6 +162,8 @@ def __init__(self, kernel_file): kern_name=self.name, header=sub_hdr, body=body, )) assert(self._impls) + if "generic" not in [impl.name for impl in self._impls]: + raise Exception(f"{self.name} does not have a generic protokernel.") self.has_dispatcher = False for impl in self._impls: if impl.name == 'dispatcher': @@ -194,4 +196,3 @@ def __repr__(self): if __name__ == '__main__': print(kernels) -