Skip to content

Commit

Permalink
Merge pull request #685 from argilo/fix-generic
Browse files Browse the repository at this point in the history
Remove redundant a_generic kernels
  • Loading branch information
jdemel authored Nov 4, 2023
2 parents fd20770 + 15e45b9 commit fd0dec8
Show file tree
Hide file tree
Showing 29 changed files with 10 additions and 543 deletions.
3 changes: 2 additions & 1 deletion gen/volk_kernel_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ def __init__(self, kernel_file):
kern_name=self.name, header=sub_hdr, body=body,
))
assert(self._impls)
if "generic" not in [impl.name for impl in self._impls]:
raise Exception(f"{self.name} does not have a generic protokernel.")
self.has_dispatcher = False
for impl in self._impls:
if impl.name == 'dispatcher':
Expand Down Expand Up @@ -194,4 +196,3 @@ def __repr__(self):

if __name__ == '__main__':
print(kernels)

16 changes: 0 additions & 16 deletions kernels/volk/volk_16i_convert_8i.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,20 +276,4 @@ static inline void volk_16i_convert_8i_neon(int8_t* outputVector,
#endif /* LV_HAVE_NEON */


#ifdef LV_HAVE_GENERIC

static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
{
int8_t* outputVectorPtr = outputVector;
const int16_t* inputVectorPtr = inputVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
}
}
#endif /* LV_HAVE_GENERIC */

#endif /* INCLUDED_volk_16i_convert_8i_a_H */
17 changes: 0 additions & 17 deletions kernels/volk/volk_16i_s32f_convert_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,21 +483,4 @@ static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector,
}
#endif /* LV_HAVE_SSE */

#ifdef LV_HAVE_GENERIC

static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points)
{
float* outputVectorPtr = outputVector;
const int16_t* inputVectorPtr = inputVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
}
}
#endif /* LV_HAVE_GENERIC */

#endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
15 changes: 0 additions & 15 deletions kernels/volk/volk_16u_byteswap.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,21 +271,6 @@ static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap,
}
#endif /* LV_HAVE_NEON */

#ifdef LV_HAVE_GENERIC

static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap,
unsigned int num_points)
{
uint16_t* inputPtr = intsToSwap;
for (unsigned int point = 0; point < num_points; point++) {
uint16_t output = *inputPtr;
output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
*inputPtr = output;
inputPtr++;
}
}
#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_ORC

extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
Expand Down
2 changes: 1 addition & 1 deletion kernels/volk/volk_32f_asin_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ volk_32f_asin_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int nu
#ifdef LV_HAVE_GENERIC

static inline void
volk_32f_asin_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
volk_32f_asin_32f_generic(float* bVector, const float* aVector, unsigned int num_points)
{
float* bPtr = bVector;
const float* aPtr = aVector;
Expand Down
17 changes: 0 additions & 17 deletions kernels/volk/volk_32f_convert_64f.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,21 +231,4 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
#endif /* LV_HAVE_SSE2 */


#ifdef LV_HAVE_GENERIC

static inline void volk_32f_convert_64f_a_generic(double* outputVector,
const float* inputVector,
unsigned int num_points)
{
double* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*outputVectorPtr++ = ((double)(*inputVectorPtr++));
}
}
#endif /* LV_HAVE_GENERIC */


#endif /* INCLUDED_volk_32f_convert_64f_a_H */
18 changes: 1 addition & 17 deletions kernels/volk/volk_32f_exp_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,22 +165,6 @@ volk_32f_exp_32f_a_sse2(float* bVector, const float* aVector, unsigned int num_p
#endif /* LV_HAVE_SSE2 for aligned */


#ifdef LV_HAVE_GENERIC

static inline void
volk_32f_exp_32f_a_generic(float* bVector, const float* aVector, unsigned int num_points)
{
float* bPtr = bVector;
const float* aPtr = aVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*bPtr++ = expf(*aPtr++);
}
}

#endif /* LV_HAVE_GENERIC */

#endif /* INCLUDED_volk_32f_exp_32f_a_H */

#ifndef INCLUDED_volk_32f_exp_32f_u_H
Expand Down Expand Up @@ -269,7 +253,7 @@ volk_32f_exp_32f_u_sse2(float* bVector, const float* aVector, unsigned int num_p
#ifdef LV_HAVE_GENERIC

static inline void
volk_32f_exp_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
volk_32f_exp_32f_generic(float* bVector, const float* aVector, unsigned int num_points)
{
float* bPtr = bVector;
const float* aPtr = aVector;
Expand Down
24 changes: 3 additions & 21 deletions kernels/volk/volk_32f_log2_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,24 +459,6 @@ volk_32f_log2_32f_neon(float* bVector, const float* aVector, unsigned int num_po
#define INCLUDED_volk_32f_log2_32f_u_H


#ifdef LV_HAVE_GENERIC

static inline void
volk_32f_log2_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points)
{
float* bPtr = bVector;
const float* aPtr = aVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
float const result = log2f(*aPtr++);
*bPtr++ = isinf(result) ? -127.0f : result;
}
}

#endif /* LV_HAVE_GENERIC */


#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>

Expand Down Expand Up @@ -555,7 +537,7 @@ volk_32f_log2_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int nu
}

number = quarterPoints * 4;
volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number);
volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number);
}

#endif /* LV_HAVE_SSE4_1 for unaligned */
Expand Down Expand Up @@ -643,7 +625,7 @@ static inline void volk_32f_log2_32f_u_avx2_fma(float* bVector,
}

number = eighthPoints * 8;
volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number);
volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number);
}

#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */
Expand Down Expand Up @@ -731,7 +713,7 @@ volk_32f_log2_32f_u_avx2(float* bVector, const float* aVector, unsigned int num_
}

number = eighthPoints * 8;
volk_32f_log2_32f_u_generic(bPtr, aPtr, num_points - number);
volk_32f_log2_32f_generic(bPtr, aPtr, num_points - number);
}

#endif /* LV_HAVE_AVX2 for unaligned */
Expand Down
25 changes: 0 additions & 25 deletions kernels/volk/volk_32f_s32f_convert_16i.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,29 +553,4 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector,
#endif /* LV_HAVE_SSE */


#ifdef LV_HAVE_GENERIC

static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points)
{
int16_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
float min_val = SHRT_MIN;
float max_val = SHRT_MAX;
float r;

for (number = 0; number < num_points; number++) {
r = *inputVectorPtr++ * scalar;
if (r < min_val)
r = min_val;
else if (r > max_val)
r = max_val;
*outputVectorPtr++ = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_GENERIC */

#endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
12 changes: 0 additions & 12 deletions kernels/volk/volk_32f_s32f_convert_32i.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,16 +406,4 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
#endif /* LV_HAVE_SSE */


#ifdef LV_HAVE_GENERIC

static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points)
{
volk_32f_s32f_convert_32i_generic(outputVector, inputVector, scalar, num_points);
}

#endif /* LV_HAVE_GENERIC */

#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
18 changes: 0 additions & 18 deletions kernels/volk/volk_32f_x2_add_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,24 +373,6 @@ extern void volk_32f_x2_add_32f_a_neonpipeline(float* cVector,
unsigned int num_points);
#endif /* LV_HAVE_NEONV7 */

#ifdef LV_HAVE_GENERIC

static inline void volk_32f_x2_add_32f_a_generic(float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points)
{
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr = bVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */


#ifdef LV_HAVE_ORC

Expand Down
24 changes: 0 additions & 24 deletions kernels/volk/volk_32f_x2_dot_prod_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,30 +470,6 @@ static inline void volk_32f_x2_dot_prod_32f_u_avx512f(float* result,
#include <volk/volk_common.h>


#ifdef LV_HAVE_GENERIC


static inline void volk_32f_x2_dot_prod_32f_a_generic(float* result,
const float* input,
const float* taps,
unsigned int num_points)
{

float dotProduct = 0;
const float* aPtr = input;
const float* bPtr = taps;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
dotProduct += ((*aPtr++) * (*bPtr++));
}

*result = dotProduct;
}

#endif /*LV_HAVE_GENERIC*/


#ifdef LV_HAVE_SSE


Expand Down
19 changes: 0 additions & 19 deletions kernels/volk/volk_32f_x2_multiply_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,25 +341,6 @@ static inline void volk_32f_x2_multiply_32f_neon(float* cVector,
#endif /* LV_HAVE_NEON */


#ifdef LV_HAVE_GENERIC

static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points)
{
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr = bVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */


#ifdef LV_HAVE_ORC
extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector,
const float* aVector,
Expand Down
17 changes: 0 additions & 17 deletions kernels/volk/volk_32fc_conjugate_32fc.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,4 @@ static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
#endif /* LV_HAVE_NEON */


#ifdef LV_HAVE_GENERIC

static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points)
{
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
unsigned int number = 0;

for (number = 0; number < num_points; number++) {
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */


#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
19 changes: 0 additions & 19 deletions kernels/volk/volk_32fc_deinterleave_64f_x2.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,25 +277,6 @@ static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
}
#endif /* LV_HAVE_SSE */

#ifdef LV_HAVE_GENERIC

static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,
double* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points)
{
unsigned int number = 0;
const float* complexVectorPtr = (float*)complexVector;
double* iBufferPtr = iBuffer;
double* qBufferPtr = qBuffer;

for (number = 0; number < num_points; number++) {
*iBufferPtr++ = (double)*complexVectorPtr++;
*qBufferPtr++ = (double)*complexVectorPtr++;
}
}
#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_NEONV8
#include <arm_neon.h>

Expand Down
18 changes: 0 additions & 18 deletions kernels/volk/volk_32fc_magnitude_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,24 +308,6 @@ static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
#endif /* LV_HAVE_SSE */


#ifdef LV_HAVE_GENERIC

static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points)
{
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
unsigned int number = 0;
for (number = 0; number < num_points; number++) {
const float real = *complexVectorPtr++;
const float imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
}
}
#endif /* LV_HAVE_GENERIC */


#ifdef LV_HAVE_NEON
#include <arm_neon.h>

Expand Down
Loading

0 comments on commit fd0dec8

Please sign in to comment.