From ace5fdd1d84be284f84fddfa968c921771704b97 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Sun, 10 May 2020 03:30:38 +0530 Subject: [PATCH 01/14] sse4.2: first attempt at implementing mm_cmpestra --- simde/x86/sse4.2.h | 198 +++++++++++++++++++++++++++++++++++++++++++++ test/x86/sse4.2.c | 107 ++++++++++++++++++++++++ 2 files changed, 305 insertions(+) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 243dac381..29d49251d 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -37,6 +37,204 @@ SIMDE__BEGIN_DECLS # define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES #endif +SIMDE__FUNCTION_ATTRIBUTES +int +simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){ +#if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpestra(a, la, b, lb, imm8); +#else + simde__m128i_private + BoolRes_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + if(imm8&1){ + int UpperBound = (128 / 16) - 1; + int aInvalid = 0; + int bInvalid = 0; + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++) { + for(size_t j = 0; j< (UpperBound) ; j++) + { + int bitvalue = ( (a_.i16[i]==b_.i16[j]) ? 1 : 0 ); + BoolRes_.i16[i] |= ( ( bitvalue ) << j); + if(i == la) + aInvalid = 1; + if(j == lb) + bInvalid = 1; + switch( ((imm8 & 12)>>2) ){ + case 0: + break; + case 1: + break; + case 2: + if(aInvalid && bInvalid) + BoolRes_.i16[i] |= (1<>2) ) { + case 0: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ((BoolRes_.i16[i] >> j)&1) << i) ; + } + } + break; + case 1: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ( ((BoolRes_.i16[i] >> j)&1) & ( (BoolRes_.i16[i] >> (j+1))&1) ) << i); + j+=2; + } + } + break; + case 2: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ((BoolRes_.i16[i] >> i)&1) << i); + } + } + break; + case 3: + IntRes1 = (imm8 & 1) ? 0xff : 0xffff; + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + size_t k=i; + for (size_t j = 0 ; j < (UpperBound-i) ; j++){ + IntRes1 &= ( ( (BoolRes_.i16[k] >> j)&1 ) << i) ; + k+=1; + } + } + } + for(size_t i = 0; i < (UpperBound) ; i++){ + if((imm8>>4)&1){ + if((imm8>>5)&1) { + if (i >= lb) { + IntRes2 |= ( ((IntRes1>>i)&1) << i); + } + else { + IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + } + } + else{ + IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + } + } + else{ + IntRes2 |= ( ((IntRes1>>i)&1) << i); + } + } + return ( (IntRes2==0) & (lb > UpperBound) ); + } + else{ + int UpperBound = (128 / 8) - 1; + int aInvalid = 0; + int bInvalid = 0; + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++) { + for(size_t j = 0; j< (UpperBound) ; j++) + { + int bitvalue = ( (a_.i8[i]==b_.i8[j]) ? 1 : 0 ); + BoolRes_.i8[i] |= ( ( bitvalue ) << j); + if(i == la) + aInvalid = 1; + if(j == lb) + bInvalid = 1; + switch( ((imm8 & 12)>>2) ){ + case 0: + break; + case 1: + break; + case 2: + if(aInvalid && bInvalid) + BoolRes_.i8[i] |= (1<>2) ) { + case 0: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ((BoolRes_.i8[i] >> j)&1) << i) ; + } + } + break; + case 1: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ( ((BoolRes_.i8[i] >> j)&1) & ( (BoolRes_.i8[i] >> (j+1))&1) ) << i); + j+=2; + } + } + break; + case 2: + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + for (size_t j = 0 ; j < (UpperBound) ; j++){ + IntRes1 |= ( ((BoolRes_.i8[i] >> i)&1) << i); + } + } + break; + case 3: + IntRes1 = (imm8 & 1) ? 0xff : 0xffff; + SIMDE__VECTORIZE + for (size_t i = 0 ; i < (UpperBound) ; i++){ + size_t k=i; + for (size_t j = 0 ; j < (UpperBound-i) ; j++){ + IntRes1 &= ( ( (BoolRes_.i8[k] >> j)&1 ) << i) ; + k+=1; + } + } + } + for(size_t i = 0; i < (UpperBound) ; i++){ + if((imm8>>4)&1){ + if((imm8>>5)&1) { + if (i >= lb) { + IntRes2 |= ( ((IntRes1>>i)&1) << i); + } + else { + IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + } + } + else{ + IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + } + } + else{ + IntRes2 |= ( ((IntRes1>>i)&1) << i); + } + } + return ( (IntRes2==0) & (lb > UpperBound) ); + } +#endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) +# define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) +#endif + SIMDE__FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { diff --git a/test/x86/sse4.2.c b/test/x86/sse4.2.c index d00fe0fe2..e111d50d7 100644 --- a/test/x86/sse4.2.c +++ b/test/x86/sse4.2.c @@ -24,9 +24,116 @@ #define SIMDE_TESTS_CURRENT_ISAX sse4_2 #include #include +#include #if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) +static MunitResult +test_simde_mm_odd_cmpestra(const MunitParameter params[], void* data) { + (void) params; + (void) data; + + const struct { + simde__m128i a; + int la; + simde__m128i b; + int lb; + const int imm8; + int r; + } test_vec[8] = { + + }; + + printf("\n"); + for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { + simde__m128i_private a, b; + int la, lb, r; + const int imm8 = (munit_rand_int_range(0, UINT8_MAX) | 1); + + munit_rand_memory(sizeof(a), (uint8_t*) &a); + munit_rand_memory(sizeof(b), (uint8_t*) &b); + la = munit_rand_int_range(0, 128/16); + lb = munit_rand_int_range(0, 128/16); + + r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8); + + printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n" + " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n", + a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]); + printf(" %d ,\n",la); + printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n" + " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n", + b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]); + printf(" %d ,\n",lb); + printf(" %d ,\n",imm8); + printf(" %d },\n",r); + } + return MUNIT_FAIL; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8); + assert(r == test_vec[i].r); + } + + return MUNIT_OK; +} + +static MunitResult +test_simde_mm_even_cmpestra(const MunitParameter params[], void* data) { + (void) params; + (void) data; + + const struct { + simde__m128i a; + int la; + simde__m128i b; + int lb; + const int imm8; + int r; + } test_vec[8] = { + + }; + + printf("\n"); + for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { + simde__m128i_private a, b; + int la, lb, r; + const int imm8 = (munit_rand_int_range(0, UINT8_MAX) & 0); + + munit_rand_memory(sizeof(a), (uint8_t*) &a); + munit_rand_memory(sizeof(b), (uint8_t*) &b); + la = munit_rand_int_range(0, 128/8); + lb = munit_rand_int_range(0, 128/8); + + r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8); + + printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n", + a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8], + a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]); + printf(" %d ,\n",la); + printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" + " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n", + b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8], + b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]); + printf(" %d ,\n",lb); + printf(" %d ,\n",imm8); + printf(" %d },\n",r); + } + return MUNIT_FAIL; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8); + assert(r == test_vec[i].r); + } + + return MUNIT_OK; +} + static MunitResult test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) { (void) params; From 14a3c455c9a8fb404c65c6b49c82e1791e5a0b96 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 12:38:34 +0530 Subject: [PATCH 02/14] sse4.2 : made few changes --- simde/x86/sse4.2.h | 354 ++++++++++++++++++++++++--------------------- 1 file changed, 191 insertions(+), 163 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 29d49251d..a8cf78c86 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -37,198 +37,226 @@ SIMDE__BEGIN_DECLS # define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES #endif +#define SIMDE_SIDD_CMP_EQUAL_ANY 0 +#define SIMDE_SIDD_CMP_RANGES 1 +#define SIMDE_SIDD_CMP_EQUAL_EACH 2 +#define SIMDE_SIDD_CMP_EQUAL_ORDERED 3 + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) +#define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY +#define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES +#define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH +#define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED +#endif + SIMDE__FUNCTION_ATTRIBUTES int -simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){ -#if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpestra(a, la, b, lb, imm8); -#else +simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) { + const int cmp_op = imm8 & 0x06; + const int polarity = imm8 & 0x30; simde__m128i_private - BoolRes_ = simde__m128i_to_private(simde_mm_setzero_si128()), - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - if(imm8&1){ - int UpperBound = (128 / 16) - 1; - int aInvalid = 0; - int bInvalid = 0; - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++) { - for(size_t j = 0; j< (UpperBound) ; j++) - { - int bitvalue = ( (a_.i16[i]==b_.i16[j]) ? 1 : 0 ); - BoolRes_.i16[i] |= ( ( bitvalue ) << j); - if(i == la) - aInvalid = 1; - if(j == lb) - bInvalid = 1; - switch( ((imm8 & 12)>>2) ){ - case 0: - break; - case 1: - break; - case 2: - if(aInvalid && bInvalid) - BoolRes_.i16[i] |= (1<> 2)){ + case SIMDE_SIDD_CMP_EQUAL_ANY: + break; + case SIMDE_SIDD_CMP_RANGES: + break; + case SIMDE_SIDD_CMP_EQUAL_EACH: + if(a_invalid && b_invalid) + bool_res_.i8[i] |= (1 << j); + break; + case SIMDE_SIDD_CMP_EQUAL_ORDERED: + if(a_invalid && !b_invalid) + bool_res_.i8[i] |= (1 << j); + else if(a_invalid && b_invalid) + bool_res_.i8[i] |= (1 << j); + break; } } - int16_t IntRes1 = 0; - int16_t IntRes2 = 0; - switch( ((imm8 & 12)>>2) ) { - case 0: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ((BoolRes_.i16[i] >> j)&1) << i) ; - } + } + int8_t int_res_1 = 0; + int8_t int_res_2 = 0; + switch( ((imm8 & 12) >> 2) ) { + case SIMDE_SIDD_CMP_EQUAL_ANY: + for(size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i); } - break; - case 1: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ( ((BoolRes_.i16[i] >> j)&1) & ( (BoolRes_.i16[i] >> (j+1))&1) ) << i); - j+=2; - } + } + break; + case SIMDE_SIDD_CMP_RANGES: + for(size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i); + j += 2; } - break; - case 2: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ((BoolRes_.i16[i] >> i)&1) << i); - } + } + break; + case SIMDE_SIDD_CMP_EQUAL_EACH: + for (size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i); } - break; - case 3: - IntRes1 = (imm8 & 1) ? 0xff : 0xffff; - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - size_t k=i; - for (size_t j = 0 ; j < (UpperBound-i) ; j++){ - IntRes1 &= ( ( (BoolRes_.i16[k] >> j)&1 ) << i) ; - k+=1; - } + } + break; + case SIMDE_SIDD_CMP_EQUAL_ORDERED: + int_res_1 = (imm8 & 1) ? 0xff : 0xffff; + for(size_t i = 0 ; i < (upper_bound) ; i++){ + size_t k = i; + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound-i) ; j++){ + int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ; + k += 1; } - } - for(size_t i = 0; i < (UpperBound) ; i++){ - if((imm8>>4)&1){ - if((imm8>>5)&1) { - if (i >= lb) { - IntRes2 |= ( ((IntRes1>>i)&1) << i); - } - else { - IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); - } + } + } + for(size_t i = 0; i < (upper_bound) ; i++){ + if((imm8 >> 4) & 1){ + if((imm8 >> 5) & 1) { + if (i >= lb) { + int_res_2 |= (((int_res_1 >> i) & 1) << i); } - else{ - IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + else { + int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i); } } else{ - IntRes2 |= ( ((IntRes1>>i)&1) << i); + int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i); } } - return ( (IntRes2==0) & (lb > UpperBound) ); + else{ + int_res_2 |= ( ((int_res_1 >> i) & 1) << i); + } } - else{ - int UpperBound = (128 / 8) - 1; - int aInvalid = 0; - int bInvalid = 0; - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++) { - for(size_t j = 0; j< (UpperBound) ; j++) - { - int bitvalue = ( (a_.i8[i]==b_.i8[j]) ? 1 : 0 ); - BoolRes_.i8[i] |= ( ( bitvalue ) << j); - if(i == la) - aInvalid = 1; - if(j == lb) - bInvalid = 1; - switch( ((imm8 & 12)>>2) ){ - case 0: - break; - case 1: - break; - case 2: - if(aInvalid && bInvalid) - BoolRes_.i8[i] |= (1< upper_bound) ); +} + +SIMDE__FUNCTION_ATTRIBUTES +int +simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) { + const int cmp_op = imm8 & 0x06; + const int polarity = imm8 & 0x30; + simde__m128i_private + bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + int b_invalid = 0; + for(size_t i = 0 ; i < (upper_bound) ; i++) { + for(size_t j = 0; j< (upper_bound) ; j++) + { + int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0); + bool_res_.i16[i] |= ((bitvalue) << j); + if(i == la) + a_invalid = 1; + if(j == lb) + b_invalid = 1; + switch(((imm8 & 12) >> 2)){ + case SIMDE_SIDD_CMP_EQUAL_ANY: + break; + case SIMDE_SIDD_CMP_RANGES: + break; + case SIMDE_SIDD_CMP_EQUAL_EACH: + if(a_invalid && b_invalid) + bool_res_.i16[i] |= (1 << j); + break; + case SIMDE_SIDD_CMP_EQUAL_ORDERED: + if(a_invalid && !b_invalid) + bool_res_.i16[i] |= (1 << j); + else if(a_invalid && b_invalid) + bool_res_.i16[i] |= (1 << j); + break; } } - int8_t IntRes1 = 0; - int8_t IntRes2 = 0; - switch( ((imm8 & 12)>>2) ) { - case 0: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ((BoolRes_.i8[i] >> j)&1) << i) ; - } + } + int16_t int_res_1 = 0; + int16_t int_res_2 = 0; + switch(((imm8 & 12) >> 2)) { + case SIMDE_SIDD_CMP_EQUAL_ANY: + for(size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ; } - break; - case 1: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ( ((BoolRes_.i8[i] >> j)&1) & ( (BoolRes_.i8[i] >> (j+1))&1) ) << i); - j+=2; - } + } + break; + case SIMDE_SIDD_CMP_RANGES: + for(size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for(size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i); + j += 2; } - break; - case 2: - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - for (size_t j = 0 ; j < (UpperBound) ; j++){ - IntRes1 |= ( ((BoolRes_.i8[i] >> i)&1) << i); - } + } + break; + case SIMDE_SIDD_CMP_EQUAL_EACH: + for (size_t i = 0 ; i < (upper_bound) ; i++){ + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for (size_t j = 0 ; j < (upper_bound) ; j++){ + int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i); } - break; - case 3: - IntRes1 = (imm8 & 1) ? 0xff : 0xffff; - SIMDE__VECTORIZE - for (size_t i = 0 ; i < (UpperBound) ; i++){ - size_t k=i; - for (size_t j = 0 ; j < (UpperBound-i) ; j++){ - IntRes1 &= ( ( (BoolRes_.i8[k] >> j)&1 ) << i) ; - k+=1; - } + } + break; + case SIMDE_SIDD_CMP_EQUAL_ORDERED: + int_res_1 = (imm8 & 1) ? 0xff : 0xffff; + for(size_t i = 0 ; i < (upper_bound) ; i++){ + size_t k = i; + SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + for(size_t j = 0 ; j < (upper_bound-i) ; j++){ + int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ; + k += 1; } - } - for(size_t i = 0; i < (UpperBound) ; i++){ - if((imm8>>4)&1){ - if((imm8>>5)&1) { - if (i >= lb) { - IntRes2 |= ( ((IntRes1>>i)&1) << i); - } - else { - IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); - } + } + } + for(size_t i = 0; i < (upper_bound) ; i++){ + if((imm8 >> 4) & 1){ + if((imm8 >> 5) & 1) { + if (i >= lb) { + int_res_2 |= (((int_res_1 >> i) & 1) << i); } - else{ - IntRes2 |= ( ( ((IntRes1>>i)&1)^(-1)) << i); + else { + int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i); } } else{ - IntRes2 |= ( ((IntRes1>>i)&1) << i); + int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i); } } - return ( (IntRes2==0) & (lb > UpperBound) ); + else{ + int_res_2 |= (((int_res_1 >> i) & 1) << i); + } } + return ((int_res_2 == 0) & (lb > upper_bound)); +} + +SIMDE__FUNCTION_ATTRIBUTES +int +simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){ +#if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpestra(a, la, b, lb, imm8); +#else + const int character_type = imm8 & 0x03; + if(character_type & 1) + return simde_mm_cmpestra_8_(a, la, b, lb, imm8); + else + return simde_mm_cmpestra_16_(a, la, b, lb, imm8); #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) From 9cb5384b9568ef151f55adf016cf7b1187ea1e41 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 12:45:58 +0530 Subject: [PATCH 03/14] sse4.2 : made few changes --- simde/x86/sse4.2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index a8cf78c86..9e6ab319d 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -69,7 +69,7 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i a_invalid = 1; if(j == lb) b_invalid = 1; - switch(((imm8 & 12) >> 2)){ + switch(cmp_op){ case SIMDE_SIDD_CMP_EQUAL_ANY: break; case SIMDE_SIDD_CMP_RANGES: @@ -89,7 +89,7 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i } int8_t int_res_1 = 0; int8_t int_res_2 = 0; - switch( ((imm8 & 12) >> 2) ) { + switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: for(size_t i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) @@ -168,7 +168,7 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int a_invalid = 1; if(j == lb) b_invalid = 1; - switch(((imm8 & 12) >> 2)){ + switch(cmp_op){ case SIMDE_SIDD_CMP_EQUAL_ANY: break; case SIMDE_SIDD_CMP_RANGES: @@ -188,7 +188,7 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int } int16_t int_res_1 = 0; int16_t int_res_2 = 0; - switch(((imm8 & 12) >> 2)) { + switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: for(size_t i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) From 5ba826229840728ac2088057003d75d03ec214c5 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 12:58:11 +0530 Subject: [PATCH 04/14] sse4.2 : made few changes --- test/x86/sse4.2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/x86/sse4.2.c b/test/x86/sse4.2.c index e111d50d7..699f50bae 100644 --- a/test/x86/sse4.2.c +++ b/test/x86/sse4.2.c @@ -29,7 +29,7 @@ #if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) static MunitResult -test_simde_mm_odd_cmpestra(const MunitParameter params[], void* data) { +test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) { (void) params; (void) data; @@ -79,7 +79,7 @@ test_simde_mm_odd_cmpestra(const MunitParameter params[], void* data) { } static MunitResult -test_simde_mm_even_cmpestra(const MunitParameter params[], void* data) { +test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) { (void) params; (void) data; From dae6bc3abd058d454ffeb19a55669b9bc4e96651 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 13:40:19 +0530 Subject: [PATCH 05/14] sse4.2 : made few changes --- simde/x86/sse4.2.h | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 9e6ab319d..563923e90 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -61,8 +61,8 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i const int upper_bound = (128 / 8) - 1; int a_invalid = 0; int b_invalid = 0; - for (size_t i = 0 ; i < (upper_bound) ; i++) { - for(size_t j = 0; j< (upper_bound) ; j++){ + for(int i = 0 ; i < (upper_bound) ; i++) { + for(int j = 0; j< (upper_bound) ; j++){ int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0); bool_res_.i8[i] |= (( bitvalue ) << j); if(i == la) @@ -91,44 +91,44 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i int8_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: - for(size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i); } } break; case SIMDE_SIDD_CMP_RANGES: - for(size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i); j += 2; } } break; case SIMDE_SIDD_CMP_EQUAL_EACH: - for (size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i); } } break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: int_res_1 = (imm8 & 1) ? 0xff : 0xffff; - for(size_t i = 0 ; i < (upper_bound) ; i++){ - size_t k = i; + for(int i = 0 ; i < (upper_bound) ; i++){ + int k = i; SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound-i) ; j++){ + for(int j = 0 ; j < (upper_bound-i) ; j++){ int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ; k += 1; } } } - for(size_t i = 0; i < (upper_bound) ; i++){ - if((imm8 >> 4) & 1){ - if((imm8 >> 5) & 1) { + for(int i = 0; i < (upper_bound) ; i++){ + if(polarity & 1){ + if((polarity >> 1) & 1) { if (i >= lb) { int_res_2 |= (((int_res_1 >> i) & 1) << i); } @@ -159,8 +159,8 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int const int upper_bound = (128 / 16) - 1; int a_invalid = 0; int b_invalid = 0; - for(size_t i = 0 ; i < (upper_bound) ; i++) { - for(size_t j = 0; j< (upper_bound) ; j++) + for(int i = 0 ; i < (upper_bound) ; i++) { + for(int j = 0; j< (upper_bound) ; j++) { int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0); bool_res_.i16[i] |= ((bitvalue) << j); @@ -190,44 +190,44 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int int16_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: - for(size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound) ; j++){ + for (int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ; } } break; case SIMDE_SIDD_CMP_RANGES: - for(size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(size_t j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i); j += 2; } } break; case SIMDE_SIDD_CMP_EQUAL_EACH: - for (size_t i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < (upper_bound) ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (size_t j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < (upper_bound) ; j++){ int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i); } } break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: int_res_1 = (imm8 & 1) ? 0xff : 0xffff; - for(size_t i = 0 ; i < (upper_bound) ; i++){ - size_t k = i; + for(int i = 0 ; i < (upper_bound) ; i++){ + int k = i; SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(size_t j = 0 ; j < (upper_bound-i) ; j++){ + for(int j = 0 ; j < (upper_bound-i) ; j++){ int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ; k += 1; } } } - for(size_t i = 0; i < (upper_bound) ; i++){ - if((imm8 >> 4) & 1){ - if((imm8 >> 5) & 1) { + for(int i = 0; i < (upper_bound) ; i++){ + if(polarity & 1){ + if((polarity >> 1) & 1) { if (i >= lb) { int_res_2 |= (((int_res_1 >> i) & 1) << i); } From 453d5e45b0ffc4984f45bde6b43abf18df9c5ee5 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 13:43:12 +0530 Subject: [PATCH 06/14] sse4.2 : made few changes --- simde/x86/sse4.2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 563923e90..3bedf4e61 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -87,8 +87,8 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i } } } - int8_t int_res_1 = 0; - int8_t int_res_2 = 0; + int32_t int_res_1 = 0; + int32_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: for(int i = 0 ; i < (upper_bound) ; i++){ @@ -186,8 +186,8 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int } } } - int16_t int_res_1 = 0; - int16_t int_res_2 = 0; + int32_t int_res_1 = 0; + int32_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: for(int i = 0 ; i < (upper_bound) ; i++){ From d9b4aa9f6ae41a6bbf50d3026c1f1010ba366a92 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 14:39:58 +0530 Subject: [PATCH 07/14] sse4.2 : made few changes --- simde/x86/sse4.2.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 3bedf4e61..9a6d65b3f 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -249,16 +249,15 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int SIMDE__FUNCTION_ATTRIBUTES int simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){ -#if defined(SIMDE_X86_SSE4_2_NATIVE) - return _mm_cmpestra(a, la, b, lb, imm8); -#else const int character_type = imm8 & 0x03; if(character_type & 1) return simde_mm_cmpestra_8_(a, la, b, lb, imm8); else return simde_mm_cmpestra_16_(a, la, b, lb, imm8); -#endif } +#if defined(SIMDE_X86_SSE4_2_NATIVE) +# define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) +#endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) # define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) #endif From fd4cd19f3ce2a802cbaadc8ad133e16782967e7e Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Mon, 11 May 2020 14:45:06 +0530 Subject: [PATCH 08/14] sse4.2 : made few changes --- test/x86/sse4.2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/x86/sse4.2.c b/test/x86/sse4.2.c index 699f50bae..5201d62c5 100644 --- a/test/x86/sse4.2.c +++ b/test/x86/sse4.2.c @@ -27,7 +27,7 @@ #include #if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) - +/* static MunitResult test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) { (void) params; @@ -133,7 +133,7 @@ test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) { return MUNIT_OK; } - +*/ static MunitResult test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) { (void) params; From cfe4b7fc0d0f21d15cd2d7aa3740e7e47a587bd0 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Tue, 12 May 2020 15:12:52 +0530 Subject: [PATCH 09/14] sse4.2: made few changes --- simde/x86/sse4.2.h | 138 ++++++++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 52 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 9a6d65b3f..75e1adcf1 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -37,16 +37,41 @@ SIMDE__BEGIN_DECLS # define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES #endif -#define SIMDE_SIDD_CMP_EQUAL_ANY 0 -#define SIMDE_SIDD_CMP_RANGES 1 -#define SIMDE_SIDD_CMP_EQUAL_EACH 2 -#define SIMDE_SIDD_CMP_EQUAL_ORDERED 3 +#if defined(SIMDE_X86_SSE4_2_NATIVE) +# define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS +# define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS +# define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS +# define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS +# define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY +# define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES +# define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH +# define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED +# define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY +# define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY +# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY +# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY +# define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT +# define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT +# define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK +# define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) -#define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY -#define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES -#define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH -#define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED +#else +# define SIMDE_SIDD_UBYTE_OPS 0x00 +# define SIMDE_SIDD_UWORD_OPS 0x01 +# define SIMDE_SIDD_SBYTE_OPS 0x02 +# define SIMDE_SIDD_SWORD_OPS 0x03 +# define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 +# define SIMDE_SIDD_CMP_RANGES 0x04 +# define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 +# define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c +# define SIMDE_SIDD_POSITIVE_POLARITY 0x00 +# define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 +# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 +# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 +# define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 +# define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 +# define SIMDE_SIDD_BIT_MASK 0x00 +# define SIMDE_SIDD_UNIT_MASK 0x40 #endif SIMDE__FUNCTION_ATTRIBUTES @@ -61,72 +86,79 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i const int upper_bound = (128 / 8) - 1; int a_invalid = 0; int b_invalid = 0; - for(int i = 0 ; i < (upper_bound) ; i++) { - for(int j = 0; j< (upper_bound) ; j++){ + for(int i = 0 ; i < upper_bound ; i++) { + for(int j = 0; j< upper_bound ; j++){ int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0); - bool_res_.i8[i] |= (( bitvalue ) << j); if(i == la) a_invalid = 1; if(j == lb) b_invalid = 1; switch(cmp_op){ case SIMDE_SIDD_CMP_EQUAL_ANY: + bitvalue = 0; break; case SIMDE_SIDD_CMP_RANGES: + bitvalue = 0; break; case SIMDE_SIDD_CMP_EQUAL_EACH: if(a_invalid && b_invalid) - bool_res_.i8[i] |= (1 << j); + bitvalue = 1; + else + bitvalue = 0; break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: if(a_invalid && !b_invalid) - bool_res_.i8[i] |= (1 << j); + bitvalue = 1; else if(a_invalid && b_invalid) - bool_res_.i8[i] |= (1 << j); + bitvalue = 1; + else + bitvalue = 0; break; } + bool_res_.i8[i] |= (bitvalue << j); } } int32_t int_res_1 = 0; int32_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(int j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < upper_bound ; j++){ int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i); } } break; case SIMDE_SIDD_CMP_RANGES: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(int j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < upper_bound ; j++){ int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i); j += 2; } } break; case SIMDE_SIDD_CMP_EQUAL_EACH: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(int j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < upper_bound ; j++){ int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i); } } break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: - int_res_1 = (imm8 & 1) ? 0xff : 0xffff; - for(int i = 0 ; i < (upper_bound) ; i++){ + int_res_1 = 0xff; + for(int i = 0 ; i < upper_bound ; i++){ int k = i; - SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + SIMDE__VECTORIZE_REDUCTION(&:int_res_1) for(int j = 0 ; j < (upper_bound-i) ; j++){ int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ; k += 1; } } + break; } - for(int i = 0; i < (upper_bound) ; i++){ + for(int i = 0; i < upper_bound ; i++){ if(polarity & 1){ if((polarity >> 1) & 1) { if (i >= lb) { @@ -144,7 +176,7 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i int_res_2 |= ( ((int_res_1 >> i) & 1) << i); } } - return ( (int_res_2 == 0) & (lb > upper_bound) ); + return !int_res_2 & (lb > upper_bound); } SIMDE__FUNCTION_ATTRIBUTES @@ -159,73 +191,80 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int const int upper_bound = (128 / 16) - 1; int a_invalid = 0; int b_invalid = 0; - for(int i = 0 ; i < (upper_bound) ; i++) { - for(int j = 0; j< (upper_bound) ; j++) + for(int i = 0 ; i < upper_bound ; i++) { + for(int j = 0; j< upper_bound ; j++) { int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0); - bool_res_.i16[i] |= ((bitvalue) << j); if(i == la) a_invalid = 1; if(j == lb) b_invalid = 1; switch(cmp_op){ case SIMDE_SIDD_CMP_EQUAL_ANY: + bitvalue = 0; break; case SIMDE_SIDD_CMP_RANGES: + bitvalue = 0; break; case SIMDE_SIDD_CMP_EQUAL_EACH: if(a_invalid && b_invalid) - bool_res_.i16[i] |= (1 << j); + bitvalue = 1; + else + bitvalue = 0; break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: if(a_invalid && !b_invalid) - bool_res_.i16[i] |= (1 << j); + bitvalue = 1; else if(a_invalid && b_invalid) - bool_res_.i16[i] |= (1 << j); + bitvalue = 1; + else + bitvalue = 0; break; } + bool_res_.i16[i] |= (bitvalue << j); } } int32_t int_res_1 = 0; int32_t int_res_2 = 0; switch(cmp_op) { case SIMDE_SIDD_CMP_EQUAL_ANY: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for (int j = 0 ; j < (upper_bound) ; j++){ + for (int j = 0 ; j < upper_bound ; j++){ int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ; } } break; case SIMDE_SIDD_CMP_RANGES: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(int j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < upper_bound ; j++){ int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i); j += 2; } } break; case SIMDE_SIDD_CMP_EQUAL_EACH: - for(int i = 0 ; i < (upper_bound) ; i++){ + for(int i = 0 ; i < upper_bound ; i++){ SIMDE__VECTORIZE_REDUCTION(|:int_res_1) - for(int j = 0 ; j < (upper_bound) ; j++){ + for(int j = 0 ; j < upper_bound ; j++){ int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i); } } break; case SIMDE_SIDD_CMP_EQUAL_ORDERED: - int_res_1 = (imm8 & 1) ? 0xff : 0xffff; - for(int i = 0 ; i < (upper_bound) ; i++){ + int_res_1 = 0xffff; + for(int i = 0 ; i < upper_bound ; i++){ int k = i; - SIMDE__VECTORIZE_REDUCTION(|:int_res_1) + SIMDE__VECTORIZE_REDUCTION(&:int_res_1) for(int j = 0 ; j < (upper_bound-i) ; j++){ int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ; k += 1; } } + break; } - for(int i = 0; i < (upper_bound) ; i++){ + for(int i = 0; i < upper_bound ; i++){ if(polarity & 1){ if((polarity >> 1) & 1) { if (i >= lb) { @@ -243,22 +282,17 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int int_res_2 |= (((int_res_1 >> i) & 1) << i); } } - return ((int_res_2 == 0) & (lb > upper_bound)); + return !int_res_2 & (lb > upper_bound); } -SIMDE__FUNCTION_ATTRIBUTES -int -simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){ - const int character_type = imm8 & 0x03; - if(character_type & 1) - return simde_mm_cmpestra_8_(a, la, b, lb, imm8); - else - return simde_mm_cmpestra_16_(a, la, b, lb, imm8); -} #if defined(SIMDE_X86_SSE4_2_NATIVE) # define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) +# define simde_mm_cmpestra(a, la, b, lb, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \ + : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8))) # define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) #endif From 9984e03a3851740fbcd5e33b011620076d58bfda Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Tue, 12 May 2020 15:20:46 +0530 Subject: [PATCH 10/14] sse4.2: made few changes --- simde/x86/sse4.2.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 75e1adcf1..7588bf153 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -56,22 +56,22 @@ SIMDE__BEGIN_DECLS # define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK #else -# define SIMDE_SIDD_UBYTE_OPS 0x00 -# define SIMDE_SIDD_UWORD_OPS 0x01 -# define SIMDE_SIDD_SBYTE_OPS 0x02 -# define SIMDE_SIDD_SWORD_OPS 0x03 -# define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 -# define SIMDE_SIDD_CMP_RANGES 0x04 -# define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 -# define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c -# define SIMDE_SIDD_POSITIVE_POLARITY 0x00 -# define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 -# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 -# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 -# define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 -# define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 -# define SIMDE_SIDD_BIT_MASK 0x00 -# define SIMDE_SIDD_UNIT_MASK 0x40 +# define SIMDE_SIDD_UBYTE_OPS 0x00 +# define SIMDE_SIDD_UWORD_OPS 0x01 +# define SIMDE_SIDD_SBYTE_OPS 0x02 +# define SIMDE_SIDD_SWORD_OPS 0x03 +# define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 +# define SIMDE_SIDD_CMP_RANGES 0x04 +# define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 +# define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c +# define SIMDE_SIDD_POSITIVE_POLARITY 0x00 +# define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 +# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 +# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 +# define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 +# define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 +# define SIMDE_SIDD_BIT_MASK 0x00 +# define SIMDE_SIDD_UNIT_MASK 0x40 #endif SIMDE__FUNCTION_ATTRIBUTES From 1384ba185ff22dfc3f9fa085fd4506d3c141c1b2 Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Tue, 12 May 2020 15:25:53 +0530 Subject: [PATCH 11/14] sse4.2: made few changes --- simde/x86/sse4.2.h | 64 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 7588bf153..8f891e53f 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -38,40 +38,40 @@ SIMDE__BEGIN_DECLS #endif #if defined(SIMDE_X86_SSE4_2_NATIVE) -# define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS -# define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS -# define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS -# define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS -# define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY -# define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES -# define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH -# define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED -# define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY -# define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY -# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY -# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY -# define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT -# define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT -# define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK -# define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS +#define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS +#define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS +#define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS +#define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY +#define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES +#define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH +#define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED +#define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY +#define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY +#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY +#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY +#define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT +#define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT +#define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK +#define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK #else -# define SIMDE_SIDD_UBYTE_OPS 0x00 -# define SIMDE_SIDD_UWORD_OPS 0x01 -# define SIMDE_SIDD_SBYTE_OPS 0x02 -# define SIMDE_SIDD_SWORD_OPS 0x03 -# define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 -# define SIMDE_SIDD_CMP_RANGES 0x04 -# define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 -# define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c -# define SIMDE_SIDD_POSITIVE_POLARITY 0x00 -# define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 -# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 -# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 -# define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 -# define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 -# define SIMDE_SIDD_BIT_MASK 0x00 -# define SIMDE_SIDD_UNIT_MASK 0x40 +#define SIMDE_SIDD_UBYTE_OPS 0x00 +#define SIMDE_SIDD_UWORD_OPS 0x01 +#define SIMDE_SIDD_SBYTE_OPS 0x02 +#define SIMDE_SIDD_SWORD_OPS 0x03 +#define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 +#define SIMDE_SIDD_CMP_RANGES 0x04 +#define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 +#define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c +#define SIMDE_SIDD_POSITIVE_POLARITY 0x00 +#define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 +#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 +#define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 +#define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 +#define SIMDE_SIDD_BIT_MASK 0x00 +#define SIMDE_SIDD_UNIT_MASK 0x40 #endif SIMDE__FUNCTION_ATTRIBUTES From 9a4dbcc6f81be96835e60cc16238ece53e86156d Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Tue, 12 May 2020 18:25:34 +0530 Subject: [PATCH 12/14] sse4.2: made some changes --- simde/x86/sse4.2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 8f891e53f..fc948f221 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -285,14 +285,14 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int return !int_res_2 & (lb > upper_bound); } -#if defined(SIMDE_X86_SSE4_2_NATIVE) -# define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) -#endif -#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) # define simde_mm_cmpestra(a, la, b, lb, imm8) \ (((imm8) & SIMDE_SIDD_UWORD_OPS) \ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8))) +#if defined(SIMDE_X86_SSE4_2_NATIVE) +# define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) # define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) #endif From 08a35766c9bb7119468402971d515de542d71aee Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Wed, 13 May 2020 08:22:36 +0530 Subject: [PATCH 13/14] sse4.2: made few changes --- simde/x86/sse4.2.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index fc948f221..51c68c4c6 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -285,15 +285,21 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int return !int_res_2 & (lb > upper_bound); } -# define simde_mm_cmpestra(a, la, b, lb, imm8) \ +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define simde_mm_cmpestra(a, la, b, lb, imm8) \ + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ + _mm_cmpestra(a, la, b, lb, imm8) +#else + #define simde_mm_cmpestra(a, la, b, lb, imm8) \ + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ (((imm8) & SIMDE_SIDD_UWORD_OPS) \ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8))) -#if defined(SIMDE_X86_SSE4_2_NATIVE) -# define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) -# define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) + #define _mm_cmpestra(a, la, b, lb, imm8) \ + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ + simde_mm_cmpestra(a, la, b, lb, imm8) #endif SIMDE__FUNCTION_ATTRIBUTES From a6a53fefd64c9fc07d336e0e8a113a1235cd0f3d Mon Sep 17 00:00:00 2001 From: Hidayat Ullah Khan Date: Thu, 14 May 2020 13:22:12 +0530 Subject: [PATCH 14/14] sse4.2: made few changes --- simde/x86/sse4.2.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/simde/x86/sse4.2.h b/simde/x86/sse4.2.h index 51c68c4c6..c296ff0d7 100644 --- a/simde/x86/sse4.2.h +++ b/simde/x86/sse4.2.h @@ -76,7 +76,8 @@ SIMDE__BEGIN_DECLS SIMDE__FUNCTION_ATTRIBUTES int -simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) { +simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) { const int cmp_op = imm8 & 0x06; const int polarity = imm8 & 0x30; simde__m128i_private @@ -181,7 +182,8 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i SIMDE__FUNCTION_ATTRIBUTES int -simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) { +simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) { const int cmp_op = imm8 & 0x06; const int polarity = imm8 & 0x30; simde__m128i_private @@ -286,20 +288,15 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int } #if defined(SIMDE_X86_SSE4_2_NATIVE) - #define simde_mm_cmpestra(a, la, b, lb, imm8) \ - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ - _mm_cmpestra(a, la, b, lb, imm8) + #define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8) #else #define simde_mm_cmpestra(a, la, b, lb, imm8) \ - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ (((imm8) & SIMDE_SIDD_UWORD_OPS) \ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8))) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) - #define _mm_cmpestra(a, la, b, lb, imm8) \ - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \ - simde_mm_cmpestra(a, la, b, lb, imm8) + #define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8) #endif SIMDE__FUNCTION_ATTRIBUTES