Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sse4.2: first attempt at implementing mm_cmpestra #280

Closed
265 changes: 265 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,271 @@ SIMDE__BEGIN_DECLS
# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
#endif

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS
#define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS
#define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS
#define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS
#define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY
#define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES
#define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH
#define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED
#define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY
#define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY
#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY
#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY
#define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT
#define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT
#define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK
#define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK

#else
#define SIMDE_SIDD_UBYTE_OPS 0x00
#define SIMDE_SIDD_UWORD_OPS 0x01
#define SIMDE_SIDD_SBYTE_OPS 0x02
#define SIMDE_SIDD_SWORD_OPS 0x03
#define SIMDE_SIDD_CMP_EQUAL_ANY 0x00
#define SIMDE_SIDD_CMP_RANGES 0x04
#define SIMDE_SIDD_CMP_EQUAL_EACH 0x08
#define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c
#define SIMDE_SIDD_POSITIVE_POLARITY 0x00
#define SIMDE_SIDD_NEGATIVE_POLARITY 0x10
#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20
#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30
#define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00
#define SIMDE_SIDD_MOST_SIGNIFICANT 0x40
#define SIMDE_SIDD_BIT_MASK 0x00
#define SIMDE_SIDD_UNIT_MASK 0x40
#endif

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
nemequ marked this conversation as resolved.
Show resolved Hide resolved
nemequ marked this conversation as resolved.
Show resolved Hide resolved
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
mr-c marked this conversation as resolved.
Show resolved Hide resolved
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
nemequ marked this conversation as resolved.
Show resolved Hide resolved
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
}
nemequ marked this conversation as resolved.
Show resolved Hide resolved
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
}
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \
nemequ marked this conversation as resolved.
Show resolved Hide resolved
_mm_cmpestra(a, la, b, lb, imm8)
#else
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
#endif
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _mm_cmpestra(a, la, b, lb, imm8) \
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 0x7f) \
simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) {
Expand Down
107 changes: 107 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,116 @@
#define SIMDE_TESTS_CURRENT_ISAX sse4_2
#include <test/x86/test-x86-internal.h>
#include <simde/x86/sse4.2.h>
#include <assert.h>

#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
/*
static MunitResult
test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) | 1);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/16);
lb = munit_rand_int_range(0, 128/16);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
nemequ marked this conversation as resolved.
Show resolved Hide resolved
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) & 0);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/8);
lb = munit_rand_int_range(0, 128/8);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}
*/
static MunitResult
test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) {
(void) params;
Expand Down