Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sse4.2: first attempt at implementing mm_cmpestra #280

Closed
226 changes: 226 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,232 @@ SIMDE__BEGIN_DECLS
# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
#endif

#define SIMDE_SIDD_CMP_EQUAL_ANY 0
#define SIMDE_SIDD_CMP_RANGES 1
#define SIMDE_SIDD_CMP_EQUAL_EACH 2
#define SIMDE_SIDD_CMP_EQUAL_ORDERED 3
nemequ marked this conversation as resolved.
Show resolved Hide resolved

#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY
#define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES
#define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH
#define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED
#endif

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
nemequ marked this conversation as resolved.
Show resolved Hide resolved
nemequ marked this conversation as resolved.
Show resolved Hide resolved
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
mr-c marked this conversation as resolved.
Show resolved Hide resolved
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < (upper_bound) ; i++) {
for(int j = 0; j< (upper_bound) ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
bool_res_.i8[i] |= (( bitvalue ) << j);
nemequ marked this conversation as resolved.
Show resolved Hide resolved
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
nemequ marked this conversation as resolved.
Show resolved Hide resolved
break;
case SIMDE_SIDD_CMP_RANGES:
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bool_res_.i8[i] |= (1 << j);
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bool_res_.i8[i] |= (1 << j);
else if(a_invalid && b_invalid)
bool_res_.i8[i] |= (1 << j);
break;
}
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < (upper_bound) ; i++){
nemequ marked this conversation as resolved.
Show resolved Hide resolved
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
nemequ marked this conversation as resolved.
Show resolved Hide resolved
for(int i = 0 ; i < (upper_bound) ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
nemequ marked this conversation as resolved.
Show resolved Hide resolved
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
}
nemequ marked this conversation as resolved.
Show resolved Hide resolved
}
for(int i = 0; i < (upper_bound) ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return ( (int_res_2 == 0) & (lb > upper_bound) );
nemequ marked this conversation as resolved.
Show resolved Hide resolved
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < (upper_bound) ; i++) {
for(int j = 0; j< (upper_bound) ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
bool_res_.i16[i] |= ((bitvalue) << j);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
break;
case SIMDE_SIDD_CMP_RANGES:
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bool_res_.i16[i] |= (1 << j);
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bool_res_.i16[i] |= (1 << j);
else if(a_invalid && b_invalid)
bool_res_.i16[i] |= (1 << j);
break;
}
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
for(int i = 0 ; i < (upper_bound) ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
}
}
for(int i = 0; i < (upper_bound) ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return ((int_res_2 == 0) & (lb > upper_bound));
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){
#if defined(SIMDE_X86_SSE4_2_NATIVE)
return _mm_cmpestra(a, la, b, lb, imm8);
mr-c marked this conversation as resolved.
Show resolved Hide resolved
#else
const int character_type = imm8 & 0x03;
if(character_type & 1)
return simde_mm_cmpestra_8_(a, la, b, lb, imm8);
else
return simde_mm_cmpestra_16_(a, la, b, lb, imm8);
#endif
}
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
# define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) {
Expand Down
107 changes: 107 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,116 @@
#define SIMDE_TESTS_CURRENT_ISAX sse4_2
#include <test/x86/test-x86-internal.h>
#include <simde/x86/sse4.2.h>
#include <assert.h>

#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)

static MunitResult
test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) | 1);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/16);
lb = munit_rand_int_range(0, 128/16);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
nemequ marked this conversation as resolved.
Show resolved Hide resolved
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) & 0);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/8);
lb = munit_rand_int_range(0, 128/8);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) {
(void) params;
Expand Down