Skip to content

Commit

Permalink
sse4.2: added the implementation for mm_cmpestra
Browse files Browse the repository at this point in the history
  • Loading branch information
masterchef2209 committed May 16, 2020
1 parent 15a47fc commit e8628a7
Show file tree
Hide file tree
Showing 2 changed files with 635 additions and 0 deletions.
225 changes: 225 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,231 @@ SIMDE_BEGIN_DECLS_
#define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
}
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE_VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
}
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8)
#else
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
#endif
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
Expand Down
Loading

0 comments on commit e8628a7

Please sign in to comment.