Skip to content

Commit

Permalink
Optimize equality tests with known literals
Browse files Browse the repository at this point in the history
Inline equality test with lists of a single immediate element (such
as `[42]` or `[a]`). Call a helper code fragment for longer lists
of immediates.

Inline equality test with an empty bitstring even if the type of the
other operand is unknown.
  • Loading branch information
bjorng committed Nov 27, 2023
1 parent db59e04 commit ffe652b
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 8 deletions.
1 change: 1 addition & 0 deletions erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
int128_to_big_shared
int_div_rem_body_shared
int_div_rem_guard_shared
is_eq_exact_list_shared
is_in_range_shared
is_ge_lt_shared
minus_body_shared
Expand Down
179 changes: 171 additions & 8 deletions erts/emulator/beam/jit/arm/instr_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1480,20 +1480,123 @@ void BeamModuleAssembler::emit_i_test_arity(const ArgLabel &Fail,
a.b_ne(resolve_beam_label(Fail, disp1MB));
}


/*
* ARG1 = Fist operand
* ARG2 = Literal list
*
* The result is returned in the Z flag.
*/
void BeamGlobalAssembler::emit_is_eq_exact_list_shared() {
Label loop = a.newLabel(), mid = a.newLabel(), done = a.newLabel();

a.b(mid);

a.bind(loop);
emit_untag_ptr(ARG1, ARG1);
emit_untag_ptr(ARG2, ARG2);
a.ldp(TMP1, ARG1, arm::Mem(ARG1));
a.ldp(TMP2, ARG2, arm::Mem(ARG2));
a.cmp(TMP1, TMP2);
a.b_ne(done);

a.bind(mid);
a.cmp(ARG1, ARG2);
a.b_eq(done);

/* If not equal, both terms must be CONSes. */
ERTS_CT_ASSERT(!is_list(make_small(0) | make_list(0)));
ERTS_CT_ASSERT(!is_list(make_boxed(0) | make_list(0)));
a.orr(TMP1, ARG1, ARG2);
emit_is_not_cons(loop, TMP1);

/* Not equal. Clear Z flag. */
a.cmp(TMP1, imm(0));

a.bind(done);
a.ret(a64::x30);
}

static bool is_list_of_immediates(Eterm term) {
while (is_list(term)) {
Eterm* cons = list_val(term);
if (!is_immed(CAR(cons))) {
return false;
}
term = CDR(cons);
}
return is_nil(term);
}

void BeamModuleAssembler::emit_is_eq_exact(const ArgLabel &Fail,
const ArgSource &X,
const ArgSource &Y) {
auto x = load_source(X, ARG1);

if (exact_type<BeamTypeId::Bitstring>(X) && Y.isLiteral()) {
if (Y.isLiteral()) {
Eterm literal = beamfile_get_literal(beam, Y.as<ArgLiteral>().get());

if (is_bitstring(literal) && bitstring_size(literal) == 0) {
arm::Gp boxed_ptr;

comment("simplified equality test with empty binary");
emit_is_boxed(resolve_beam_label(Fail, dispUnknown), X, x.reg);
boxed_ptr = emit_ptr_val(ARG1, x.reg);
if (masked_types<BeamTypeId::MaybeBoxed>(X) ==
BeamTypeId::Bitstring) {
comment("eliminated check for known bitstring");
a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.cbnz(TMP1, resolve_beam_label(Fail, disp1MB));
} else {
a.sub(TMP1, boxed_ptr, imm(TAG_PRIMARY_BOXED));
a.ldp(TMP1, TMP2, arm::Mem(TMP1));
/* The header mask with the binary sub tag bits
* removed (0b110011) is not possible to use as an
* immediate operand for 'and'. (See the note at the
* beginning of the file.) Therefore, use a simpler
* mask (0b110000) that will also clear the primary
* tag bits. That works because we KNOW that a boxed
* pointer always points to a header word and that the
* primary tag for a header is 0. */
const auto mask = _BITSTRING_TAG_MASK & ~_TAG_PRIMARY_MASK;
ERTS_CT_ASSERT(TAG_PRIMARY_HEADER == 0);
ERTS_CT_ASSERT(_TAG_HEADER_HEAP_BITS ==
(_TAG_HEADER_HEAP_BITS & mask));
a.and_(TMP1, TMP1, imm(mask));
a.cmp(TMP1, imm(_TAG_HEADER_HEAP_BITS));
a.ccmp(TMP2, imm(0), imm(NZCV::kNone), imm(arm::CondCode::kEQ));
a.b_ne(resolve_beam_label(Fail, disp1MB));
}

return;
} else if (is_list(literal) && is_immed(CAR(list_val(literal))) &&
is_nil(CDR(list_val(literal)))) {
/* Inline the equality test if the RHS argument is a list
* of one immediate value such as `[42]` or `[a]`. */
arm::Gp cons_ptr;

comment("inlined equality test with %T", literal);
if (!exact_type<BeamTypeId::Cons>(X)) {
emit_is_cons(resolve_beam_label(Fail, dispUnknown), x.reg);
}
cons_ptr = emit_ptr_val(TMP1, x.reg);
a.sub(TMP1, cons_ptr, imm(TAG_PRIMARY_LIST));
a.ldp(TMP2, TMP3, arm::Mem(TMP1));
cmp(TMP2, CAR(list_val(literal)));
mov_imm(TMP4, NIL);
a.ccmp(TMP3, TMP4, imm(NZCV::kNone), imm(arm::CondCode::kEQ));
a.b_ne(resolve_beam_label(Fail, disp1MB));

return;
} else if (is_list_of_immediates(literal)) {
auto y = load_source(Y, ARG2);

comment("optimized equality test with %T", literal);
mov_var(ARG1, x);
mov_var(ARG2, y);
fragment_call(ga->get_is_eq_exact_list_shared());
a.b_ne(resolve_beam_label(Fail, disp1MB));

arm::Gp boxed_ptr = emit_ptr_val(ARG1, x.reg);
a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.cbnz(TMP1, resolve_beam_label(Fail, disp1MB));
return;
}
}
Expand Down Expand Up @@ -1553,15 +1656,75 @@ void BeamModuleAssembler::emit_is_ne_exact(const ArgLabel &Fail,
const ArgSource &Y) {
auto x = load_source(X, ARG1);

if (exact_type<BeamTypeId::Bitstring>(X) && Y.isLiteral()) {
if (Y.isLiteral()) {
Eterm literal = beamfile_get_literal(beam, Y.as<ArgLiteral>().get());

if (is_bitstring(literal) && bitstring_size(literal) == 0) {
arm::Gp boxed_ptr = emit_ptr_val(ARG1, x.reg);
arm::Gp boxed_ptr;
Label next = a.newLabel();

comment("simplified non-equality test with empty binary");
a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.cbz(TMP1, resolve_beam_label(Fail, disp1MB));
emit_is_boxed(next, X, x.reg);
boxed_ptr = emit_ptr_val(ARG1, x.reg);
if (masked_types<BeamTypeId::MaybeBoxed>(X) ==
BeamTypeId::Bitstring) {
comment("eliminated check for known bitstring");
a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.cbz(TMP1, resolve_beam_label(Fail, disp1MB));
} else {
a.sub(TMP1, boxed_ptr, imm(TAG_PRIMARY_BOXED));
a.ldp(TMP1, TMP2, arm::Mem(TMP1));
/* The header mask with the binary sub tag bits
* removed (0b110011) is not possible to use as an
* immediate operand for 'and'. (See the note at the
* beginning of the file.) Therefore, use a simpler
* mask (0b110000) that will also clear the primary
* tag bits. That works because we KNOW that a boxed
* pointer always points to a header word and that the
* primary tag for a header is 0. */
const auto mask = _BITSTRING_TAG_MASK & ~_TAG_PRIMARY_MASK;
ERTS_CT_ASSERT(TAG_PRIMARY_HEADER == 0);
ERTS_CT_ASSERT(_TAG_HEADER_HEAP_BITS ==
(_TAG_HEADER_HEAP_BITS & mask));
a.and_(TMP1, TMP1, imm(mask));
a.cmp(TMP1, imm(_TAG_HEADER_HEAP_BITS));
a.ccmp(TMP2, imm(0), imm(NZCV::kNone), imm(arm::CondCode::kEQ));
a.b_eq(resolve_beam_label(Fail, disp1MB));
}

a.bind(next);

return;
} else if (is_list(literal) && is_immed(CAR(list_val(literal))) &&
is_nil(CDR(list_val(literal)))) {
arm::Gp cons_ptr;
Label next = a.newLabel();

/* Inline the equality test if the RHS argument is a list
* of one immediate value such as `[42]` or `[a]`. */
comment("inlined non-equality test with %T", literal);
if (!exact_type<BeamTypeId::Cons>(X)) {
emit_is_cons(next, x.reg);
}
cons_ptr = emit_ptr_val(TMP1, x.reg);
a.sub(TMP1, cons_ptr, imm(TAG_PRIMARY_LIST));
a.ldp(TMP2, TMP3, arm::Mem(TMP1));
cmp(TMP2, CAR(list_val(literal)));
mov_imm(TMP4, NIL);
a.ccmp(TMP3, TMP4, imm(NZCV::kNone), imm(arm::CondCode::kEQ));
a.b_eq(resolve_beam_label(Fail, disp1MB));

a.bind(next);

return;
} else if (is_list_of_immediates(literal)) {
auto y = load_source(Y, ARG2);

comment("optimized non-equality test with %T", literal);
mov_var(ARG1, x);
mov_var(ARG2, y);
fragment_call(ga->get_is_eq_exact_list_shared());
a.b_eq(resolve_beam_label(Fail, disp1MB));

return;
}
Expand Down

0 comments on commit ffe652b

Please sign in to comment.