diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl index 0f5f8446888e..b6490a22237a 100644 --- a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl +++ b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl @@ -92,6 +92,7 @@ int128_to_big_shared int_div_rem_body_shared int_div_rem_guard_shared + is_eq_exact_list_shared is_in_range_shared is_ge_lt_shared minus_body_shared diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp index 393aa0b55403..7e46de956aad 100644 --- a/erts/emulator/beam/jit/arm/instr_common.cpp +++ b/erts/emulator/beam/jit/arm/instr_common.cpp @@ -1480,20 +1480,123 @@ void BeamModuleAssembler::emit_i_test_arity(const ArgLabel &Fail, a.b_ne(resolve_beam_label(Fail, disp1MB)); } + +/* + * ARG1 = Fist operand + * ARG2 = Literal list + * + * The result is returned in the Z flag. + */ +void BeamGlobalAssembler::emit_is_eq_exact_list_shared() { + Label loop = a.newLabel(), mid = a.newLabel(), done = a.newLabel(); + + a.b(mid); + + a.bind(loop); + emit_untag_ptr(ARG1, ARG1); + emit_untag_ptr(ARG2, ARG2); + a.ldp(TMP1, ARG1, arm::Mem(ARG1)); + a.ldp(TMP2, ARG2, arm::Mem(ARG2)); + a.cmp(TMP1, TMP2); + a.b_ne(done); + + a.bind(mid); + a.cmp(ARG1, ARG2); + a.b_eq(done); + + /* If not equal, both terms must be CONSes. */ + ERTS_CT_ASSERT(!is_list(make_small(0) | make_list(0))); + ERTS_CT_ASSERT(!is_list(make_boxed(0) | make_list(0))); + a.orr(TMP1, ARG1, ARG2); + emit_is_not_cons(loop, TMP1); + + /* Not equal. Clear Z flag. */ + a.cmp(TMP1, imm(0)); + + a.bind(done); + a.ret(a64::x30); +} + +static bool is_list_of_immediates(Eterm term) { + while (is_list(term)) { + Eterm* cons = list_val(term); + if (!is_immed(CAR(cons))) { + return false; + } + term = CDR(cons); + } + return is_nil(term); +} + void BeamModuleAssembler::emit_is_eq_exact(const ArgLabel &Fail, const ArgSource &X, const ArgSource &Y) { auto x = load_source(X, ARG1); - if (exact_type(X) && Y.isLiteral()) { + if (Y.isLiteral()) { Eterm literal = beamfile_get_literal(beam, Y.as().get()); if (is_bitstring(literal) && bitstring_size(literal) == 0) { + arm::Gp boxed_ptr; + comment("simplified equality test with empty binary"); + emit_is_boxed(resolve_beam_label(Fail, dispUnknown), X, x.reg); + boxed_ptr = emit_ptr_val(ARG1, x.reg); + if (masked_types(X) == + BeamTypeId::Bitstring) { + comment("eliminated check for known bitstring"); + a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.cbnz(TMP1, resolve_beam_label(Fail, disp1MB)); + } else { + a.sub(TMP1, boxed_ptr, imm(TAG_PRIMARY_BOXED)); + a.ldp(TMP1, TMP2, arm::Mem(TMP1)); + /* The header mask with the binary sub tag bits + * removed (0b110011) is not possible to use as an + * immediate operand for 'and'. (See the note at the + * beginning of the file.) Therefore, use a simpler + * mask (0b110000) that will also clear the primary + * tag bits. That works because we KNOW that a boxed + * pointer always points to a header word and that the + * primary tag for a header is 0. */ + const auto mask = _BITSTRING_TAG_MASK & ~_TAG_PRIMARY_MASK; + ERTS_CT_ASSERT(TAG_PRIMARY_HEADER == 0); + ERTS_CT_ASSERT(_TAG_HEADER_HEAP_BITS == + (_TAG_HEADER_HEAP_BITS & mask)); + a.and_(TMP1, TMP1, imm(mask)); + a.cmp(TMP1, imm(_TAG_HEADER_HEAP_BITS)); + a.ccmp(TMP2, imm(0), imm(NZCV::kNone), imm(arm::CondCode::kEQ)); + a.b_ne(resolve_beam_label(Fail, disp1MB)); + } + + return; + } else if (is_list(literal) && is_immed(CAR(list_val(literal))) && + is_nil(CDR(list_val(literal)))) { + /* Inline the equality test if the RHS argument is a list + * of one immediate value such as `[42]` or `[a]`. */ + arm::Gp cons_ptr; + + comment("inlined equality test with %T", literal); + if (!exact_type(X)) { + emit_is_cons(resolve_beam_label(Fail, dispUnknown), x.reg); + } + cons_ptr = emit_ptr_val(TMP1, x.reg); + a.sub(TMP1, cons_ptr, imm(TAG_PRIMARY_LIST)); + a.ldp(TMP2, TMP3, arm::Mem(TMP1)); + cmp(TMP2, CAR(list_val(literal))); + mov_imm(TMP4, NIL); + a.ccmp(TMP3, TMP4, imm(NZCV::kNone), imm(arm::CondCode::kEQ)); + a.b_ne(resolve_beam_label(Fail, disp1MB)); + + return; + } else if (is_list_of_immediates(literal)) { + auto y = load_source(Y, ARG2); + + comment("optimized equality test with %T", literal); + mov_var(ARG1, x); + mov_var(ARG2, y); + fragment_call(ga->get_is_eq_exact_list_shared()); + a.b_ne(resolve_beam_label(Fail, disp1MB)); - arm::Gp boxed_ptr = emit_ptr_val(ARG1, x.reg); - a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); - a.cbnz(TMP1, resolve_beam_label(Fail, disp1MB)); return; } } @@ -1553,15 +1656,75 @@ void BeamModuleAssembler::emit_is_ne_exact(const ArgLabel &Fail, const ArgSource &Y) { auto x = load_source(X, ARG1); - if (exact_type(X) && Y.isLiteral()) { + if (Y.isLiteral()) { Eterm literal = beamfile_get_literal(beam, Y.as().get()); if (is_bitstring(literal) && bitstring_size(literal) == 0) { - arm::Gp boxed_ptr = emit_ptr_val(ARG1, x.reg); + arm::Gp boxed_ptr; + Label next = a.newLabel(); comment("simplified non-equality test with empty binary"); - a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); - a.cbz(TMP1, resolve_beam_label(Fail, disp1MB)); + emit_is_boxed(next, X, x.reg); + boxed_ptr = emit_ptr_val(ARG1, x.reg); + if (masked_types(X) == + BeamTypeId::Bitstring) { + comment("eliminated check for known bitstring"); + a.ldur(TMP1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); + a.cbz(TMP1, resolve_beam_label(Fail, disp1MB)); + } else { + a.sub(TMP1, boxed_ptr, imm(TAG_PRIMARY_BOXED)); + a.ldp(TMP1, TMP2, arm::Mem(TMP1)); + /* The header mask with the binary sub tag bits + * removed (0b110011) is not possible to use as an + * immediate operand for 'and'. (See the note at the + * beginning of the file.) Therefore, use a simpler + * mask (0b110000) that will also clear the primary + * tag bits. That works because we KNOW that a boxed + * pointer always points to a header word and that the + * primary tag for a header is 0. */ + const auto mask = _BITSTRING_TAG_MASK & ~_TAG_PRIMARY_MASK; + ERTS_CT_ASSERT(TAG_PRIMARY_HEADER == 0); + ERTS_CT_ASSERT(_TAG_HEADER_HEAP_BITS == + (_TAG_HEADER_HEAP_BITS & mask)); + a.and_(TMP1, TMP1, imm(mask)); + a.cmp(TMP1, imm(_TAG_HEADER_HEAP_BITS)); + a.ccmp(TMP2, imm(0), imm(NZCV::kNone), imm(arm::CondCode::kEQ)); + a.b_eq(resolve_beam_label(Fail, disp1MB)); + } + + a.bind(next); + + return; + } else if (is_list(literal) && is_immed(CAR(list_val(literal))) && + is_nil(CDR(list_val(literal)))) { + arm::Gp cons_ptr; + Label next = a.newLabel(); + + /* Inline the equality test if the RHS argument is a list + * of one immediate value such as `[42]` or `[a]`. */ + comment("inlined non-equality test with %T", literal); + if (!exact_type(X)) { + emit_is_cons(next, x.reg); + } + cons_ptr = emit_ptr_val(TMP1, x.reg); + a.sub(TMP1, cons_ptr, imm(TAG_PRIMARY_LIST)); + a.ldp(TMP2, TMP3, arm::Mem(TMP1)); + cmp(TMP2, CAR(list_val(literal))); + mov_imm(TMP4, NIL); + a.ccmp(TMP3, TMP4, imm(NZCV::kNone), imm(arm::CondCode::kEQ)); + a.b_eq(resolve_beam_label(Fail, disp1MB)); + + a.bind(next); + + return; + } else if (is_list_of_immediates(literal)) { + auto y = load_source(Y, ARG2); + + comment("optimized non-equality test with %T", literal); + mov_var(ARG1, x); + mov_var(ARG2, y); + fragment_call(ga->get_is_eq_exact_list_shared()); + a.b_eq(resolve_beam_label(Fail, disp1MB)); return; }