From e68b2ab3096c18a4c00dc746f8bbfbbf7ee18724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 19 Oct 2023 16:51:02 +0200 Subject: [PATCH] AArch64: Combine move + move + trim --- erts/emulator/beam/jit/arm/instr_common.cpp | 63 +++++++++++++++++---- erts/emulator/beam/jit/arm/ops.tab | 8 +++ erts/emulator/beam/jit/beam_jit_args.hpp | 8 +++ 3 files changed, 69 insertions(+), 10 deletions(-) diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp index e7741bde665d..283f327277b9 100644 --- a/erts/emulator/beam/jit/arm/instr_common.cpp +++ b/erts/emulator/beam/jit/arm/instr_common.cpp @@ -515,6 +515,56 @@ void BeamModuleAssembler::emit_i_move(const ArgSource &Src, mov_arg(Dst, Src); } +void BeamModuleAssembler::emit_move_two_trim(const ArgYRegister &Src1, + const ArgRegister &Dst1, + const ArgYRegister &Src2, + const ArgRegister &Dst2, + const ArgWord &Words) { + auto dst1 = init_destination(Dst1, TMP1); + auto dst2 = init_destination(Dst2, TMP2); + arm::Mem mem = getArgRef(Src1); + Sint trim = Words.get() * sizeof(Eterm); + auto src_index = Src1.as().get(); + + ASSERT(ArgVal::memory_relation(Src1, Src2) == + ArgVal::Relation::consecutive); + + if (src_index == 0 && Support::isInt9(trim)) { + /* Combine fetching of y0 and y1 with trimming. */ + mem = arm::Mem(E).post(trim); + a.ldp(dst1.reg, dst2.reg, mem); + dst1 = init_destination(Dst1.trimmed(Words.get()), TMP1); + dst2 = init_destination(Dst2.trimmed(Words.get()), TMP2); + flush_vars(dst1, dst2); + } else { + safe_ldp(dst1.reg, dst2.reg, Src1, Src2); + + /* Try to combine trimming with storing to one of destination + * registers. */ + + if (Dst1.isYRegister() && + Dst1.as().get() == Words.get() && + Support::isInt9(trim)) { + const arm::Mem dst_ref = arm::Mem(E, trim).pre(); + flush_var(dst2); + a.str(dst1.reg, dst_ref); + } else if (Dst2.isYRegister() && + Dst2.as().get() == Words.get() && + Support::isInt9(trim)) { + const arm::Mem dst_ref = arm::Mem(E, trim).pre(); + flush_var(dst1); + a.str(dst2.reg, dst_ref); + } else { + flush_vars(dst1, dst2); + + ASSERT(Words.get() <= 1023); + if (Words.get() > 0) { + add(E, E, Words.get() * sizeof(Eterm)); + } + } + } +} + void BeamModuleAssembler::emit_move_trim(const ArgSource &Src, const ArgRegister &Dst, const ArgWord &Words) { @@ -525,16 +575,9 @@ void BeamModuleAssembler::emit_move_trim(const ArgSource &Src, auto src_index = Src.as().get(); if (src_index == 0 && Support::isInt9(trim)) { const arm::Mem src_ref = arm::Mem(E).post(trim); - if (Dst.isXRegister()) { - auto dst = init_destination(Dst, TMP1); - a.ldr(dst.reg, src_ref); - flush_var(dst); - } else { - auto dst_index = Dst.as().get() - Words.get(); - auto dst = init_destination(ArgYRegister(dst_index), TMP1); - a.ldr(dst.reg, src_ref); - flush_var(dst); - } + auto dst = init_destination(Dst.trimmed(Words.get()), TMP1); + a.ldr(dst.reg, src_ref); + flush_var(dst); return; } diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index d2c2190e78d2..bc0d9c4f1ce5 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -351,8 +351,16 @@ move S1 D1=y | move S2 D2=y | consecutive_words(D1, D2) => move S1 D1=y | move S2 D2=y | consecutive_words(D2, D1) => store_two_values S2 D2 S1 D1 +move S1=y D1 | move S2=y D2 | consecutive_words(S1, S2) | trim N u => + move_two_trim S1 D1 S2 D2 N + +move S2=y D2 | move S1=y D1 | consecutive_words(S1, S2) | trim N u => + move_two_trim S1 D1 S2 D2 N + move Src Dst | trim N u => move_trim Src Dst N +move_two_trim y d y d t + move_trim s d t move Src Dst => i_move Src Dst diff --git a/erts/emulator/beam/jit/beam_jit_args.hpp b/erts/emulator/beam/jit/beam_jit_args.hpp index 4dba1b3f4fd7..9705019ee2b9 100644 --- a/erts/emulator/beam/jit/beam_jit_args.hpp +++ b/erts/emulator/beam/jit/beam_jit_args.hpp @@ -247,6 +247,14 @@ struct ArgRegister : public ArgSource { return (int)(val >> 10); } + constexpr ArgVal trimmed(int n) const { + if (isYRegister()) { + return ArgVal(TYPE::YReg, UWord((val & REG_MASK) - n)); + } else { + return *this; + } + } + template constexpr T copy(int n) const { return T(n | (val & ~REG_MASK));