diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp index 105c9aa41a64..0bff1e73824c 100644 --- a/erts/emulator/beam/jit/x86/beam_asm.hpp +++ b/erts/emulator/beam/jit/x86/beam_asm.hpp @@ -1323,6 +1323,12 @@ class BeamModuleAssembler : public BeamAssembler, x86::Gp size_reg); bool need_mask(const ArgVal Val, Sint size); void set_zero(Sint effectiveSize); + void emit_accumulate(ArgVal src, + Sint effectiveSize, + x86::Gp bin_data, + x86::Gp tmp, + x86::Gp value, + bool isFirst); bool bs_maybe_enter_runtime(bool entered); void bs_maybe_leave_runtime(bool entered); void emit_construct_utf8_shared(); diff --git a/erts/emulator/beam/jit/x86/instr_bs.cpp b/erts/emulator/beam/jit/x86/instr_bs.cpp index 8097a2db0e4d..427ad786095e 100644 --- a/erts/emulator/beam/jit/x86/instr_bs.cpp +++ b/erts/emulator/beam/jit/x86/instr_bs.cpp @@ -1380,6 +1380,47 @@ void BeamModuleAssembler::set_zero(Sint effectiveSize) { } } +/* + * Efficiently accumulate a value for a binary segment, + * using the smallest possible instructions. + */ +void BeamModuleAssembler::emit_accumulate(ArgVal src, + Sint effectiveSize, + x86::Gp bin_data, + x86::Gp tmp, + x86::Gp value, + bool isFirst) { + if (isFirst) { + /* There is no need to mask the first value being + * accumulated. */ + if (effectiveSize > 32) { + a.mov(bin_data, value); + } else { + a.mov(bin_data.r32(), value.r32()); + } + return; + } + + ASSERT(effectiveSize < 64); + + if (!need_mask(src, effectiveSize)) { + comment("skipped masking because the value always fits"); + } else if (effectiveSize == 32) { + a.mov(value.r32(), value.r32()); + } else if (effectiveSize == 16) { + a.movzx(value.r32(), value.r16()); + } else if (effectiveSize == 8) { + a.movzx(value.r32(), value.r8()); + } else if (effectiveSize < 32) { + a.and_(value.r32(), (1ULL << effectiveSize) - 1); + } else { + mov_imm(tmp, (1ULL << effectiveSize) - 1); + a.and_(value, tmp); + } + + a.or_(bin_data, value); +} + /* * In: * @@ -2359,12 +2400,13 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, x86::Gp bin_data = ARG5; comment("accumulate value for integer segment"); - if (seg.action == BscSegment::action::ACCUMULATE_FIRST) { - mov_imm(bin_data, 0); - } else if (seg.effectiveSize < 64) { + if (seg.action != BscSegment::action::ACCUMULATE_FIRST && + seg.effectiveSize < 64) { a.shl(bin_data, imm(seg.effectiveSize)); } - mov_arg(ARG1, seg.src); + if (!seg.src.isSmall()) { + mov_arg(ARG1, seg.src); + } if (!always_small(seg.src)) { if (always_one_of().get()); + mov_imm(ARG1, val); + } else if (seg.effectiveSize + _TAG_IMMED1_SIZE <= 32) { + a.shr(ARG1d, imm(_TAG_IMMED1_SIZE)); + } else { + a.sar(ARG1, imm(_TAG_IMMED1_SIZE)); + } /* Mask (if needed) and accumulate. */ a.bind(accumulate); - if (seg.effectiveSize == 64) { - a.mov(bin_data, ARG1); - } else if (!need_mask(seg.src, seg.effectiveSize)) { - comment("skipped masking because the value always fits"); - a.or_(bin_data, ARG1); - } else if (seg.effectiveSize == 32) { - a.mov(ARG1d, ARG1d); - a.or_(bin_data, ARG1); - } else if (seg.effectiveSize < 32) { - a.and_(ARG1, (1ULL << seg.effectiveSize) - 1); - a.or_(bin_data, ARG1); - } else { - mov_imm(tmp, (1ULL << seg.effectiveSize) - 1); - a.and_(ARG1, tmp); - a.or_(bin_data, ARG1); - } + emit_accumulate(seg.src, + seg.effectiveSize, + bin_data, + tmp, + ARG1, + seg.action == + BscSegment::action::ACCUMULATE_FIRST); break; } case BscSegment::action::STORE: {