Skip to content

Commit

Permalink
x86_64: Optimize accumulating of integer segments
Browse files Browse the repository at this point in the history
When accumulating a fixed-size integer, be sure to use the smallest
possible native instructions. Also simplify storing the first value
into the accumulator.
  • Loading branch information
bjorng committed Dec 20, 2024
1 parent 3b6b64b commit aeb36f1
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 21 deletions.
6 changes: 6 additions & 0 deletions erts/emulator/beam/jit/x86/beam_asm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,12 @@ class BeamModuleAssembler : public BeamAssembler,
x86::Gp size_reg);
bool need_mask(const ArgVal Val, Sint size);
void set_zero(Sint effectiveSize);
void emit_accumulate(ArgVal src,
Sint effectiveSize,
x86::Gp bin_data,
x86::Gp tmp,
x86::Gp value,
bool isFirst);
bool bs_maybe_enter_runtime(bool entered);
void bs_maybe_leave_runtime(bool entered);
void emit_construct_utf8_shared();
Expand Down
82 changes: 61 additions & 21 deletions erts/emulator/beam/jit/x86/instr_bs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1380,6 +1380,47 @@ void BeamModuleAssembler::set_zero(Sint effectiveSize) {
}
}

/*
* Efficiently accumulate a value for a binary segment,
* using the smallest possible instructions.
*/
void BeamModuleAssembler::emit_accumulate(ArgVal src,
Sint effectiveSize,
x86::Gp bin_data,
x86::Gp tmp,
x86::Gp value,
bool isFirst) {
if (isFirst) {
/* There is no need to mask the first value being
* accumulated. */
if (effectiveSize > 32) {
a.mov(bin_data, value);
} else {
a.mov(bin_data.r32(), value.r32());
}
return;
}

ASSERT(effectiveSize < 64);

if (!need_mask(src, effectiveSize)) {
comment("skipped masking because the value always fits");
} else if (effectiveSize == 32) {
a.mov(value.r32(), value.r32());
} else if (effectiveSize == 16) {
a.movzx(value.r32(), value.r16());
} else if (effectiveSize == 8) {
a.movzx(value.r32(), value.r8());
} else if (effectiveSize < 32) {
a.and_(value.r32(), (1ULL << effectiveSize) - 1);
} else {
mov_imm(tmp, (1ULL << effectiveSize) - 1);
a.and_(value, tmp);
}

a.or_(bin_data, value);
}

/*
* In:
*
Expand Down Expand Up @@ -2359,12 +2400,13 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
x86::Gp bin_data = ARG5;

comment("accumulate value for integer segment");
if (seg.action == BscSegment::action::ACCUMULATE_FIRST) {
mov_imm(bin_data, 0);
} else if (seg.effectiveSize < 64) {
if (seg.action != BscSegment::action::ACCUMULATE_FIRST &&
seg.effectiveSize < 64) {
a.shl(bin_data, imm(seg.effectiveSize));
}
mov_arg(ARG1, seg.src);
if (!seg.src.isSmall()) {
mov_arg(ARG1, seg.src);
}

if (!always_small(seg.src)) {
if (always_one_of<BeamTypeId::Integer,
Expand Down Expand Up @@ -2401,26 +2443,24 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
}

a.bind(value_is_small);
a.sar(ARG1, imm(_TAG_IMMED1_SIZE));
if (seg.src.isSmall()) {
Sint val = signed_val(seg.src.as<ArgSmall>().get());
mov_imm(ARG1, val);
} else if (seg.effectiveSize + _TAG_IMMED1_SIZE <= 32) {
a.shr(ARG1d, imm(_TAG_IMMED1_SIZE));
} else {
a.sar(ARG1, imm(_TAG_IMMED1_SIZE));
}

/* Mask (if needed) and accumulate. */
a.bind(accumulate);
if (seg.effectiveSize == 64) {
a.mov(bin_data, ARG1);
} else if (!need_mask(seg.src, seg.effectiveSize)) {
comment("skipped masking because the value always fits");
a.or_(bin_data, ARG1);
} else if (seg.effectiveSize == 32) {
a.mov(ARG1d, ARG1d);
a.or_(bin_data, ARG1);
} else if (seg.effectiveSize < 32) {
a.and_(ARG1, (1ULL << seg.effectiveSize) - 1);
a.or_(bin_data, ARG1);
} else {
mov_imm(tmp, (1ULL << seg.effectiveSize) - 1);
a.and_(ARG1, tmp);
a.or_(bin_data, ARG1);
}
emit_accumulate(seg.src,
seg.effectiveSize,
bin_data,
tmp,
ARG1,
seg.action ==
BscSegment::action::ACCUMULATE_FIRST);
break;
}
case BscSegment::action::STORE: {
Expand Down

0 comments on commit aeb36f1

Please sign in to comment.