diff --git a/erts/emulator/beam/emu/bs_instrs.tab b/erts/emulator/beam/emu/bs_instrs.tab index 1150b1172781..1c41b5b3ccb7 100644 --- a/erts/emulator/beam/emu/bs_instrs.tab +++ b/erts/emulator/beam/emu/bs_instrs.tab @@ -373,7 +373,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { const BeamInstr* p; Uint alloc = $Alloc; Eterm new_binary; - ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */ + ErlBitsState* EBS = ERL_BITS_EBS_FROM_REG(reg); /* We count the total number of bits in an unsigned integer. To avoid * having to check for overflow when adding to `num_bits`, we ensure that @@ -546,7 +546,6 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { } /* Allocate binary. */ - ERL_BITS_RELOAD_STATEP(c_p); p = p_start; if (p[0] == BSC_APPEND) { Uint live = $Live; @@ -565,15 +564,13 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { } p_start += BSC_NUM_ARGS; } else if (p[0] == BSC_PRIVATE_APPEND) { - Uint unit; Eterm Src; $test_heap(alloc, $Live); - $BS_LOAD_UNIT(p, unit); $BS_LOAD_SRC(p, Src); - new_binary = erts_bs_private_append_checked(c_p, Src, num_bits, unit); + new_binary = erts_bs_private_append_checked(EBS, c_p, Src, num_bits); if (is_non_value(new_binary)) { $BS_FAIL_INFO($Fail, c_p->freason, c_p->fvalue, Src); @@ -589,7 +586,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { /* num_bits = Number of bits to build * alloc = Total number of words to allocate on heap */ - erts_bin_offset = 0; + EBS->erts_bin_offset = 0; if (num_bits <= ERL_ONHEAP_BITS_LIMIT) { ErlHeapBits *hb; @@ -598,7 +595,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { HTOP += heap_bits_size(num_bits); hb->thing_word = header_heap_bits(num_bits); hb->size = num_bits; - erts_current_bin = (byte *) hb->data; + EBS->erts_current_bin = (byte *) hb->data; new_binary = make_bitstring(hb); } else { Binary* bptr; @@ -608,7 +605,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { $Live); bptr = erts_bin_nrml_alloc(NBYTES(num_bits)); - erts_current_bin = (byte *)bptr->orig_bytes; + EBS->erts_current_bin = (byte *)bptr->orig_bytes; LIGHT_SWAPOUT; @@ -616,7 +613,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { &MSO(c_p).overhead, &HEAP_TOP(c_p), bptr, - erts_current_bin, + EBS->erts_current_bin, 0, num_bits); @@ -640,7 +637,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { byte* string; $BS_LOAD_STRING_SRC(p, string); $BS_LOAD_FIXED_SIZE(p, Size); - erts_new_bs_put_string(ERL_BITS_ARGS_2(string, Size)); + erts_bs_put_string(EBS, string, Size); continue; } @@ -649,7 +646,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { switch (p[0]) { case BSC_BINARY_ALL: $BS_LOAD_UNIT(p, unit); - if (!erts_new_bs_put_binary_all(c_p, Src, unit)) { + if (!erts_bs_put_binary_all(EBS, c_p, Src, unit)) { $BS_FAIL_INFO($Fail, BADARG, am_unit, Src); } break; @@ -658,14 +655,14 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { $BS_LOAD_FLAGS(p, flags); $BS_LOAD_SIZE(p, Size); $BS_GET_UNCHECKED_FIELD_SIZE(Size, unit, $BADARG($Fail), _size); - if (!erts_new_bs_put_binary(c_p, Src, _size)) { + if (!erts_bs_put_binary(EBS, c_p, Src, _size)) { Eterm reason = is_bitstring(Src) ? am_short : am_type; $BS_FAIL_INFO($Fail, BADARG, reason, Src); } break; case BSC_BINARY_FIXED_SIZE: $BS_LOAD_FIXED_SIZE(p, Size); - if (!erts_new_bs_put_binary(c_p, Src, Size)) { + if (!erts_bs_put_binary(EBS, c_p, Src, Size)) { Eterm reason = is_bitstring(Src) ? am_short : am_type; $BS_FAIL_INFO($Fail, BADARG, reason, Src); } @@ -675,7 +672,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { $BS_LOAD_FLAGS(p, flags); $BS_LOAD_SIZE(p, Size); $BS_GET_UNCHECKED_FIELD_SIZE(Size, unit, $BADARG($Fail), _size); - Src = erts_new_bs_put_float(c_p, Src, _size, flags); + Src = erts_bs_put_float(EBS, c_p, Src, _size, flags); if (is_value(Src)) { $BS_FAIL_INFO($Fail, BADARG, c_p->fvalue, Src); } @@ -683,7 +680,7 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { case BSC_FLOAT_FIXED_SIZE: $BS_LOAD_FLAGS(p, flags); $BS_LOAD_FIXED_SIZE(p, Size); - Src = erts_new_bs_put_float(c_p, Src, Size, flags); + Src = erts_bs_put_float(EBS, c_p, Src, Size, flags); if (is_value(Src)) { $BS_FAIL_INFO($Fail, BADARG, c_p->fvalue, Src); } @@ -691,33 +688,48 @@ i_bs_create_bin(Fail, Alloc, Live, Dst, N) { case BSC_INTEGER: { Sint _size; + int result; $BS_LOAD_UNIT(p, unit); $BS_LOAD_FLAGS(p, flags); $BS_LOAD_SIZE(p, Size); $BS_GET_UNCHECKED_FIELD_SIZE(Size, unit, $BADARG($Fail), _size); - if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(Src, _size, flags))) { + if (flags & BSF_LITTLE) { + result = erts_bs_put_integer_le(EBS, Src, _size); + } else { + result = erts_bs_put_integer_be(EBS, Src, _size); + } + if (!result) { $BS_FAIL_INFO($Fail, BADARG, am_type, Src); } } break; case BSC_INTEGER_FIXED_SIZE: case BSC_UTF32: - $BS_LOAD_FLAGS(p, flags); - $BS_LOAD_FIXED_SIZE(p, Size); - if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(Src, Size, flags))) { - $BS_FAIL_INFO($Fail, BADARG, am_type, Src); + { + int result; + + $BS_LOAD_FLAGS(p, flags); + $BS_LOAD_FIXED_SIZE(p, Size); + if (flags & BSF_LITTLE) { + result = erts_bs_put_integer_le(EBS, Src, Size); + } else { + result = erts_bs_put_integer_be(EBS, Src, Size); + } + if (!result) { + $BS_FAIL_INFO($Fail, BADARG, am_type, Src); + } } break; case BSC_UTF8: - if (!erts_bs_put_utf8(ERL_BITS_ARGS_1(Src))) { + if (!erts_bs_put_utf8(EBS, Src)) { $BS_FAIL_INFO($Fail, BADARG, am_type, Src); } break; case BSC_UTF16: $BS_LOAD_FLAGS(p, flags); $BS_LOAD_SRC(p, Src); - if (!erts_bs_put_utf16(ERL_BITS_ARGS_2(Src, flags))) { + if (!erts_bs_put_utf16(EBS, Src, flags)) { $BS_FAIL_INFO($Fail, BADARG, am_type, Src); } break; diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index 75413225b9cf..a3858841283a 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -302,15 +302,15 @@ Process *p, Uint num_bits, unsigned flags, ErlSubBits *sb) * Move bits to temporary buffer. We want the buffer to be stored in * little-endian order, since bignums are little-endian. */ - + if (flags & BSF_LITTLE) { - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, - LSB, 0, 1, num_bits); + erts_copy_bits_fwd(erl_sub_bits_get_base(sb), sb->start, + LSB, 0, num_bits); *MSB >>= offs; /* adjust msb */ } else { *MSB = 0; - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, - MSB, offs, -1, num_bits); + erts_copy_bits_rev(erl_sub_bits_get_base(sb), sb->start, + MSB, offs, num_bits); } sb->start += num_bits; @@ -472,13 +472,13 @@ erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlSubBits *sb) } if (BIT_IS_MACHINE_ENDIAN(flags)) { - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, - fptr, 0, 1, - num_bits); + erts_copy_bits_fwd(erl_sub_bits_get_base(sb), sb->start, + fptr, 0, + num_bits); } else { - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, - fptr + NBYTES(num_bits) - 1, 0, -1, - num_bits); + erts_copy_bits_rev(erl_sub_bits_get_base(sb), sb->start, + fptr + NBYTES(num_bits) - 1, 0, + num_bits); } ERTS_FP_CHECK_INIT(p); if (num_bits == 16) { @@ -532,237 +532,438 @@ erts_bs_get_binary_all_2(Process *p, ErlSubBits *sb) ****************************************************************/ -/* COPY_VAL: - * copy sz byte from val to dst buffer, - * dst, val are updated!!! +/* FMT_COPY_VAL: + * Copy sz bytes from val to dst buffer; + * dst and val are updated. */ -#define COPY_VAL(dst,ddir,val,sz) do { \ - Uint __sz = (sz); \ - while(__sz) { \ - switch(__sz) { \ - default: \ - case 4: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ - case 3: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ - case 2: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ - case 1: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ - } \ - } \ +#define FMT_COPY_VAL(dst,ddir,val,sz) do { \ + Uint __sz = (sz); \ + while (__sz) { \ + switch(__sz) { \ + default: \ + case 8: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 7: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 6: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 5: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 4: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 3: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 2: *dst = val; dst += ddir; val >>= 8; __sz--; \ + case 1: *dst = val; dst += ddir; val >>= 8; __sz--; \ + } \ + } \ } while(0) static void -fmt_small(byte *buf, Uint num_bytes, Eterm arg, Uint num_bits, Uint flags) +fmt_small_be(byte *buf, Eterm arg, Uint num_bits) { Uint bit_offset; Sint val; + Uint num_bytes; ASSERT(is_small(arg)); ASSERT(num_bits != 0); /* Tested by caller */ + num_bytes = NBYTES(num_bits); bit_offset = BIT_OFFSET(num_bits); val = signed_val(arg); - if (flags & BSF_LITTLE) { /* Little endian */ + buf += num_bytes - 1; + if (bit_offset) { + *buf-- = val << (8-bit_offset); num_bytes--; - COPY_VAL(buf, 1, val, num_bytes); - *buf = bit_offset ? (val << (8-bit_offset)) : val; - } else { /* Big endian */ - buf += num_bytes - 1; - if (bit_offset) { - *buf-- = val << (8-bit_offset); - num_bytes--; - val >>= bit_offset; - } - COPY_VAL(buf, -1, val, num_bytes); + val >>= bit_offset; } + FMT_COPY_VAL(buf, -1, val, num_bytes); } -/* calculate a - *cp (carry) (store result in b), *cp is updated! */ -#define SUBc(a, cp, b) do { \ - byte __x = (a); \ - byte __y = (__x - (*(cp))); \ - (*cp) = (__y > __x); \ - *(b) = ~__y; \ - } while(0) +static void +fmt_small_le(byte *buf, Eterm arg, Uint num_bits) +{ + Uint bit_offset; + Sint val; + Uint num_bytes; + + ASSERT(is_small(arg)); + ASSERT(num_bits != 0); /* Tested by caller */ + + num_bytes = NBYTES(num_bits); + bit_offset = BIT_OFFSET(num_bits); + val = signed_val(arg); + num_bytes--; + FMT_COPY_VAL(buf, 1, val, num_bytes); + *buf = bit_offset ? (val << (8-bit_offset)) : val; +} + +#undef FMT_COPY_VAL + +/* + * Calculate a - c (carry), storing the result in b; set c + * to the new carry. + */ +#if __has_builtin(__builtin_subc) && !defined(DEBUG) +#define SUBc(a, c, b) \ + do { \ + *(b) = __builtin_subc(0, (a), c, &c); \ + } while(0) +#else +#define SUBc(a, c, b) \ + do { \ + byte __x = (a); \ + byte __y = (__x - !(c)); \ + c = !(__y > __x); \ + *(b) = ~__y; \ + } while(0) +#endif static void -fmt_big(byte *buf, Uint num_bytes, Eterm val, Uint num_bits, Uint flags) +fmt_big_be(byte *buf, Eterm val, Uint num_bits) { unsigned long offs; int sign; Uint ds; ErtsDigit* dp; int n; + ErtsDigit acc = 0; + ErtsDigit d; + Uint num_bytes; ASSERT(is_big(val)); + ASSERT(num_bits != 0); - if (num_bits == 0) { - return; - } - + num_bytes = NBYTES(num_bits); sign = big_sign(val); ds = big_size(val)*sizeof(ErtsDigit); /* number of digits bytes */ dp = big_v(val); n = MIN(num_bytes, ds); offs = BIT_OFFSET(num_bits); - if (flags & BSF_LITTLE) { - num_bytes -= n; /* pad with this amount */ - if (sign) { /* negative */ - int c = 1; - while (n >= sizeof(ErtsDigit)) { - ErtsDigit d = *dp++; - int i; - for (i = 0; i < sizeof(ErtsDigit); i++) { - SUBc(d & 0xff, &c, buf); - buf++; - d >>= 8; - } - n -= sizeof(ErtsDigit); + + buf += num_bytes - 1; /* end of buffer */ + num_bytes -= n; /* pad with this amount */ + offs = offs ? (8-offs) : 0; /* shift offset */ + + if (sign) { /* negative bignum */ + unsigned int c = 0; + + while (n >= sizeof(ErtsDigit)) { + int i; + + d = *dp++; + acc |= d << offs; + SUBc(acc & 0xff, c, buf); + buf--; + acc = d >> (8-offs); + for (i = 0; i < sizeof(ErtsDigit)-1; i++) { + SUBc(acc & 0xff, c, buf); + buf--; + acc >>= 8; } - if (n) { - ErtsDigit d = *dp; - do { - SUBc(d & 0xff, &c, buf); - buf++; - d >>= 8; - } while (--n > 0); + n -= sizeof(ErtsDigit); + } + if (n) { + acc |= ((ErtsDigit)*dp << offs); + do { + SUBc(acc & 0xff, c, buf); + buf--; + acc >>= 8; + } while (--n > 0); + } + /* pad */ + while (num_bytes--) { + SUBc(acc & 0xff, c, buf); + buf--; + acc >>= 8; + } + } else { /* positive bignum */ + while (n >= sizeof(ErtsDigit)) { + int i; + + d = *dp++; + acc |= d << offs; + *buf-- = acc; + acc = d >> (8-offs); + for (i = 0; i < sizeof(ErtsDigit)-1; i++) { + *buf-- = acc; + acc >>= 8; } - /* pad */ - while (num_bytes--) { - SUBc(0, &c, buf); + n -= sizeof(ErtsDigit); + } + if (n) { + acc |= (*dp << offs); + do { + *buf-- = acc; + acc >>= 8; + } while (--n > 0); + } + while (num_bytes--) { + *buf-- = acc; + acc >>= 8; + } + } +} + +static void +fmt_big_le(byte *buf, Eterm val, Uint num_bits) +{ + unsigned long offs; + int sign; + Uint ds; + ErtsDigit* dp; + int n; + Uint num_bytes; + + ASSERT(is_big(val)); + ASSERT(num_bits != 0); + + num_bytes = NBYTES(num_bits); + sign = big_sign(val); + ds = big_size(val)*sizeof(ErtsDigit); /* number of digits bytes */ + dp = big_v(val); + n = MIN(num_bytes, ds); + + offs = BIT_OFFSET(num_bits); + num_bytes -= n; /* pad with this amount */ + if (sign) { /* negative */ + unsigned int c = 0; + while (n >= sizeof(ErtsDigit)) { + ErtsDigit d = *dp++; + int i; + for (i = 0; i < sizeof(ErtsDigit); i++) { + SUBc(d & 0xff, c, buf); buf++; + d >>= 8; } - } else { /* positive */ - while (n >= sizeof(ErtsDigit)) { - ErtsDigit d = *dp++; - int i; - for(i = 0; i < sizeof(ErtsDigit); i++) { - *buf++ = d; - d >>= 8; - } - n -= sizeof(ErtsDigit); - } - if (n) { - ErtsDigit d = *dp; - do { - *buf++ = d; - d >>= 8; - } while (--n > 0); - } - /* pad */ - while (num_bytes) { - *buf++ = 0; - num_bytes--; + n -= sizeof(ErtsDigit); + } + if (n) { + ErtsDigit d = *dp; + do { + SUBc(d & 0xff, c, buf); + buf++; + d >>= 8; + } while (--n > 0); + } + /* pad */ + while (num_bytes--) { + SUBc(0, c, buf); + buf++; + } + } else { /* positive */ + while (n >= sizeof(ErtsDigit)) { + ErtsDigit d = *dp++; + int i; + for(i = 0; i < sizeof(ErtsDigit); i++) { + *buf++ = d; + d >>= 8; } + n -= sizeof(ErtsDigit); + } + if (n) { + ErtsDigit d = *dp; + do { + *buf++ = d; + d >>= 8; + } while (--n > 0); + } + /* pad */ + while (num_bytes) { + *buf++ = 0; + num_bytes--; } + } - /* adjust MSB */ - if (offs) { - buf--; - *buf <<= (8 - offs); + /* adjust MSB */ + if (offs) { + buf--; + *buf <<= (8 - offs); + } +} + +#undef SUBc + +static void +restore_and_shift(byte *buf, Uint orig_byte, Uint bit_offset, Uint num_bits) +{ + Uint rshift = bit_offset; + Uint lshift = 8 - bit_offset; + Uint deoffs = BIT_OFFSET(bit_offset + num_bits); + Uint lmask = MAKE_MASK(8 - bit_offset); + Uint count = (num_bits - lshift) / 8; + Uint bits, bits1; + + ASSERT(num_bits - lshift >= 0); + + bits = *buf; + bits1 = bits >> rshift; + *buf = MASK_BITS(bits1, orig_byte, lmask); + buf++; + + while (count--) { + bits1 = bits << lshift; + bits = *buf; + *buf++ = bits1 | (bits >> rshift); + } + + if (deoffs) { + Uint rmask = MAKE_MASK(deoffs) << (8 - deoffs); + + bits1 = bits << lshift; + if ((rmask << rshift) & 0xff) { + bits = *buf; + bits1 |= (bits >> rshift); } - } else { /* BIG ENDIAN */ - ErtsDigit acc = 0; - ErtsDigit d; + *buf = MASK_BITS(bits1, *buf, rmask); + } +} - buf += num_bytes - 1; /* end of buffer */ - num_bytes -= n; /* pad with this amount */ - offs = offs ? (8-offs) : 0; /* shift offset */ +int +erts_bs_put_integer_be(ErlBitsState *EBS, Eterm arg, Uint num_bits) +{ + byte* dst_bin = EBS->erts_current_bin; + Uint bin_offset = EBS->erts_bin_offset; + Uint bit_offset; + byte b; + byte *iptr; - if (sign) { /* negative bignum */ - int c = 1; + if (ERTS_UNLIKELY(num_bits == 0)) { + return is_small(arg) || is_big(arg); + } - while (n >= sizeof(ErtsDigit)) { - int i; + iptr = dst_bin + BYTE_OFFSET(bin_offset); + bit_offset = BIT_OFFSET(bin_offset); + if (is_small(arg)) { + Uint rbits = 8 - bit_offset; - d = *dp++; - acc |= d << offs; - SUBc(acc & 0xff, &c, buf); - buf--; - acc = d >> (8-offs); - for (i = 0; i < sizeof(ErtsDigit)-1; i++) { - SUBc(acc & 0xff, &c, buf); - buf--; - acc >>= 8; - } - n -= sizeof(ErtsDigit); - } - if (n) { - acc |= ((ErtsDigit)*dp << offs); - do { - SUBc(acc & 0xff, &c, buf); - buf--; - acc >>= 8; - } while (--n > 0); - } - /* pad */ - while (num_bytes--) { - SUBc(acc & 0xff, &c, buf); - buf--; - acc >>= 8; + if (bit_offset == 0) { + /* Aligned on a byte boundary. */ + if (num_bits <= 8) { + /* All bits are in the same byte. */ + b = (signed_val(arg) & MAKE_MASK(num_bits)) << (rbits-num_bits); + *iptr = b; + } else { + /* More than one byte. */ + fmt_small_be(iptr, arg, num_bits); } - } else { /* positive bignum */ - while (n >= sizeof(ErtsDigit)) { - int i; + } else if (bit_offset + num_bits <= 8) { + /* + * All bits are in the same byte. + */ + b = *iptr & (0xff << rbits); + b |= (signed_val(arg) & MAKE_MASK(num_bits)) << (rbits-num_bits); + *iptr = b; + } else { /* Big endian */ + /* + * Big-endian, more than one byte, but not aligned on a byte boundary. + * Handle the bits up to the next byte boundary specially, + * then let fmt_small_be() handle the rest. + */ + Uint shift_count = num_bits - rbits; + Sint val = signed_val(arg); - d = *dp++; - acc |= d << offs; - *buf-- = acc; - acc = d >> (8-offs); - for (i = 0; i < sizeof(ErtsDigit)-1; i++) { - *buf-- = acc; - acc >>= 8; - } - n -= sizeof(ErtsDigit); - } - if (n) { - acc |= (*dp << offs); - do { - *buf-- = acc; - acc >>= 8; - } while (--n > 0); - } - while (num_bytes--) { - *buf-- = acc; - acc >>= 8; + ASSERT(num_bits > rbits); + b = *iptr & (0xff << rbits); + + /* + * Shifting with a shift count greater than or equal to the word + * size may be a no-op (instead of 0 the result may be the unshifted + * value). Therefore, only do the shift and the OR if the shift count + * is less than the word size if the number is positive; if negative, + * we must simulate the sign extension. + */ + if (shift_count < sizeof(Uint)*8) { + b |= (val >> shift_count) & MAKE_MASK(rbits); + } else if (val < 0) { + /* Simulate sign extension. */ + b |= (-1) & MAKE_MASK(rbits); } + *iptr++ = b; + + fmt_small_be(iptr, arg, shift_count); } + } else if (is_big(arg) && bit_offset == 0) { + /* + * Big number, aligned on a byte boundary. We can format the + * integer directly into the binary. + */ + fmt_big_be(iptr, arg, num_bits); + } else if (is_big(arg) && bit_offset + num_bits <= 8) { + /* + * All bits are in the same byte. + */ + Uint rbits = 8 - bit_offset; + Sint sign = big_sign(arg); + ErtsDigit* dp = big_v(arg); + Uint val = sign ? -*dp : *dp; + + b = *iptr & (0xff << rbits); + b |= (val & MAKE_MASK(num_bits)) << (rbits-num_bits); + *iptr = b; + } else if (is_big(arg)) { + /* + * Big number, not aligned on a byte boundary. + * + * Format the integer byte-aligned using the binary itself as + * a temporary buffer. + */ + b = *iptr; + fmt_big_be(iptr, arg, num_bits); + + /* + * Now restore the overwritten bits of the first byte and + * shift everything to the right. + */ + restore_and_shift(iptr, b, bit_offset, num_bits); + } else { + /* Not an integer. */ + return 0; } + EBS->erts_bin_offset = bin_offset + num_bits; + return 1; } int -erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm arg, Uint num_bits, unsigned flags)) +erts_bs_put_integer_le(ErlBitsState *EBS, Eterm arg, Uint num_bits) { - Uint bin_offset = erts_bin_offset; + byte* dst_bin = EBS->erts_current_bin; + Uint bin_offset = EBS->erts_bin_offset; Uint bit_offset; - Uint b; + byte b; byte *iptr; + if (ERTS_UNLIKELY(num_bits == 0)) { + return is_small(arg) || is_big(arg); + } + + iptr = dst_bin + BYTE_OFFSET(bin_offset); bit_offset = BIT_OFFSET(bin_offset); if (is_small(arg)) { - Uint rbits = 8 - bit_offset; - - if (num_bits == 0) { - return 1; - } else if (bit_offset + num_bits <= 8) { - /* - * All bits are in the same byte. - */ - iptr = erts_current_bin+BYTE_OFFSET(bin_offset); - b = *iptr & (0xff << rbits); - b |= (signed_val(arg) & ((1 << num_bits)-1)) << (rbits-num_bits); - *iptr = b; - } else if (bit_offset == 0) { - /* - * More than one bit, starting at a byte boundary. - */ - iptr = erts_current_bin + BYTE_OFFSET(bin_offset); - fmt_small(iptr, NBYTES(num_bits), arg, num_bits, flags); - } else if (flags & BSF_LITTLE) { + Uint rbits = 8 - bit_offset; + + if (bit_offset == 0) { + /* Aligned on a byte boundary. */ + if (num_bits <= 8) { + /* All bits are in the same byte. */ + b = (signed_val(arg) & MAKE_MASK(num_bits)) << (rbits-num_bits); + *iptr = b; + } else { + /* More than one byte. */ + fmt_small_le(iptr, arg, num_bits); + } + } else if (bit_offset + num_bits <= 8) { + /* + * All bits are in the same byte. + */ + b = *iptr & (0xff << rbits); + b |= (signed_val(arg) & MAKE_MASK(num_bits)) << (rbits-num_bits); + *iptr = b; + } else if (BIT_OFFSET(num_bits) == 0) { /* * Little endian small in more than one byte, not - * aligned on a byte boundary. + * aligned on a byte boundary. The size is evenly + * divisible by 8, which means that there will be + * one partial byte, followed by zero or more + * complete bytes, followed by a final partial byte. */ Sint val = signed_val(arg); Uint rshift = bit_offset; @@ -771,92 +972,51 @@ erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm arg, Uint num_bits, unsigned flag Uint count = (num_bits - rbits) / 8; Uint bits, bits1; - iptr = erts_current_bin+BYTE_OFFSET(bin_offset); - - if (BIT_OFFSET(num_bits) == 0) { - bits = val; - bits1 = bits >> rshift; - *iptr = MASK_BITS(bits1, *iptr, lmask); - iptr++; + /* Handle the first partial byte. */ + bits = val; + bits1 = bits >> rshift; + *iptr = MASK_BITS(bits1, *iptr, lmask); + iptr++; + val >>= 8; + + /* Handle all complete bytes. */ + while (count--) { + bits1 = bits << lshift; + bits = val & 0xff; + *iptr++ = bits1 | (bits >> rshift); val >>= 8; - - while (count--) { - bits1 = bits << lshift; - bits = val & 0xff; - *iptr++ = bits1 | (bits >> rshift); - val >>= 8; - } - - *iptr = bits << lshift; - } else { - Sint num_bytes = NBYTES(num_bits) - 1; - Uint deoffs = BIT_OFFSET(bit_offset + num_bits); - - if (num_bytes-- > 0) { - bits = val; - } else { - bits = (val << (8 - BIT_OFFSET(num_bits))); - } - bits1 = bits >> rshift; - *iptr = MASK_BITS(bits1, *iptr, lmask); - iptr++; - val >>= 8; - - while (count--) { - bits1 = bits << lshift; - if (num_bytes-- > 0) { - bits = val & 0xff; - } else { - bits = (val << (8 - BIT_OFFSET(num_bits))) & 0xff; - } - *iptr++ = bits1 | (bits >> rshift); - val >>= 8; - } - - if (deoffs) { - bits1 = bits << lshift; - if (rshift < deoffs) { - bits = (val << (8 - BIT_OFFSET(num_bits))) & 0xff; - bits1 |= bits >> rshift; - } - *iptr = bits1; - } } - } else { /* Big endian */ - /* - * Big-endian, more than one byte, but not aligned on a byte boundary. - * Handle the bits up to the next byte boundary specially, - * then let fmt_int() handle the rest. - */ - Uint shift_count = num_bits - rbits; - Sint val = signed_val(arg); - iptr = erts_current_bin+BYTE_OFFSET(bin_offset); - b = *iptr & (0xff << rbits); - - /* - * Shifting with a shift count greater than or equal to the word - * size may be a no-op (instead of 0 the result may be the unshifted - * value). Therefore, only do the shift and the OR if the shift count - * is less than the word size if the number is positive; if negative, - * we must simulate the sign extension. - */ - if (shift_count < sizeof(Uint)*8) { - b |= (val >> shift_count) & ((1 << rbits) - 1); - } else if (val < 0) { - /* Simulate sign extension. */ - b |= (-1) & ((1 << rbits) - 1); - } - *iptr++ = b; - fmt_small(iptr, NBYTES(num_bits-rbits), arg, num_bits-rbits, flags); - } + /* Handle the final partial byte. */ + *iptr = bits << lshift; + } else { + /* + * Little endian small in more than one byte, not aligned + * on a byte boundary, and the size is not evenly + * divisible by 8. + * + * Now this gets complicated. We used to handle this + * directly, but since this case is presumably uncommon, + * we do this in a simpler way in two steps. + * + * First format the integer byte-aligned using the binary + * itself as a temporary buffer. + */ + b = *iptr; + fmt_small_le(iptr, arg, num_bits); + + /* + * Now restore the overwritten bits of the first byte and + * shift everything to the right. + */ + restore_and_shift(iptr, b, bit_offset, num_bits); + } } else if (is_big(arg) && bit_offset == 0) { - /* - * Big number, aligned on a byte boundary. We can format the - * integer directly into the binary. - */ - fmt_big(erts_current_bin+BYTE_OFFSET(bin_offset), - NBYTES(num_bits), arg, num_bits, flags); + /* + * Big number, aligned on a byte boundary. We can format the + * integer directly into the binary. + */ + fmt_big_le(iptr, arg, num_bits); } else if (is_big(arg) && bit_offset + num_bits <= 8) { /* * All bits are in the same byte. @@ -866,68 +1026,37 @@ erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm arg, Uint num_bits, unsigned flag ErtsDigit* dp = big_v(arg); Uint val = sign ? -*dp : *dp; - iptr = erts_current_bin+BYTE_OFFSET(bin_offset); b = *iptr & (0xff << rbits); - b |= (val & ((1 << num_bits)-1)) << (rbits-num_bits); + b |= (val & MAKE_MASK(num_bits)) << (rbits-num_bits); *iptr = b; } else if (is_big(arg)) { /* * Big number, not aligned on a byte boundary. - */ - Uint rshift = bit_offset; - Uint lshift = 8 - bit_offset; - Uint deoffs = BIT_OFFSET(bit_offset + num_bits); - Uint lmask = MAKE_MASK(8 - bit_offset); - Uint rmask = (deoffs) ? (MAKE_MASK(deoffs)<<(8-deoffs)) : 0; - Uint count = (num_bits - lshift) / 8; - Uint bits, bits1; - - ASSERT(num_bits - lshift >= 0); - - /* + * * Format the integer byte-aligned using the binary itself as * a temporary buffer. */ - iptr = erts_current_bin + BYTE_OFFSET(bin_offset); b = *iptr; - fmt_big(iptr, NBYTES(num_bits), arg, num_bits, flags); + fmt_big_le(iptr, arg, num_bits); /* * Now restore the overwritten bits of the first byte and * shift everything to the right. */ - bits = *iptr; - bits1 = bits >> rshift; - *iptr = MASK_BITS(bits1, b, lmask); - iptr++; - - while (count--) { - bits1 = bits << lshift; - bits = *iptr; - *iptr++ = bits1 | (bits >> rshift); - } - - if (rmask) { - bits1 = bits << lshift; - if ((rmask << rshift) & 0xff) { - bits = *iptr; - bits1 |= (bits >> rshift); - } - *iptr = MASK_BITS(bits1, *iptr, rmask); - } + restore_and_shift(iptr, b, bit_offset, num_bits); } else { /* Not an integer. */ return 0; } - erts_bin_offset = bin_offset + num_bits; + EBS->erts_bin_offset = bin_offset + num_bits; return 1; } #if !defined(BEAMASM) int -erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg)) +erts_bs_put_utf8(ErlBitsState *EBS, Eterm arg) { - Uint bin_offset = erts_bin_offset; + Uint bin_offset = EBS->erts_bin_offset; Uint bit_offset; Uint num_bits; byte tmp_buf[4]; @@ -944,7 +1073,7 @@ erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg)) if ((bit_offset = BIT_OFFSET(bin_offset)) == 0) { /* We can write directly into the destination binary. */ - dst = erts_current_bin+BYTE_OFFSET(bin_offset); + dst = EBS->erts_current_bin + BYTE_OFFSET(bin_offset); } else { /* Unaligned destination binary. Must use a temporary buffer. */ dst = tmp_buf; @@ -975,19 +1104,19 @@ erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg)) } if (bin_offset != 0) { - erts_copy_bits(dst, 0, 1, erts_current_bin, bin_offset, 1, num_bits); + erts_copy_bits_fwd(dst, 0, EBS->erts_current_bin, bin_offset, num_bits); } - erts_bin_offset += num_bits; + EBS->erts_bin_offset += num_bits; return 1; } #endif int -erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags)) +erts_bs_put_utf16(ErlBitsState *EBS, Eterm arg, Uint flags) { - Uint bin_offset = erts_bin_offset; + Uint bin_offset = EBS->erts_bin_offset; Uint bit_offset; Uint num_bits; byte tmp_buf[4]; @@ -1004,7 +1133,7 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags)) if ((bit_offset = BIT_OFFSET(bin_offset)) == 0) { /* We can write directly into the destination binary. */ - dst = erts_current_bin+BYTE_OFFSET(bin_offset); + dst = EBS->erts_current_bin + BYTE_OFFSET(bin_offset); } else { /* Unaligned destination binary. Must use a temporary buffer. */ dst = tmp_buf; @@ -1040,17 +1169,16 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags)) } if (bin_offset != 0) { - erts_copy_bits(dst, 0, 1, erts_current_bin, bin_offset, 1, num_bits); + erts_copy_bits_fwd(dst, 0, EBS->erts_current_bin, bin_offset, num_bits); } - erts_bin_offset += num_bits; + EBS->erts_bin_offset += num_bits; return 1; } int -erts_new_bs_put_binary(Process *c_p, Eterm arg, Uint num_bits) +erts_bs_put_binary(ErlBitsState *EBS, Process *c_p, Eterm arg, Uint num_bits) { - ERL_BITS_DEFINE_STATEP(c_p); Uint offset, size; byte *base; @@ -1066,18 +1194,17 @@ erts_new_bs_put_binary(Process *c_p, Eterm arg, Uint num_bits) return 0; } - copy_binary_to_buffer(erts_current_bin, erts_bin_offset, + copy_binary_to_buffer(EBS->erts_current_bin, EBS->erts_bin_offset, base, offset, num_bits); - erts_bin_offset += num_bits; + EBS->erts_bin_offset += num_bits; BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION); return 1; } int -erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit) +erts_bs_put_binary_all(ErlBitsState *EBS, Process *c_p, Eterm arg, Uint unit) { - ERL_BITS_DEFINE_STATEP(c_p); Uint offset, size; byte *base; @@ -1091,9 +1218,9 @@ erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit) return 0; } - copy_binary_to_buffer(erts_current_bin, erts_bin_offset, + copy_binary_to_buffer(EBS->erts_current_bin, EBS->erts_bin_offset, base, offset, size); - erts_bin_offset += size; + EBS->erts_bin_offset += size; BUMP_REDS(c_p, size / BITS_PER_REDUCTION); return 1; @@ -1106,11 +1233,9 @@ erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit) * and sets c_p-fvalue to 'type', 'no_float', or 'invalid'. */ Eterm -erts_new_bs_put_float(Process *c_p, Eterm arg, Uint num_bits, int flags) +erts_bs_put_float(ErlBitsState *EBS, Process *c_p, Eterm arg, Uint num_bits, int flags) { - ERL_BITS_DEFINE_STATEP(c_p); - - if (BIT_OFFSET(erts_bin_offset) == 0) { + if (BIT_OFFSET(EBS->erts_bin_offset) == 0) { Uint32 a; Uint32 b; @@ -1222,7 +1347,7 @@ erts_new_bs_put_float(Process *c_p, Eterm arg, Uint num_bits, int flags) } if (BIT_IS_MACHINE_ENDIAN(flags)) { - byte* t = erts_current_bin+BYTE_OFFSET(erts_bin_offset); + byte* t = EBS->erts_current_bin + BYTE_OFFSET(EBS->erts_bin_offset); #ifdef WORDS_BIGENDIAN if (num_bits == 16) { t[0] = a >> 8; @@ -1255,7 +1380,9 @@ erts_new_bs_put_float(Process *c_p, Eterm arg, Uint num_bits, int flags) } #endif } else { - byte* t = erts_current_bin+BYTE_OFFSET(erts_bin_offset) + NBYTES(num_bits); + byte* t = EBS->erts_current_bin + + BYTE_OFFSET(EBS->erts_bin_offset) + + NBYTES(num_bits); #ifdef WORDS_BIGENDIAN if (num_bits == 16) { t[-1] = a >> 8; @@ -1384,28 +1511,41 @@ erts_new_bs_put_float(Process *c_p, Eterm arg, Uint num_bits, int flags) return make_small(num_bits); } if (BIT_IS_MACHINE_ENDIAN(flags)) { - erts_copy_bits(bptr, 0, 1, - erts_current_bin, - erts_bin_offset, 1, num_bits); + erts_copy_bits_fwd(bptr, 0, + EBS->erts_current_bin, + EBS->erts_bin_offset, num_bits); } else { - erts_copy_bits(bptr+NBYTES(num_bits)-1, 0, -1, - erts_current_bin, erts_bin_offset, 1, - num_bits); + byte tmp_buf[8]; + Uint n = BYTE_OFFSET(num_bits); + byte *dst = tmp_buf + n; + + do { + *--dst = *bptr++; + } while (--n != 0); + + erts_copy_bits_fwd(tmp_buf, 0, + EBS->erts_current_bin, + EBS->erts_bin_offset, num_bits); } } - erts_bin_offset += num_bits; + EBS->erts_bin_offset += num_bits; return THE_NON_VALUE; } -void -erts_new_bs_put_string(ERL_BITS_PROTO_2(byte* iptr, Uint num_bytes)) +void +erts_bs_put_string(ErlBitsState* EBS, byte* iptr, Uint num_bytes) { - if (BIT_OFFSET(erts_bin_offset) != 0) { - erts_copy_bits(iptr, 0, 1, erts_current_bin, erts_bin_offset, 1, num_bytes*8); + byte* dst_bin = EBS->erts_current_bin; + Uint dst_offset = EBS->erts_bin_offset; + + EBS->erts_bin_offset = dst_offset + num_bytes * 8; + if (BIT_OFFSET(dst_offset) != 0) { + erts_copy_bits_fwd(iptr, 0, + dst_bin, dst_offset, + num_bytes * 8); } else { - sys_memcpy(erts_current_bin+BYTE_OFFSET(erts_bin_offset), iptr, num_bytes); + sys_memcpy(dst_bin + BYTE_OFFSET(dst_offset), iptr, num_bytes); } - erts_bin_offset += num_bytes*8; } static ERTS_INLINE @@ -1488,8 +1628,9 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, BinRef* br; Binary* binp; Uint heap_need; + Uint position; Uint used_size_in_bits; - ERL_BITS_DEFINE_STATEP(c_p); + ErlBitsState* EBS = ERL_BITS_EBS_FROM_REG(reg); /* * Check the binary argument. @@ -1528,10 +1669,10 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, * OK, the binary is writable. */ ASSERT(sb->start == 0); - erts_bin_offset = sb->end; + EBS->erts_bin_offset = position = sb->end; if (unit > 1) { - if ((unit == 8 && (erts_bin_offset & 7) != 0) || - (unit != 8 && (erts_bin_offset % unit) != 0)) { + if ((unit == 8 && (position & 7) != 0) || + (unit != 8 && (position % unit) != 0)) { c_p->fvalue = am_unit; goto badarg; } @@ -1548,13 +1689,13 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, return bin; } - if((ERTS_UINT_MAX - build_size_in_bits) < erts_bin_offset) { + if ((ERTS_UINT_MAX - build_size_in_bits) < position) { c_p->fvalue = am_size; c_p->freason = SYSTEM_LIMIT; return THE_NON_VALUE; } - used_size_in_bits = erts_bin_offset + build_size_in_bits; + used_size_in_bits = position + build_size_in_bits; /* Make sure that no one else can append to the incoming bitstring. */ erl_sub_bits_clear_writable(sb); @@ -1569,11 +1710,11 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, binp = erts_bin_realloc(binp, new_size); br->val = binp; - BUMP_REDS(c_p, erts_bin_offset / BITS_PER_REDUCTION); + BUMP_REDS(c_p, position / BITS_PER_REDUCTION); } binp->intern.apparent_size = NBYTES(used_size_in_bits); - erts_current_bin = (byte*)binp->orig_bytes; + EBS->erts_current_bin = (byte*)binp->orig_bytes; /* Allocate heap space and build a new sub binary. */ reg[live] = sb->orig; @@ -1592,7 +1733,7 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, erl_sub_bits_init(sb, ERL_SUB_BITS_FLAGS_WRITABLE, reg[live], - erts_current_bin, + EBS->erts_current_bin, 0, used_size_in_bits); @@ -1656,10 +1797,10 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, &br, &sb); - erts_current_bin = (byte*)(br->val)->orig_bytes; - erts_bin_offset = src_size; + EBS->erts_current_bin = (byte*)(br->val)->orig_bytes; + EBS->erts_bin_offset = src_size; - copy_binary_to_buffer(erts_current_bin, + copy_binary_to_buffer(EBS->erts_current_bin, 0, src_bytes, src_offset, @@ -1671,15 +1812,14 @@ erts_bs_append_checked(Process* c_p, Eterm* reg, Uint live, } Eterm -erts_bs_private_append_checked(Process* p, Eterm bin, Uint build_size_in_bits, Uint unit) +erts_bs_private_append_checked(ErlBitsState* EBS, Process* p, + Eterm bin, Uint build_size_in_bits) { - Uint new_position, new_size, used_size; + Uint old_position, new_position, used_size; Binary *refc_binary; ErlSubBits *sb; BinRef *br; - ERL_BITS_DEFINE_STATEP(p); - sb = (ErlSubBits*)bitstring_val(bin); ASSERT(sb->thing_word == HEADER_SUB_BITS); @@ -1688,21 +1828,24 @@ erts_bs_private_append_checked(Process* p, Eterm bin, Uint build_size_in_bits, U /* Calculate new size in bits. */ ASSERT(sb->start == 0); - erts_bin_offset = sb->end; + EBS->erts_bin_offset = old_position = sb->end; - if((ERTS_UINT_MAX - build_size_in_bits) < erts_bin_offset) { +#ifdef BEAMASM + ASSERT(ERTS_UINT_MAX - build_size_in_bits >= old_position); +#else + if (ERTS_UINT_MAX - build_size_in_bits < old_position) { p->fvalue = am_size; p->freason = SYSTEM_LIMIT; return THE_NON_VALUE; } +#endif refc_binary = br->val; - new_position = erts_bin_offset + build_size_in_bits; - update_wb_overhead(p, br, sb->end, new_position); - + new_position = old_position + build_size_in_bits; used_size = NBYTES(new_position); - new_size = GROW_PROC_BIN_SIZE(used_size); + + update_wb_overhead(p, br, old_position, new_position); if (refc_binary->intern.flags & BIN_FLAG_WRITABLE) { /* This is the normal case - the binary is writable. There are no other @@ -1711,10 +1854,11 @@ erts_bs_private_append_checked(Process* p, Eterm bin, Uint build_size_in_bits, U ASSERT(erl_sub_bits_is_writable(sb)); ASSERT(erts_refc_read(&refc_binary->intern.refc, 1) == 1); if (refc_binary->orig_size < used_size) { + Uint new_size = GROW_PROC_BIN_SIZE(used_size); refc_binary = erts_bin_realloc(refc_binary, new_size); br->val = refc_binary; - BUMP_REDS(p, erts_bin_offset / BITS_PER_REDUCTION); + BUMP_REDS(p, EBS->erts_bin_offset / BITS_PER_REDUCTION); } ASSERT(sb->start == 0); @@ -1730,10 +1874,11 @@ erts_bs_private_append_checked(Process* p, Eterm bin, Uint build_size_in_bits, U * binary and make a copy of the data. * * We'll also make a new BinRef as the old one may have been moved from - * the `wrt_bins` list to the regular `off_heap` list by the GC. To + * the `wrt_bins` list to the regular `off_heap` list by the GC. * To move it back would mean traversing the `off_heap` list from the * start, so we'll create a new BinRef instead for this (hopefully) * rare case. */ + Uint new_size = GROW_PROC_BIN_SIZE(used_size); Binary *new_binary = erts_bin_nrml_alloc(new_size); Eterm *hp = HeapFragOnlyAlloc(p, ERL_REFC_BITS_SIZE); @@ -1749,12 +1894,12 @@ erts_bs_private_append_checked(Process* p, Eterm bin, Uint build_size_in_bits, U refc_binary->orig_bytes, MIN(refc_binary->orig_size, new_size)); - BUMP_REDS(p, erts_bin_offset / BITS_PER_REDUCTION); + BUMP_REDS(p, EBS->erts_bin_offset / BITS_PER_REDUCTION); refc_binary = new_binary; } ASSERT(refc_binary->intern.flags & BIN_FLAG_WRITABLE); - erts_current_bin = (byte*)&refc_binary->orig_bytes[0]; + EBS->erts_current_bin = (byte*)&refc_binary->orig_bytes[0]; return make_bitstring(sb); } @@ -1829,7 +1974,7 @@ erts_bs_get_unaligned_uint32(ErlSubBits* sb) byte bigbuf[4]; byte* LSB; byte* MSB; - + CHECK_MATCH_BUFFER(sb); ASSERT((sb->start & 7) != 0); ASSERT(sb->end - sb->start >= 32); @@ -1841,7 +1986,7 @@ erts_bs_get_unaligned_uint32(ErlSubBits* sb) MSB = LSB + bytes - 1; *MSB = 0; - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, MSB, offs, -1, 32); + erts_copy_bits_rev(erl_sub_bits_get_base(sb), sb->start, MSB, offs, 32); return LSB[0] | (LSB[1]<<8) | (LSB[2]<<16) | (LSB[3]<<24); } @@ -1864,7 +2009,7 @@ erts_align_utf8_bytes(ErlSubBits *sb, byte* buf) } else { bits = 16; } - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, buf, 0, 1, bits); + erts_copy_bits_fwd(erl_sub_bits_get_base(sb), sb->start, buf, 0, bits); } Eterm @@ -1989,8 +2134,8 @@ erts_bs_get_utf16(ErlSubBits *sb, Uint flags) * get 4 bytes, otherwise two bytes. */ Uint n = num_bits < 32 ? 16 : 32; - erts_copy_bits(erl_sub_bits_get_base(sb), sb->start, 1, - tmp_buf, 0, 1, n); + erts_copy_bits_fwd(erl_sub_bits_get_base(sb), sb->start, + tmp_buf, 0, n); src = tmp_buf; } @@ -2124,18 +2269,141 @@ int erts_cmp_bits__(const byte *a_ptr, /* * The basic bit copy operation. Copies n bits from the source buffer to - * the destination buffer. Depending on the directions, it can reverse the - * copied bits. + * the destination buffer. */ +void +erts_copy_bits_fwd(const byte* src, /* Base pointer to source. */ + size_t soffs, /* Bit offset for source relative to src. */ + byte* dst, /* Base pointer to destination. */ + size_t doffs, /* Bit offset for destination relative to dst. */ + size_t n) /* Number of bits to copy. */ +{ + Uint lmask; + Uint rmask; + Uint count; + Uint deoffs; + + if (n == 0) { + return; + } + + src += BYTE_OFFSET(soffs); + dst += BYTE_OFFSET(doffs); + soffs = BIT_OFFSET(soffs); + doffs = BIT_OFFSET(doffs); + deoffs = BIT_OFFSET(doffs+n); + lmask = (doffs) ? MAKE_MASK(8-doffs) : 0; + rmask = (deoffs) ? (MAKE_MASK(deoffs)<<(8-deoffs)) : 0; + + /* + * Take care of the case that all bits are in the same byte. + */ + + if (doffs+n < 8) { /* All bits are in the same byte */ + lmask = (lmask & rmask) ? (lmask & rmask) : (lmask | rmask); + + if (soffs == doffs) { + *dst = MASK_BITS(*src, *dst, lmask); + } else if (soffs > doffs) { + Uint bits = (*src << (soffs-doffs)); + if (soffs+n > 8) { + src++; + bits |= (*src >> (8-(soffs-doffs))); + } + *dst = MASK_BITS(bits, *dst, lmask); + } else { + *dst = MASK_BITS((*src >> (doffs-soffs)), *dst, lmask); + } + return; /* We are done! */ + } + + /* + * At this point, we know that the bits are in 2 or more bytes. + */ + + count = (lmask ? (n - (8 - doffs)) : n) >> 3; + + if (soffs == doffs) { + /* + * The bits are aligned in the same way. We can just copy the bytes + * (except for the first and last bytes). + */ + + if (lmask) { + *dst = MASK_BITS(*src, *dst, lmask); + dst++, src++; + } + + sys_memcpy(dst, src, count); -void -erts_copy_bits(const byte* src, /* Base pointer to source. */ + if (rmask) { + dst += count; + src += count; + *dst = MASK_BITS(*src, *dst, rmask); + } + } else { + Uint bits; + Uint bits1; + Uint rshift; + Uint lshift; + + /* + * The tricky case. The bits must be shifted into position. + */ + + if (soffs > doffs) { + lshift = soffs - doffs; + rshift = 8 - lshift; + bits = *src; + if (soffs + n > 8) { + src++; + } + } else { + rshift = doffs - soffs; + lshift = 8 - rshift; + bits = 0; + } + + if (lmask) { + bits1 = bits << lshift; + bits = *src++; + bits1 |= (bits >> rshift); + *dst = MASK_BITS(bits1, *dst, lmask); + dst++; + } + + while (count--) { + bits1 = bits << lshift; + bits = *src++; + *dst = bits1 | (bits >> rshift); + dst++; + } + + if (rmask) { + bits1 = bits << lshift; + if ((rmask << rshift) & 0xff) { + bits = *src; + bits1 |= (bits >> rshift); + } + *dst = MASK_BITS(bits1, *dst, rmask); + } + } +} + +/* + * The reverse bit copy operation. Copies n bits from the source + * buffer to the destination buffer. The bits are read 8 bits at the + * time from the source buffer, while incrementing the source buffer + * pointer. The 8 bit groups are stored into the destination buffer, + * while decrementing the destination buffer pointer. + */ + +void +erts_copy_bits_rev(const byte* src, /* Base pointer to source. */ size_t soffs, /* Bit offset for source relative to src. */ - int sdir, /* Direction: 1 (forward) or -1 (backward). */ byte* dst, /* Base pointer to destination. */ size_t doffs, /* Bit offset for destination relative to dst. */ - int ddir, /* Direction: 1 (forward) or -1 (backward). */ size_t n) /* Number of bits to copy. */ { Uint lmask; @@ -2144,11 +2412,11 @@ erts_copy_bits(const byte* src, /* Base pointer to source. */ Uint deoffs; if (n == 0) { - return; + return; } - src += sdir*BYTE_OFFSET(soffs); - dst += ddir*BYTE_OFFSET(doffs); + src += BYTE_OFFSET(soffs); + dst -= BYTE_OFFSET(doffs); soffs = BIT_OFFSET(soffs); doffs = BIT_OFFSET(doffs); deoffs = BIT_OFFSET(doffs+n); @@ -2160,21 +2428,21 @@ erts_copy_bits(const byte* src, /* Base pointer to source. */ */ if (doffs+n < 8) { /* All bits are in the same byte */ - lmask = (lmask & rmask) ? (lmask & rmask) : (lmask | rmask); - - if (soffs == doffs) { - *dst = MASK_BITS(*src,*dst,lmask); - } else if (soffs > doffs) { - Uint bits = (*src << (soffs-doffs)); - if (soffs+n > 8) { - src += sdir; - bits |= (*src >> (8-(soffs-doffs))); - } - *dst = MASK_BITS(bits,*dst,lmask); - } else { - *dst = MASK_BITS((*src >> (doffs-soffs)),*dst,lmask); - } - return; /* We are done! */ + lmask = (lmask & rmask) ? (lmask & rmask) : (lmask | rmask); + + if (soffs == doffs) { + *dst = MASK_BITS(*src,*dst,lmask); + } else if (soffs > doffs) { + Uint bits = (*src << (soffs-doffs)); + if (soffs+n > 8) { + src++; + bits |= (*src >> (8-(soffs-doffs))); + } + *dst = MASK_BITS(bits,*dst,lmask); + } else { + *dst = MASK_BITS((*src >> (doffs-soffs)),*dst,lmask); + } + return; /* We are done! */ } /* @@ -2184,75 +2452,70 @@ erts_copy_bits(const byte* src, /* Base pointer to source. */ count = ((lmask) ? (n - (8 - doffs)) : n) >> 3; if (soffs == doffs) { - /* - * The bits are aligned in the same way. We can just copy the bytes - * (except for the first and last bytes). Note that the directions - * might be different, so we can't just use memcpy(). - */ + /* + * The bits are aligned in the same way. We can just copy the bytes + * (except for the first and last bytes). + */ - if (lmask) { - *dst = MASK_BITS(*src, *dst, lmask); - dst += ddir; - src += sdir; - } + if (lmask) { + *dst = MASK_BITS(*src, *dst, lmask); + dst--, src++; + } - while (count--) { - *dst = *src; - dst += ddir; - src += sdir; - } + while (count--) { + *dst-- = *src++; + } - if (rmask) { - *dst = MASK_BITS(*src,*dst,rmask); - } + if (rmask) { + *dst = MASK_BITS(*src, *dst, rmask); + } } else { - Uint bits; - Uint bits1; - Uint rshift; - Uint lshift; + Uint bits; + Uint bits1; + Uint rshift; + Uint lshift; - /* - * The tricky case. The bits must be shifted into position. - */ - - if (soffs > doffs) { - lshift = (soffs - doffs); - rshift = 8 - lshift; - bits = *src; - if (soffs + n > 8) { - src += sdir; - } - } else { - rshift = (doffs - soffs); - lshift = 8 - rshift; - bits = 0; - } - - if (lmask) { - bits1 = bits << lshift; - bits = *src; - src += sdir; - bits1 |= (bits >> rshift); - *dst = MASK_BITS(bits1,*dst,lmask); - dst += ddir; - } + /* + * The tricky case. The bits must be shifted into position. + */ - while (count--) { - bits1 = bits << lshift; - bits = *src; - src += sdir; - *dst = bits1 | (bits >> rshift); - dst += ddir; - } - - if (rmask) { - bits1 = bits << lshift; - if ((rmask << rshift) & 0xff) { - bits = *src; - bits1 |= (bits >> rshift); - } - *dst = MASK_BITS(bits1,*dst,rmask); - } + if (soffs > doffs) { + lshift = (soffs - doffs); + rshift = 8 - lshift; + bits = *src; + if (soffs + n > 8) { + src++; + } + } else { + rshift = doffs - soffs; + lshift = 8 - rshift; + bits = 0; + } + + if (lmask) { + bits1 = bits << lshift; + bits = *src; + src++; + bits1 |= (bits >> rshift); + *dst = MASK_BITS(bits1, *dst, lmask); + dst--; + } + + while (count--) { + bits1 = bits << lshift; + bits = *src++; + *dst = bits1 | (bits >> rshift); + dst--; + } + + if (rmask) { + bits1 = bits << lshift; + if ((rmask << rshift) & 0xff) { + bits = *src; + bits1 |= (bits >> rshift); + } + *dst = MASK_BITS(bits1, *dst, rmask); + } } } diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h index 619f6157e4ec..6d4200e4a2b1 100644 --- a/erts/emulator/beam/erl_bits.h +++ b/erts/emulator/beam/erl_bits.h @@ -168,43 +168,28 @@ struct erl_bits_state { /* * Pointer to the beginning of the current binary. */ - byte* erts_current_bin_; + byte* erts_current_bin; /* * Offset in bits into the current binary. */ - Uint erts_bin_offset_; + Uint erts_bin_offset; }; +typedef struct erl_bits_state ErlBitsState; + /* - * Reentrant API with the state passed as a parameter. - * (Except when the current Process* already is a parameter.) + * The bit syntax construction state resides in the current process's + * schduler data. The following macro retrieves the pointer to that + * state given a pointer to the X register array. */ -/* the state resides in the current process' scheduler data */ -#define ERL_BITS_DECLARE_STATEP struct erl_bits_state *EBS - -#define ERL_BITS_RELOAD_STATEP(P) \ - do { \ - EBS = &erts_proc_sched_data((P))->registers->aux_regs.d.erl_bits_state; \ - } while(0) - -#define ERL_BITS_DEFINE_STATEP(P) \ - struct erl_bits_state *EBS = \ - &erts_proc_sched_data((P))->registers->aux_regs.d.erl_bits_state -#define ErlBitsState (*EBS) - -#define ERL_BITS_PROTO_0 struct erl_bits_state *EBS -#define ERL_BITS_PROTO_1(PARM1) struct erl_bits_state *EBS, PARM1 -#define ERL_BITS_PROTO_2(PARM1,PARM2) struct erl_bits_state *EBS, PARM1, PARM2 -#define ERL_BITS_PROTO_3(PARM1,PARM2,PARM3) struct erl_bits_state *EBS, PARM1, PARM2, PARM3 -#define ERL_BITS_ARGS_0 EBS -#define ERL_BITS_ARGS_1(ARG1) EBS, ARG1 -#define ERL_BITS_ARGS_2(ARG1,ARG2) EBS, ARG1, ARG2 -#define ERL_BITS_ARGS_3(ARG1,ARG2,ARG3) EBS, ARG1, ARG2, ARG3 - -#define erts_bin_offset (ErlBitsState.erts_bin_offset_) -#define erts_current_bin (ErlBitsState.erts_current_bin_) +#define ERL_BITS_EBS_FROM_REG(Reg) \ + ((ErlBitsState *) ((char *)(Reg) + \ + (offsetof(ErtsSchedulerRegisters, \ + aux_regs.d.erl_bits_state) - \ + offsetof(ErtsSchedulerRegisters, \ + x_reg_array.d)))) /* * Return number of Eterm words needed for allocation with HAlloc(), @@ -231,22 +216,25 @@ Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlSubBits Eterm erts_bs_get_binary_all_2(Process *p, ErlSubBits* sb); /* Binary construction, new instruction set. */ -int erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm Integer, Uint num_bits, unsigned flags)); +int erts_bs_put_integer_be(ErlBitsState *EBS, Eterm Integer, Uint num_bits); +int erts_bs_put_integer_le(ErlBitsState *EBS, Eterm Integer, Uint num_bits); #if !defined(BEAMASM) -int erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm Integer)); +int erts_bs_put_utf8(ErlBitsState *EBS, Eterm Integer); #endif -int erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm Integer, Uint flags)); -int erts_new_bs_put_binary(Process *c_p, Eterm Bin, Uint num_bits); -int erts_new_bs_put_binary_all(Process *c_p, Eterm Bin, Uint unit); -Eterm erts_new_bs_put_float(Process *c_p, Eterm Float, Uint num_bits, int flags); -void erts_new_bs_put_string(ERL_BITS_PROTO_2(byte* iptr, Uint num_bytes)); +int erts_bs_put_utf16(ErlBitsState *EBS, Eterm Integer, Uint flags); +int erts_bs_put_binary(ErlBitsState *EBS, Process *c_p, Eterm Bin, Uint num_bits); +int erts_bs_put_binary_all(ErlBitsState* EBS, Process *c_p, Eterm Bin, Uint unit); +Eterm erts_bs_put_float(ErlBitsState *EBS, Process *c_p, Eterm Float, + Uint num_bits, int flags); +void erts_bs_put_string(ErlBitsState *EBS, byte* iptr, Uint num_bytes); Uint32 erts_bs_get_unaligned_uint32(ErlSubBits* sb); Eterm erts_bs_get_utf8(ErlSubBits* sb); Eterm erts_bs_get_utf16(ErlSubBits* sb, Uint flags); Eterm erts_bs_append_checked(Process* p, Eterm* reg, Uint live, Uint size, Uint extra_words, Uint unit); -Eterm erts_bs_private_append_checked(Process* p, Eterm bin, Uint size, Uint unit); +Eterm erts_bs_private_append_checked(ErlBitsState* EBS, Process* p, + Eterm bin, Uint size); Eterm erts_bs_init_writable(Process* p, Eterm sz); /* ************************************************************************* */ @@ -257,8 +245,10 @@ copy_binary_to_buffer(byte *dst_base, Uint dst_offset, const byte *src_base, Uint src_offset, Uint size); -void erts_copy_bits(const byte* src, size_t soffs, int sdir, - byte* dst, size_t doffs, int ddir, size_t n); +void erts_copy_bits_fwd(const byte* src, size_t soffs, + byte* dst, size_t doffs, size_t n); +void erts_copy_bits_rev(const byte* src, size_t soffs, + byte* dst, size_t doffs, size_t n); ERTS_GLB_INLINE int erts_cmp_bits(const byte* a_ptr, Uint a_offs, @@ -544,9 +534,9 @@ copy_binary_to_buffer(byte *dst_base, Uint dst_offset, if (((dst_offset | src_offset | size) & 7) == 0) { sys_memcpy(dst_base, src_base, BYTE_SIZE(size)); } else { - erts_copy_bits(src_base, BIT_OFFSET(src_offset), 1, - dst_base, BIT_OFFSET(dst_offset), 1, - size); + erts_copy_bits_fwd(src_base, BIT_OFFSET(src_offset), + dst_base, BIT_OFFSET(dst_offset), + size); } } } @@ -618,7 +608,7 @@ erts_get_aligned_binary_bytes_extra(Eterm bin, NBYTES(size) + extra); *base_ptr = bytes; - erts_copy_bits(base, offset, 1, &bytes[extra], 0, 1, size); + erts_copy_bits_fwd(base, offset, &bytes[extra], 0, size); return &bytes[extra]; } diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 2563d40ad563..0e554a336a99 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1352,7 +1352,7 @@ int enif_inspect_binary(ErlNifEnv* env, Eterm bin_term, ErlNifBinary* bin) env->tmp_obj_list = tmp_obj; bin->data = (byte*)&tmp_obj[1]; - erts_copy_bits(base, offset, 1, bin->data, 0, 1, size); + erts_copy_bits_fwd(base, offset, bin->data, 0, size); } else { bin->data = &base[BYTE_OFFSET(offset)]; } diff --git a/erts/emulator/beam/erl_term_hashing.c b/erts/emulator/beam/erl_term_hashing.c index 796dd9574b0c..dae3bba60f64 100644 --- a/erts/emulator/beam/erl_term_hashing.c +++ b/erts/emulator/beam/erl_term_hashing.c @@ -1276,8 +1276,8 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ byte *buf = erts_alloc(ERTS_ALC_T_TMP, nr_of_bytes); Uint nr_of_bits_to_copy = ctx.sz*BYTE_BITS+ctx.bitsize; if (can_trap) iterations_until_trap -= iters_for_bin; - erts_copy_bits(ctx.bptr, - ctx.bitoffs, 1, buf, 0, 1, nr_of_bits_to_copy); + erts_copy_bits_fwd(ctx.bptr, ctx.bitoffs, + buf, 0, nr_of_bits_to_copy); hash = block_hash(buf, ctx.sz, con); if (ctx.bitsize > 0) { UINT32_HASH_2(ctx.bitsize, @@ -1312,9 +1312,9 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ Uint nr_of_bits_to_copy = MIN(nr_of_bits_left, BINARY_BUF_SIZE_BITS); ctx.done = nr_of_bits_left == nr_of_bits_to_copy; - erts_copy_bits(ctx.bptr + ctx.no_bytes_processed, - ctx.bitoffs, 1, ctx.buf, 0, 1, - nr_of_bits_to_copy); + erts_copy_bits_fwd(ctx.bptr + ctx.no_bytes_processed, + ctx.bitoffs, ctx.buf, 0, + nr_of_bits_to_copy); block_hash_buffer(ctx.buf, bytes_to_process, block_hash_ctx); @@ -1948,7 +1948,7 @@ make_internal_hash(Eterm term, erts_ihash_t salt) if (BIT_OFFSET(offset) != 0) { byte *tmp = (byte*)erts_alloc(ERTS_ALC_T_TMP, NBYTES(size)); - erts_copy_bits(data, offset, 1, tmp, 0, 1, size); + erts_copy_bits_fwd(data, offset, tmp, 0, size); bytes = tmp; } else { bytes = &data[BYTE_OFFSET(offset)]; diff --git a/erts/emulator/beam/jit/arm/instr_bs.cpp b/erts/emulator/beam/jit/arm/instr_bs.cpp index f6a84591c998..d28c40321cce 100644 --- a/erts/emulator/beam/jit/arm/instr_bs.cpp +++ b/erts/emulator/beam/jit/arm/instr_bs.cpp @@ -1199,7 +1199,7 @@ void BeamModuleAssembler::update_bin_state(a64::Gp bin_offset, Sint size, a64::Gp size_reg) { int cur_bin_offset = offsetof(ErtsSchedulerRegisters, - aux_regs.d.erl_bits_state.erts_current_bin_); + aux_regs.d.erl_bits_state.erts_current_bin); arm::Mem mem_bin_base = arm::Mem(scheduler_registers, cur_bin_offset); arm::Mem mem_bin_offset = arm::Mem(scheduler_registers, cur_bin_offset + sizeof(Eterm)); @@ -1207,8 +1207,8 @@ void BeamModuleAssembler::update_bin_state(a64::Gp bin_offset, if (bit_offset % 8 != 0) { /* The bit offset is unknown or not byte-aligned. */ ERTS_CT_ASSERT_FIELD_PAIR(struct erl_bits_state, - erts_current_bin_, - erts_bin_offset_); + erts_current_bin, + erts_bin_offset); a.ldp(TMP2, bin_offset, mem_bin_base); if (size_reg.isValid()) { @@ -2021,14 +2021,14 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, BscSegment seg = segments[0]; comment("private append to binary"); ASSERT(Alloc.get() == 0); - mov_arg(ARG2, seg.src); + load_erl_bits_state(ARG1); + a.mov(ARG2, c_p); + mov_arg(ARG3, seg.src); if (sizeReg.isValid()) { - a.mov(ARG3, sizeReg); + a.mov(ARG4, sizeReg); } else { - mov_imm(ARG3, num_bits); + mov_imm(ARG4, num_bits); } - a.mov(ARG4, seg.unit); - a.mov(ARG1, c_p); emit_enter_runtime(Live.get()); runtime_call<4>(erts_bs_private_append_checked); emit_leave_runtime(Live.get()); @@ -2036,7 +2036,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, } else if (estimated_num_bits <= ERL_ONHEAP_BITS_LIMIT) { static constexpr auto cur_bin_offset = offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) + - offsetof(struct erl_bits_state, erts_current_bin_); + offsetof(struct erl_bits_state, erts_current_bin); Uint need; arm::Mem mem_bin_base = arm::Mem(scheduler_registers, cur_bin_offset); @@ -2104,8 +2104,8 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, /* Initialize the erl_bin_state struct. */ ERTS_CT_ASSERT_FIELD_PAIR(struct erl_bits_state, - erts_current_bin_, - erts_bin_offset_); + erts_current_bin, + erts_bin_offset); a.stp(HTOP, ZERO, mem_bin_base); /* Update HTOP. */ @@ -2158,11 +2158,12 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, comment("construct a binary segment"); if (seg.effectiveSize >= 0) { /* The segment has a literal size. */ - mov_imm(ARG3, seg.effectiveSize); - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); + load_erl_bits_state(ARG1); + a.mov(ARG2, c_p); + mov_arg(ARG3, seg.src); + mov_imm(ARG4, seg.effectiveSize); emit_enter_runtime(Live.get()); - runtime_call<3>(erts_new_bs_put_binary); + runtime_call<4>(erts_bs_put_binary); emit_leave_runtime(Live.get()); error_info = beam_jit_update_bsc_reason_info(seg.error_info, BSC_REASON_BADARG, @@ -2172,12 +2173,13 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, seg.size.as().get() == am_all) { /* Include the entire binary/bitstring in the * resulting binary. */ - a.mov(ARG3, seg.unit); - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); + load_erl_bits_state(ARG1); + a.mov(ARG2, c_p); + mov_arg(ARG3, seg.src); + mov_imm(ARG4, seg.unit); emit_enter_runtime(Live.get()); - runtime_call<3>(erts_new_bs_put_binary_all); + runtime_call<4>(erts_bs_put_binary_all); emit_leave_runtime(Live.get()); error_info = beam_jit_update_bsc_reason_info(seg.error_info, @@ -2195,17 +2197,18 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, * the value is a non-negative small in the * appropriate range. Multiply the size with the * unit. */ - auto r = load_source(seg.size, ARG3); - a.asr(ARG3, r.reg, imm(_TAG_IMMED1_SIZE)); + auto r = load_source(seg.size, ARG4); + a.asr(ARG4, r.reg, imm(_TAG_IMMED1_SIZE)); if (seg.unit != 1) { mov_imm(TMP1, seg.unit); - a.mul(ARG3, ARG3, TMP1); + a.mul(ARG4, ARG4, TMP1); } - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); + load_erl_bits_state(ARG1); + a.mov(ARG2, c_p); + mov_arg(ARG3, seg.src); emit_enter_runtime(Live.get()); - runtime_call<3>(erts_new_bs_put_binary); + runtime_call<4>(erts_bs_put_binary); emit_leave_runtime(Live.get()); error_info = beam_jit_update_bsc_reason_info(seg.error_info, @@ -2225,21 +2228,22 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, case am_float: comment("construct float segment"); if (seg.effectiveSize >= 0) { - mov_imm(ARG3, seg.effectiveSize); + mov_imm(ARG4, seg.effectiveSize); } else { - auto r = load_source(seg.size, ARG3); - a.asr(ARG3, r.reg, imm(_TAG_IMMED1_SIZE)); + auto r = load_source(seg.size, ARG4); + a.asr(ARG4, r.reg, imm(_TAG_IMMED1_SIZE)); if (seg.unit != 1) { mov_imm(TMP1, seg.unit); - a.mul(ARG3, ARG3, TMP1); + a.mul(ARG4, ARG4, TMP1); } } - mov_arg(ARG2, seg.src); - mov_imm(ARG4, seg.flags); - a.mov(ARG1, c_p); + load_erl_bits_state(ARG1); + a.mov(ARG2, c_p); + mov_arg(ARG3, seg.src); + mov_imm(ARG5, seg.flags); emit_enter_runtime(Live.get()); - runtime_call<4>(erts_new_bs_put_float); + runtime_call<5>(erts_bs_put_float); emit_leave_runtime(Live.get()); if (Fail.get() == 0) { @@ -2490,12 +2494,15 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, } else { /* Call the helper function to fetch and store the * integer into the binary. */ - mov_arg(ARG2, seg.src); - mov_imm(ARG4, seg.flags); load_erl_bits_state(ARG1); + mov_arg(ARG2, seg.src); emit_enter_runtime(Live.get()); - runtime_call<4>(erts_new_bs_put_integer); + if (seg.flags & BSF_LITTLE) { + runtime_call<3>(erts_bs_put_integer_le); + } else { + runtime_call<3>(erts_bs_put_integer_be); + } emit_leave_runtime(Live.get()); if (exact_type(seg.src)) { @@ -2522,12 +2529,12 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, comment("insert string"); ASSERT(seg.effectiveSize >= 0); - mov_imm(ARG3, seg.effectiveSize / 8); - mov_arg(ARG2, string_ptr); load_erl_bits_state(ARG1); + mov_arg(ARG2, string_ptr); + mov_imm(ARG3, seg.effectiveSize / 8); emit_enter_runtime(Live.get()); - runtime_call<3>(erts_new_bs_put_string); + runtime_call<3>(erts_bs_put_string); emit_leave_runtime(Live.get()); break; } @@ -2556,13 +2563,16 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, a.cbz(ARG1, resolve_label(error, disp1MB)); break; case am_utf32: + load_erl_bits_state(ARG1); mov_arg(ARG2, seg.src); mov_imm(ARG3, 4 * 8); - a.mov(ARG4, seg.flags); - load_erl_bits_state(ARG1); emit_enter_runtime(Live.get()); - runtime_call<4>(erts_new_bs_put_integer); + if (seg.flags & BSF_LITTLE) { + runtime_call<3>(erts_bs_put_integer_le); + } else { + runtime_call<3>(erts_bs_put_integer_be); + } emit_leave_runtime(Live.get()); if (Fail.get() == 0) { diff --git a/erts/emulator/beam/jit/beam_jit_common.cpp b/erts/emulator/beam/jit/beam_jit_common.cpp index fde678606ffa..3b348db4b9ad 100644 --- a/erts/emulator/beam/jit/beam_jit_common.cpp +++ b/erts/emulator/beam/jit/beam_jit_common.cpp @@ -808,7 +808,7 @@ void beam_jit_bs_add_argument_error(Process *c_p, Eterm A, Eterm B) { Eterm beam_jit_bs_init_bits(Process *c_p, Eterm *reg, - ERL_BITS_DECLARE_STATEP, + ErlBitsState *EBS, Uint num_bits, Uint alloc, unsigned Live) { @@ -818,7 +818,7 @@ Eterm beam_jit_bs_init_bits(Process *c_p, alloc += ERL_REFC_BITS_SIZE; } - erts_bin_offset = 0; + EBS->erts_bin_offset = 0; if (num_bits <= ERL_ONHEAP_BITS_LIMIT) { ErlHeapBits *hb; @@ -830,7 +830,7 @@ Eterm beam_jit_bs_init_bits(Process *c_p, hb->thing_word = header_heap_bits(num_bits); hb->size = num_bits; - erts_current_bin = (byte *)hb->data; + EBS->erts_current_bin = (byte *)hb->data; return make_bitstring(hb); } else { const Uint num_bytes = NBYTES(num_bits); @@ -839,13 +839,13 @@ Eterm beam_jit_bs_init_bits(Process *c_p, test_bin_vheap(c_p, reg, num_bytes / sizeof(Eterm), alloc, Live); new_binary = erts_bin_nrml_alloc(num_bytes); - erts_current_bin = (byte *)new_binary->orig_bytes; + EBS->erts_current_bin = (byte *)new_binary->orig_bytes; return erts_wrap_refc_bitstring(&MSO(c_p).first, &MSO(c_p).overhead, &HEAP_TOP(c_p), new_binary, - erts_current_bin, + EBS->erts_current_bin, 0, num_bits); } diff --git a/erts/emulator/beam/jit/beam_jit_common.hpp b/erts/emulator/beam/jit/beam_jit_common.hpp index 934821d8f532..3d34674153e0 100644 --- a/erts/emulator/beam/jit/beam_jit_common.hpp +++ b/erts/emulator/beam/jit/beam_jit_common.hpp @@ -600,7 +600,7 @@ void beam_jit_bs_field_size_argument_error(Process *c_p, Eterm size); void beam_jit_bs_add_argument_error(Process *c_p, Eterm A, Eterm B); Eterm beam_jit_bs_init_bits(Process *c_p, Eterm *reg, - ERL_BITS_DECLARE_STATEP, + ErlBitsState *EBS, Uint num_bits, Uint alloc, unsigned Live); diff --git a/erts/emulator/beam/jit/x86/instr_bs.cpp b/erts/emulator/beam/jit/x86/instr_bs.cpp index d3c9e1c050d6..4a7f54771750 100644 --- a/erts/emulator/beam/jit/x86/instr_bs.cpp +++ b/erts/emulator/beam/jit/x86/instr_bs.cpp @@ -1262,10 +1262,10 @@ void BeamModuleAssembler::update_bin_state(x86::Gp bin_offset, const int x_reg_offset = offsetof(ErtsSchedulerRegisters, x_reg_array.d); const int cur_bin_base = offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) + - offsetof(struct erl_bits_state, erts_current_bin_); + offsetof(struct erl_bits_state, erts_current_bin); const int cur_bin_offset = offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) + - offsetof(struct erl_bits_state, erts_bin_offset_); + offsetof(struct erl_bits_state, erts_bin_offset); x86::Mem mem_bin_base = x86::Mem(registers, cur_bin_base - x_reg_offset, sizeof(UWord)); @@ -1822,10 +1822,10 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, offsetof(ErtsSchedulerRegisters, x_reg_array.d); const int cur_bin_base = offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) + - offsetof(struct erl_bits_state, erts_current_bin_); + offsetof(struct erl_bits_state, erts_current_bin); const int cur_bin_offset = offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) + - offsetof(struct erl_bits_state, erts_bin_offset_); + offsetof(struct erl_bits_state, erts_bin_offset); x86::Mem mem_bin_base = x86::qword_ptr(registers, cur_bin_base - x_reg_offset); x86::Mem mem_bin_offset = @@ -2195,14 +2195,14 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, runtime_entered = bs_maybe_enter_runtime(runtime_entered); comment("private append to binary"); ASSERT(Alloc.get() == 0); - mov_arg(ARG2, seg.src); + mov_arg(ARG3, seg.src); if (sizeReg.isValid()) { - a.mov(ARG3, sizeReg); + a.mov(ARG4, sizeReg); } else { - mov_imm(ARG3, num_bits); + mov_imm(ARG4, num_bits); } - a.mov(ARG4, seg.unit); - a.mov(ARG1, c_p); + a.mov(ARG2, c_p); + load_erl_bits_state(ARG1); runtime_call<4>(erts_bs_private_append_checked); /* There is no way the call can fail on a 64-bit architecture. */ a.mov(TMP_MEM1q, RET); @@ -2251,10 +2251,11 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, comment("construct a binary segment"); if (seg.effectiveSize >= 0) { /* The segment has a literal size. */ - mov_imm(ARG3, seg.effectiveSize); - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); - runtime_call<3>(erts_new_bs_put_binary); + mov_imm(ARG4, seg.effectiveSize); + mov_arg(ARG3, seg.src); + a.mov(ARG2, c_p); + load_erl_bits_state(ARG1); + runtime_call<4>(erts_bs_put_binary); error_info = beam_jit_update_bsc_reason_info(seg.error_info, BSC_REASON_BADARG, BSC_INFO_DEPENDS, @@ -2263,10 +2264,11 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, seg.size.as().get() == am_all) { /* Include the entire binary/bitstring in the * resulting binary. */ - a.mov(ARG3, seg.unit); - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); - runtime_call<3>(erts_new_bs_put_binary_all); + mov_imm(ARG4, seg.unit); + mov_arg(ARG3, seg.src); + a.mov(ARG2, c_p); + load_erl_bits_state(ARG1); + runtime_call<4>(erts_bs_put_binary_all); error_info = beam_jit_update_bsc_reason_info(seg.error_info, BSC_REASON_BADARG, BSC_INFO_UNIT, @@ -2282,16 +2284,17 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, * the value is a non-negative small in the * appropriate range. Multiply the size with the * unit. */ - mov_arg(ARG3, seg.size); - a.sar(ARG3, imm(_TAG_IMMED1_SIZE)); + mov_arg(ARG4, seg.size); + a.sar(ARG4, imm(_TAG_IMMED1_SIZE)); if (seg.unit != 1) { mov_imm(RET, seg.unit); - a.mul(ARG3); /* CLOBBERS RDX = ARG3! */ - a.mov(ARG3, RET); + a.mul(ARG4); /* CLOBBERS RDX = ARG3! */ + a.mov(ARG4, RET); } - mov_arg(ARG2, seg.src); - a.mov(ARG1, c_p); - runtime_call<3>(erts_new_bs_put_binary); + mov_arg(ARG3, seg.src); + a.mov(ARG2, c_p); + load_erl_bits_state(ARG1); + runtime_call<4>(erts_bs_put_binary); error_info = beam_jit_update_bsc_reason_info(seg.error_info, BSC_REASON_BADARG, BSC_INFO_DEPENDS, @@ -2311,20 +2314,21 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, runtime_entered = bs_maybe_enter_runtime(runtime_entered); comment("construct float segment"); if (seg.effectiveSize >= 0) { - mov_imm(ARG3, seg.effectiveSize); + mov_imm(ARG4, seg.effectiveSize); } else { - mov_arg(ARG3, seg.size); - a.sar(ARG3, imm(_TAG_IMMED1_SIZE)); + mov_arg(ARG4, seg.size); + a.sar(ARG4, imm(_TAG_IMMED1_SIZE)); if (seg.unit != 1) { mov_imm(RET, seg.unit); - a.mul(ARG3); /* CLOBBERS RDX = ARG3! */ - a.mov(ARG3, RET); + a.mul(ARG4); /* CLOBBERS RDX = ARG3! */ + a.mov(ARG4, RET); } } - mov_arg(ARG2, seg.src); - mov_imm(ARG4, seg.flags); - a.mov(ARG1, c_p); - runtime_call<4>(erts_new_bs_put_float); + mov_arg(ARG3, seg.src); + mov_imm(ARG5, seg.flags); + a.mov(ARG2, c_p); + load_erl_bits_state(ARG1); + runtime_call<5>(erts_bs_put_float); if (Fail.get() == 0) { mov_imm(ARG4, beam_jit_update_bsc_reason_info(seg.error_info, @@ -2595,9 +2599,12 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, * integer into the binary. */ runtime_entered = bs_maybe_enter_runtime(runtime_entered); mov_arg(ARG2, seg.src); - mov_imm(ARG4, seg.flags); load_erl_bits_state(ARG1); - runtime_call<4>(erts_new_bs_put_integer); + if (seg.flags & BSF_LITTLE) { + runtime_call<3>(erts_bs_put_integer_le); + } else { + runtime_call<3>(erts_bs_put_integer_be); + } if (exact_type(seg.src)) { comment("skipped test for success because construction " "can't fail"); @@ -2628,7 +2635,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, mov_imm(ARG3, seg.effectiveSize / 8); mov_arg(ARG2, string_ptr); load_erl_bits_state(ARG1); - runtime_call<3>(erts_new_bs_put_string); + runtime_call<3>(erts_bs_put_string); } break; case am_utf8: { runtime_entered = bs_maybe_enter_runtime(runtime_entered); @@ -2656,9 +2663,12 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, runtime_entered = bs_maybe_enter_runtime(runtime_entered); mov_arg(ARG2, seg.src); mov_imm(ARG3, 4 * 8); - a.mov(ARG4, seg.flags); load_erl_bits_state(ARG1); - runtime_call<4>(erts_new_bs_put_integer); + if (seg.flags & BSF_LITTLE) { + runtime_call<3>(erts_bs_put_integer_le); + } else { + runtime_call<3>(erts_bs_put_integer_be); + } if (Fail.get() == 0) { mov_arg(ARG1, seg.src); mov_imm(ARG4, diff --git a/erts/emulator/test/bs_construct_SUITE.erl b/erts/emulator/test/bs_construct_SUITE.erl index 42de143791a5..8fbada37e6ac 100644 --- a/erts/emulator/test/bs_construct_SUITE.erl +++ b/erts/emulator/test/bs_construct_SUITE.erl @@ -30,7 +30,7 @@ otp_7422/1, zero_width/1, bad_append/1, bs_append_overflow/1, bs_append_offheap/1, reductions/1, fp16/1, zero_init/1, error_info/1, little/1, - heap_binary_unit/1 + heap_binary_unit/1, floats/1 ]). -include_lib("common_test/include/ct.hrl"). @@ -46,7 +46,7 @@ all() -> copy_writable_binary, kostis, dynamic, otp_7422, zero_width, bad_append, bs_append_overflow, bs_append_offheap, reductions, fp16, zero_init, - error_info, little, heap_binary_unit]. + error_info, little, heap_binary_unit, floats]. init_per_suite(Config) -> Config. @@ -1634,6 +1634,254 @@ heap_binary_unit_2(Variant, Rest) -> {error2, Bin2} end. +floats(_Config) -> + _ = rand:uniform(), %Seed generator + io:format("Seed: ~p", [rand:export_seed()]), + + %% Random floats. + _ = [do_float(rand:uniform() * math:pow(10.0, rand:uniform(20))) || + _ <- lists:seq(1, 20)], + + %% Random floats with powers of 10 near the upper limit representable + %% as a 64-bit float. + _ = [do_float(rand:uniform() * math:pow(10.0, 300 + rand:uniform(7))) || + _ <- lists:seq(1, 10)], + + %% Random small integers. + _ = [do_float(rand:uniform(1_000_000)) || _ <- lists:seq(1, 10)], + + %% Random big integers. + _ = [do_float(rand:uniform(1_000_000) bsl 64) || _ <- lists:seq(1, 10)], + + do_float(-0.0), + do_float(+0.0), + + ok. + +do_float(F) -> + do_float(F, 0). + +do_float(_F, 32) -> + ok; +do_float(F, N) -> + Pad = rand:uniform(1 bsl N) - 1, + true = is_integer(Pad), + + do_float_be_16(F, N, Pad), + do_float_be_32(F, N, Pad), + do_float_be_64(F, N, Pad), + + do_float_le_16(F, N, Pad), + do_float_le_32(F, N, Pad), + do_float_le_64(F, N, Pad), + + do_float(F, N + 1). + +do_float_be_16(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 15 -> <>; + 21 -> <>; + _ -> <> + end; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 1 -> <>; + 19 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + +do_float_be_32(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 1 -> <>; + 6 -> <>; + _ -> <> + end; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 8 -> <>; + 12 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + +do_float_be_64(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 7 -> <>; + 13 -> <>; + _ -> <> + end, + + %% Match out the original float. + <> = Bin; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 7 -> <>; + 13 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + +do_float_le_16(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 11 -> <>; + 27 -> <>; + _ -> <> + end; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 7 -> <>; + 13 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + +do_float_le_32(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 9 -> <>; + 29 -> <>; + _ -> <> + end; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 7 -> <>; + 13 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + +do_float_le_64(F, N, Pad) -> + FloatBin = id(<>), + Bin = id(<>), + Bin = id(<>), + Bin = <>, + Bin = <>, + <> = Bin, + + if + is_float(F) -> + %% Construct float segment of a known float. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 9 -> <>; + 29 -> <>; + _ -> <> + end, + + %% Match out the original float. + <> = Bin; + is_integer(F) -> + %% Construct float segment of a known integer. + FloatBin = id(<>), + Bin = <>, + Bin = case N of + 7 -> <>; + 13 -> <>; + _ -> <> + end; + true -> + ok + end, + + ok. + + %%% %%% Common utilities. %%%