Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix[codegen]: fix overcopying of bytes in make_setter #4419

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions tests/unit/compiler/venom/test_memmerging.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,21 @@ def test_memmerging_double_use():
_check_pre_post(pre, post)


def test_existing_mcopy_overlap_nochange():
"""
Check that mcopy which already contains an overlap does not get optimized
"""
if not version_check(begin="cancun"):
return

pre = """
_global:
mcopy 32, 33, 2
return %1
"""
_check_no_change(pre)


@pytest.mark.parametrize("load_opcode,copy_opcode", LOAD_COPY)
def test_memmerging_load(load_opcode, copy_opcode):
pre = f"""
Expand Down
43 changes: 42 additions & 1 deletion vyper/codegen/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,37 @@ def make_byte_array_copier(dst, src):
# batch copy the bytearray (including length word) using copy_bytes
len_ = add_ofst(get_bytearray_length(src), 32)
max_bytes = src.typ.maxlen + 32

if _prefer_copy_maxbound_heuristic(dst, src, item_size=1):
len_ = max_bytes

# batch copy the entire dynarray, including length word
ret = copy_bytes(dst, src, len_, max_bytes)
return b1.resolve(ret)


# heuristic to choose
def _prefer_copy_maxbound_heuristic(dst, src, item_size):
if dst.location != MEMORY:
return False

# a heuristic - it's cheaper to just copy the extra buffer bytes
# than calculate the number of bytes
# copy(dst, src, 32 + itemsize*load(src))
# =>
# copy(dst, src, bound)
# (32 + itemsize*(load(src))) costs 4*3+5 gas
length_calc_cost = 6 * (item_size != 1) # PUSH MUL
src_bound = src.typ.memory_bytes_required
if src.location in (CALLDATA, MEMORY) and src_bound <= (5 + length_calc_cost) * 32:
return True
# threshold is 6 words of data (+ 1 length word that we need to copy anyway)
# dload(src) costs additional 7*3 gas
if src.location == DATA and src_bound <= (12 + length_calc_cost) * 32:
return True
return False


def bytes_data_ptr(ptr):
if ptr.location is None: # pragma: nocover
raise CompilerPanic("tried to modify non-pointer type")
Expand Down Expand Up @@ -276,6 +303,9 @@ def _dynarray_make_setter(dst, src, hi=None):
n_bytes = add_ofst(_mul(count, element_size), 32)
max_bytes = 32 + src.typ.count * element_size

if _prefer_copy_maxbound_heuristic(dst, src, element_size):
n_bytes = max_bytes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

codecov reports this isn't covered - can you please fix that?

Copy link
Member Author

@charles-cooper charles-cooper Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's covered -- you can verify by running the test suite with the following patch:

diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py
index 64c6a62a8..74cfebc79 100644
--- a/vyper/codegen/core.py
+++ b/vyper/codegen/core.py
@@ -304,6 +304,7 @@ def _dynarray_make_setter(dst, src, hi=None):
                 max_bytes = 32 + src.typ.count * element_size
 
                 if _prefer_copy_maxbound_heuristic(dst, src, element_size):
+                    raise Exception("covered")
                     n_bytes = max_bytes
 
                 # batch copy the entire dynarray, including length word

for me, this quickly raises an exception at "tests/functional/codegen/types/test_dynamic_array.py::test_dynarray_length_no_clobber[\n@external\ndef should_revert():\n c: DynArray[DynArray[uint256, 1], 2] = [[]]\n c[0] = c.pop()\n ]".
Screenshot from 2025-01-02 17-11-40


# batch copy the entire dynarray, including length word
ret.append(copy_bytes(dst, src, n_bytes, max_bytes))

Expand Down Expand Up @@ -1038,7 +1068,18 @@ def _complex_make_setter(left, right, hi=None):
assert is_tuple_like(left.typ)
keys = left.typ.tuple_keys()

if left.is_pointer and right.is_pointer and right.encoding == Encoding.VYPER:
# performance: if there is any dynamic data, there might be
# unused space between the end of the dynarray and the end of the buffer.
# for instance DynArray[uint256, 100] with runtime length of 5.
# in these cases, we recurse to dynarray make_setter which has its own
# heuristic for when to copy all data.

# use abi_type.is_dynamic since it is identical to the query "do any children
# have dynamic size"
has_dynamic_data = right.typ.abi_type.is_dynamic()
simple_encoding = right.encoding == Encoding.VYPER

if left.is_pointer and right.is_pointer and simple_encoding and not has_dynamic_data:
# both left and right are pointers, see if we want to batch copy
# instead of unrolling the loop.
assert left.encoding == Encoding.VYPER
Expand Down
14 changes: 10 additions & 4 deletions vyper/venom/passes/memmerging.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def merge(self, other: "_Copy"):
self.insts.extend(other.insts)

def __repr__(self) -> str:
return f"({self.src}, {self.src_end}, {self.length}, {self.dst}, {self.dst_end})"
return f"({self.src}, {self.dst}, {self.length})"


class MemMergePass(IRPass):
Expand Down Expand Up @@ -114,7 +114,7 @@ def _optimize_copy(self, bb: IRBasicBlock, copy_opcode: str, load_opcode: str):
copy.insts.sort(key=bb.instructions.index)

if copy_opcode == "mcopy":
assert not copy.overwrites_self_src()
assert not copy.overwrites_self_src(), (copy, copy.insts, bb)

pin_inst = None
inst = copy.insts[-1]
Expand Down Expand Up @@ -277,8 +277,14 @@ def _barrier():
if self._write_after_write_hazard(n_copy):
_barrier()
# check if the new copy does not overwrites existing data
if not allow_dst_overlaps_src and self._read_after_write_hazard(n_copy):
_barrier()
if not allow_dst_overlaps_src:
if n_copy.overwrites_self_src():
# continue otherwise we will get an assertion failure
# in _optimize_copy
continue
if self._read_after_write_hazard(n_copy):
_barrier()

self._add_copy(n_copy)

elif _volatile_memory(inst):
Expand Down
Loading