From 4b99a199735e524337d3b208b18e419c09458891 Mon Sep 17 00:00:00 2001
From: Martin Blicha <martin.blicha@gmail.com>
Date: Wed, 23 Oct 2024 10:16:35 +0200
Subject: [PATCH] Revert wraparound semantics for reading from buffers

Recently, we have changed the semantics of reading beyond index 2^256
from a buffer. Previously, any read beyond buffer length would return
all zeroes. However, to match the semantics of SMT, we changed that so
that if reading from multiple consecutive indices would overflow the
index (at 2^256), the indices would wrap around and start from 0 again.
Unfortunately, it turns out that the interpreters (such as evmtool) do
not behave this way, they follow the original "just return all zeroes"
strategy.

In order to match the behaviour of real interpreters, we revert the
recent changes and return to the original behaviour.

This also introduces the problem of incompatibility with our SMT
encoding, which should be mitigated by modifying the fuzzer to not
generate these corner cases.
---
 CHANGELOG.md    |  1 -
 src/EVM/Expr.hs | 25 +++++--------------------
 test/test.hs    | 12 ++++++------
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index beca62b32..6e57474b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -60,7 +60,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   and hence lead to false negatives through trivially UNSAT SMT expressions
 - Respect --smt-timeout in equivalence checking
 - Fixed the handling of returndata with an abstract size during transaction finalization
-- Fixed handling of indices overflowing 2^256 when reading from buffers
 - Error handling for user-facing cli commands is much improved
 
 ## [0.53.0] - 2024-02-23
diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs
index e12bb440f..7a0c8a9ce 100644
--- a/src/EVM/Expr.hs
+++ b/src/EVM/Expr.hs
@@ -290,16 +290,10 @@ readWord i b = readWordFromBytes i b
 -- Attempts to read a concrete word from a buffer by reading 32 individual bytes and joining them together
 -- returns an abstract ReadWord expression if a concrete word cannot be constructed
 readWordFromBytes :: Expr EWord -> Expr Buf -> Expr EWord
-readWordFromBytes (Lit idx) (ConcreteBuf bs)
-  | idx + 32 < idx = let
-    -- Overflowing, part of the read will be from the beginning of src
-      hd = BS.replicate (unsafeInto (maxWord256 - idx) + 1) 0
-      tl = BS.take (32 - BS.length hd) bs
-    in Lit $ word $ hd <> tl
-  | otherwise =
-    case tryInto idx of
-      Left _ -> Lit 0
-      Right i -> Lit $ word $ padRight 32 $ BS.take 32 $ BS.drop i bs
+readWordFromBytes (Lit idx) (ConcreteBuf bs) =
+  case tryInto idx of
+    Left _ -> Lit 0
+    Right i -> Lit $ word $ padRight 32 $ BS.take 32 $ BS.drop i bs
 readWordFromBytes i@(Lit idx) buf = let
     bytes = [readByte (Lit i') buf | i' <- [idx .. idx + 31]]
   in if all isLitByte bytes
@@ -344,16 +338,7 @@ copySlice a@(Lit srcOffset) b@(Lit dstOffset) c@(Lit size) d@(ConcreteBuf src) e
   , size < maxBytes =
       let hd = padRight (unsafeInto dstOffset) $ BS.take (unsafeInto dstOffset) dst
           sl = if srcOffset > unsafeInto (BS.length src)
-            then
-              if srcOffset + size < srcOffset -- overflow, read will continue from begining of the buffer
-                then
-                  let headSize = (maxWord256 - srcOffset) + 1
-                      tailSize = size - headSize
-                      sliceHead = BS.replicate (unsafeInto headSize) 0
-                      sliceTail = padRight (unsafeInto tailSize) $ BS.take (unsafeInto tailSize) src
-                   in
-                    sliceHead <> sliceTail
-                else BS.replicate (unsafeInto size) 0
+            then BS.replicate (unsafeInto size) 0
             else padRight (unsafeInto size) $ BS.take (unsafeInto size) (BS.drop (unsafeInto srcOffset) src)
           tl = BS.drop (unsafeInto dstOffset + unsafeInto size) dst
       in ConcreteBuf $ hd <> sl <> tl
diff --git a/test/test.hs b/test/test.hs
index 213bc1830..c7a0d9acc 100644
--- a/test/test.hs
+++ b/test/test.hs
@@ -705,6 +705,11 @@ tests = testGroup "hevm"
         y <- checkEquivAndLHS x simplified
         assertBoolM "Must be equal" y
     ]
+  {- NOTE: These tests were designed to test behaviour on reading from a buffer such that the indices overflow 2^256.
+           However, such scenarios are impossible in the real world (the operation would run out of gas). The problem
+           is that the behaviour of bytecode interpreters does not match the semantics of SMT. Intrepreters just
+           return all zeroes for any read beyond buffer size, while in SMT reading multiple bytes may lead to overflow
+           on indices and subsequently to reading from the beginning of the buffer (wrap-around semantics).
   , testGroup "concrete-buffer-simplification-large-index" [
       test "copy-slice-large-index-nooverflow" $ do
         let
@@ -736,13 +741,8 @@ tests = testGroup "hevm"
           s = Expr.simplify e
         equal <- checkEquiv e s
         assertEqualM "Must be equal" True equal
-    , test "copy-slice-overflowing-into-too-short-buffer" $ do
-        let
-          e = BufLength (CopySlice (Lit 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff) (Lit 0x00) (Lit 0x02) (ConcreteBuf "") (ConcreteBuf ""))
-          s = Expr.simplify e
-        equal <- checkEquiv e s
-        assertEqualM "Must be equal" True equal
   ]
+  -}
   , testGroup "isUnsat-concrete-tests" [
       test "disjunction-left-false" $ do
         let