Skip to content

Commit

Permalink
Merge pull request #436 from ethereum/per-mapping-smt-array
Browse files Browse the repository at this point in the history
Isolated SMT arrays for logically seperate storage
  • Loading branch information
d-xo authored Jan 16, 2024
2 parents 7a828a5 + a07ebff commit 8ac711c
Show file tree
Hide file tree
Showing 10 changed files with 344 additions and 39 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Keccak concretization is now done only after all simplifications are performed. This helps with simplification pre-concretization

## Added

- Optimized smt queries that significantly improve performance when dealing with solidity mappings and arrays
- Support for using Bitwuzla as a solver
- Symbolic tests now support statically sized arrays as parameters
- `hevm test` now has a `num-solvers` parameter that controls how many solver instances to spawn
Expand Down
11 changes: 6 additions & 5 deletions src/EVM.hs
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ makeVm o = do
unknownContract :: Expr EAddr -> Contract
unknownContract addr = Contract
{ code = UnknownCode addr
, storage = AbstractStore addr
, origStorage = AbstractStore addr
, storage = AbstractStore addr Nothing
, origStorage = AbstractStore addr Nothing
, balance = Balance addr
, nonce = Nothing
, codehash = hashcode (UnknownCode addr)
Expand All @@ -182,8 +182,8 @@ unknownContract addr = Contract
abstractContract :: ContractCode -> Expr EAddr -> Contract
abstractContract code addr = Contract
{ code = code
, storage = AbstractStore addr
, origStorage = AbstractStore addr
, storage = AbstractStore addr Nothing
, origStorage = AbstractStore addr Nothing
, balance = Balance addr
, nonce = if isCreation code then Just 1 else Just 0
, codehash = hashcode code
Expand Down Expand Up @@ -1824,8 +1824,9 @@ create self this xSize xGas xValue xs newAddr initCode = do
resetStorage :: Expr Storage -> Expr Storage
resetStorage = \case
ConcreteStore _ -> ConcreteStore mempty
AbstractStore a -> AbstractStore a
AbstractStore a Nothing -> AbstractStore a Nothing
SStore _ _ p -> resetStorage p
AbstractStore _ (Just _) -> internalError "unexpected logical store in EVM.hs"
GVar _ -> internalError "unexpected global variable"

modifying (#env % #contracts % ix newAddr % #storage) resetStorage
Expand Down
134 changes: 131 additions & 3 deletions src/EVM/Expr.hs
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ readStorage w st = go (simplify w) st
where
go :: Expr EWord -> Expr Storage -> Maybe (Expr EWord)
go _ (GVar _) = internalError "Can't read from a GVar"
go slot s@(AbstractStore _) = Just $ SLoad slot s
go slot s@(AbstractStore _ _) = Just $ SLoad slot s
go (Lit l) (ConcreteStore s) = Lit <$> Map.lookup l s
go slot store@(ConcreteStore _) = Just $ SLoad slot store
go slot s@(SStore prevSlot val prev) = case (prevSlot, slot) of
Expand Down Expand Up @@ -644,7 +644,11 @@ readStorage w st = go (simplify w) st
-- for our purposes. This lets us completely simplify reads from write
-- chains involving writes to arrays at literal offsets.
(Lit a, Add (Lit b) (Keccak _) ) | a < 256, b < maxW32 -> go slot prev
(Add (Lit a) (Keccak _) , Lit b) | a < 256, b < maxW32 -> go slot prev
(Add (Lit a) (Keccak _) , Lit b) | b < 256, a < maxW32 -> go slot prev

--- NOTE these are needed to succeed in rewriting arrays with a variable index
-- (Lit a, Add (Keccak _) (Var _) ) | a < 256 -> go slot prev
-- (Add (Keccak _) (Var _) , Lit b) | b < 256 -> go slot prev

-- Finding two Keccaks that are < 256 away from each other should be VERY hard
-- This simplification allows us to deal with maps of structs
Expand Down Expand Up @@ -756,14 +760,138 @@ writeStorage key val store = SStore key val store

getAddr :: Expr Storage -> Maybe (Expr EAddr)
getAddr (SStore _ _ p) = getAddr p
getAddr (AbstractStore a) = Just a
getAddr (AbstractStore a _) = Just a
getAddr (ConcreteStore _) = Nothing
getAddr (GVar _) = internalError "cannot determine addr of a GVar"

getLogicalIdx :: Expr Storage -> Maybe W256
getLogicalIdx (SStore _ _ p) = getLogicalIdx p
getLogicalIdx (AbstractStore _ idx) = idx
getLogicalIdx (ConcreteStore _) = Nothing
getLogicalIdx (GVar _) = internalError "cannot determine addr of a GVar"


-- ** Whole Expression Simplification ** -----------------------------------------------------------


data StorageType = SmallSlot | Array | Map | Mixed | UNK
deriving (Show, Eq)

safeToDecompose :: Expr a -> Bool
safeToDecompose inp = result /= Mixed
where
result = execState (safeToDecomposeRunner inp) UNK

safeToDecomposeRunner :: forall a. Expr a -> State StorageType ()
safeToDecomposeRunner a = go a

go :: forall b. Expr b -> State StorageType ()
go e@(SLoad (MappingSlot {}) _) = setMap e
go e@(SLoad (ArraySlotZero {}) _) = setArray e
go e@(SLoad (ArraySlotWithOffset {}) _) = setArray e
go e@(SLoad (Lit x) _) | x < 256 = setSmall e
go e@(SLoad _ _) = setMixed e
go e@(SStore (MappingSlot {}) _ _) = setMap e
go e@(SStore (ArraySlotZero {}) _ _) = setArray e
go e@(SStore (ArraySlotWithOffset {}) _ _) = setArray e
go e@(SStore (Lit x) _ _) | x < 256 = setSmall e
go e@(SStore _ _ _) = setMixed e
go _ = pure ()

-- Helper functions for detecting mixed load/store
setMixed _ = do
put Mixed
pure ()
setMap _ = do
s <- get
case s of
Array -> put Mixed
SmallSlot -> put Mixed
UNK -> put Map
_ -> pure ()
pure ()
setArray _ = do
s <- get
case s of
Map -> put Mixed
SmallSlot -> put Mixed
UNK -> put Array
_ -> pure ()
pure ()
setSmall _ = do
s <- get
case s of
Map -> put Mixed
Array -> put Mixed
UNK -> put SmallSlot
_ -> pure ()
pure ()

-- | Splits storage into logical sub-stores if (1) all SLoad->SStore* chains are one of:
-- (1a) Lit < 256, (1b) MappingSlot, (1c) ArraySlotWithOffset, (1d) ArraySlotZero
-- and (2) there is no mixing of different types (e.g. Map with Array) within
-- the same SStore -> SLoad* chain, and (3) there is no mixing of different array/map slots.
--
-- Mixing (2) and (3) are attempted to be prevented (if possible) as part of the rewrites
-- done by the `readStorage` function that is ran before this. If there is still mixing here,
-- we abort with a Nothing.
--
-- We do NOT rewrite stand-alone `SStore`-s (i.e. SStores that are not read), since
-- they are often used to describe a post-state, and are not dispatched as-is to
-- the solver
decomposeStorage :: Expr a -> Maybe (Expr a)
decomposeStorage = go
where
go :: Expr a -> Maybe (Expr a)
go e@(SLoad origKey store) = if Prelude.not (safeToDecompose e) then Nothing else tryRewrite origKey store
go e = Just e

tryRewrite :: Expr EWord -> Expr Storage -> Maybe (Expr EWord)
tryRewrite origKey store = case inferLogicalIdx origKey of
Just (idx, key) -> do
base <- setLogicalBase idx store
pure (SLoad key base)
_ -> Nothing

-- NOTE: we use (Maybe W256) for idx here, because for small slot numbers we want to keep the
-- Logical Store value a Nothing
inferLogicalIdx :: Expr EWord -> Maybe (Maybe W256, Expr EWord)
inferLogicalIdx = \case
Lit a | a >= 256 -> Nothing
Lit a -> Just (Nothing, Lit a)
(MappingSlot idx key) | BS.length idx == 64 -> Just (Just $ idxToWord idx, key)
(ArraySlotWithOffset idx offset) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, offset)
(ArraySlotZero idx) | BS.length idx == 32 -> Just (Just $ idxToWord64 idx, Lit 0)
_ -> Nothing

idxToWord :: ByteString -> W256
idxToWord = W256 . word256 . (BS.takeEnd 32)
-- Arrays take the whole `id` and keccak it. It's supposed to be 64B
idxToWord64 :: ByteString -> W256
idxToWord64 = W256 . word256 . (BS.takeEnd 64)

-- Updates the logical base store of the given expression if it is safe to do so
setLogicalBase :: Maybe W256 -> Expr Storage -> Maybe (Expr Storage)

-- abstract bases get their logical idx set to the new value
setLogicalBase idx (AbstractStore addr _) = Just $ AbstractStore addr idx
setLogicalBase idx (SStore i v s) = do
b <- setLogicalBase idx s
(idx2, key2) <- inferLogicalIdx i
if idx == idx2 then Just (SStore key2 v b) else Nothing

-- empty concrete base is safe to reuse without any rewriting
setLogicalBase _ s@(ConcreteStore m) | Map.null m = Just s

-- if the existing base is concrete but we have writes to only keys < 256
-- then we can safely rewrite the base to an empty ConcreteStore (safe because we assume keccack(x) > 256)
setLogicalBase _ (ConcreteStore store) =
if all (< 256) (Map.keys store)
then Just (ConcreteStore mempty)
else Nothing
setLogicalBase _ (GVar _) = internalError "Unexpected GVar"


-- | Simple recursive match based AST simplification
-- Note: may not terminate!
simplify :: Expr a -> Expr a
Expand Down
4 changes: 2 additions & 2 deletions src/EVM/Format.hs
Original file line number Diff line number Diff line change
Expand Up @@ -673,8 +673,8 @@ formatExpr = go
, ")"
, formatExpr prev
]
AbstractStore a ->
"(AbstractStore " <> formatExpr a <> ")"
AbstractStore a idx ->
"(AbstractStore " <> formatExpr a <> " " <> T.pack (show idx) <> ")"
ConcreteStore s -> if null s
then "(ConcreteStore <empty>)"
else T.unlines
Expand Down
13 changes: 7 additions & 6 deletions src/EVM/Fuzz.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ substituteStores :: Map (Expr 'EAddr) (Map W256 W256) -> Prop -> Prop
substituteStores valMap p = mapProp go p
where
go :: Expr a -> Expr a
go (AbstractStore a) = case valMap !? a of
-- TODO: Is this OK??
go (AbstractStore a _) = case valMap !? a of
Just m -> ConcreteStore m
Nothing -> ConcreteStore mempty
go a = a
Expand Down Expand Up @@ -141,7 +142,7 @@ findBufProp p = mapPropM go p
e -> pure e

--- Store extraction
data CollectStorage = CollectStorage { addrs :: Set.Set (Expr EAddr)
data CollectStorage = CollectStorage { addrs :: Set.Set (Expr EAddr, Maybe W256)
, keys :: Set.Set W256
, vals :: Set.Set W256
}
Expand Down Expand Up @@ -181,9 +182,9 @@ findStoragePropInner p = mapPropM go p
where
go :: forall a. Expr a -> State CollectStorage (Expr a)
go = \case
e@(AbstractStore a) -> do
e@(AbstractStore a idx) -> do
s <- get
put s {addrs=Set.insert a s.addrs}
put s {addrs=Set.insert (a, idx) s.addrs}
pure e
e@(SLoad (Lit val) _) -> do
s <- get
Expand Down Expand Up @@ -219,9 +220,9 @@ getvals vars = do
getStores :: CollectStorage -> Gen (Map (Expr EAddr) (Map W256 W256))
getStores storesLoads = go (Set.toList storesLoads.addrs) mempty
where
go :: [Expr EAddr] -> Map (Expr EAddr) (Map W256 W256) -> Gen (Map (Expr EAddr) (Map W256 W256))
go :: [(Expr EAddr, Maybe W256)] -> Map (Expr EAddr) (Map W256 W256) -> Gen (Map (Expr EAddr) (Map W256 W256))
go [] addrToValsMap = pure addrToValsMap
go (addr:ax) addrToValsMap = do
go ((addr, _):ax) addrToValsMap = do
-- number of elements inserted into storage
numElems :: Int <- frequency [(1, pure 0)
,(10, choose (1, 10))
Expand Down
32 changes: 19 additions & 13 deletions src/EVM/SMT.hs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ data CexVars = CexVars
-- | buffer names and guesses at their maximum size
, buffers :: Map Text (Expr EWord)
-- | reads from abstract storage
, storeReads :: Map (Expr EAddr) (Set (Expr EWord))
, storeReads :: Map (Expr EAddr, Maybe W256) (Set (Expr EWord))
-- | the names of any block context variables
, blockContext :: [Text]
-- | the names of any tx context variables
Expand Down Expand Up @@ -207,7 +207,10 @@ assertProps :: Config -> [Prop] -> SMT2
-- simplify to rewrite sload/sstore combos
-- notice: it is VERY important not to concretize, because Keccak assumptions
-- will be generated by assertPropsNoSimp, and that needs unconcretized Props
assertProps conf ps = assertPropsNoSimp conf $ Expr.simplifyProps ps
assertProps conf ps = assertPropsNoSimp conf (decompose . Expr.simplifyProps $ ps)
where
decompose :: [Prop] -> [Prop]
decompose props = fromMaybe props (mapM (mapPropM Expr.decomposeStorage) props)

-- Note: we need a version that does NOT call simplify,
-- because we make use of it to verify the correctness of our simplification
Expand Down Expand Up @@ -290,7 +293,7 @@ referencedAbstractStores :: TraversableTerm a => a -> Set Builder
referencedAbstractStores term = foldTerm go mempty term
where
go = \case
AbstractStore s -> Set.singleton (storeName s)
AbstractStore s idx -> Set.singleton (storeName s idx)
_ -> mempty

referencedWAddrs :: TraversableTerm a => a -> Set Builder
Expand Down Expand Up @@ -347,16 +350,16 @@ referencedBlockContext expr = nubOrd $ foldTerm go [] expr
-- the store (e.g, SLoad addr idx (SStore addr idx val AbstractStore)).
-- However, we expect that most of such reads will have been
-- simplified away.
findStorageReads :: Prop -> Map (Expr EAddr) (Set (Expr EWord))
findStorageReads :: Prop -> Map (Expr EAddr, Maybe W256) (Set (Expr EWord))
findStorageReads p = Map.fromListWith (<>) $ foldProp go mempty p
where
go :: Expr a -> [(Expr EAddr, Set (Expr EWord))]
go :: Expr a -> [((Expr EAddr, Maybe W256), Set (Expr EWord))]
go = \case
SLoad slot store ->
[((fromMaybe (error $ "Internal Error: could not extract address from: " <> show store) (Expr.getAddr store)), Set.singleton slot) | containsNode isAbstractStore store]
[((fromMaybe (internalError $ "could not extract address from: " <> show store) (Expr.getAddr store), Expr.getLogicalIdx store), Set.singleton slot) | containsNode isAbstractStore store]
_ -> []

isAbstractStore (AbstractStore _) = True
isAbstractStore (AbstractStore _ _) = True
isAbstractStore _ = False

findBufferAccess :: TraversableTerm a => [a] -> [(Expr EWord, Expr EWord, Expr Buf)]
Expand Down Expand Up @@ -815,8 +818,10 @@ exprToSMT = \case
"(writeWord " <> encIdx `sp` encVal `sp` encPrev <> ")"
CopySlice srcIdx dstIdx size src dst ->
copySlice srcIdx dstIdx size (exprToSMT src) (exprToSMT dst)

-- we need to do a bit of processing here.
ConcreteStore s -> encodeConcreteStore s
AbstractStore a -> storeName a
AbstractStore a idx -> storeName a idx
SStore idx val prev ->
let encIdx = exprToSMT idx
encVal = exprToSMT val
Expand Down Expand Up @@ -936,8 +941,9 @@ encodeConcreteStore s = foldl encodeWrite "((as const Storage) #x000000000000000
encVal = exprToSMT (Lit val)
in "(store " <> prev `sp` encKey `sp` encVal <> ")"

storeName :: Expr EAddr -> Builder
storeName a = fromString ("baseStore_") <> formatEAddr a
storeName :: Expr EAddr -> Maybe W256 -> Builder
storeName a Nothing = fromString ("baseStore_") <> formatEAddr a
storeName a (Just idx) = fromString ("baseStore_") <> formatEAddr a <> "_" <> (fromString $ show idx)

formatEAddr :: Expr EAddr -> Builder
formatEAddr = \case
Expand Down Expand Up @@ -1097,11 +1103,11 @@ getBufs getVal bufs = foldM getBuf mempty bufs
-- concretized storage
getStore
:: (Text -> IO Text)
-> Map (Expr EAddr) (Set (Expr EWord))
-> Map (Expr EAddr, Maybe W256) (Set (Expr EWord))
-> IO (Map (Expr EAddr) (Map W256 W256))
getStore getVal abstractReads =
fmap Map.fromList $ forM (Map.toList abstractReads) $ \(addr, slots) -> do
let name = toLazyText (storeName addr)
fmap Map.fromList $ forM (Map.toList abstractReads) $ \((addr, idx), slots) -> do
let name = toLazyText (storeName addr idx)
raw <- getVal name
let parsed = case parseCommentFreeFileMsg getValueRes (T.toStrict raw) of
Right (ResSpecific (valParsed :| [])) -> valParsed
Expand Down
2 changes: 1 addition & 1 deletion src/EVM/SymExec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ subStores model b = Map.foldlWithKey subStore b model
where
go :: Expr a -> Expr a
go = \case
v@(AbstractStore a)
v@(AbstractStore a _)
-> if a == var
then ConcreteStore val
else v
Expand Down
4 changes: 2 additions & 2 deletions src/EVM/Traversals.hs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ foldExpr f acc expr = acc <> (go expr)
-- storage

e@(ConcreteStore _) -> f e
e@(AbstractStore _) -> f e
e@(AbstractStore _ _) -> f e
e@(SLoad a b) -> f e <> (go a) <> (go b)
e@(SStore a b c) -> f e <> (go a) <> (go b) <> (go c)

Expand Down Expand Up @@ -512,7 +512,7 @@ mapExprM f expr = case expr of
-- storage

ConcreteStore b -> f (ConcreteStore b)
AbstractStore a -> f (AbstractStore a)
AbstractStore a b -> f (AbstractStore a b)
SLoad a b -> do
a' <- mapExprM f a
b' <- mapExprM f b
Expand Down
4 changes: 3 additions & 1 deletion src/EVM/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,9 @@ data Expr (a :: EType) where
-- storage

ConcreteStore :: (Map W256 W256) -> Expr Storage
AbstractStore :: Expr EAddr -> Expr Storage
AbstractStore :: Expr EAddr -- which contract is this store for?
-> Maybe W256 -- which logical store does this refer to? (e.g. solidity mappings / arrays)
-> Expr Storage

SLoad :: Expr EWord -- key
-> Expr Storage -- storage
Expand Down
Loading

0 comments on commit 8ac711c

Please sign in to comment.