Initial fuzzing work

ethereum · Oct 27, 2023 · 4de97ba · 4de97ba
1 parent 61aa81b
commit 4de97ba
Show file tree

Hide file tree

Showing 8 changed files with 228 additions and 31 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -72,6 +72,7 @@ This release also includes many small bugfixes:
 - we now generate correct counterexamples for branches where only a subset of input variables are referenced by the path conditions
 - `vm.prank` now works correctly when passed a symbolic address
 - storage layout information will now be parsed from the output of `forge build` if it is available
+- Added concrete fuzzer that can be controlled via `--num-cex-fuzz`
 
 ## API Changes
 

diff --git a/cli/cli.hs b/cli/cli.hs
@@ -90,6 +90,7 @@ data Command w
       , trace         :: w ::: Bool               <?> "Dump trace"
       , assertions    :: w ::: Maybe [Word256]    <?> "Comma separated list of solc panic codes to check for (default: user defined assertion violations only)"
       , askSmtIterations :: w ::: Integer         <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
+      , numCexFuzz    :: w ::: Integer            <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
       , numSolvers    :: w ::: Maybe Natural      <?> "Number of solver instances to use (default: number of cpu cores)"
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
@@ -109,6 +110,7 @@ data Command w
       , debug         :: w ::: Bool             <?> "Debug printing of internal behaviour"
       , trace         :: w ::: Bool             <?> "Dump trace"
       , askSmtIterations :: w ::: Integer       <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
+      , numCexFuzz    :: w ::: Integer          <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
       , loopDetectionHeuristic :: w ::: LoopHeuristic <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
       , abstractMemory    :: w ::: Bool                      <?> "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
@@ -158,6 +160,7 @@ data Command w
       , loopDetectionHeuristic :: w ::: LoopHeuristic   <!> "StackBased" <?> "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive"
       , abstractArithmetic    :: w ::: Bool             <?> "Use abstraction-refinement for complicated arithmetic functions such as MulMod. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
       , abstractMemory    :: w ::: Bool                      <?> "Use abstraction-refinement for Memory. This runs the solver first with abstraction turned on, and if it returns a potential counterexample, the counterexample is refined to make sure it is a counterexample for the actual (not the abstracted) problem"
+      , numCexFuzz    :: w ::: Integer                  <!> "3" <?> "Number of fuzzing tries to do to generate a counterexample (default: 3)"
       , askSmtIterations :: w ::: Integer               <!> "1" <?> "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)"
       }
   | Version
@@ -196,7 +199,8 @@ main = do
   cmd <- Options.unwrapRecord "hevm -- Ethereum evaluator"
   let env = Env { config = defaultConfig
     { dumpQueries = cmd.smtdebug
-    , debug = cmd.debug
+    , verbose = fromMaybe 0 cmd.verbose
+    , numCexFuzz = cmd.numCexFuzz
     , abstRefineMem = cmd.abstractMemory
     , abstRefineArith = cmd.abstractArithmetic
     , dumpTrace = cmd.trace

diff --git a/hevm.cabal b/hevm.cabal
@@ -83,6 +83,7 @@ library
     EVM.Expr,
     EVM.SMT,
     EVM.Solvers,
+    EVM.Fuzz,
     EVM.Exec,
     EVM.Format,
     EVM.Fetch,
@@ -180,7 +181,8 @@ library
     spawn                             >= 0.3 && < 0.4,
     filepattern                       >= 0.1.2 && < 0.2,
     witch                             >= 1.1 && < 1.3,
-    unliftio-core                     >= 0.2.1.0
+    unliftio-core                     >= 0.2.1.0,
+    MonadRandom                       >= 0.5.1,
   if !os(windows)
     build-depends:
       brick                           >= 1.4 && < 2.0,

diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs
@@ -50,6 +50,7 @@ data Config = Config
   , abstRefineArith :: Bool
   , abstRefineMem   :: Bool
   , dumpTrace       :: Bool
+  , numCexFuzz      :: Integer
   }
   deriving (Show, Eq)
 
@@ -62,6 +63,7 @@ defaultConfig = Config
   , abstRefineArith = False
   , abstRefineMem   = False
   , dumpTrace = False
+  , numCexFuzz = 3
   }
 
 writeTraceDapp :: App m => DappInfo -> VM RealWorld -> m ()

diff --git a/src/EVM/Expr.hs b/src/EVM/Expr.hs
@@ -788,7 +788,7 @@ simplify e = if (mapExpr go e == e)
     go (ReadWord idx buf) = readWord idx buf
     go o@(ReadByte (Lit _) _) = simplifyReads o
     go (ReadByte idx buf) = readByte idx buf
-    go (BufLength buf) = bufLength buf
+    go (BufLength a) = bufLength a
 
     -- We can zero out any bytes in a base ConcreteBuf that we know will be overwritten by a later write
     -- TODO: make this fully general for entire write chains, not just a single write.

diff --git a/src/EVM/Fuzz.hs b/src/EVM/Fuzz.hs
@@ -0,0 +1,151 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{- |
+    Module: EVM.Fuzz
+    Description: Concrete Fuzzer of Exprs
+-}
+
+module EVM.Fuzz where
+
+import Prelude hiding (LT, GT, lookup)
+import Control.Monad.State
+import Data.Map.Strict as Map (Map, (!), (!?), insert)
+import EVM.Expr qualified as Expr
+import EVM.Expr (bytesToW256)
+import Data.Set as Set (insert, Set, empty, toList)
+import EVM.Traversals
+import Data.ByteString qualified as BS
+import Data.Word (Word8)
+import Control.Monad.Random.Strict qualified as CMR
+
+import EVM.Types (Prop(..), W256, Expr(..), EType(..), internalError)
+import EVM.SMT (BufModel(..), SMTCex(..))
+
+tryCexFuzz :: [Prop] -> Integer -> Maybe (SMTCex)
+tryCexFuzz ps tries = CMR.evalRand (testVals tries) (CMR.mkStdGen 1337)
+  where
+    vars = extractVars ps
+    bufs = extractBufs ps
+    testVals :: CMR.MonadRandom m => Integer -> m (Maybe SMTCex)
+    testVals 0 = pure Nothing
+    testVals todo = do
+      varVals <- getVals vars
+      bufVals <- getBufs bufs
+      let
+        ret =  map (substituteEWord varVals . substituteBuf bufVals) ps
+        retSimp =  Expr.simplifyProps ret
+      if null retSimp then pure $ Just (SMTCex {
+                                    vars = varVals
+                                    , addrs = mempty
+                                    , buffers = bufVals
+                                    , store = mempty
+                                    , blockContext = mempty
+                                    , txContext = mempty
+                                    })
+                                    else testVals (todo-1)
+
+
+substituteEWord :: Map (Expr EWord) W256 -> Prop -> Prop
+substituteEWord valMap p = mapProp go p
+  where
+    go :: Expr a -> Expr a
+    go orig@(Var _) = Lit (valMap ! orig)
+    go orig@(TxValue) = Lit (valMap ! orig)
+    go a = a
+
+
+substituteBuf :: Map (Expr Buf) BufModel -> Prop -> Prop
+substituteBuf valMap p = mapProp go p
+  where
+    go :: Expr a -> Expr a
+    go orig@(AbstractBuf _) = case (valMap !? orig) of
+                                Just (Flat x) -> ConcreteBuf x
+                                Just (Comp _) -> internalError "No compressed allowed in fuzz"
+                                Nothing -> orig
+    go a = a
+
+
+-- Var extraction
+newtype CollectVars = CollectVars { vs :: Set.Set (Expr EWord) }
+  deriving (Show)
+
+initVarsState :: CollectVars
+initVarsState = CollectVars { vs = Set.empty }
+
+findVarProp :: Prop -> State CollectVars Prop
+findVarProp p = mapPropM go p
+  where
+    go :: forall a. Expr a -> State CollectVars (Expr a)
+    go = \case
+      e@(Var a) -> do
+        s <- get
+        put $ s{vs=Set.insert (Var a) s.vs}
+        pure e
+      e -> pure e
+
+
+extractVars :: [Prop] -> [Expr EWord]
+extractVars ps = Set.toList vars
+ where
+  CollectVars vars = execState (mapM_ findVarProp ps) initVarsState
+
+
+--- Buf extraction
+newtype CollectBufs = CollectBufs { bufs :: Set.Set (Expr Buf) }
+  deriving (Show)
+
+initBufsState :: CollectBufs
+initBufsState = CollectBufs { bufs = Set.empty }
+
+extractBufs :: [Prop] -> [Expr Buf]
+extractBufs ps = Set.toList bufs
+ where
+  CollectBufs bufs = execState (mapM_ findBufProp ps) initBufsState
+
+
+findBufProp :: Prop -> State CollectBufs Prop
+findBufProp p = mapPropM go p
+  where
+    go :: forall a. Expr a -> State CollectBufs (Expr a)
+    go = \case
+      e@(AbstractBuf a) -> do
+        s <- get
+        put $ s{bufs=Set.insert (AbstractBuf a) s.bufs}
+        pure e
+      e -> pure e
+
+
+-- Var value generation
+getVals :: CMR.MonadRandom m => [Expr EWord] -> m (Map (Expr EWord) W256)
+getVals vars = do
+    bufs <- go vars mempty
+    addTxStuff bufs
+  where
+    addTxStuff :: CMR.MonadRandom m => Map (Expr EWord) W256 -> m (Map (Expr EWord) W256)
+    addTxStuff a = pure $ Map.insert TxValue 0 a -- TODO change from 0 sometimes
+    go :: CMR.MonadRandom m => [Expr EWord] -> Map (Expr EWord) W256 -> m (Map (Expr EWord) W256)
+    go [] valMap = pure valMap
+    go (a:ax) valMap = do
+      val <- getRndW8s 32
+      go ax (Map.insert a (bytesToW256 val) valMap)
+
+
+-- Buf value generation
+getBufs :: CMR.MonadRandom m => [Expr Buf] -> m (Map (Expr Buf) BufModel)
+getBufs bufs = go bufs mempty
+  where
+    go :: CMR.MonadRandom m => [Expr Buf] -> Map (Expr Buf) BufModel -> m (Map (Expr Buf) BufModel)
+    go [] valMap = pure valMap
+    go (a:ax) valMap = do
+      emptySize :: Bool <- CMR.getRandom
+      size <- if emptySize then (pure 0)
+                           else getRndW8
+      bytes <- getRndW8s (fromIntegral size)
+      go ax (Map.insert a (Flat $ BS.pack bytes) valMap)
+
+
+getRndW8 :: CMR.MonadRandom m => m Word8
+getRndW8 = do CMR.getRandom
+
+getRndW8s :: CMR.MonadRandom m => Int -> m [Word8]
+getRndW8s n = replicateM n getRndW8
diff --git a/src/EVM/Solvers.hs b/src/EVM/Solvers.hs
@@ -16,7 +16,7 @@ import Control.Monad.IO.Unlift
 import Data.Char (isSpace)
 import Data.Map (Map)
 import Data.Map qualified as Map
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isJust, fromJust)
 import Data.Text qualified as TS
 import Data.Text.Lazy (Text)
 import Data.Text.Lazy qualified as T
@@ -25,6 +25,7 @@ import Data.Text.Lazy.Builder
 import System.Process (createProcess, cleanupProcess, proc, ProcessHandle, std_in, std_out, std_err, StdStream(..))
 import Witch (into)
 import EVM.Effects
+import EVM.Fuzz (tryCexFuzz)
 
 import EVM.SMT
 import EVM.Types (W256, Expr(AbstractBuf), internalError)
@@ -126,35 +127,42 @@ withSolvers solver count timeout cont = do
     runTask :: (MonadIO m, ReadConfig m) => Task -> SolverInstance -> Chan SolverInstance -> Int -> m ()
     runTask (Task smt2@(SMT2 cmds (RefinementEqs refineEqs refps) cexvars ps) r) inst availableInstances fileCounter = do
       conf <- readConfig
+      let fuzzResult = tryCexFuzz ps conf.numCexFuzz
       liftIO $ do
         when (conf.dumpQueries) $ writeSMT2File smt2 fileCounter "abstracted"
-        -- reset solver and send all lines of provided script
-        out <- sendScript inst (SMT2 ("(reset)" : cmds) mempty mempty ps)
-        case out of
-          -- if we got an error then return it
-          Left e -> writeChan r (Error ("error while writing SMT to solver: " <> T.toStrict e))
-          -- otherwise call (check-sat), parse the result, and send it down the result channel
-          Right () -> do
-            sat <- sendLine inst "(check-sat)"
-            res <- do
-                case sat of
-                  "unsat" -> pure Unsat
-                  "timeout" -> pure Unknown
-                  "unknown" -> pure Unknown
-                  "sat" -> if null refineEqs then Sat <$> getModel inst cexvars
-                           else do
-                                let refinedSMT2 = SMT2 refineEqs mempty mempty (ps <> refps)
-                                writeSMT2File refinedSMT2 fileCounter "refined"
-                                _ <- sendScript inst refinedSMT2
-                                sat2 <- sendLine inst "(check-sat)"
-                                case sat2 of
-                                  "unsat" -> pure Unsat
-                                  "timeout" -> pure Unknown
-                                  "unknown" -> pure Unknown
-                                  "sat" -> Sat <$> getModel inst cexvars
-                                  _ -> pure . Error $ T.toStrict $ "Unable to parse solver output: " <> sat2
-                  _ -> pure . Error $ T.toStrict $ "Unable to parse solver output: " <> sat
-            writeChan r res
+        if (isJust fuzzResult)
+          then do
+            when (conf.verbose) $ putStrLn $ "Cex found via fuzzing:" <> (show fuzzResult)
+            writeChan r (Sat $ fromJust fuzzResult)
+          else do
+              when (conf.verbose) $ putStrLn $ "Fuzzing failed to find a Cex"
+              -- reset solver and send all lines of provided script
+              out <- sendScript inst (SMT2 ("(reset)" : cmds) mempty mempty ps)
+              case out of
+                -- if we got an error then return it
+                Left e -> writeChan r (Error ("error while writing SMT to solver: " <> T.toStrict e))
+                -- otherwise call (check-sat), parse the result, and send it down the result channel
+                Right () -> do
+                  sat <- sendLine inst "(check-sat)"
+                  res <- do
+                      case sat of
+                        "unsat" -> pure Unsat
+                        "timeout" -> pure Unknown
+                        "unknown" -> pure Unknown
+                        "sat" -> if null refineEqs then Sat <$> getModel inst cexvars
+                                 else do
+                                      let refinedSMT2 = SMT2 refineEqs mempty mempty (ps <> refps)
+                                      writeSMT2File refinedSMT2 fileCounter "refined"
+                                      _ <- sendScript inst refinedSMT2
+                                      sat2 <- sendLine inst "(check-sat)"
+                                      case sat2 of
+                                        "unsat" -> pure Unsat
+                                        "timeout" -> pure Unknown
+                                        "unknown" -> pure Unknown
+                                        "sat" -> Sat <$> getModel inst cexvars
+                                        _ -> pure . Error $ T.toStrict $ "Unable to parse solver output: " <> sat2
+                        _ -> pure . Error $ T.toStrict $ "Unable to parse solver output: " <> sat
+                  writeChan r res
 
         -- put the instance back in the list of available instances
         writeChan availableInstances inst

diff --git a/test/test.hs b/test/test.hs
@@ -2640,6 +2640,35 @@ tests = testGroup "hevm"
           assertBoolM "Did not find expected storage cex" testCex
           putStrLnM "Expected counterexample found"
   ]
+  , testGroup "concr-fuzz"
+    [ test "fuzz-complicated-mul" $ do
+      Just c <- solcRuntime "MyContract"
+        [i|
+        contract MyContract {
+          function complicated(uint x, uint y, uint z) public {
+            uint a;
+            uint b;
+            unchecked {
+              a = x * x * x * y * y * y * z;
+              b = x * x * x * x * y * y * z * z;
+            }
+            assert(a == b);
+          }
+        }
+        |]
+      let sig = (Sig "complicated(uint256,uint256,uint256)" [AbiUIntType 256, AbiUIntType 256, AbiUIntType 256])
+      (_, [Cex (_, ctr)]) <- withSolvers CVC5 1 Nothing $ \s -> checkAssert s defaultPanicCodes c (Just sig) [] defaultVeriOpts
+      let
+        x = getVar ctr "arg1"
+        y = getVar ctr "arg2"
+        z = getVar ctr "arg3"
+        a = x * x * x * y * y * y * z;
+        b = x * x * x * x * y * y * z * z;
+        val = a == b
+      assertBoolM "Must fail" (not val)
+      putStrLnM  $ "expected counterexample found, x:  " <> (show x) <> " y: " <> (show y) <> " z: " <> (show z)
+      putStrLnM  $ "cex a: " <> (show a) <> " b: " <> (show b)
+  ]
   , testGroup "simplification-working"
   [
     test "prop-simp-bool1" $ do