diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bebb07a8..b9ec37ed4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 and continue running, whenever possible - STATICCALL abstraction is now performed in case of symbolic arguments - Better error messages for JSON parsing +- Multiple solutions are allowed for a single symbolic expression ## Fixed - We now try to simplify expressions fully before trying to cast them to a concrete value diff --git a/cli/cli.hs b/cli/cli.hs index e0bd4009d..436c2a203 100644 --- a/cli/cli.hs +++ b/cli/cli.hs @@ -102,6 +102,7 @@ data Command w , solverThreads :: w ::: Maybe Natural "Number of threads for each solver instance. Only respected for Z3 (default: 1)" , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , maxBranch :: w ::: Int "10" "Max number of branches to explore when encountering a symbolic value (default: 10)" } | Equivalence -- prove equivalence between two programs { codeA :: w ::: ByteString "Bytecode of the first program" @@ -122,6 +123,7 @@ data Command w , solverThreads :: w ::: Maybe Natural "Number of threads for each solver instance. Only respected for Z3 (default: 1)" , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , maxBranch :: w ::: Int "10" "Max number of branches to explore when encountering a symbolic value (default: 10)" } | Exec -- Execute a given program with specified env & calldata { code :: w ::: Maybe ByteString "Program bytecode" @@ -171,6 +173,7 @@ data Command w , maxIterations :: w ::: Maybe Integer "Number of times we may revisit a particular branching point. For no bound, set -1 (default: 5)" , loopDetectionHeuristic :: w ::: LoopHeuristic "StackBased" "Which heuristic should be used to determine if we are in a loop: StackBased (default) or Naive" , noDecompose :: w ::: Bool "Don't decompose storage slots into separate arrays" + , maxBranch :: w ::: Int "10" "Max number of branches to explore when encountering a symbolic value (default: 10)" , numCexFuzz :: w ::: Integer "3" "Number of fuzzing tries to do to generate a counterexample (default: 3)" , askSmtIterations :: w ::: Integer "1" "Number of times we may revisit a particular branching point before we consult the smt solver to check reachability (default: 1)" } @@ -216,6 +219,7 @@ main = withUtf8 $ do , numCexFuzz = cmd.numCexFuzz , dumpTrace = cmd.trace , decomposeStorage = Prelude.not cmd.noDecompose + , maxNumBranch = cmd.maxBranch } } case cmd of Version {} ->putStrLn getFullVersion diff --git a/src/EVM.hs b/src/EVM.hs index 4279fd273..cae4cce85 100644 --- a/src/EVM.hs +++ b/src/EVM.hs @@ -810,7 +810,7 @@ exec1 = do OpJump -> case stk of x:xs -> - burn g_mid $ forceConcrete x "JUMP: symbolic jumpdest" $ \x' -> + burn g_mid $ forceConcreteLimitSz x 2 "JUMP: symbolic jumpdest" $ \x' -> case tryInto x' of Left _ -> vmError BadJumpDestination Right i -> checkJump i xs @@ -818,7 +818,7 @@ exec1 = do OpJumpi -> case stk of - x:y:xs -> forceConcrete x "JUMPI: symbolic jumpdest" $ \x' -> + x:y:xs -> forceConcreteLimitSz x 2 "JUMPI: symbolic jumpdest" $ \x' -> burn g_high $ let jump :: Bool -> EVM t s () jump False = assign (#state % #stack) xs >> next @@ -887,7 +887,7 @@ exec1 = do Left _ -> vmError IllegalOverflow Right gas -> do overrideC <- use $ #state % #overrideCaller - delegateCall this gas xTo xTo xValue xInOffset xInSize xOutOffset xOutSize xs $ + delegateCall this gas xTo xTo xValue xInOffset xInSize xOutOffset xOutSize xs unknownCodeFallback $ \callee -> do let from' = fromMaybe self overrideC zoom #state $ do @@ -908,7 +908,7 @@ exec1 = do Left _ -> vmError IllegalOverflow Right gas -> do overrideC <- use $ #state % #overrideCaller - delegateCall this gas xTo self xValue xInOffset xInSize xOutOffset xOutSize xs $ \_ -> do + delegateCall this gas xTo self xValue xInOffset xInSize xOutOffset xOutSize xs unknownCodeFallback $ \_ -> do zoom #state $ do assign #callvalue xValue assign #caller $ fromMaybe self overrideC @@ -963,7 +963,7 @@ exec1 = do Right gas -> -- NOTE: we don't update overrideCaller in this case because -- forge-std doesn't. see: https://github.com/foundry-rs/foundry/pull/8863 - delegateCall this gas xTo' self (Lit 0) xInOffset xInSize xOutOffset xOutSize xs $ + delegateCall this gas xTo' self (Lit 0) xInOffset xInSize xOutOffset xOutSize xs unknownCodeFallback $ \_ -> touchAccount self _ -> underrun @@ -994,15 +994,15 @@ exec1 = do case stk of xGas:xTo:xInOffset:xInSize:xOutOffset:xOutSize:xs -> case wordToAddr xTo of - Nothing -> fallback + Nothing -> fallback undefined Just xTo' -> do case gasTryFrom xGas of Left _ -> vmError IllegalOverflow Right gas -> canFetchAccount xTo' >>= \case - False -> fallback + False -> fallback undefined True -> do overrideC <- use $ #state % #overrideCaller - delegateCall this gas xTo' xTo' (Lit 0) xInOffset xInSize xOutOffset xOutSize xs $ + delegateCall this gas xTo' xTo' (Lit 0) xInOffset xInSize xOutOffset xOutSize xs fallback $ \callee -> do zoom #state $ do assign #callvalue (Lit 0) @@ -1012,8 +1012,8 @@ exec1 = do touchAccount self touchAccount callee where - fallback :: EVM t s () - fallback = do + fallback :: a -> EVM t s () + fallback _ = do -- Reset caller if needed resetCaller <- use $ #state % #resetCaller when resetCaller $ assign (#state % #overrideCaller) Nothing @@ -1605,7 +1605,7 @@ notStatic continue = do forceAddr :: VMOps t => Expr EWord -> String -> (Expr EAddr -> EVM t s ()) -> EVM t s () forceAddr n msg continue = case wordToAddr n of - Nothing -> oneSolution n $ \case + Nothing -> manySolutions n 20 $ \case Just sol -> continue $ LitAddr (truncateToAddr sol) Nothing -> fallback Just c -> continue c @@ -1615,7 +1615,15 @@ forceAddr n msg continue = case wordToAddr n of forceConcrete :: VMOps t => Expr EWord -> String -> (W256 -> EVM t s ()) -> EVM t s () forceConcrete n msg continue = case maybeLitWordSimp n of - Nothing -> oneSolution n $ maybe fallback continue + Nothing -> manySolutions n 32 $ maybe fallback continue + Just c -> continue c + where fallback = do + vm <- get + partial $ UnexpectedSymbolicArg vm.state.pc (getOpName vm.state) msg (wrap [n]) + +forceConcreteLimitSz :: VMOps t => Expr EWord -> Int -> String -> (W256 -> EVM t s ()) -> EVM t s () +forceConcreteLimitSz n bytes msg continue = case maybeLitWordSimp n of + Nothing -> manySolutions n bytes $ maybe fallback continue Just c -> continue c where fallback = do vm <- get @@ -1623,7 +1631,7 @@ forceConcrete n msg continue = case maybeLitWordSimp n of forceConcreteAddr :: VMOps t => Expr EAddr -> String -> (Addr -> EVM t s ()) -> EVM t s () forceConcreteAddr n msg continue = case maybeLitAddrSimp n of - Nothing -> oneSolution (WAddr n) $ maybe fallback $ \c -> continue $ unsafeInto c + Nothing -> manySolutions (WAddr n) 20 $ maybe fallback $ \c -> continue (truncateToAddr c) Just c -> continue c where fallback = do vm <- get @@ -1706,7 +1714,7 @@ cheatCode :: Expr EAddr cheatCode = LitAddr $ unsafeInto (keccak' "hevm cheat code") cheat - :: (?op :: Word8, VMOps t) + :: forall t s . (?op :: Word8, VMOps t) => Gas t -> (Expr EWord, Expr EWord) -> (Expr EWord, Expr EWord) -> [Expr EWord] -> EVM t s () cheat gas (inOffset, inSize) (outOffset, outSize) xs = do @@ -1724,11 +1732,12 @@ cheat gas (inOffset, inSize) (outOffset, outSize) xs = do , context = newContext } case maybeLitWordSimp abi of - Nothing -> oneSolution abi $ \case + Nothing -> manySolutions abi 4 $ \case Just concAbi -> runCheat concAbi input Nothing -> partial $ UnexpectedSymbolicArg vm.state.pc (getOpName vm.state) "symbolic cheatcode selector" (wrap [abi]) Just concAbi -> runCheat concAbi input where + runCheat :: W256 -> Expr 'Buf -> EVM t s () runCheat abi input = do let abi' = unsafeInto abi case Map.lookup abi' cheatActions of @@ -2046,6 +2055,11 @@ cheatActions = Map.fromList assertLe = genAssert (<=) Expr.leq ">" "assertLe" assertGe = genAssert (>=) Expr.geq "<" "assertGe" +unknownCodeFallback :: VMOps t => Expr EAddr -> EVM t s () +unknownCodeFallback xTo = do + pc <- use (#state % #pc) + state <- use #state + partial $ UnexpectedSymbolicArg pc (getOpName state) "call target has unknown code" (wrap [xTo]) -- * General call implementation ("delegateCall") -- note that the continuation is ignored in the precompile case @@ -2061,9 +2075,10 @@ delegateCall -> Expr EWord -> Expr EWord -> [Expr EWord] - -> (Expr EAddr -> EVM t s ()) + -> (Expr EAddr -> EVM t s ()) -- fallback + -> (Expr EAddr -> EVM t s ()) -- continue -> EVM t s () -delegateCall this gasGiven xTo xContext xValue xInOffset xInSize xOutOffset xOutSize xs continue +delegateCall this gasGiven xTo xContext xValue xInOffset xInSize xOutOffset xOutSize xs fallback continue | isPrecompileAddr xTo = forceConcreteAddr2 (xTo, xContext) "Cannot call precompile with symbolic addresses" $ \(xTo', xContext') -> @@ -2077,10 +2092,7 @@ delegateCall this gasGiven xTo xContext xValue xInOffset xInSize xOutOffset xOut when resetCaller $ assign (#state % #overrideCaller) Nothing vm0 <- get fetchAccount xTo $ \target -> case target.code of - UnknownCode _ -> do - pc <- use (#state % #pc) - state <- use #state - partial $ UnexpectedSymbolicArg pc (getOpName state) "call target has unknown code" (wrap [xTo]) + UnknownCode _ -> fallback xTo _ -> do burn' xGas $ do calldata <- readMemory xInOffset xInSize @@ -2984,16 +2996,31 @@ instance VMOps Symbolic where choosePath loc Unknown = choose . PleaseChoosePath condSimp $ choosePath loc . Case - oneSolution ewordExpr continue = do + manySolutions ewordExpr numBytes continue = do pathconds <- use #constraints - query $ PleaseGetSol ewordExpr pathconds $ \case - Just concVal -> do + query $ PleaseGetSols ewordExpr numBytes pathconds $ \case + Just concVals -> do assign #result Nothing - pushTo #constraints $ Expr.simplifyProp (ewordExpr .== (Lit concVal)) - continue $ Just concVal + case (length concVals) of + 0 -> continue Nothing + 1 -> runOne $ head concVals + _ -> choose . PleaseChoosePath ewordExpr $ runMore concVals Nothing -> do assign #result Nothing continue Nothing + where + runMore vals leftOrRight = do + case length vals of + -- if 2, we run both, otherwise, we run 1st and run ourselves with the rest + 2 -> if leftOrRight then runOne $ head vals + else runOne (head $ tail vals) + _ -> if leftOrRight then runOne $ head vals + else choose . PleaseChoosePath ewordExpr $ runMore (tail vals) + runOne val = do + assign #result Nothing + pushTo #constraints $ Expr.simplifyProp (ewordExpr .== (Lit val)) + continue $ Just val + instance VMOps Concrete where burn' n continue = do @@ -3122,7 +3149,7 @@ instance VMOps Concrete where whenSymbolicElse _ a = a partial _ = internalError "won't happen during concrete exec" branch (forceLit -> cond) continue = continue (cond > 0) - oneSolution _ _ = internalError "SMT solver should never be needed in concrete mode" + manySolutions _ _ _ = internalError "SMT solver should never be needed in concrete mode" -- Create symbolic VM from concrete VM symbolify :: VM Concrete s -> VM Symbolic s diff --git a/src/EVM/Effects.hs b/src/EVM/Effects.hs index 41354e551..735b8004a 100644 --- a/src/EVM/Effects.hs +++ b/src/EVM/Effects.hs @@ -43,6 +43,7 @@ data Config = Config -- Returns Unknown if the Cex cannot be found via fuzzing , onlyCexFuzz :: Bool , decomposeStorage :: Bool + , maxNumBranch :: Int } deriving (Show, Eq) @@ -56,6 +57,7 @@ defaultConfig = Config , numCexFuzz = 10 , onlyCexFuzz = False , decomposeStorage = True + , maxNumBranch = 100 } -- Write to the console diff --git a/src/EVM/Fetch.hs b/src/EVM/Fetch.hs index f6c23cc91..3508e97bb 100644 --- a/src/EVM/Fetch.hs +++ b/src/EVM/Fetch.hs @@ -29,6 +29,9 @@ import System.Process import Control.Monad.IO.Class import Control.Monad (when) import EVM.Effects +import qualified EVM.Expr as Expr +import Numeric (showHex,readHex) +import Data.Bits ((.&.)) -- | Abstract representation of an RPC fetch request data RpcQuery a where @@ -213,9 +216,9 @@ oracle solvers info q = do -- Is is possible to satisfy the condition? continue <$> checkBranch solvers (branchcondition ./= (Lit 0)) pathconds - PleaseGetSol symAddr pathconditions continue -> do + PleaseGetSols symExpr numBytes pathconditions continue -> do let pathconds = foldl' PAnd (PBool True) pathconditions - continue <$> getSolution solvers symAddr pathconds + continue <$> getSolutions solvers symExpr numBytes pathconds PleaseFetchContract addr base continue -> do contract <- case info of @@ -244,33 +247,41 @@ oracle solvers info q = do type Fetcher t m s = App m => Query t s -> m (EVM t s ()) -getSolution :: forall m . App m => SolverGroup -> Expr EWord -> Prop -> m (Maybe W256) -getSolution solvers symAddr pathconditions = do +getSolutions :: forall m . App m => SolverGroup -> Expr EWord -> Int -> Prop -> m (Maybe [W256]) +getSolutions solvers symExprPreSimp numBytes pathconditions = do conf <- readConfig liftIO $ do - ret <- collectSolutions [] 1 pathconditions conf + let symExpr = Expr.concKeccakSimpExpr symExprPreSimp + when conf.debug $ putStrLn $ "Collecting solutions to symbolic query: " <> show symExpr + ret <- collectSolutions [] conf.maxNumBranch symExpr pathconditions conf case ret of Nothing -> pure Nothing Just r -> case length r of 0 -> pure Nothing - -- Temporary, a future improvement can deal with more than one solution - 1 -> pure $ Just (r !! 0) - _ -> pure Nothing + _ -> pure $ Just r where - collectSolutions :: [W256] -> Int -> Prop -> Config -> IO (Maybe [W256]) - collectSolutions addrs maxSols conds conf = do - if (length addrs > maxSols) then pure Nothing + createHexValue k = + let hexString = concat (replicate k "ff") + in fst . head $ readHex hexString + collectSolutions :: [W256] -> Int -> Expr EWord -> Prop -> Config -> IO (Maybe [W256]) + collectSolutions vals maxSols symExpr conds conf = do + if (length vals > maxSols) then pure Nothing else - checkSat solvers (assertProps conf [(PEq (Var "addrQuery") symAddr) .&& conds]) >>= \case + checkSat solvers (assertProps conf [(PEq (Var "addrQuery") symExpr) .&& conds]) >>= \case Sat (SMTCex vars _ _ _ _ _) -> case (Map.lookup (Var "addrQuery") vars) of - Just addr -> do - let newConds = PAnd conds (symAddr ./= (Lit addr)) - when conf.debug $ putStrLn $ "Got one solution to symbolic query:" <> show addr <> " now have " <> show (length addrs + 1) <> " solution(s), max is: " <> show maxSols - collectSolutions (addr:addrs) maxSols newConds conf + Just v -> do + let hexMask = createHexValue numBytes + maskedVal = v .&. hexMask + cond = (And symExpr (Lit hexMask)) ./= Lit maskedVal + newConds = Expr.simplifyProp $ PAnd conds cond + showedVal = "val: 0x" <> (showHex maskedVal "") + when conf.debug $ putStrLn $ "Got one solution to symbolic query," <> showedVal <> " now have " <> + show (length vals + 1) <> " solution(s), max is: " <> show maxSols + collectSolutions (maskedVal:vals) maxSols symExpr newConds conf _ -> internalError "No solution to symbolic query" Unsat -> do when conf.debug $ putStrLn "No more solution(s) to symbolic query." - pure $ Just addrs + pure $ Just vals -- Error or timeout, we need to be conservative res -> do when conf.debug $ putStrLn $ "Symbolic query result is neither SAT nor UNSAT:" <> show res diff --git a/src/EVM/Solvers.hs b/src/EVM/Solvers.hs index fe21f7e2f..3e506f8d1 100644 --- a/src/EVM/Solvers.hs +++ b/src/EVM/Solvers.hs @@ -131,7 +131,6 @@ withSolvers solver count threads timeout cont = do when (conf.debug) $ putStrLn $ " Cex found via fuzzing:" <> (show fuzzResult) writeChan r (Sat $ fromJust fuzzResult) else if not conf.onlyCexFuzz then do - when (conf.debug) $ putStrLn " Fuzzing failed to find a Cex" -- reset solver and send all lines of provided script out <- sendScript inst (SMT2 ("(reset)" : cmds) mempty ps) case out of diff --git a/src/EVM/SymExec.hs b/src/EVM/SymExec.hs index a69d11001..50cfe39fc 100644 --- a/src/EVM/SymExec.hs +++ b/src/EVM/SymExec.hs @@ -687,6 +687,8 @@ equivalenceCheck solvers bytecodeA bytecodeB opts calldata = do branchesB <- getBranches bytecodeB when conf.debug $ liftIO $ do putStrLn "bytecodeA and bytecodeB are different, checking for equivalence" + -- putStrLn $ "endstateA: -> " <> (T.unpack $ T.intercalate "\nendStateA: -> " $ map formatExpr branchesA) + -- putStrLn $ "endstateB: -> " <> (T.unpack $ T.intercalate "\nendstateB: -> " $ map formatExpr branchesB) equivalenceCheck' solvers branchesA branchesB where -- decompiles the given bytecode into a list of branches @@ -963,7 +965,7 @@ prettyCalldata cex buf sig types = headErr errSig (T.splitOn "(" sig) <> "(" <> ConcreteBuf c -> T.pack (bsToHex c) _ -> err SAbi _ -> err - headErr e l = fromMaybe e $ listToMaybe l + headErr e l = fromMaybe e $ listToMaybe l err = internalError $ "unable to produce a concrete model for calldata: " <> show buf errSig = internalError $ "unable to split sig: " <> show sig diff --git a/src/EVM/Types.hs b/src/EVM/Types.hs index 13aac47f3..edabc051a 100644 --- a/src/EVM/Types.hs +++ b/src/EVM/Types.hs @@ -594,7 +594,7 @@ data Query t s where PleaseFetchContract :: Addr -> BaseState -> (Contract -> EVM t s ()) -> Query t s PleaseFetchSlot :: Addr -> W256 -> (W256 -> EVM t s ()) -> Query t s PleaseAskSMT :: Expr EWord -> [Prop] -> (BranchCondition -> EVM Symbolic s ()) -> Query Symbolic s - PleaseGetSol :: Expr EWord -> [Prop] -> (Maybe W256 -> EVM Symbolic s ()) -> Query Symbolic s + PleaseGetSols :: Expr EWord -> Int -> [Prop] -> (Maybe [W256] -> EVM Symbolic s ()) -> Query Symbolic s PleaseDoFFI :: [String] -> Map String String -> (ByteString -> EVM t s ()) -> Query t s PleaseReadEnv :: String -> (String -> EVM t s ()) -> Query t s @@ -618,8 +618,10 @@ instance Show (Query t s) where (("") ++) - PleaseGetSol expr constraints _ -> - ((" + (("") ++) PleaseDoFFI cmd env _ -> @@ -630,7 +632,7 @@ instance Show (Query t s) where instance Show (Choose s) where showsPrec _ = \case PleaseChoosePath _ _ -> - ((" EVM t s () branch :: Expr EWord -> (Bool -> EVM t s ()) -> EVM t s () - oneSolution :: Expr EWord -> (Maybe W256 -> EVM t s ()) -> EVM t s () + manySolutions :: Expr EWord -> Int -> (Maybe W256 -> EVM t s ()) -> EVM t s () -- Bytecode Representations ------------------------------------------------------------------------