From 46d992de21d9f1ff7c82a29fe23a17f6ae34c6bb Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 2 Nov 2023 11:17:59 -0400 Subject: [PATCH 1/5] crucible-llvm-syntax: Make the parser extensible --- .../src/Lang/Crucible/LLVM/Syntax.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs index fde5c2557..b32e22236 100644 --- a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs @@ -12,7 +12,7 @@ module Lang.Crucible.LLVM.Syntax , pointerTypeParser ) where -import Control.Applicative (empty) +import Control.Applicative ((<|>), empty) import Control.Monad (unless) import Control.Monad.IO.Class (MonadIO(..)) import Control.Monad.State.Strict (MonadState(..)) @@ -51,12 +51,18 @@ import Lang.Crucible.Syntax.ExprParse qualified as Parse llvmParserHooks :: Mem.HasPtrWidth w => + -- | Hooks with which to further extend this parser + ParserHooks LLVM -> GlobalVar Mem -> ParserHooks LLVM -llvmParserHooks mvar = +llvmParserHooks hooks mvar = ParserHooks - { extensionTypeParser = llvmTypeParser - , extensionParser = llvmAtomParser mvar + { extensionTypeParser = + Parse.describe "LLVM type" $ + Parse.call (llvmTypeParser <|> extensionTypeParser hooks) + , extensionParser = + Parse.describe "LLVM operation" $ + Parse.call (llvmAtomParser mvar <|> extensionParser hooks) } --------------------------------------------------------------------- From d3128bca7945a846d7a7a55d3b2a85b1151bb33c Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 2 Nov 2023 11:49:14 -0400 Subject: [PATCH 2/5] crucible-llvm-syntax: Add type alias parser extension --- crucible-llvm-syntax/README.md | 6 ++ .../crucible-llvm-syntax.cabal | 2 + .../src/Lang/Crucible/LLVM/Syntax.hs | 19 ++-- .../Lang/Crucible/LLVM/Syntax/TypeAlias.hs | 95 +++++++++++++++++++ crucible-llvm-syntax/test-data/type.cbl | 13 +++ crucible-llvm-syntax/test-data/type.out.good | 33 +++++++ crucible-llvm-syntax/test/Test.hs | 4 +- 7 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs create mode 100644 crucible-llvm-syntax/test-data/type.cbl create mode 100644 crucible-llvm-syntax/test-data/type.out.good diff --git a/crucible-llvm-syntax/README.md b/crucible-llvm-syntax/README.md index 7bccded00..eecc9bf11 100644 --- a/crucible-llvm-syntax/README.md +++ b/crucible-llvm-syntax/README.md @@ -34,4 +34,10 @@ If the numeral `w` is the width of a pointer and `n` is an arbitrary numeral, - `load : Alignment -> LLVMType -> Ptr w -> T`: load a value from memory, where the type `T` is determined by the `LLVMType` - `store : Alignment -> LLVMType -> Ptr w -> T -> Unit`: store a value to memory, where the type `T` is determined by the `LLVMType` +## Further extensions + +The parser hooks can be further customized by passing yet another `ParserHooks` +to them. The `TypeAlias` module implements one such example, for translating +types like `Long` into `(Ptr n)` or `(Bitvector n)` for appropriate `n`. + [syn]: ../crucible-syntax diff --git a/crucible-llvm-syntax/crucible-llvm-syntax.cabal b/crucible-llvm-syntax/crucible-llvm-syntax.cabal index aca0ae3a1..e5fa453b0 100644 --- a/crucible-llvm-syntax/crucible-llvm-syntax.cabal +++ b/crucible-llvm-syntax/crucible-llvm-syntax.cabal @@ -93,6 +93,7 @@ library build-depends: base >= 4.13, + containers, crucible >= 0.1, crucible-llvm, crucible-syntax, @@ -106,6 +107,7 @@ library exposed-modules: Lang.Crucible.LLVM.Syntax + Lang.Crucible.LLVM.Syntax.TypeAlias test-suite crucible-llvm-syntax-tests import: shared diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs index b32e22236..6e818807c 100644 --- a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax.hs @@ -8,7 +8,8 @@ {-# LANGUAGE PatternSynonyms #-} module Lang.Crucible.LLVM.Syntax - ( llvmParserHooks + ( emptyParserHooks + , llvmParserHooks , pointerTypeParser ) where @@ -49,7 +50,11 @@ import Lang.Crucible.Syntax.Concrete qualified as Parse import Lang.Crucible.Syntax.ExprParse (MonadSyntax) import Lang.Crucible.Syntax.ExprParse qualified as Parse -llvmParserHooks :: +-- | A 'ParserHooks' instance that adds no further extensions to the language. +emptyParserHooks :: ParserHooks ext +emptyParserHooks = ParserHooks empty empty + +llvmParserHooks :: Mem.HasPtrWidth w => -- | Hooks with which to further extend this parser ParserHooks LLVM -> @@ -57,10 +62,10 @@ llvmParserHooks :: ParserHooks LLVM llvmParserHooks hooks mvar = ParserHooks - { extensionTypeParser = + { extensionTypeParser = Parse.describe "LLVM type" $ Parse.call (llvmTypeParser <|> extensionTypeParser hooks) - , extensionParser = + , extensionParser = Parse.describe "LLVM operation" $ Parse.call (llvmAtomParser mvar <|> extensionParser hooks) } @@ -97,7 +102,7 @@ llvmAtomParser :: , ?parserHooks :: ParserHooks LLVM , Mem.HasPtrWidth w ) => - GlobalVar Mem -> + GlobalVar Mem -> m (Some (Atom s)) llvmAtomParser mvar = Parse.depCons Parse.atomName $ @@ -140,7 +145,7 @@ llvmAtomParser mvar = Atom.AtomName "alloca" -> Parse.describe "LLVM alloca arguments" $ do loc <- Parse.position - (align, assign) <- + (align, assign) <- Parse.cons parseAlign (Parse.operands (Ctx.Empty Ctx.:> BVRepr ?ptrWidth)) @@ -150,7 +155,7 @@ llvmAtomParser mvar = Atom.AtomName "load" -> Parse.describe "LLVM load arguments" $ do loc <- Parse.position - (align, (memTy, assign)) <- + (align, (memTy, assign)) <- Parse.cons parseAlign (Parse.cons diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs new file mode 100644 index 000000000..78f329b8b --- /dev/null +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs @@ -0,0 +1,95 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TypeApplications #-} + +module Lang.Crucible.LLVM.Syntax.TypeAlias + ( TypeAlias(..) + , TypeLookup(..) + , aarch32LinuxTypes + , x86_64LinuxTypes + , typeAliasParser + , typeAliasParserHooks + ) where + +import Control.Applicative ( Alternative(empty) ) +import qualified Data.Map as Map +import qualified Data.Text as Text + +import qualified Data.Parameterized.NatRepr as PN +import Data.Parameterized.Some ( Some(..) ) + +import qualified Lang.Crucible.LLVM.MemModel as LCLM +import qualified Lang.Crucible.Syntax.Atoms as LCSA +import qualified Lang.Crucible.Syntax.Concrete as LCSC +import qualified Lang.Crucible.Syntax.ExprParse as LCSE +import qualified Lang.Crucible.Types as LCT + +-- | Additional types beyond those built into crucible-llvm-syntax. +data TypeAlias = Byte | Int | Long | PidT | Pointer | Short | SizeT | UidT + deriving (Bounded, Enum, Eq, Show) + +-- | Lookup function from a 'TypeAlias' to the underlying crucible type it +-- represents. +newtype TypeLookup = TypeLookup (TypeAlias -> (Some LCT.TypeRepr)) + +-- | A lookup function from 'AFE.TypeAlias' to types with the appropriate width +-- on Arm32 Linux. +aarch32LinuxTypes :: TypeLookup +aarch32LinuxTypes = + TypeLookup $ + \case + Byte -> Some (LCT.BVRepr (PN.knownNat @8)) + Int -> Some (LCT.BVRepr (PN.knownNat @32)) + Long -> Some (LCT.BVRepr (PN.knownNat @32)) + PidT -> Some (LCT.BVRepr (PN.knownNat @32)) + Pointer -> Some (LCLM.LLVMPointerRepr (PN.knownNat @32)) + Short -> Some (LCT.BVRepr (PN.knownNat @16)) + SizeT -> Some (LCT.BVRepr (PN.knownNat @32)) + UidT -> Some (LCT.BVRepr (PN.knownNat @32)) + +-- | A lookup function from 'TypeAlias' to types with the appropriate width on +-- X86_64 Linux. +x86_64LinuxTypes :: TypeLookup +x86_64LinuxTypes = + TypeLookup $ + \case + Byte -> Some (LCT.BVRepr (PN.knownNat @8)) + Int -> Some (LCT.BVRepr (PN.knownNat @32)) + Long -> Some (LCT.BVRepr (PN.knownNat @64)) + PidT -> Some (LCT.BVRepr (PN.knownNat @32)) + Pointer -> Some (LCLM.LLVMPointerRepr (PN.knownNat @64)) + Short -> Some (LCT.BVRepr (PN.knownNat @16)) + SizeT -> Some (LCT.BVRepr (PN.knownNat @64)) + UidT -> Some (LCT.BVRepr (PN.knownNat @32)) + +-- | Parser for type extensions to Crucible syntax +typeMapParser :: + LCSE.MonadSyntax LCSA.Atomic m => + -- | A mapping from type names to the crucible types they represent + Map.Map LCSA.AtomName (Some LCT.TypeRepr) -> + m (Some LCT.TypeRepr) +typeMapParser types = do + name <- LCSC.atomName + case Map.lookup name types of + Just someTypeRepr -> return someTypeRepr + Nothing -> empty + +-- | Parser for type extensions to Crucible syntax +typeAliasParser :: + LCSE.MonadSyntax LCSA.Atomic m => + TypeLookup -> + m (Some LCT.TypeRepr) +typeAliasParser (TypeLookup lookupFn) = + typeMapParser $ + Map.fromList + [ (LCSA.AtomName (Text.pack (show t)), lookupFn t) + | t <- [minBound..maxBound] + ] + +typeAliasParserHooks :: TypeLookup -> LCSC.ParserHooks ext +typeAliasParserHooks lookupFn = + LCSC.ParserHooks + { LCSC.extensionTypeParser = typeAliasParser lookupFn + , LCSC.extensionParser = empty + } diff --git a/crucible-llvm-syntax/test-data/type.cbl b/crucible-llvm-syntax/test-data/type.cbl new file mode 100644 index 000000000..85cc448e6 --- /dev/null +++ b/crucible-llvm-syntax/test-data/type.cbl @@ -0,0 +1,13 @@ +(defun @test-type () Unit + (start start: + (let byte (the Byte (bv 8 0))) + (let int (the Int (bv 32 0))) + (let long (the Long (bv 64 0))) + (let pid (the PidT (bv 32 0))) + (let blk (the Nat 0)) + (let off (bv 64 0)) + (let p (ptr 64 blk off)) + (let ptr (the Pointer p)) + (let short (the Short (bv 16 0))) + (let size (the SizeT (bv 64 0))) + (return ()))) diff --git a/crucible-llvm-syntax/test-data/type.out.good b/crucible-llvm-syntax/test-data/type.out.good new file mode 100644 index 000000000..d261613e3 --- /dev/null +++ b/crucible-llvm-syntax/test-data/type.out.good @@ -0,0 +1,33 @@ +(defun @test-type () Unit + (start start: + (let byte (the Byte (bv 8 0))) + (let int (the Int (bv 32 0))) + (let long (the Long (bv 64 0))) + (let pid (the PidT (bv 32 0))) + (let blk (the Nat 0)) + (let off (bv 64 0)) + (let p (ptr 64 blk off)) + (let ptr (the Pointer p)) + (let short (the Short (bv 16 0))) + (let size (the SizeT (bv 64 0))) + (return ()))) + +test-type +%0 + % 3:15 + $0 = bVLit(8, BV 0) + % 4:14 + $1 = bVLit(32, BV 0) + % 5:15 + $2 = bVLit(64, BV 0) + % 7:14 + $3 = natLit(0) + % 9:16 + $4 = extensionApp(pointerExpr $3 $2) + % 11:16 + $5 = bVLit(16, BV 0) + % 13:13 + $6 = emptyApp() + % 13:5 + return $6 + % no postdom diff --git a/crucible-llvm-syntax/test/Test.hs b/crucible-llvm-syntax/test/Test.hs index 507e1c6e0..8561eebb6 100644 --- a/crucible-llvm-syntax/test/Test.hs +++ b/crucible-llvm-syntax/test/Test.hs @@ -25,6 +25,7 @@ import Lang.Crucible.Syntax.Concrete (ParserHooks) import Lang.Crucible.Syntax.Prog (doParseCheck) import Lang.Crucible.LLVM.Syntax (llvmParserHooks) +import Lang.Crucible.LLVM.Syntax.TypeAlias (typeAliasParserHooks, x86_64LinuxTypes) main :: IO () main = do @@ -56,7 +57,8 @@ parserHooks = do halloc <- newHandleAllocator memVar <- mkMemVar "crucible-llvm-syntax-test-memory" halloc let ?ptrWidth = knownNat @64 - return (llvmParserHooks memVar) + let hooks = typeAliasParserHooks x86_64LinuxTypes + return (llvmParserHooks hooks memVar) testParser :: FilePath -> FilePath -> IO () testParser inFile outFile = From b48d753af1fc665ccc1132855486d5cc3781b7cc Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 2 Nov 2023 11:51:55 -0400 Subject: [PATCH 3/5] crucible-llvm-syntax: A bit of work on haddocks --- .../src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs index 78f329b8b..8e8c890ca 100644 --- a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs @@ -36,7 +36,7 @@ newtype TypeLookup = TypeLookup (TypeAlias -> (Some LCT.TypeRepr)) -- | A lookup function from 'AFE.TypeAlias' to types with the appropriate width -- on Arm32 Linux. aarch32LinuxTypes :: TypeLookup -aarch32LinuxTypes = +aarch32LinuxTypes = TypeLookup $ \case Byte -> Some (LCT.BVRepr (PN.knownNat @8)) @@ -62,9 +62,9 @@ x86_64LinuxTypes = Short -> Some (LCT.BVRepr (PN.knownNat @16)) SizeT -> Some (LCT.BVRepr (PN.knownNat @64)) UidT -> Some (LCT.BVRepr (PN.knownNat @32)) - + -- | Parser for type extensions to Crucible syntax -typeMapParser :: +typeMapParser :: LCSE.MonadSyntax LCSA.Atomic m => -- | A mapping from type names to the crucible types they represent Map.Map LCSA.AtomName (Some LCT.TypeRepr) -> @@ -75,8 +75,8 @@ typeMapParser types = do Just someTypeRepr -> return someTypeRepr Nothing -> empty --- | Parser for type extensions to Crucible syntax -typeAliasParser :: +-- | Parser for type aliases for the Crucible-LLVM syntax +typeAliasParser :: LCSE.MonadSyntax LCSA.Atomic m => TypeLookup -> m (Some LCT.TypeRepr) @@ -87,6 +87,7 @@ typeAliasParser (TypeLookup lookupFn) = | t <- [minBound..maxBound] ] +-- | Parser hooks with 'LCSC.extensionTypeParser' set to 'typeAliasParser' typeAliasParserHooks :: TypeLookup -> LCSC.ParserHooks ext typeAliasParserHooks lookupFn = LCSC.ParserHooks From adcb3f6e0a84db83ca45669fd6b9a486e99627f1 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 2 Nov 2023 15:49:08 -0400 Subject: [PATCH 4/5] Documentation tweaks Co-authored-by: Ryan Scott --- crucible-llvm-syntax/README.md | 2 +- crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crucible-llvm-syntax/README.md b/crucible-llvm-syntax/README.md index eecc9bf11..e48b78850 100644 --- a/crucible-llvm-syntax/README.md +++ b/crucible-llvm-syntax/README.md @@ -36,7 +36,7 @@ If the numeral `w` is the width of a pointer and `n` is an arbitrary numeral, ## Further extensions -The parser hooks can be further customized by passing yet another `ParserHooks` +The LLVM parser hooks can be further customized by passing yet another `ParserHooks` to them. The `TypeAlias` module implements one such example, for translating types like `Long` into `(Ptr n)` or `(Bitvector n)` for appropriate `n`. diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs index 8e8c890ca..4b329089f 100644 --- a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs @@ -33,7 +33,7 @@ data TypeAlias = Byte | Int | Long | PidT | Pointer | Short | SizeT | UidT -- represents. newtype TypeLookup = TypeLookup (TypeAlias -> (Some LCT.TypeRepr)) --- | A lookup function from 'AFE.TypeAlias' to types with the appropriate width +-- | A lookup function from 'TypeAlias' to types with the appropriate width -- on Arm32 Linux. aarch32LinuxTypes :: TypeLookup aarch32LinuxTypes = From 22bd7b7ee0044bd6d749010d8011731d84472213 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 2 Nov 2023 16:15:41 -0400 Subject: [PATCH 5/5] crucible-llvm-syntax: Additional documentation --- crucible-llvm-syntax/README.md | 3 ++- .../src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/crucible-llvm-syntax/README.md b/crucible-llvm-syntax/README.md index e48b78850..702b6e07e 100644 --- a/crucible-llvm-syntax/README.md +++ b/crucible-llvm-syntax/README.md @@ -38,6 +38,7 @@ If the numeral `w` is the width of a pointer and `n` is an arbitrary numeral, The LLVM parser hooks can be further customized by passing yet another `ParserHooks` to them. The `TypeAlias` module implements one such example, for translating -types like `Long` into `(Ptr n)` or `(Bitvector n)` for appropriate `n`. +types like `Long` into `(Bitvector n)` and `Pointer` into `(Ptr n)` for +appropriate `n`. [syn]: ../crucible-syntax diff --git a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs index 4b329089f..092d61231 100644 --- a/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs +++ b/crucible-llvm-syntax/src/Lang/Crucible/LLVM/Syntax/TypeAlias.hs @@ -1,3 +1,19 @@ +{- +Module : Lang.Crucible.LLVM.Syntax.TypeAlias +Copyright : (c) Galois, Inc 2023 +Maintainer : Langston Barrett +License : BSD3 + +This module provides facilities for parsing C-like types and translating them +to appropriate Crucible-LLVM types, given a target triple. For example, the +syntax @Long@ is parsed as the Crucible-LLVM 64-bit bitvector type for the +x86_64 Linux target ('x86_64LinuxTypes'), but the 32-bit bitvector type for +32-bit ARM Linux targets ('aarch32LinuxTypes'). This can be useful if you want +to write Crucible CFGs that can be simulated in the context of LLVM modules +for several different architectures, for example if you want to override system +calls. +-} + {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE LambdaCase #-}