Преглед на файлове

Modified parser to use s-cargot and made some grammar modifications to example

Getty Ritter преди 9 години
родител
ревизия
e98c6220e1
променени са 3 файла, в които са добавени 252 реда и са изтрити 263 реда
  1. 3 2
      gidl.cabal
  2. 230 238
      src/Gidl/Parse.hs
  3. 19 23
      tests/example.idl

+ 3 - 2
gidl.cabal

@@ -42,7 +42,9 @@ library
                        parsec,
                        pretty-show,
                        transformers,
-                       ivory-artifact
+                       ivory-artifact,
+                       s-cargot,
+                       text
   hs-source-dirs:      src
   default-language:    Haskell2010
   ghc-options:         -Wall
@@ -56,4 +58,3 @@ executable             gidl
 
   default-language:    Haskell2010
   ghc-options:         -Wall
-

+ 230 - 238
src/Gidl/Parse.hs

@@ -1,242 +1,234 @@
-
-module Gidl.Parse where
-
-import Data.List
-import Data.Functor.Identity
-import Control.Monad
-import Text.Parsec.Prim
-import Text.Parsec.Char
-import Text.Parsec.Token
-import Text.Parsec.Combinator
-import Text.Parsec.Language
-import Text.Parsec.Error
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+module Gidl.Parse (parseDecls) where
+
+import           Control.Applicative ((<$>), (<*>))
+import           Control.Monad ((>=>), guard)
+import           Data.List (nub, partition)
+import           Data.SCargot.Comments (withHaskellComments)
+import           Data.SCargot.General ( SExprSpec
+                                      , convertSpec
+                                      , decode
+                                      , asWellFormed
+                                      )
+import           Data.SCargot.HaskLike (HaskLikeAtom(..), haskLikeSpec)
+import           Data.SCargot.Repr.WellFormed
+import           Data.Text (unpack, pack)
 
 import Gidl.Types
 import Gidl.Interface
 
-type Parser u a = ParsecT String u Identity a
-type ParseEnv = (TypeEnv, InterfaceEnv)
-
-emptyParseEnv :: ParseEnv
-emptyParseEnv = (emptyTypeEnv, emptyInterfaceEnv)
-
-getTypeEnv :: Parser ParseEnv TypeEnv
-getTypeEnv = fmap fst getState
-
-getInterfaceEnv :: Parser ParseEnv InterfaceEnv
-getInterfaceEnv = fmap snd getState
-
-setTypeEnv :: TypeEnv -> Parser ParseEnv ()
-setTypeEnv te = do
-  (_, ie) <- getState
-  setState (te, ie)
-
-setInterfaceEnv :: InterfaceEnv -> Parser ParseEnv ()
-setInterfaceEnv ie = do
-  (te, _) <- getState
-  setState (te, ie)
-
----
-
-lexer :: GenTokenParser String u Identity
-lexer = makeTokenParser haskellDef
-
-tWhiteSpace :: Parser u ()
-tWhiteSpace = whiteSpace lexer
-
-tInteger :: Parser u Integer
-tInteger = (integer lexer) <?> "integer"
-
-tNatural :: Parser u Integer
-tNatural = do
-  i <- tInteger
-  case i < 0 of
-    True -> fail "expected positive integer"
-    False -> return i
-
-tFloat :: Parser u Double
-tFloat = (float lexer) <?> "floating point number"
-
-tString :: Parser u String
-tString = (stringLiteral lexer) <?> "string"
-
-tSymbol :: Parser u String
-tSymbol = (many1 $ noneOf "()\" \t\n\r") <?> "symbol"
-
-tIdentifier :: String -> Parser u ()
-tIdentifier i = do
-  s <- tSymbol
-  case s == i of
-    True -> return ()
-    False -> fail ("expected identifier " ++ i)
-
-tList :: Parser u a -> Parser u a
-tList c = do
-  tWhiteSpace
-  void $ char '('
-  tWhiteSpace
-  r <- c
-  tWhiteSpace
-  void $ char ')'
-  return r
-  <?> "list"
-
-
-tPair :: Parser u a
-      -> Parser u b
-      -> Parser u (a, b)
-tPair a b = tList $ do
-  ra <- a
-  tWhiteSpace
-  rb <- b
-  return (ra, rb)
-
-tKnownPrimType  :: Parser ParseEnv PrimType
-tKnownPrimType  = do
-  t <- tKnownType
-  case t of
-    PrimType p -> return p
-    StructType n _ -> fail ("expected a known primitive type name, got " ++ n)
-
-tKnownType :: Parser ParseEnv Type
-tKnownType = do
-  s <- tSymbol
-  te <- getTypeEnv
-  case lookupTypeName s te of
-    Just t -> return t
-    Nothing -> fail ("expected a known type name, got " ++ s)
-
-tStructRow :: Parser ParseEnv (Identifier, PrimType)
-tStructRow = tPair tSymbol tKnownPrimType
-  <?> "struct row"
-
-tStructBody :: Parser ParseEnv [(Identifier, PrimType)]
-tStructBody = tList (many1 (tWhiteSpace >> tStructRow))
-  <?> "struct body"
-
-tStructDecl :: Parser ParseEnv (TypeName, Type)
-tStructDecl = tList $ do
-  tIdentifier "def-struct"
-  tWhiteSpace
-  n <- tSymbol
-  b <- tStructBody
-  return (n, StructType n b)
-
-defineType :: (TypeName, Type) -> Parser ParseEnv ()
-defineType (tn, t) = do
-  te <- getTypeEnv
-  case lookupTypeName tn te of
-    Just _ -> fail ("type named '" ++ tn ++ "' already exists")
-    Nothing -> setTypeEnv (insertType tn t te)
-
-defineInterface :: Interface -> Parser ParseEnv ()
-defineInterface i = do
-  ie <- getInterfaceEnv
-  case lookupInterface ina ie of
-    Just _ -> fail ("interface named '" ++ ina ++ "' already exists")
-    Nothing -> setInterfaceEnv (insertInterface i ie)
-  where (Interface ina _ _) = i
-tNewtypeDecl :: Parser ParseEnv (TypeName, Type)
-tNewtypeDecl = tList $ do
-  tIdentifier "def-newtype"
-  tWhiteSpace
-  n <- tSymbol
-  tWhiteSpace
-  c <- tKnownPrimType
-  return (n, PrimType (Newtype n c))
-
-tEnumDecl :: Parser ParseEnv (TypeName, Type)
-tEnumDecl = tList $ do
-  tIdentifier "def-enum"
-  tWhiteSpace
-  n <- tSymbol
-  w  <- optionMaybe (try tInteger)
-  width <- case w of
-    Nothing -> return Bits32
-    Just 8 -> return  Bits8
-    Just 16 -> return Bits16
-    Just 32 -> return Bits32
-    Just 64 -> return Bits64
-    _ -> fail "Expected enum bit size to be 8, 16, 32, or 64"
-
-  vs <- tList $ many1 $ tPair tSymbol tNatural
-  when (not_unique (map fst vs)) $
-    fail "enum keys were not unique"
-  when (not_unique (map snd vs)) $
-    fail "enum values were not unique"
-  -- XXX make it possible to implicitly assign numbers
-  return (n, PrimType (EnumType n width vs))
-
-not_unique :: (Eq a) => [a] -> Bool
-not_unique l = nub l /= l
-
-tPermission :: Parser a Perm
-tPermission = do
-  s <- tSymbol
-  case s of
-    "read"      -> return Read
-    "r"         -> return Read
-    "write"     -> return Write
-    "w"         -> return Write
-    "readwrite" -> return ReadWrite
-    "rw"        -> return ReadWrite
-    _           -> fail "expected permission"
-
-tInterfaceMethod :: Parser ParseEnv (MethodName, Method)
-tInterfaceMethod = tList $ do
-  n <- tSymbol
-  m <- choice [ try tAttr, try tStream ]
-  return (n, m)
-  where
-  tAttr = tList $ do
-    tIdentifier "attr"
-    tWhiteSpace
-    p <- tPermission
-    tWhiteSpace
-    tn <- tKnownType
-    return (AttrMethod p tn)
-  tStream = tList $ do
-    tIdentifier "stream"
-    tWhiteSpace
-    r <- tInteger
-    tWhiteSpace
-    tn <- tKnownType
-    return (StreamMethod r tn)
-
-tKnownInterface :: Parser ParseEnv Interface
-tKnownInterface  = do
-  n <- tSymbol
-  ie <- getInterfaceEnv
-  case lookupInterface n ie of
-    Just i -> return i
-    Nothing -> fail ("expected a known interface name, got " ++ n)
-
-tInterfaceDecl :: Parser ParseEnv Interface
-tInterfaceDecl = tList $ do
-  tIdentifier "def-interface"
-  tWhiteSpace
-  n <- tSymbol
-  tWhiteSpace
-  ms <- tList (many1 tInterfaceMethod)
-  when (not_unique (map fst ms)) $
-    fail "expected unique interface method names"
-  tWhiteSpace
-  ps <- optionMaybe (tList (many1 tKnownInterface))
-  -- XXX require the ms not shadow names in inherited interfaces
-  case ps of
-    Just p -> return (Interface  n p ms)
-    Nothing -> return (Interface n [] ms)
-
-tDecls :: Parser ParseEnv ParseEnv
-tDecls = do
-  _ <- many (choice [ try tStructDecl    >>= defineType
-                    , try tNewtypeDecl   >>= defineType
-                    , try tEnumDecl      >>= defineType
-                    , try tInterfaceDecl >>= defineInterface
-                    ])
-  tWhiteSpace >> eof
-  getState
-
-parseDecls :: String -> Either ParseError ParseEnv
-parseDecls s = runP tDecls emptyParseEnv "" s
-
+-- We parse into this abstract structure before converting it to the
+-- structures Gidl uses elsewhere. That way, we can separate our
+-- parsing and our checking.
+data Decl
+  = NewtypeDecl Identifier Identifier
+  | EnumDecl (Identifier, Bits) [(Identifier, Integer)]
+  | StructDecl Identifier [(Identifier, Identifier)]
+  | InterfaceDecl Identifier [Identifier] [(Identifier, MethodDecl)]
+    deriving (Eq, Show)
+
+data MethodDecl
+  = AttrDecl Perm Identifier
+  | StreamDecl Integer Identifier
+    deriving (Eq, Show)
+
+check :: Bool -> String -> Either String ()
+check True _ = return ()
+check _ msg  = throw msg
+
+-- Here's a function to convert those decls.
+toEnv :: [Decl] -> Either String (TypeEnv, InterfaceEnv)
+toEnv decls = do
+  check (unique typeNames) "duplicate type names"
+  check (unique interfaceNames) "duplicate interface names"
+  typs <- mapM getTypePair typeNames
+  ifcs <- mapM getIfacePair interfaceNames
+  return (TypeEnv typs, InterfaceEnv ifcs)
+  where (typDs, ifcDs) = partition isTypeDecl decls
+
+        builtins = let TypeEnv bs = baseTypeEnv in bs
+
+        typeNames = map fst builtins ++ map getName typDs
+        interfaceNames = map getName ifcDs
+
+        typMap = [(getName d, toType d) | d <- typDs] ++
+                 [(n, return (n, t)) | (n, t) <- builtins ]
+        ifcMap = [(getName i, toInterface i) | i <- ifcDs]
+
+        -- this is gross because I'm trying to make sure declarations
+        -- can happen in any order. XXX: prevent recursion!
+        getType n = snd `fmap` getTypePair n
+        getTypePair n = case lookup n typMap of
+            Just (Right t) -> return t
+            Just (Left l)  -> Left l
+            Nothing        -> throw ("Unknown primitive type: " ++ n)
+
+        getIface n = snd `fmap` getIfacePair n
+        getIfacePair n = case lookup n ifcMap of
+          Just (Right i) -> return i
+          Just (Left l)  -> Left l
+          Nothing        -> throw ("Unknown interface: " ++ n)
+
+        getPrimType n = do
+          t <- getType n
+          case t of
+            PrimType t' -> return t'
+            _ -> throw ("Expected primitive type but got " ++ show t)
+
+        -- converts a decl to an actual type
+        toType (NewtypeDecl n t) = do
+          t' <- getPrimType t
+          return (n, PrimType (Newtype n t'))
+        toType (EnumDecl (n, s) ts) = do
+          guard (unique (map fst ts)) /?/ "baz"
+          return (n, PrimType (EnumType n s ts))
+        toType (StructDecl n ss) = do
+          ps <- mapM (getPrimType . snd) ss
+          return (n, StructType n (zip (map fst ss) ps))
+        toType _ = error "[unreachable]"
+
+        toMethod (n, AttrDecl perm t) = do
+          t' <- getType t
+          return (n, AttrMethod perm t')
+        toMethod (n, StreamDecl rate t) = do
+          t' <- getType t
+          return (n, StreamMethod rate t')
+
+        toInterface (InterfaceDecl n is ms) = do
+          ms' <- mapM toMethod ms
+          is' <- mapM getIface is
+          return (n, Interface n is' ms')
+        toInterface _ = error "[unreachable]"
+
+        getName (NewtypeDecl n _)     = n
+        getName (EnumDecl (n, _) _)   = n
+        getName (StructDecl n _)      = n
+        getName (InterfaceDecl n _ _) = n
+
+        isTypeDecl InterfaceDecl {} = False
+        isTypeDecl _                = True
+
+        unique l = nub l == l
+
+
+parseDecls :: String -> Either String (TypeEnv, InterfaceEnv)
+parseDecls = return . pack >=> decode gidlSpec >=> toEnv
+
+gidlSpec :: SExprSpec HaskLikeAtom Decl
+gidlSpec
+  = withHaskellComments
+  $ convertSpec tDecl ppDecl
+  $ asWellFormed haskLikeSpec
+
+-- utility aliases and helpers
+type Parse a = WellFormedSExpr HaskLikeAtom -> Either String a
+
+throw :: String -> Either String a
+throw = Left
+
+infix 9 /?/
+(/?/) :: Either String a -> String -> Either String a
+Left msg /?/ ctx = throw (msg ++ "\n  in parsing " ++ ctx)
+r        /?/ _   = r
+
+-- basic parsing of things (these might show up in s-cargot
+--  proper eventually?)
+tSymbol :: Parse String
+tSymbol = asAtom go
+  where go (HSIdent i) = return (unpack i)
+        go sx          = throw ("Expected identifier; got " ++ show sx)
+
+tType :: Parse String
+tType = tSymbol
+
+tInteger :: Parse Integer
+tInteger = asAtom go
+  where go (HSInt n) = return n
+        go sx        = throw ("Expected integer; got " ++ show sx)
+
+-- some parsing of gidl-specific types
+tBits :: Parse Bits
+tBits = tInteger >=> tWidth
+
+tWidth :: Integer -> Either String Bits
+tWidth 8  = return Bits8
+tWidth 16 = return Bits16
+tWidth 32 = return Bits32
+tWidth 64 = return Bits64
+tWidth _  = throw "Expected enum bit size to be 8, 16, 32, or 64"
+
+tPermission :: Parse Perm
+tPermission = asAtom go
+  where go token
+          | token == "read"      || token == "r"  = return Read
+          | token == "write"     || token == "w"  = return Write
+          | token == "readwrite" || token == "rw" = return ReadWrite
+          | otherwise = throw ("unknown permission: " ++ show token)
+
+-- newtypes
+tNewtypeDecl :: Parse Decl
+tNewtypeDecl = asList $ \ls -> do
+  car (isAtom "def-newtype") ls
+  name <- cdr (car tSymbol) ls       /?/ "newtype name"
+  typ  <- cdr (cdr (car tSymbol)) ls /?/ "newtype type"
+  return (NewtypeDecl name typ)
+
+-- structs
+tStructDecl :: Parse Decl
+tStructDecl = asList go
+  where go ("def-struct":name:body) =
+            StructDecl <$> (tSymbol name         /?/ "structure name")
+                       <*> (mapM tStructRow body /?/ "structure body")
+        go _ = throw "invalid struct decl"
+
+tStructRow :: Parse (Identifier, Identifier)
+tStructRow = fromPair tSymbol tType
+
+-- enums
+tEnumDecl :: Parse Decl
+tEnumDecl = asList go
+  where go ("def-enum" : name : body) =
+          EnumDecl <$> tEnumName name      /?/ "enum name"
+                   <*> mapM tEnumBody body /?/ "enum body"
+        go _ = throw "invalid enum decl"
+
+tEnumName :: Parse (Identifier, Bits)
+tEnumName (L [name, size]) = (,) <$> tSymbol name <*> tBits size
+tEnumName name             = (,) <$> tSymbol name <*> return Bits32
+
+tEnumBody :: Parse (Identifier, Integer)
+tEnumBody = fromPair tSymbol tInteger
+
+-- interface declarations
+tInterfaceDecl :: Parse Decl
+tInterfaceDecl = asList go
+  where go ("def-interface":name:parents:body) =
+          InterfaceDecl
+            <$> tSymbol name                  /?/ "interface name"
+            <*> asList (mapM tSymbol) parents /?/ "interface supers"
+            <*> mapM tInterfaceMethod body    /?/ "interface methods"
+        go _ = throw "invalid interface decl"
+
+tInterfaceMethod :: Parse (Identifier, MethodDecl)
+tInterfaceMethod = fromPair tSymbol (asList go)
+  where go ["attr",   p, t] = AttrDecl   <$> tPermission p <*> tType t
+        go ["stream", n, t] = StreamDecl <$> tInteger n    <*> tType t
+        go (x:_) = throw ("unknown interface type: " ++ show x)
+        go []    = throw "empty interface decl"
+
+-- declarations in general
+tDecl :: Parse Decl
+tDecl ls@(L ("def-struct"    : _)) = tStructDecl ls    /?/ "struct"
+tDecl ls@(L ("def-newtype"   : _)) = tNewtypeDecl ls   /?/ "newtype"
+tDecl ls@(L ("def-enum"      : _)) = tEnumDecl ls      /?/ "enum"
+tDecl ls@(L ("def-interface" : _)) = tInterfaceDecl ls /?/ "interface"
+tDecl (L (A name : _)) =
+  throw ("unknown declaration type: " ++ show name)
+tDecl item = do
+  throw ("invalid top-level item " ++ show item)
+
+-- For now, no pretty-printing (but it will come soon!)
+ppDecl :: Decl -> WellFormedSExpr HaskLikeAtom
+ppDecl = const "<unimplemented>"

+ 19 - 23
tests/example.idl

@@ -1,22 +1,20 @@
-
 (def-newtype time_micros_t sint64_t)
 
 -- comments should be haskell style, because we're suing parsec's haskell lexer
 
 -- enums default to 32 bit width if you don't really care
 (def-enum mode_t
- ((stabilize 0)
-  (auto 1)))
+ (stabilize 0)
+ (auto 1))
 
 -- or you can specify a width
-(def-enum armed_t 
- 8
- ((disarmed 0)
-  (armed 1)))
+(def-enum (armed_t 8)
+ (disarmed 0)
+ (armed 1))
 
 (def-struct heartbeat_t
- ((time time_micros_t)
-  (armed armed_t)))
+ (time time_micros_t)
+ (armed armed_t))
 
 (def-newtype lat_t sint32_t)
 (def-newtype lon_t sint32_t)
@@ -31,15 +29,15 @@
 (def-newtype meters_t float_t)
 
 (def-struct coordinate_t
- ((lat lat_t)
-  (lon lon_t)
-  (alt meters_t)))
+ (lat lat_t)
+ (lon lon_t)
+ (alt meters_t))
 
 (def-struct timed_coord_t
-  ((lat lat_t)
-   (lon lon_t)
-   (alt meters_t)
-   (time time_micros_t)))
+  (lat lat_t)
+  (lon lon_t)
+  (alt meters_t)
+  (time time_micros_t))
 
 -- Todo: the following interface syntax and semantics are a strawman.
 -- Interfaces have methods that are either streams or attrs.
@@ -48,16 +46,15 @@
 -- to zero). they also implicitly define an attr $(steamname)-stream-rate,
 -- which permits changing the stream rate at runtime.
 
-(def-interface vehicle_i
- ((heartbeat (stream 10 heartbeat_t))))
+(def-interface vehicle_i ()
+ (heartbeat (stream 10 heartbeat_t)))
 
 -- Interfaces implement java-style inheritance. No shadowing of inherited method
 -- names permitted.
 
-(def-interface controllable_vehicle_i
-  ((current_waypoint (attr read      coordinate_t))
-   (next_waypoint    (attr readwrite timed_coord_t)))
-  (vehicle_i)) -- Inherits from interface vehicle
+(def-interface controllable_vehicle_i (vehicle_i) -- Inherits from interface vehicle
+  (current_waypoint (attr read      coordinate_t))
+  (next_waypoint    (attr readwrite timed_coord_t)))
 
 -- The idea here is that, when negotiating a gidl connection, the client can
 -- specify or negotiate what interface is supported.
@@ -68,4 +65,3 @@
 -- This allows us to specify various kinds of vehicles might exist without
 -- repeating ourselves, and provides a way to
 -- manage new functionality without breaking legacy code.
-