浏览代码

S-Expression parser added and working, although in the middle of restructuring how comments work

Getty Ritter 9 年之前
父节点
当前提交
d01604d72f
共有 6 个文件被更改,包括 215 次插入57 次删除
  1. 15 4
      Data/SCargot/CommonLisp.hs
  2. 155 34
      Data/SCargot/General.hs
  3. 30 9
      Data/SCargot/Repr.hs
  4. 3 3
      Data/SCargot/Repr/Rich.hs
  5. 1 1
      Data/SCargot/Repr/WellFormed.hs
  6. 11 6
      s-cargot.cabal

+ 15 - 4
Data/SCargot/CommonLisp.hs

@@ -4,7 +4,15 @@
 --   macro definitions, this module should successfully parse and
 --   desugar even quoted lists and vector literals.
 
-module Data.SCargot.CommonLisp where
+module Data.SCargot.CommonLisp
+       ( CLAtom(..)
+       , CommonLispSpec
+       , withComments
+       , withQuote
+       , withVectors
+       , decode
+       , encode
+       ) where
 
 data CLAtom
   = CLSymbol Text
@@ -14,7 +22,10 @@ data CLAtom
   | CLFloat Double
     deriving (Eq, Show, Read)
 
-type CommonLispSpec carrier = SExprSpec CLAtom carrier
+data CommonLispSpec carrier = CommonLispSpec
+ { sexprSpec    :: SExprSpec CLAtom carrier
+ , poundReaders :: ReaderMacroMap CLAtom
+ }
 
 withComments :: CommonLispSpec c -> CommonLispSpec c
 withComments = addCommentType (const () <$> (char ';' *> restOfLine))
@@ -28,5 +39,5 @@ withQuote = addReader '\'' (go <$> parse)
 withVectors :: CommonLispSpec c -> CommonLispSpec c
 withVectors = addReader '#' (go <$> parse)
 
-parse :: CommonLispSpec c -> Text -> Either String c
-serialize :: CommonLispSpec c -> c -> Text
+decode :: CommonLispSpec c -> Text -> Either String c
+encode :: CommonLispSpec c -> c -> Text

+ 155 - 34
Data/SCargot/General.hs

@@ -1,30 +1,54 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+
 module Data.SCargot.General
   ( -- * SExprSpec
     SExprSpec
   , mkSpec
   , convertSpec
   , addReader
-  , addCommentType
+  , addComment
+    -- * Specific SExprSpec Conversions
   , asRich
   , asWellFormed
-    -- * A Few Standard Reader Macros
-  , quote
-  , vector
+  , withSemicolonComments
+  , withQuote
     -- * Using a SExprSpec
-  , parseSExpr
-  , serializeSExpr
+  , decode
+  , decodeOne
+  , encode
+    -- * Useful Type Aliases
+  , Reader
+  , Comment
+  , Serializer
   ) where
 
-import           Control.Applicative
+import           Control.Applicative ((<*))
+import           Control.Monad ((>=>))
 import           Data.Attoparsec.Text
-import           Data.Map.String (Map)
-import qualified Data.Map.String as M
+import           Data.Char (isAlpha)
+import           Data.Map.Strict (Map)
+import qualified Data.Map.Strict as M
+import           Data.Text (Text)
+
+import           Prelude hiding (takeWhile)
 
 import           Data.SCargot.Repr
 
 type ReaderMacroMap atom = Map Char (Reader atom)
-type CommentMap = Map Char (Parser ())
+type CommentMap = Map Char Comment
+
+-- | A 'Reader' represents a reader macro: it takes a parser for
+--   the S-Expression type and performs as much or as little
+--   parsing as it would like, and then returns an S-expression.
 type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
+
+-- | A 'Comment' represents any kind of skippable comment.
+type Comment = Parser ()
+
+-- | A 'Serializer' is any function which can serialize an Atom
+--   to 'Text'.
 type Serializer atom = atom -> Text
 
 -- | A 'SExprSpec' describes a parser and emitter for a particular
@@ -37,23 +61,24 @@ data SExprSpec atom carrier = SExprSpec
   { sesPAtom   :: Parser atom
   , sesSAtom   :: Serializer atom
   , readerMap  :: ReaderMacroMap atom
-  , commentMap :: CommentMap
+  , comment    :: Comment
   , postparse  :: SExpr atom -> Either String carrier
   , preserial  :: carrier -> SExpr atom
   }
 
+-- | Create a basic 'SExprSpec' when given a parser and serializer
 --   for an atom type.
 mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
 mkSpec p s = SExprSpec
-  { sesPAtom  = p
-  , sesSAtom  = s
-  , rmMap     = M.empty
-  , postparse = return
-  , preserial = id
+  { sesPAtom   = p
+  , sesSAtom   = s
+  , readerMap  = M.empty
+  , commentMap = skipSpace
+  , postparse  = return
+  , preserial  = id
   }
 
+-- | Modify the carrier type for a 'SExprSpec'. This is
 --   used internally to convert between various 'SExpr' representations,
 --   but could also be used externally to add an extra conversion layer
 --   onto a 'SExprSpec', e.g. for a custom Lisp-like language:
@@ -64,33 +89,125 @@ mkSpec p s = SExprSpec
 convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
 convertSpec f g spec = spec
   { postparse = postparse spec >=> f
-  , preserial = g . preserial spec
+  , preserial = preserial spec . g
   }
 
-addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
-addReader c reader spec = spec { rmMap = insert c reader (rmMap spec) }
-
-addCommentType :: Char -> Comment -> SExprSpec a c -> SExprSpec a c
-addCommentType c comment spec = spec { }
-
-quote :: atom -> Reader atom
-quote q parse = go <$> parse
-  where go v = SCons q (SCons v SNil)
-
+-- | Convert the final output representation from the 'SExpr' type
+--   to the 'RichSExpr' type.
 asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
 asRich = convertSpec (return . toRich) fromRich
 
+-- | Convert the final output representation from the 'SExpr' type
+--   to the 'WellFormedSExpr' type.
 asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
 asWellFormed = convertSpec toWellFormed fromWellFormed
 
-parseGenericSExpr :: Parser atom  -> ReaderMacroMap atom -> CommentMap -> Parser (SExpr atom)
-parseGenericSExpr atom reader comment =
-  char '(' *> 
+-- | Add the ability to execute some particular reader macro, as
+--   defined by its initial character and the 'Parser' which returns
+--   the parsed S-Expression. The 'Reader' is passed a 'Parser' which
+--   can be recursively called to parse more S-Expressions, and begins
+--   parsing after the reader character has been removed from the
+--   stream.
+addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
+addReader c reader spec = spec
+  { readerMap = M.insert c reader (readerMap spec) }
+
+-- | Add the ability to ignore some kind of comment. If the comment
+--   parser overlaps with a reader macro or the atom parser, then the
+--   former will be tried first.
+setComment :: Comment -> SExprSpec a c -> SExprSpec a c
+setComment c spec = spec { comment = c }
+
+-- | Add the ability to skip line comments beginning with a semicolon.
+withSemicolonComments :: SExprSpec a c -> SExprSpec a c
+withSemicolonComments = addComment ';' (skipWhile (\ c -> c /= '\n'))
+
+-- | Add the ability to understand a quoted S-Expression. In general,
+--   many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
+--   a convenience function which allows you to easily add quoted
+--   expressions to a 'SExprSpec', provided that you supply which
+--   atom you want substituted in for the symbol @quote@.
+withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
+withQuote q = addReader '\'' prs
+  where prs p = go `fmap` p
+        go s  = SCons (SAtom q) (SCons s SNil)
+
+parseGenericSExpr ::
+  Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
+parseGenericSExpr atom reader skip = do
+  let sExpr = parseGenericSExpr atom reader skip
+  skip
+  c <- peekChar
+  r <- case c of
+    Nothing -> fail "Unexpected end of input"
+    Just '(' -> char '(' >> skip >> parseList sExpr skip
+    Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
+    _ -> SAtom `fmap` atom
+  skip
+  return r
+
+parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
+parseList sExpr skip = do
+  i <- peekChar
+  case i of
+    Nothing  -> fail "Unexpected end of input"
+    Just ')' -> char ')' >> return SNil
+    _        -> do
+      car <- sExpr
+      skip
+      c <- peekChar
+      case c of
+        Just '.' -> do
+          char '.'
+          cdr <- sExpr
+          skip
+          char ')'
+          skip
+          return (SCons car cdr)
+        Just ')' -> do
+          char ')'
+          skip
+          return (SCons car SNil)
+        _ -> do
+          cdr <- parseList sExpr skip
+          return (SCons car cdr)
+
+-- | Given a CommentMap, create the corresponding parser to
+--   skip those comments (if they exist).
+buildSkip :: CommentMap -> Parser ()
+buildSkip m = skipSpace >> comments >> skipSpace
+  where comments = do
+          c <- peekChar
+          case c of
+            Nothing -> return ()
+            Just c' -> case M.lookup c' m of
+              Just p  -> anyChar >> p
+              Nothing -> return ()
+
+(#) :: a -> (a -> b) -> b
+(#) = flip ($)
+
+testSpec :: SExprSpec Text (SExpr Text)
+testSpec = mkSpec (takeWhile1 isAlpha) id
+         # withQuote "quote"
+         # addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p)
+
+-- | Decode a single S-expression. If any trailing input is left after
+--   the S-expression (ignoring comments or whitespace) then this
+--   will fail: for those cases, use 'decode', which returns a list of
+--   all the S-expressions found at the top level.
+decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
+decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
+  where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
 
-parseSExpr :: SExprSpec atom carrier -> Text -> Either String carrier
-parseSExpr spec = undefined
+-- | Decode several S-expressions according to a given 'SExprSpec'. This
+--   will return a list of every S-expression that appears at the top-level
+--   of the document.
+decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
+decode SExprSpec { .. } =
+  parseOnly (many1 parser <* endOfInput) >=> mapM postparse
+    where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
 
-serializeSExpr :: SExprSpec atom carrier -> carrier -> Text
-serializeSExpr spec = serializeGenericSExpr ses . preserial
+-- | Emit an S-Expression in a machine-readable way. This
+encode :: SExprSpec atom carrier -> carrier -> Text
+encode SExprSpec { .. } = undefined

+ 30 - 9
Data/SCargot/Repr.hs

@@ -1,8 +1,11 @@
 module Data.SCargot.Repr
-       ( SExpr(..)
+       ( -- * Elementary SExpr representation
+         SExpr(..)
+         -- * Rich SExpr representation
        , RichSExpr(..)
        , toRich
        , fromRich
+         -- * Well-Formed SExpr representation
        , WellFormedSExpr(..)
        , toWellFormed
        , fromWellFormed
@@ -23,28 +26,31 @@ data SExpr atom
 --   exposed. In this case, we have 'RSList' to
 --   represent a well-formed cons list, and 'RSDotted'
 --   to represent an improper list of the form
+--   @(a b c . d)@. This representation is based on
+--   the shape of the parsed S-Expression, and not on
+--   how it was represented, so @(a . (b))@ is going to
+--   be represented as @RSList[RSAtom a, RSAtom b]@
+--   despite having been originally represented as a
+--   dotted list.
 data RichSExpr atom
   = RSList [RichSExpr atom]
   | RSDotted [RichSExpr atom] atom
   | RSAtom atom
     deriving (Eq, Show, Read)
 
+-- |  It should always be true that
 --
+--   > fromRich (toRich x) == x
 --
 --   and that
 --
+--   > toRich (fromRich x) == x
 toRich :: SExpr atom -> RichSExpr atom
 toRich (SAtom a) = RSAtom a
-toRich (SCons x xs) = go xs [toRich x]
-  where go (SAtom a) rs    = RSDotted rs a
-        go SNil rs         = RSList rs
-        go (SCons x xs) rs = go xs (toRich x:rs)
+toRich (SCons x xs) = go xs (toRich x:)
+  where go (SAtom a) rs    = RSDotted (rs []) a
+        go SNil rs         = RSList (rs [])
+        go (SCons x xs) rs = go xs (rs . (toRich x:))
 
 -- | This follows the same laws as 'toRich'.
 fromRich :: RichSExpr atom -> SExpr atom
@@ -62,23 +68,30 @@ data WellFormedSExpr atom
   | WFSAtom atom
     deriving (Eq, Show, Read)
 
+-- | This will be @Nothing@ if the argument contains an
 --   improper list. It should hold that
 --
+--   > toWellFormed (fromWellFormed x) == Right x
+--
+--   and also (more tediously) that
+--
+--   > case fromWellFormed (toWellFormed x) of
+--   >   Left _  -> True
+--   >   Right y -> x == y
 toWellFormed :: SExpr atom -> Either String (WellFormedSExpr atom)
+toWellFormed SNil      = return (WFSList [])
 toWellFormed (SAtom a) = return (WFSAtom a)
 toWellFormed (SCons x xs) = do
   x' <- toWellFormed x
-  go xs [x']
+  go xs (x':)
   where go (SAtom a) rs = Left "Found atom in cdr position"
-        go SNil rs      = return (WFSList rs)
+        go SNil rs      = return (WFSList (rs []))
         go (SCons x xs) rs = do
           x' <- toWellFormed x
-          go xs (x':rs)
+          go xs (rs . (x':))
 
 -- | Convert a WellFormedSExpr back into a SExpr.
 fromWellFormed :: WellFormedSExpr atom -> SExpr atom
 fromWellFormed (WFSAtom a)  = SAtom a
 fromWellFormed (WFSList xs) =
-  foldr SCons SNil (map fromWellFormed xs)
+  foldl SCons SNil (map fromWellFormed xs)

+ 3 - 3
Data/SCargot/Repr/Rich.hs

@@ -13,6 +13,6 @@ module Data.SCargot.Repr.Rich
 
 import Data.SCargot.Repr as R
 
-pattern List xs = R.RSList xs
-pattern DotList xs = R.RSDotted xs
-pattern Atom a = R.RSAtom a
+pattern Atom a       = R.RSAtom a
+pattern List xs      = R.RSList xs
+pattern DotList xs x = R.RSDotted xs x

+ 1 - 1
Data/SCargot/Repr/WellFormed.hs

@@ -1,6 +1,6 @@
 {-# LANGUAGE PatternSynonyms #-}
 
-module Data.SCargot.Repr.Rich
+module Data.SCargot.Repr.WellFormed
        ( -- * 'WellFormedSExpr' representation
          R.WellFormedSExpr(..)
        , R.toWellFormed

+ 11 - 6
s-cargot.cabal

@@ -13,9 +13,14 @@ build-type:          Simple
 cabal-version:       >=1.10
 
 library
-  -- exposed-modules:     
-  -- other-modules:       
-  -- other-extensions:    
-  build-depends:       base >=4.7 && <4.8
-  -- hs-source-dirs:      
-  default-language:    Haskell2012
+  exposed-modules:     Data.SCargot.Repr,
+                       Data.SCargot.Repr.Basic,
+                       Data.SCargot.Repr.Rich,
+                       Data.SCargot.Repr.WellFormed,
+                       Data.SCargot.General,
+                       Data.SCargot.Tutorial
+  -- other-modules:
+  -- other-extensions:
+  build-depends:       base >=4.7 && <4.8, attoparsec, text, containers
+  -- hs-source-dirs:
+  default-language:    Haskell2010