Explorar el Código

Switched from AttoParsec to Parsec for somewhat better error messages

Getty Ritter hace 9 años
Se han modificado 5 ficheros con 66 adiciones y 58 borrados
  1. 11 12
  2. 8 9
  3. 16 6
  4. 30 30
  5. 1 1

+ 11 - 12

@@ -10,9 +10,10 @@ module Data.SCargot.Basic
   , withQuote
   ) where
+import           Control.Applicative ((<$>))
 import           Data.Char (isAlphaNum)
-import           Data.Attoparsec.Text (Parser, takeWhile1)
-import           Data.Text (Text)
+import           Text.Parsec -- (Parser, takeWhile1)
+import           Data.Text (Text, pack)
 import           Data.SCargot.Repr.Basic
 import           Data.SCargot.General
@@ -20,22 +21,16 @@ import           Data.SCargot.Comments (withLispComments)
 isAtomChar :: Char -> Bool
 isAtomChar c = isAlphaNum c
-               || c == '-'
-               || c == '*'
-               || c == '/'
-               || c == '+'
-               || c == '<'
-               || c == '>'
-               || c == '='
-               || c == '!'
-               || c == '?'
+  || c == '-' || c == '*' || c == '/'
+  || c == '+' || c == '<' || c == '>'
+  || c == '=' || c == '!' || c == '?'
 -- | A 'SExprSpec' that understands atoms to be sequences of
 --   alphanumeric characters as well as the punctuation
+--   characters @[-*/+<>=!?]@, and does no processing of them.
 --   This is not quite representative of actual lisps, which
+--   would, for example, accept various kinds of string
+--   and numeric literals.
 basicSpec :: SExprSpec Text (SExpr Text)
-basicSpec = mkSpec (takeWhile1 isAtomChar) id
+basicSpec = mkSpec pToken id
+  where pToken = pack <$> many1 (satisfy isAtomChar)

+ 8 - 9

@@ -24,10 +24,9 @@ module Data.SCargot.Comments
   , simpleBlockComment
   ) where
-import           Control.Applicative ((<|>))
 import           Control.Monad (void)
-import           Data.Attoparsec.Text
 import           Data.Text (Text)
+import           Text.Parsec
 import           Prelude hiding (takeWhile)
@@ -36,8 +35,8 @@ import Data.SCargot.General
 -- | Given a string, produce a comment parser that matches that
 --   initial string and ignores everything until the end of the
 --   line.
-lineComment :: Text -> Comment
-lineComment s = string s >> takeWhile (/= '\n') >> return ()
+lineComment :: String -> Comment
+lineComment s = string s >> skipMany (noneOf "\n") >> return ()
 -- | Given two strings, a begin and an end delimeter, produce a
 --   parser that matches the beginning delimeter and then ignores
@@ -57,7 +56,7 @@ lineComment s = string s >> takeWhile (/= '\n') >> return ()
 -- > /* this /* comment */
 -- is a complete comment.
-simpleBlockComment :: Text -> Text -> Comment
+simpleBlockComment :: String -> String -> Comment
 simpleBlockComment begin end =
   string begin >>
   manyTill anyChar (string end) >>
@@ -129,13 +128,13 @@ For example:
 We can then use these to parse s-expressions with different kinds of
 comment syntaxes:
-> decode mySpec "(foo ; a lisp comment\n  bar)\n"
+> > decode mySpec "(foo ; a lisp comment\n  bar)\n"
 > Left "Failed reading: takeWhile1"
-> decode myLispySpec "(foo ; a lisp comment\n  bar)\n"
+> > decode myLispySpec "(foo ; a lisp comment\n  bar)\n"
 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
-> decode mySpec "(foo /* a c-like\n   comment */ bar)\n"
+> > decode mySpec "(foo /* a c-like\n   comment */ bar)\n"
 > Left "Failed reading: takeWhile1"
-> decode myCLikeSpec "(foo /* a c-like\n   comment */ bar)\n"
+> > decode myCLikeSpec "(foo /* a c-like\n   comment */ bar)\n"
 > Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]

+ 16 - 6

@@ -22,9 +22,8 @@ module Data.SCargot.General
   , Serializer
   ) where
-import           Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
+import           Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
 import           Control.Monad ((>=>))
-import           Data.Attoparsec.Text
 import           Data.Char (isAlpha, isDigit, isAlphaNum)
 import           Data.Map.Strict (Map)
 import qualified Data.Map.Strict as M
@@ -32,6 +31,9 @@ import           Data.Monoid ((<>))
 import           Data.String (IsString)
 import           Data.Text (Text, pack, unpack)
 import qualified Data.Text as T
+import           Text.Parsec
+import           Text.Parsec.Char (anyChar, space)
+import           Text.Parsec.Text
 import           Prelude hiding (takeWhile)
@@ -172,6 +174,9 @@ withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t)
 withQuote = addReader '\'' (fmap go)
   where go s  = SCons "quote" (SCons s SNil)
+peekChar :: Parser (Maybe Char)
+peekChar = Just <$> lookAhead anyChar <|> pure Nothing
 parseGenericSExpr ::
   Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
 parseGenericSExpr atom reader skip = do
@@ -215,16 +220,21 @@ parseList sExpr skip = do
 -- | Given a CommentMap, create the corresponding parser to
 --   skip those comments (if they exist).
 buildSkip :: Maybe (Parser ()) -> Parser ()
-buildSkip Nothing  = skipSpace
+buildSkip Nothing  = skipMany space
 buildSkip (Just c) = alternate
-  where alternate = skipSpace >> ((c >> alternate) <|> return ())
+  where alternate = skipMany space >> ((c >> alternate) <|> return ())
+doParse :: Parser a -> Text -> Either String a
+doParse p t = case runParser p () "" t of
+  Left err -> Left (show err)
+  Right x  -> Right x
 -- | Decode a single S-expression. If any trailing input is left after
 --   the S-expression (ignoring comments or whitespace) then this
 --   will fail: for those cases, use 'decode', which returns a list of
 --   all the S-expressions found at the top level.
 decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
-decodeOne spec = parseOnly (parser <* endOfInput) >=> (postparse spec)
+decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
   where parser = parseGenericSExpr
                    (sesPAtom spec)
                    (readerMap spec)
@@ -235,7 +245,7 @@ decodeOne spec = parseOnly (parser <* endOfInput) >=> (postparse spec)
 --   of the document.
 decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
 decode spec =
-  parseOnly (many1 parser <* endOfInput) >=> mapM (postparse spec)
+  doParse (many1 parser <* eof) >=> mapM (postparse spec)
     where parser = parseGenericSExpr
                      (sesPAtom spec)
                      (readerMap spec)

+ 30 - 30

@@ -68,17 +68,17 @@ representation using the `asRich` and `asWellFormed`
-*Data.SCargot.General> decode spec "(a b)"
+> decode spec "(a b)"
 Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)]
-*Data.SCargot.General> decode (asRich spec) "(a b)"
+> decode (asRich spec) "(a b)"
 Right [RSList [RSAtom "a",RSAtom "b"]]
-*Data.SCargot.General> decode (asWellFormed spec) "(a b)"
+> decode (asWellFormed spec) "(a b)"
 Right [WFSList [WFSAtom "a",WFSAtom "b"]]
-*Data.SCargot.General> decode spec "(a . b)"
+> decode spec "(a . b)"
 Right [SCons (SAtom "a") (SAtom "b")]
-*Data.SCargot.General> decode (asRich spec) "(a . b)"
+> decode (asRich spec) "(a . b)"
 Right [RSDotted [RSAtom "a"] "b"]
-*Data.SCargot.General> decode (asWellFormed spec) "(a . b)"
+> decode (asWellFormed spec) "(a . b)"
 Left "Found atom in cdr position"
@@ -90,24 +90,24 @@ with each other, so it's recommended to only import the type that
 you plan on working with:
-*Data.SCargot.Repr.Basic> A 2 ::: A 3 ::: A 4 ::: Nil
+> A 2 ::: A 3 ::: A 4 ::: Nil
 SCons (SCons (SCons (SAtom 2) (SAtom 3)) (SAtom 4)) SNil
-*Data.SCargot.Repr.WellFormed> L [A 1,A 2,A 3]
+> L [A 1,A 2,A 3]
 WFSList [WFSAtom 1,WFSAtom 2,WFSAtom 3]
-*Data.SCargot.Repr.WellFormed> let sexprSum (L xs) = sum (map sexprSum xs); sexprSum (A n) = n
-*Data.SCargot.Repr Data.SCargot.Repr.WellFormed> :t sexprSum
+> let sexprSum (L xs) = sum (map sexprSum xs); sexprSum (A n) = n
+> :t sexprSum
 sexprSum :: Num a => WellFormedSExpr a -> a
-*Data.SCargot.Repr.WellFormed> sexprSum (L [A 2, L [A 3, A 4]])
+> sexprSum (L [A 2, L [A 3, A 4]])
 ## Atom Types
 Any type can serve as an underlying atom type provided that it has
-an AttoParsec parser and a serializer (i.e. a way of turning it
+an Parsec parser and a serializer (i.e. a way of turning it
 into `Text`.) For these examples, I'm going to use a very simple
 serializer that is roughly like the one found in `Data.SCargot.Basic`,
 which parses symbolic tokens of letters, numbers, and some
@@ -146,9 +146,9 @@ We can then use this newly created atom type within an S-expression
 for both parsing and serialization:
-*Data.SCargot.General T> decode mySpec "(foo 1)"
+> decode mySpec "(foo 1)"
 Right [SCons (SAtom (Ident "foo")) (SCons (SAtom (Num 1)) SNil)]
-*Data.SCargot.General T> encode mySpec [SCons (SAtom (Num 0)) SNil]
+> encode mySpec [SCons (SAtom (Num 0)) SNil]
@@ -187,9 +187,9 @@ then we could use the `convertSpec` function to add this directly to
 the `SExprSpec`:
-*Data.SCargot.General T> decode (convertSpec toExpr fromExpr (asRich spec)) "(+ 1 2)"
+> decode (convertSpec toExpr fromExpr (asRich spec)) "(+ 1 2)"
 Right [Add (Num 1) (Num 2)]
-*Data.SCargot.General T> decode (convertSpec toExpr fromExpr (asRich spec)) "(0 1 2)"
+> decode (convertSpec toExpr fromExpr (asRich spec)) "(0 1 2)"
 Left "Unrecognized s-expr"
@@ -200,14 +200,14 @@ the provided `withSemicolonComments` function will cause it to understand
 traditional Lisp line-oriented comments that begin with a semicolon:
-*Data.SCargot.General> decode spec "(this ; has a comment\n inside)\n"
+> decode spec "(this ; has a comment\n inside)\n"
 Left "Failed reading: takeWhile1"
-*Data.SCargot.General> decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n"
+> decode (withSemicolonComments spec) "(this ; has a comment\n inside)\n"
 Right [SCons (SAtom "this") (SCons (SAtom "inside") SNil)]
 Additionally, you can provide your own comment syntax in the form of an
-AttoParsec parser. Any AttoParsec parser can be used, so long as it meets
+Parsec parser. Any Parsec parser can be used, so long as it meets
 the following criteria:
 - it is capable of failing (as is called until SCargot believes that there
 are no more comments)
@@ -217,8 +217,8 @@ wrapping the parser in a call to `try`
 For example, the following adds C++-style comments to an S-expression format:
-*Data.SCargot.General> let cppComment = string "//" >> takeWhile (/= '\n') >> return ()
-*Data.SCargot.General> decode (setComment cppComment spec) "(a //comment\n  b)\n"
+> let cppComment = string "//" >> takeWhile (/= '\n') >> return ()
+> decode (setComment cppComment spec) "(a //comment\n  b)\n"
 Right [SCons (SAtom "a") (SCons (SAtom "b") SNil)]
@@ -228,14 +228,14 @@ A _reader macro_ is a Lisp macro which is invoked during read time. This
 allows the _lexical_ syntax of a Lisp to be modified. The most commonly
 seen reader macro is the quote, which allows the syntax `'expr` to stand
 in for the s-expression `(quote expr)`. The S-Cargot library enables this
-by keeping a map of characters to AttoParsec parsers that can be used as
+by keeping a map of characters to Parsec parsers that can be used as
 readers. There is a special case for the aforementioned quote, but that
 could easily be written by hand as
-*Data.SCargot.General> let doQuote c = SCons (SAtom "quote") (SCons c SNil)
-*Data.SCargot.General> let qReader = addReader '\'' (\ p -> fmap doQuote p)
-*Data.SCargot.General> decode (qReader mySpec) "'foo"
+> let quoteExpr c = SCons (SAtom "quote") (SCons c SNil)
+> let withQuote = addReader '\'' (\ p -> fmap quoteExpr p)
+> decode (withQuote mySpec) "'foo"
 Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
@@ -246,8 +246,8 @@ would like; for example, the following reader macro does not bother
 parsing anything else and merely returns a new token:
-*Data.SCargot.General> let qmReader = addReader '?' (\ _ -> pure (SAtom "huh"))
-*Data.SCargot.General> decode (qmReader mySpec) "(?1 2)"
+> let qmReader = addReader '?' (\ _ -> pure (SAtom "huh"))
+> decode (qmReader mySpec) "(?1 2)"
 Right [SCons (SAtom "huh") (SCons (SAtom "1") (SCons (SAtom "2") SNil))]
@@ -259,9 +259,9 @@ proper lists, we could define a reader macro that is initialized by the
 is reached:
-*Data.SCargot.General> let pVec p = (char ']' *> pure SNil) <|> (SCons <$> p <*> pVec p)
-*Data.SCargot.General> let vec = addReader '[' pVec
-*Data.SCargot.General> decode (asRich (vec mySpec)) "(1 [2 3])"
+> let pVec p = (char ']' *> pure SNil) <|> (SCons <$> p <*> pVec p)
+> let vec = addReader '[' pVec
+> decode (asRich (vec mySpec)) "(1 [2 3])"
 Right [RSList [RSAtom "1",RSList [RSAtom "2",RSAtom "3"]]]

+ 1 - 1

@@ -23,6 +23,6 @@ library
   -- other-modules:
   -- other-extensions:
-  build-depends:       base >=4.7 && <4.8, attoparsec, text, containers
+  build-depends:       base >=4.7 && <4.8, parsec, text, containers
   -- hs-source-dirs:
   default-language:    Haskell2010