| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 | 
							- {-# LANGUAGE ViewPatterns #-}
 
- {-# LANGUAGE OverloadedStrings #-}
 
- module Data.SCargot.Parse
 
-   ( -- * Parsing
 
-     decode
 
-   , decodeOne
 
-     -- * Parsing Control
 
-   , SExprParser
 
-   , Reader
 
-   , Comment
 
-   , mkParser
 
-   , setCarrier
 
-   , addReader
 
-   , setComment
 
-     -- * Specific SExprParser Conversions
 
-   , asRich
 
-   , asWellFormed
 
-   , withQuote
 
-   ) where
 
- #if !MIN_VERSION_base(4,8,0)
 
- import Control.Applicative ((<$>), (<*), pure)
 
- #endif
 
- import           Control.Monad ((>=>))
 
- import           Data.Map.Strict (Map)
 
- import qualified Data.Map.Strict as M
 
- import           Data.Text (Text)
 
- import           Data.String (IsString)
 
- import           Text.Parsec ( (<|>)
 
-                              , (<?>)
 
-                              , char
 
-                              , eof
 
-                              , lookAhead
 
-                              , many1
 
-                              , runParser
 
-                              , skipMany
 
-                              )
 
- import           Text.Parsec.Char (anyChar, space)
 
- import           Text.Parsec.Text (Parser)
 
- import           Data.SCargot.Repr ( SExpr(..)
 
-                                    , RichSExpr
 
-                                    , WellFormedSExpr
 
-                                    , toRich
 
-                                    , toWellFormed
 
-                                    )
 
- type ReaderMacroMap atom = Map Char (Reader atom)
 
- -- | A 'Reader' represents a reader macro: it takes a parser for
 
- --   the S-Expression type and performs as much or as little
 
- --   parsing as it would like, and then returns an S-expression.
 
- type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
 
- -- | A 'Comment' represents any kind of skippable comment. This
 
- --   parser __must__ be able to fail if a comment is not being
 
- --   recognized, and it __must__ not consume any input in case
 
- --   of failure.
 
- type Comment = Parser ()
 
- -- | A 'SExprParser' describes a parser for a particular value
 
- --   that has been serialized as an s-expression. The @atom@ parameter
 
- --   corresponds to a Haskell type used to represent the atoms,
 
- --   and the @carrier@ parameter corresponds to the parsed S-Expression
 
- --   structure.
 
- data SExprParser atom carrier = SExprParser
 
-   { sesPAtom   :: Parser atom
 
-   , readerMap  :: ReaderMacroMap atom
 
-   , comment    :: Maybe Comment
 
-   , postparse  :: SExpr atom -> Either String carrier
 
-   }
 
- -- | Create a basic 'SExprParser' when given a parser
 
- --   for an atom type.
 
- --
 
- --   >>> import Text.Parsec (alphaNum, many1)
 
- --   >>> let parser = mkParser (many1 alphaNum)
 
- --   >>> decode parser "(ele phant)"
 
- --   Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
 
- mkParser :: Parser atom -> SExprParser atom (SExpr atom)
 
- mkParser parser = SExprParser
 
-   { sesPAtom   = parser
 
-   , readerMap  = M.empty
 
-   , comment    = Nothing
 
-   , postparse  = return
 
-   }
 
- -- | Modify the carrier type for a 'SExprParser'. This is
 
- --   used internally to convert between various 'SExpr' representations,
 
- --   but could also be used externally to add an extra conversion layer
 
- --   onto a 'SExprParser'.
 
- --
 
- -- >>> import Text.Parsec (alphaNum, many1)
 
- -- >>> import Data.SCargot.Repr (toRich)
 
- -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
 
- -- >>> decode parser "(ele phant)"
 
- -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
 
- setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
 
- setCarrier f spec = spec { postparse = postparse spec >=> f }
 
- -- | Convert the final output representation from the 'SExpr' type
 
- --   to the 'RichSExpr' type.
 
- --
 
- -- >>> import Text.Parsec (alphaNum, many1)
 
- -- >>> let parser = asRich (mkParser (many1 alphaNum))
 
- -- >>> decode parser "(ele phant)"
 
- -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
 
- asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
 
- asRich = setCarrier (return . toRich)
 
- -- | Convert the final output representation from the 'SExpr' type
 
- --   to the 'WellFormedSExpr' type.
 
- --
 
- -- >>> import Text.Parsec (alphaNum, many1)
 
- -- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
 
- -- >>> decode parser "(ele phant)"
 
- -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
 
- asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
 
- asWellFormed = setCarrier toWellFormed
 
- -- | Add the ability to execute some particular reader macro, as
 
- --   defined by its initial character and the 'Parser' which returns
 
- --   the parsed S-Expression. The 'Reader' is passed a 'Parser' which
 
- --   can be recursively called to parse more S-Expressions, and begins
 
- --   parsing after the reader character has been removed from the
 
- --   stream.
 
- --
 
- -- >>> import Text.Parsec (alphaNum, char, many1)
 
- -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
 
- -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
 
- -- >>> decode parser "(an [ele phant])"
 
- -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
 
- addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
 
- addReader c reader spec = spec
 
-   { readerMap = M.insert c reader (readerMap spec) }
 
- -- | Add the ability to ignore some kind of comment. This gets
 
- --   factored into whitespace parsing, and it's very important that
 
- --   the parser supplied __be able to fail__ (as otherwise it will
 
- --   cause an infinite loop), and also that it __not consume any input__
 
- --   (which may require it to be wrapped in 'try'.)
 
- --
 
- -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
 
- -- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
 
- -- >>> let parser = setComment comment (mkParser (many1 alphaNum))
 
- -- >>> decode parser "(ele //a comment\n  phant)"
 
- -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
 
- setComment :: Comment -> SExprParser a c -> SExprParser a c
 
- setComment c spec = spec { comment = Just (c <?> "comment") }
 
- -- | Add the ability to understand a quoted S-Expression.
 
- --   Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
 
- --   assumes that the underlying atom type implements the "IsString"
 
- --   class, and will create the @quote@ atom using @fromString "quote"@.
 
- --
 
- -- >>> import Text.Parsec (alphaNum, many1)
 
- -- >>> let parser = withQuote (mkParser (many1 alphaNum))
 
- -- >>> decode parser "'elephant"
 
- -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
 
- withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
 
- withQuote = addReader '\'' (fmap go)
 
-   where go s  = SCons "quote" (SCons s SNil)
 
- peekChar :: Parser (Maybe Char)
 
- peekChar = Just <$> lookAhead anyChar <|> pure Nothing
 
- parseGenericSExpr ::
 
-   Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
 
- parseGenericSExpr atom reader skip = do
 
-   let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
 
-   skip
 
-   c <- peekChar
 
-   r <- case c of
 
-     Nothing -> fail "Unexpected end of input"
 
-     Just '(' -> char '(' >> skip >> parseList sExpr skip
 
-     Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
 
-     _ -> SAtom `fmap` atom
 
-   skip
 
-   return r
 
- parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
 
- parseList sExpr skip = do
 
-   i <- peekChar
 
-   case i of
 
-     Nothing  -> fail "Unexpected end of input"
 
-     Just ')' -> char ')' >> return SNil
 
-     _        -> do
 
-       car <- sExpr
 
-       skip
 
-       c <- peekChar
 
-       case c of
 
-         Just '.' -> do
 
-           _ <- char '.'
 
-           cdr <- sExpr
 
-           skip
 
-           _ <- char ')'
 
-           skip
 
-           return (SCons car cdr)
 
-         Just ')' -> do
 
-           _ <- char ')'
 
-           skip
 
-           return (SCons car SNil)
 
-         _ -> do
 
-           cdr <- parseList sExpr skip
 
-           return (SCons car cdr)
 
- -- | Given a CommentMap, create the corresponding parser to
 
- --   skip those comments (if they exist).
 
- buildSkip :: Maybe (Parser ()) -> Parser ()
 
- buildSkip Nothing  = skipMany space
 
- buildSkip (Just c) = alternate
 
-   where alternate = skipMany space >> ((c >> alternate) <|> return ())
 
- doParse :: Parser a -> Text -> Either String a
 
- doParse p t = case runParser p () "" t of
 
-   Left err -> Left (show err)
 
-   Right x  -> Right x
 
- -- | Decode a single S-expression. If any trailing input is left after
 
- --   the S-expression (ignoring comments or whitespace) then this
 
- --   will fail: for those cases, use 'decode', which returns a list of
 
- --   all the S-expressions found at the top level.
 
- decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
 
- decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
 
-   where parser = parseGenericSExpr
 
-                    (sesPAtom spec)
 
-                    (readerMap spec)
 
-                    (buildSkip (comment spec))
 
- -- | Decode several S-expressions according to a given 'SExprParser'. This
 
- --   will return a list of every S-expression that appears at the top-level
 
- --   of the document.
 
- decode :: SExprParser atom carrier -> Text -> Either String [carrier]
 
- decode spec =
 
-   doParse (many1 parser <* eof) >=> mapM (postparse spec)
 
-     where parser = parseGenericSExpr
 
-                      (sesPAtom spec)
 
-                      (readerMap spec)
 
-                      (buildSkip (comment spec))
 
- {-
 
- -- | Encode (without newlines) a single S-expression.
 
- encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
 
- encodeSExpr SNil _         = "()"
 
- encodeSExpr (SAtom s) t    = t s
 
- encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
 
-   where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
 
-         go SNil rs      = "(" <> rs <> ")"
 
-         go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
 
- -- | Emit an S-Expression in a machine-readable way. This does no
 
- --   pretty-printing or indentation, and produces no comments.
 
- encodeOne :: SExprParser atom carrier -> carrier -> Text
 
- encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
 
- encode :: SExprParser atom carrier -> [carrier] -> Text
 
- encode spec cs = T.concat (map (encodeOne spec) cs)
 
- -}
 
 
  |