123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- {-# LANGUAGE ViewPatterns #-}
- {-# LANGUAGE OverloadedStrings #-}
- module Data.SCargot.Parse
- ( -- * Parsing
- decode
- , decodeOne
- -- * Parsing Control
- , SExprParser
- , Reader
- , Comment
- , mkParser
- , setCarrier
- , addReader
- , setComment
- -- * Specific SExprParser Conversions
- , asRich
- , asWellFormed
- , withQuote
- ) where
- #if !MIN_VERSION_base(4,8,0)
- import Control.Applicative ((<$>), (<*), pure)
- #endif
- import Control.Monad ((>=>))
- import Data.Map.Strict (Map)
- import qualified Data.Map.Strict as M
- import Data.Text (Text)
- import Data.String (IsString)
- import Text.Parsec ( (<|>)
- , (<?>)
- , char
- , eof
- , lookAhead
- , many1
- , runParser
- , skipMany
- )
- import Text.Parsec.Char (anyChar, space)
- import Text.Parsec.Text (Parser)
- import Data.SCargot.Repr ( SExpr(..)
- , RichSExpr
- , WellFormedSExpr
- , toRich
- , toWellFormed
- )
- type ReaderMacroMap atom = Map Char (Reader atom)
- -- | A 'Reader' represents a reader macro: it takes a parser for
- -- the S-Expression type and performs as much or as little
- -- parsing as it would like, and then returns an S-expression.
- type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
- -- | A 'Comment' represents any kind of skippable comment. This
- -- parser __must__ be able to fail if a comment is not being
- -- recognized, and it __must__ not consume any input in case
- -- of failure.
- type Comment = Parser ()
- -- | A 'SExprParser' describes a parser for a particular value
- -- that has been serialized as an s-expression. The @atom@ parameter
- -- corresponds to a Haskell type used to represent the atoms,
- -- and the @carrier@ parameter corresponds to the parsed S-Expression
- -- structure.
- data SExprParser atom carrier = SExprParser
- { sesPAtom :: Parser atom
- , readerMap :: ReaderMacroMap atom
- , comment :: Maybe Comment
- , postparse :: SExpr atom -> Either String carrier
- }
- -- | Create a basic 'SExprParser' when given a parser
- -- for an atom type.
- --
- -- >>> import Text.Parsec (alphaNum, many1)
- -- >>> let parser = mkParser (many1 alphaNum)
- -- >>> decode parser "(ele phant)"
- -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
- mkParser :: Parser atom -> SExprParser atom (SExpr atom)
- mkParser parser = SExprParser
- { sesPAtom = parser
- , readerMap = M.empty
- , comment = Nothing
- , postparse = return
- }
- -- | Modify the carrier type for a 'SExprParser'. This is
- -- used internally to convert between various 'SExpr' representations,
- -- but could also be used externally to add an extra conversion layer
- -- onto a 'SExprParser'.
- --
- -- >>> import Text.Parsec (alphaNum, many1)
- -- >>> import Data.SCargot.Repr (toRich)
- -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
- -- >>> decode parser "(ele phant)"
- -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
- setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
- setCarrier f spec = spec { postparse = postparse spec >=> f }
- -- | Convert the final output representation from the 'SExpr' type
- -- to the 'RichSExpr' type.
- --
- -- >>> import Text.Parsec (alphaNum, many1)
- -- >>> let parser = asRich (mkParser (many1 alphaNum))
- -- >>> decode parser "(ele phant)"
- -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
- asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
- asRich = setCarrier (return . toRich)
- -- | Convert the final output representation from the 'SExpr' type
- -- to the 'WellFormedSExpr' type.
- --
- -- >>> import Text.Parsec (alphaNum, many1)
- -- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
- -- >>> decode parser "(ele phant)"
- -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
- asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
- asWellFormed = setCarrier toWellFormed
- -- | Add the ability to execute some particular reader macro, as
- -- defined by its initial character and the 'Parser' which returns
- -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
- -- can be recursively called to parse more S-Expressions, and begins
- -- parsing after the reader character has been removed from the
- -- stream.
- --
- -- >>> import Text.Parsec (alphaNum, char, many1)
- -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
- -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
- -- >>> decode parser "(an [ele phant])"
- -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
- addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
- addReader c reader spec = spec
- { readerMap = M.insert c reader (readerMap spec) }
- -- | Add the ability to ignore some kind of comment. This gets
- -- factored into whitespace parsing, and it's very important that
- -- the parser supplied __be able to fail__ (as otherwise it will
- -- cause an infinite loop), and also that it __not consume any input__
- -- (which may require it to be wrapped in 'try'.)
- --
- -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
- -- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
- -- >>> let parser = setComment comment (mkParser (many1 alphaNum))
- -- >>> decode parser "(ele //a comment\n phant)"
- -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
- setComment :: Comment -> SExprParser a c -> SExprParser a c
- setComment c spec = spec { comment = Just (c <?> "comment") }
- -- | Add the ability to understand a quoted S-Expression.
- -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
- -- assumes that the underlying atom type implements the "IsString"
- -- class, and will create the @quote@ atom using @fromString "quote"@.
- --
- -- >>> import Text.Parsec (alphaNum, many1)
- -- >>> let parser = withQuote (mkParser (many1 alphaNum))
- -- >>> decode parser "'elephant"
- -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
- withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
- withQuote = addReader '\'' (fmap go)
- where go s = SCons "quote" (SCons s SNil)
- peekChar :: Parser (Maybe Char)
- peekChar = Just <$> lookAhead anyChar <|> pure Nothing
- parseGenericSExpr ::
- Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
- parseGenericSExpr atom reader skip = do
- let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
- skip
- c <- peekChar
- r <- case c of
- Nothing -> fail "Unexpected end of input"
- Just '(' -> char '(' >> skip >> parseList sExpr skip
- Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
- _ -> SAtom `fmap` atom
- skip
- return r
- parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
- parseList sExpr skip = do
- i <- peekChar
- case i of
- Nothing -> fail "Unexpected end of input"
- Just ')' -> char ')' >> return SNil
- _ -> do
- car <- sExpr
- skip
- c <- peekChar
- case c of
- Just '.' -> do
- _ <- char '.'
- cdr <- sExpr
- skip
- _ <- char ')'
- skip
- return (SCons car cdr)
- Just ')' -> do
- _ <- char ')'
- skip
- return (SCons car SNil)
- _ -> do
- cdr <- parseList sExpr skip
- return (SCons car cdr)
- -- | Given a CommentMap, create the corresponding parser to
- -- skip those comments (if they exist).
- buildSkip :: Maybe (Parser ()) -> Parser ()
- buildSkip Nothing = skipMany space
- buildSkip (Just c) = alternate
- where alternate = skipMany space >> ((c >> alternate) <|> return ())
- doParse :: Parser a -> Text -> Either String a
- doParse p t = case runParser p () "" t of
- Left err -> Left (show err)
- Right x -> Right x
- -- | Decode a single S-expression. If any trailing input is left after
- -- the S-expression (ignoring comments or whitespace) then this
- -- will fail: for those cases, use 'decode', which returns a list of
- -- all the S-expressions found at the top level.
- decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
- decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
- where parser = parseGenericSExpr
- (sesPAtom spec)
- (readerMap spec)
- (buildSkip (comment spec))
- -- | Decode several S-expressions according to a given 'SExprParser'. This
- -- will return a list of every S-expression that appears at the top-level
- -- of the document.
- decode :: SExprParser atom carrier -> Text -> Either String [carrier]
- decode spec =
- doParse (many1 parser <* eof) >=> mapM (postparse spec)
- where parser = parseGenericSExpr
- (sesPAtom spec)
- (readerMap spec)
- (buildSkip (comment spec))
- {-
- -- | Encode (without newlines) a single S-expression.
- encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
- encodeSExpr SNil _ = "()"
- encodeSExpr (SAtom s) t = t s
- encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
- where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
- go SNil rs = "(" <> rs <> ")"
- go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
- -- | Emit an S-Expression in a machine-readable way. This does no
- -- pretty-printing or indentation, and produces no comments.
- encodeOne :: SExprParser atom carrier -> carrier -> Text
- encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
- encode :: SExprParser atom carrier -> [carrier] -> Text
- encode spec cs = T.concat (map (encodeOne spec) cs)
- -}
|