|
@@ -1,26 +1,22 @@
|
|
{-# LANGUAGE ViewPatterns #-}
|
|
{-# LANGUAGE ViewPatterns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
-module Data.SCargot.General
|
|
|
|
- ( -- * SExprSpec
|
|
|
|
- SExprSpec
|
|
|
|
- , mkSpec
|
|
|
|
- , convertSpec
|
|
|
|
|
|
+module Data.SCargot.Parse
|
|
|
|
+ ( -- * Parsing
|
|
|
|
+ decode
|
|
|
|
+ , decodeOne
|
|
|
|
+ -- * Parsing Control
|
|
|
|
+ , SExprParser
|
|
|
|
+ , Reader
|
|
|
|
+ , Comment
|
|
|
|
+ , mkParser
|
|
|
|
+ , setCarrier
|
|
, addReader
|
|
, addReader
|
|
, setComment
|
|
, setComment
|
|
- -- * Specific SExprSpec Conversions
|
|
|
|
|
|
+ -- * Specific SExprParser Conversions
|
|
, asRich
|
|
, asRich
|
|
, asWellFormed
|
|
, asWellFormed
|
|
, withQuote
|
|
, withQuote
|
|
- -- * Using a SExprSpec
|
|
|
|
- , decode
|
|
|
|
- , decodeOne
|
|
|
|
- , encode
|
|
|
|
- , encodeOne
|
|
|
|
- -- * Useful Type Aliases
|
|
|
|
- , Reader
|
|
|
|
- , Comment
|
|
|
|
- , Serializer
|
|
|
|
) where
|
|
) where
|
|
|
|
|
|
import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
|
|
import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
|
|
@@ -63,87 +59,69 @@ type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
|
|
|
|
|
|
-- | A 'Comment' represents any kind of skippable comment. This
|
|
-- | A 'Comment' represents any kind of skippable comment. This
|
|
-- parser __must__ be able to fail if a comment is not being
|
|
-- parser __must__ be able to fail if a comment is not being
|
|
|
|
+-- recognized, and it __must__ not consume any input in case
|
|
|
|
+-- of failure.
|
|
type Comment = Parser ()
|
|
type Comment = Parser ()
|
|
|
|
|
|
-type Serializer atom = atom -> Text
|
|
|
|
-
|
|
|
|
-data SExprSpec atom carrier = SExprSpec
|
|
|
|
|
|
+-- | A 'SExprParser' describes a parser for a particular value
|
|
|
|
+-- that has been serialized as an s-expression. The @atom@ parameter
|
|
|
|
+-- corresponds to a Haskell type used to represent the atoms,
|
|
|
|
+-- and the @carrier@ parameter corresponds to the parsed S-Expression
|
|
|
|
+-- structure.
|
|
|
|
+data SExprParser atom carrier = SExprParser
|
|
{ sesPAtom :: Parser atom
|
|
{ sesPAtom :: Parser atom
|
|
- , sesSAtom :: Serializer atom
|
|
|
|
, readerMap :: ReaderMacroMap atom
|
|
, readerMap :: ReaderMacroMap atom
|
|
, comment :: Maybe Comment
|
|
, comment :: Maybe Comment
|
|
, postparse :: SExpr atom -> Either String carrier
|
|
, postparse :: SExpr atom -> Either String carrier
|
|
- , preserial :: carrier -> SExpr atom
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+-- | Create a basic 'SExprParser' when given a parser
|
|
|
|
+-- for an atom type.
|
|
--
|
|
--
|
|
-mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
|
|
|
|
-mkSpec p s = SExprSpec
|
|
|
|
- { sesPAtom = p <?> "atom"
|
|
|
|
- , sesSAtom = s
|
|
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, many1)
|
|
|
|
+-- >>> let parser = mkParser (many1 alphaNum)
|
|
|
|
+-- >>> decode parser "(ele phant)"
|
|
|
|
+-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
|
|
|
|
+mkParser :: Parser atom -> SExprParser atom (SExpr atom)
|
|
|
|
+mkParser parser = SExprParser
|
|
|
|
+ { sesPAtom = parser
|
|
, readerMap = M.empty
|
|
, readerMap = M.empty
|
|
, comment = Nothing
|
|
, comment = Nothing
|
|
, postparse = return
|
|
, postparse = return
|
|
- , preserial = id
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+-- | Modify the carrier type for a 'SExprParser'. This is
|
|
-- used internally to convert between various 'SExpr' representations,
|
|
-- used internally to convert between various 'SExpr' representations,
|
|
-- but could also be used externally to add an extra conversion layer
|
|
-- but could also be used externally to add an extra conversion layer
|
|
---
|
|
|
|
|
|
+-- onto a 'SExprParser'.
|
|
--
|
|
--
|
|
-convertSpec :: (b -> Either String c) -> (c -> b)
|
|
|
|
- -> SExprSpec a b -> SExprSpec a c
|
|
|
|
-convertSpec f g spec = spec
|
|
|
|
- { postparse = postparse spec >=> f
|
|
|
|
- , preserial = preserial spec . g
|
|
|
|
- }
|
|
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, many1)
|
|
|
|
+-- >>> import Data.SCargot.Repr (toRich)
|
|
|
|
+-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "(ele phant)"
|
|
|
|
+-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
|
|
|
|
+setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
|
|
|
|
+setCarrier f spec = spec { postparse = postparse spec >=> f }
|
|
|
|
|
|
-- | Convert the final output representation from the 'SExpr' type
|
|
-- | Convert the final output representation from the 'SExpr' type
|
|
-- to the 'RichSExpr' type.
|
|
-- to the 'RichSExpr' type.
|
|
-asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
|
|
|
|
-asRich = convertSpec (return . toRich) fromRich
|
|
|
|
|
|
+--
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, many1)
|
|
|
|
+-- >>> let parser = asRich (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "(ele phant)"
|
|
|
|
+-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
|
|
|
|
+asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
|
|
|
|
+asRich = setCarrier (return . toRich)
|
|
|
|
|
|
-- | Convert the final output representation from the 'SExpr' type
|
|
-- | Convert the final output representation from the 'SExpr' type
|
|
-- to the 'WellFormedSExpr' type.
|
|
-- to the 'WellFormedSExpr' type.
|
|
-asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
|
|
|
|
-asWellFormed = convertSpec toWellFormed fromWellFormed
|
|
|
|
|
|
+--
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, many1)
|
|
|
|
+-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "(ele phant)"
|
|
|
|
+-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
|
|
|
|
+asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
|
|
|
|
+asWellFormed = setCarrier toWellFormed
|
|
|
|
|
|
-- | Add the ability to execute some particular reader macro, as
|
|
-- | Add the ability to execute some particular reader macro, as
|
|
-- defined by its initial character and the 'Parser' which returns
|
|
-- defined by its initial character and the 'Parser' which returns
|
|
@@ -152,16 +130,13 @@ asWellFormed = convertSpec toWellFormed fromWellFormed
|
|
-- parsing after the reader character has been removed from the
|
|
-- parsing after the reader character has been removed from the
|
|
-- stream.
|
|
-- stream.
|
|
--
|
|
--
|
|
---
|
|
|
|
-addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
|
|
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, char, many1)
|
|
|
|
+-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
|
|
|
|
+-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "(an [ele phant])"
|
|
|
|
+-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
|
|
|
|
+
|
|
|
|
+addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
|
|
addReader c reader spec = spec
|
|
addReader c reader spec = spec
|
|
{ readerMap = M.insert c reader (readerMap spec) }
|
|
{ readerMap = M.insert c reader (readerMap spec) }
|
|
|
|
|
|
@@ -171,23 +146,25 @@ addReader c reader spec = spec
|
|
-- cause an infinite loop), and also that it __not consume any input__
|
|
-- cause an infinite loop), and also that it __not consume any input__
|
|
-- (which may require it to be wrapped in 'try'.)
|
|
-- (which may require it to be wrapped in 'try'.)
|
|
--
|
|
--
|
|
---
|
|
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
|
|
|
|
+-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
|
|
|
|
+-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "(ele //a comment\n phant)"
|
|
|
|
+-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
|
|
|
|
|
|
-setComment :: Comment -> SExprSpec a c -> SExprSpec a c
|
|
|
|
|
|
+setComment :: Comment -> SExprParser a c -> SExprParser a c
|
|
setComment c spec = spec { comment = Just (c <?> "comment") }
|
|
setComment c spec = spec { comment = Just (c <?> "comment") }
|
|
|
|
|
|
-withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t)
|
|
|
|
|
|
+-- | Add the ability to understand a quoted S-Expression.
|
|
|
|
+-- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
|
|
|
|
+-- assumes that the underlying atom type implements the "IsString"
|
|
|
|
+-- class, and will create the @quote@ atom using @fromString "quote"@.
|
|
|
|
+--
|
|
|
|
+-- >>> import Text.Parsec (alphaNum, many1)
|
|
|
|
+-- >>> let parser = withQuote (mkParser (many1 alphaNum))
|
|
|
|
+-- >>> decode parser "'elephant"
|
|
|
|
+-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
|
|
|
|
+withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
|
|
withQuote = addReader '\'' (fmap go)
|
|
withQuote = addReader '\'' (fmap go)
|
|
where go s = SCons "quote" (SCons s SNil)
|
|
where go s = SCons "quote" (SCons s SNil)
|
|
|
|
|
|
@@ -250,17 +227,17 @@ doParse p t = case runParser p () "" t of
|
|
-- the S-expression (ignoring comments or whitespace) then this
|
|
-- the S-expression (ignoring comments or whitespace) then this
|
|
-- will fail: for those cases, use 'decode', which returns a list of
|
|
-- will fail: for those cases, use 'decode', which returns a list of
|
|
-- all the S-expressions found at the top level.
|
|
-- all the S-expressions found at the top level.
|
|
-decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
|
|
|
|
|
|
+decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
|
|
decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
|
|
decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
|
|
where parser = parseGenericSExpr
|
|
where parser = parseGenericSExpr
|
|
(sesPAtom spec)
|
|
(sesPAtom spec)
|
|
(readerMap spec)
|
|
(readerMap spec)
|
|
(buildSkip (comment spec))
|
|
(buildSkip (comment spec))
|
|
|
|
|
|
|
|
+-- | Decode several S-expressions according to a given 'SExprParser'. This
|
|
-- will return a list of every S-expression that appears at the top-level
|
|
-- will return a list of every S-expression that appears at the top-level
|
|
-- of the document.
|
|
-- of the document.
|
|
-decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
|
|
|
|
|
|
+decode :: SExprParser atom carrier -> Text -> Either String [carrier]
|
|
decode spec =
|
|
decode spec =
|
|
doParse (many1 parser <* eof) >=> mapM (postparse spec)
|
|
doParse (many1 parser <* eof) >=> mapM (postparse spec)
|
|
where parser = parseGenericSExpr
|
|
where parser = parseGenericSExpr
|
|
@@ -268,6 +245,7 @@ decode spec =
|
|
(readerMap spec)
|
|
(readerMap spec)
|
|
(buildSkip (comment spec))
|
|
(buildSkip (comment spec))
|
|
|
|
|
|
|
|
+{-
|
|
-- | Encode (without newlines) a single S-expression.
|
|
-- | Encode (without newlines) a single S-expression.
|
|
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
|
|
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
|
|
encodeSExpr SNil _ = "()"
|
|
encodeSExpr SNil _ = "()"
|
|
@@ -279,8 +257,9 @@ encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
|
|
|
|
|
|
-- | Emit an S-Expression in a machine-readable way. This does no
|
|
-- | Emit an S-Expression in a machine-readable way. This does no
|
|
-- pretty-printing or indentation, and produces no comments.
|
|
-- pretty-printing or indentation, and produces no comments.
|
|
-encodeOne :: SExprSpec atom carrier -> carrier -> Text
|
|
|
|
|
|
+encodeOne :: SExprParser atom carrier -> carrier -> Text
|
|
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
|
|
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
|
|
|
|
|
|
-encode :: SExprSpec atom carrier -> [carrier] -> Text
|
|
|
|
|
|
+encode :: SExprParser atom carrier -> [carrier] -> Text
|
|
encode spec cs = T.concat (map (encodeOne spec) cs)
|
|
encode spec cs = T.concat (map (encodeOne spec) cs)
|
|
|
|
+-}
|