Parse.hs 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. {-# LANGUAGE ViewPatterns #-}
  2. {-# LANGUAGE OverloadedStrings #-}
  3. module Data.SCargot.Parse
  4. ( -- * Parsing
  5. decode
  6. , decodeOne
  7. -- * Parsing Control
  8. , SExprParser
  9. , Reader
  10. , Comment
  11. , mkParser
  12. , setCarrier
  13. , addReader
  14. , setComment
  15. -- * Specific SExprParser Conversions
  16. , asRich
  17. , asWellFormed
  18. , withQuote
  19. ) where
  20. import Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
  21. import Control.Monad ((>=>))
  22. import Data.Char (isAlpha, isDigit, isAlphaNum)
  23. import Data.Map.Strict (Map)
  24. import qualified Data.Map.Strict as M
  25. import Data.Maybe (fromJust)
  26. import Data.Monoid ((<>))
  27. import Data.String (IsString)
  28. import Data.Text (Text, pack, unpack)
  29. import qualified Data.Text as T
  30. import Text.Parsec ( (<|>)
  31. , (<?>)
  32. , char
  33. , eof
  34. , lookAhead
  35. , many1
  36. , runParser
  37. , skipMany
  38. )
  39. import Text.Parsec.Char (anyChar, space)
  40. import Text.Parsec.Text (Parser)
  41. import Data.SCargot.Repr ( SExpr(..)
  42. , RichSExpr
  43. , WellFormedSExpr
  44. , fromRich
  45. , toRich
  46. , fromWellFormed
  47. , toWellFormed
  48. )
  49. type ReaderMacroMap atom = Map Char (Reader atom)
  50. -- | A 'Reader' represents a reader macro: it takes a parser for
  51. -- the S-Expression type and performs as much or as little
  52. -- parsing as it would like, and then returns an S-expression.
  53. type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
  54. -- | A 'Comment' represents any kind of skippable comment. This
  55. -- parser __must__ be able to fail if a comment is not being
  56. -- recognized, and it __must__ not consume any input in case
  57. -- of failure.
  58. type Comment = Parser ()
  59. -- | A 'SExprParser' describes a parser for a particular value
  60. -- that has been serialized as an s-expression. The @atom@ parameter
  61. -- corresponds to a Haskell type used to represent the atoms,
  62. -- and the @carrier@ parameter corresponds to the parsed S-Expression
  63. -- structure.
  64. data SExprParser atom carrier = SExprParser
  65. { sesPAtom :: Parser atom
  66. , readerMap :: ReaderMacroMap atom
  67. , comment :: Maybe Comment
  68. , postparse :: SExpr atom -> Either String carrier
  69. }
  70. -- | Create a basic 'SExprParser' when given a parser
  71. -- for an atom type.
  72. --
  73. -- >>> import Text.Parsec (alphaNum, many1)
  74. -- >>> let parser = mkParser (many1 alphaNum)
  75. -- >>> decode parser "(ele phant)"
  76. -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
  77. mkParser :: Parser atom -> SExprParser atom (SExpr atom)
  78. mkParser parser = SExprParser
  79. { sesPAtom = parser
  80. , readerMap = M.empty
  81. , comment = Nothing
  82. , postparse = return
  83. }
  84. -- | Modify the carrier type for a 'SExprParser'. This is
  85. -- used internally to convert between various 'SExpr' representations,
  86. -- but could also be used externally to add an extra conversion layer
  87. -- onto a 'SExprParser'.
  88. --
  89. -- >>> import Text.Parsec (alphaNum, many1)
  90. -- >>> import Data.SCargot.Repr (toRich)
  91. -- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
  92. -- >>> decode parser "(ele phant)"
  93. -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
  94. setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
  95. setCarrier f spec = spec { postparse = postparse spec >=> f }
  96. -- | Convert the final output representation from the 'SExpr' type
  97. -- to the 'RichSExpr' type.
  98. --
  99. -- >>> import Text.Parsec (alphaNum, many1)
  100. -- >>> let parser = asRich (mkParser (many1 alphaNum))
  101. -- >>> decode parser "(ele phant)"
  102. -- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
  103. asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
  104. asRich = setCarrier (return . toRich)
  105. -- | Convert the final output representation from the 'SExpr' type
  106. -- to the 'WellFormedSExpr' type.
  107. --
  108. -- >>> import Text.Parsec (alphaNum, many1)
  109. -- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
  110. -- >>> decode parser "(ele phant)"
  111. -- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
  112. asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
  113. asWellFormed = setCarrier toWellFormed
  114. -- | Add the ability to execute some particular reader macro, as
  115. -- defined by its initial character and the 'Parser' which returns
  116. -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
  117. -- can be recursively called to parse more S-Expressions, and begins
  118. -- parsing after the reader character has been removed from the
  119. -- stream.
  120. --
  121. -- >>> import Text.Parsec (alphaNum, char, many1)
  122. -- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
  123. -- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
  124. -- >>> decode parser "(an [ele phant])"
  125. -- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
  126. addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
  127. addReader c reader spec = spec
  128. { readerMap = M.insert c reader (readerMap spec) }
  129. -- | Add the ability to ignore some kind of comment. This gets
  130. -- factored into whitespace parsing, and it's very important that
  131. -- the parser supplied __be able to fail__ (as otherwise it will
  132. -- cause an infinite loop), and also that it __not consume any input__
  133. -- (which may require it to be wrapped in 'try'.)
  134. --
  135. -- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
  136. -- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
  137. -- >>> let parser = setComment comment (mkParser (many1 alphaNum))
  138. -- >>> decode parser "(ele //a comment\n phant)"
  139. -- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
  140. setComment :: Comment -> SExprParser a c -> SExprParser a c
  141. setComment c spec = spec { comment = Just (c <?> "comment") }
  142. -- | Add the ability to understand a quoted S-Expression.
  143. -- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
  144. -- assumes that the underlying atom type implements the "IsString"
  145. -- class, and will create the @quote@ atom using @fromString "quote"@.
  146. --
  147. -- >>> import Text.Parsec (alphaNum, many1)
  148. -- >>> let parser = withQuote (mkParser (many1 alphaNum))
  149. -- >>> decode parser "'elephant"
  150. -- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
  151. withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
  152. withQuote = addReader '\'' (fmap go)
  153. where go s = SCons "quote" (SCons s SNil)
  154. peekChar :: Parser (Maybe Char)
  155. peekChar = Just <$> lookAhead anyChar <|> pure Nothing
  156. parseGenericSExpr ::
  157. Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
  158. parseGenericSExpr atom reader skip = do
  159. let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
  160. skip
  161. c <- peekChar
  162. r <- case c of
  163. Nothing -> fail "Unexpected end of input"
  164. Just '(' -> char '(' >> skip >> parseList sExpr skip
  165. Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
  166. _ -> SAtom `fmap` atom
  167. skip
  168. return r
  169. parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
  170. parseList sExpr skip = do
  171. i <- peekChar
  172. case i of
  173. Nothing -> fail "Unexpected end of input"
  174. Just ')' -> char ')' >> return SNil
  175. _ -> do
  176. car <- sExpr
  177. skip
  178. c <- peekChar
  179. case c of
  180. Just '.' -> do
  181. char '.'
  182. cdr <- sExpr
  183. skip
  184. char ')'
  185. skip
  186. return (SCons car cdr)
  187. Just ')' -> do
  188. char ')'
  189. skip
  190. return (SCons car SNil)
  191. _ -> do
  192. cdr <- parseList sExpr skip
  193. return (SCons car cdr)
  194. -- | Given a CommentMap, create the corresponding parser to
  195. -- skip those comments (if they exist).
  196. buildSkip :: Maybe (Parser ()) -> Parser ()
  197. buildSkip Nothing = skipMany space
  198. buildSkip (Just c) = alternate
  199. where alternate = skipMany space >> ((c >> alternate) <|> return ())
  200. doParse :: Parser a -> Text -> Either String a
  201. doParse p t = case runParser p () "" t of
  202. Left err -> Left (show err)
  203. Right x -> Right x
  204. -- | Decode a single S-expression. If any trailing input is left after
  205. -- the S-expression (ignoring comments or whitespace) then this
  206. -- will fail: for those cases, use 'decode', which returns a list of
  207. -- all the S-expressions found at the top level.
  208. decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
  209. decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
  210. where parser = parseGenericSExpr
  211. (sesPAtom spec)
  212. (readerMap spec)
  213. (buildSkip (comment spec))
  214. -- | Decode several S-expressions according to a given 'SExprParser'. This
  215. -- will return a list of every S-expression that appears at the top-level
  216. -- of the document.
  217. decode :: SExprParser atom carrier -> Text -> Either String [carrier]
  218. decode spec =
  219. doParse (many1 parser <* eof) >=> mapM (postparse spec)
  220. where parser = parseGenericSExpr
  221. (sesPAtom spec)
  222. (readerMap spec)
  223. (buildSkip (comment spec))
  224. {-
  225. -- | Encode (without newlines) a single S-expression.
  226. encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
  227. encodeSExpr SNil _ = "()"
  228. encodeSExpr (SAtom s) t = t s
  229. encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
  230. where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
  231. go SNil rs = "(" <> rs <> ")"
  232. go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
  233. -- | Emit an S-Expression in a machine-readable way. This does no
  234. -- pretty-printing or indentation, and produces no comments.
  235. encodeOne :: SExprParser atom carrier -> carrier -> Text
  236. encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
  237. encode :: SExprParser atom carrier -> [carrier] -> Text
  238. encode spec cs = T.concat (map (encodeOne spec) cs)
  239. -}