General.hs 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. {-# LANGUAGE RecordWildCards #-}
  2. {-# LANGUAGE ViewPatterns #-}
  3. {-# LANGUAGE OverloadedStrings #-}
  4. module Data.SCargot.General
  5. ( -- * SExprSpec
  6. SExprSpec
  7. , mkSpec
  8. , convertSpec
  9. , addReader
  10. , addComment
  11. -- * Specific SExprSpec Conversions
  12. , asRich
  13. , asWellFormed
  14. , withSemicolonComments
  15. , withQuote
  16. -- * Using a SExprSpec
  17. , decode
  18. , decodeOne
  19. , encode
  20. -- * Useful Type Aliases
  21. , Reader
  22. , Comment
  23. , Serializer
  24. ) where
  25. import Control.Applicative ((<*))
  26. import Control.Monad ((>=>))
  27. import Data.Attoparsec.Text
  28. import Data.Char (isAlpha)
  29. import Data.Map.Strict (Map)
  30. import qualified Data.Map.Strict as M
  31. import Data.Text (Text)
  32. import Prelude hiding (takeWhile)
  33. import Data.SCargot.Repr
  34. type ReaderMacroMap atom = Map Char (Reader atom)
  35. type CommentMap = Map Char Comment
  36. -- | A 'Reader' represents a reader macro: it takes a parser for
  37. -- the S-Expression type and performs as much or as little
  38. -- parsing as it would like, and then returns an S-expression.
  39. type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
  40. -- | A 'Comment' represents any kind of skippable comment.
  41. type Comment = Parser ()
  42. -- | A 'Serializer' is any function which can serialize an Atom
  43. -- to 'Text'.
  44. type Serializer atom = atom -> Text
  45. -- | A 'SExprSpec' describes a parser and emitter for a particular
  46. -- variant of S-Expressions. The @atom@ type corresponds to a
  47. -- Haskell type used to represent the atoms, and the @carrier@
  48. -- type corresponds to the parsed S-Expression structure. The
  49. -- 'SExprSpec' type is deliberately opaque so that it must be
  50. -- constructed and modified with other helper functions.
  51. data SExprSpec atom carrier = SExprSpec
  52. { sesPAtom :: Parser atom
  53. , sesSAtom :: Serializer atom
  54. , readerMap :: ReaderMacroMap atom
  55. , comment :: Comment
  56. , postparse :: SExpr atom -> Either String carrier
  57. , preserial :: carrier -> SExpr atom
  58. }
  59. -- | Create a basic 'SExprSpec' when given a parser and serializer
  60. -- for an atom type.
  61. mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
  62. mkSpec p s = SExprSpec
  63. { sesPAtom = p
  64. , sesSAtom = s
  65. , readerMap = M.empty
  66. , commentMap = skipSpace
  67. , postparse = return
  68. , preserial = id
  69. }
  70. -- | Modify the carrier type for a 'SExprSpec'. This is
  71. -- used internally to convert between various 'SExpr' representations,
  72. -- but could also be used externally to add an extra conversion layer
  73. -- onto a 'SExprSpec', e.g. for a custom Lisp-like language:
  74. --
  75. -- > mySpec :: SExprSpec MyAtomType MyAST
  76. -- > mySpec = convertSpec sexprToMyAST myASTToSexpr spec
  77. -- > where spec = mkSpec myParser mySerializer
  78. convertSpec :: (b -> Either String c) -> (c -> b) -> SExprSpec a b -> SExprSpec a c
  79. convertSpec f g spec = spec
  80. { postparse = postparse spec >=> f
  81. , preserial = preserial spec . g
  82. }
  83. -- | Convert the final output representation from the 'SExpr' type
  84. -- to the 'RichSExpr' type.
  85. asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
  86. asRich = convertSpec (return . toRich) fromRich
  87. -- | Convert the final output representation from the 'SExpr' type
  88. -- to the 'WellFormedSExpr' type.
  89. asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
  90. asWellFormed = convertSpec toWellFormed fromWellFormed
  91. -- | Add the ability to execute some particular reader macro, as
  92. -- defined by its initial character and the 'Parser' which returns
  93. -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
  94. -- can be recursively called to parse more S-Expressions, and begins
  95. -- parsing after the reader character has been removed from the
  96. -- stream.
  97. addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
  98. addReader c reader spec = spec
  99. { readerMap = M.insert c reader (readerMap spec) }
  100. -- | Add the ability to ignore some kind of comment. If the comment
  101. -- parser overlaps with a reader macro or the atom parser, then the
  102. -- former will be tried first.
  103. setComment :: Comment -> SExprSpec a c -> SExprSpec a c
  104. setComment c spec = spec { comment = c }
  105. -- | Add the ability to skip line comments beginning with a semicolon.
  106. withSemicolonComments :: SExprSpec a c -> SExprSpec a c
  107. withSemicolonComments = addComment ';' (skipWhile (\ c -> c /= '\n'))
  108. -- | Add the ability to understand a quoted S-Expression. In general,
  109. -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
  110. -- a convenience function which allows you to easily add quoted
  111. -- expressions to a 'SExprSpec', provided that you supply which
  112. -- atom you want substituted in for the symbol @quote@.
  113. withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
  114. withQuote q = addReader '\'' prs
  115. where prs p = go `fmap` p
  116. go s = SCons (SAtom q) (SCons s SNil)
  117. parseGenericSExpr ::
  118. Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
  119. parseGenericSExpr atom reader skip = do
  120. let sExpr = parseGenericSExpr atom reader skip
  121. skip
  122. c <- peekChar
  123. r <- case c of
  124. Nothing -> fail "Unexpected end of input"
  125. Just '(' -> char '(' >> skip >> parseList sExpr skip
  126. Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
  127. _ -> SAtom `fmap` atom
  128. skip
  129. return r
  130. parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
  131. parseList sExpr skip = do
  132. i <- peekChar
  133. case i of
  134. Nothing -> fail "Unexpected end of input"
  135. Just ')' -> char ')' >> return SNil
  136. _ -> do
  137. car <- sExpr
  138. skip
  139. c <- peekChar
  140. case c of
  141. Just '.' -> do
  142. char '.'
  143. cdr <- sExpr
  144. skip
  145. char ')'
  146. skip
  147. return (SCons car cdr)
  148. Just ')' -> do
  149. char ')'
  150. skip
  151. return (SCons car SNil)
  152. _ -> do
  153. cdr <- parseList sExpr skip
  154. return (SCons car cdr)
  155. -- | Given a CommentMap, create the corresponding parser to
  156. -- skip those comments (if they exist).
  157. buildSkip :: CommentMap -> Parser ()
  158. buildSkip m = skipSpace >> comments >> skipSpace
  159. where comments = do
  160. c <- peekChar
  161. case c of
  162. Nothing -> return ()
  163. Just c' -> case M.lookup c' m of
  164. Just p -> anyChar >> p
  165. Nothing -> return ()
  166. (#) :: a -> (a -> b) -> b
  167. (#) = flip ($)
  168. testSpec :: SExprSpec Text (SExpr Text)
  169. testSpec = mkSpec (takeWhile1 isAlpha) id
  170. # withQuote "quote"
  171. # addReader '#' (\ p -> SCons (SAtom "vector") `fmap` p)
  172. -- | Decode a single S-expression. If any trailing input is left after
  173. -- the S-expression (ignoring comments or whitespace) then this
  174. -- will fail: for those cases, use 'decode', which returns a list of
  175. -- all the S-expressions found at the top level.
  176. decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
  177. decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
  178. where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
  179. -- | Decode several S-expressions according to a given 'SExprSpec'. This
  180. -- will return a list of every S-expression that appears at the top-level
  181. -- of the document.
  182. decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
  183. decode SExprSpec { .. } =
  184. parseOnly (many1 parser <* endOfInput) >=> mapM postparse
  185. where parser = parseGenericSExpr sesPAtom readerMap (buildSkip commentMap)
  186. -- | Emit an S-Expression in a machine-readable way. This
  187. encode :: SExprSpec atom carrier -> carrier -> Text
  188. encode SExprSpec { .. } = undefined