General.hs 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. {-# LANGUAGE RecordWildCards #-}
  2. {-# LANGUAGE ViewPatterns #-}
  3. {-# LANGUAGE OverloadedStrings #-}
  4. module Data.SCargot.General
  5. ( -- * SExprSpec
  6. SExprSpec
  7. , mkSpec
  8. , convertSpec
  9. , addReader
  10. , setComment
  11. -- * Specific SExprSpec Conversions
  12. , asRich
  13. , asWellFormed
  14. , withSemicolonComments
  15. , withQuote
  16. -- * Using a SExprSpec
  17. , decode
  18. , decodeOne
  19. , encode
  20. -- * Useful Type Aliases
  21. , Reader
  22. , Comment
  23. , Serializer
  24. ) where
  25. import Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
  26. import Control.Monad ((>=>))
  27. import Data.Attoparsec.Text
  28. import Data.Char (isAlpha, isDigit, isAlphaNum)
  29. import Data.Map.Strict (Map)
  30. import qualified Data.Map.Strict as M
  31. import Data.Text (Text, pack, unpack)
  32. import Prelude hiding (takeWhile)
  33. import Data.SCargot.Repr
  34. type ReaderMacroMap atom = Map Char (Reader atom)
  35. -- | A 'Reader' represents a reader macro: it takes a parser for
  36. -- the S-Expression type and performs as much or as little
  37. -- parsing as it would like, and then returns an S-expression.
  38. type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
  39. -- | A 'Comment' represents any kind of skippable comment. This
  40. -- parser __must__ be able to fail if a comment is not being
  41. -- recognized, and it __must__ not consume any input.
  42. type Comment = Parser ()
  43. -- | A 'Serializer' is any function which can serialize an Atom
  44. -- to 'Text'.
  45. type Serializer atom = atom -> Text
  46. -- | A 'SExprSpec' describes a parser and emitter for a particular
  47. -- variant of S-Expressions. The @atom@ type corresponds to a
  48. -- Haskell type used to represent the atoms, and the @carrier@
  49. -- type corresponds to the parsed S-Expression structure. The
  50. -- 'SExprSpec' type is deliberately opaque so that it must be
  51. -- constructed and modified with other helper functions.
  52. data SExprSpec atom carrier = SExprSpec
  53. { sesPAtom :: Parser atom
  54. , sesSAtom :: Serializer atom
  55. , readerMap :: ReaderMacroMap atom
  56. , comment :: Maybe Comment
  57. , postparse :: SExpr atom -> Either String carrier
  58. , preserial :: carrier -> SExpr atom
  59. }
  60. -- | Create a basic 'SExprSpec' when given a parser and serializer
  61. -- for an atom type. A small minimal 'SExprSpec' that recognizes
  62. -- any alphanumeric sequence as a valid atom looks like:
  63. --
  64. -- > simpleSpec :: SExprSpec Text (SExpr Text)
  65. -- > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
  66. mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
  67. mkSpec p s = SExprSpec
  68. { sesPAtom = p
  69. , sesSAtom = s
  70. , readerMap = M.empty
  71. , comment = Nothing
  72. , postparse = return
  73. , preserial = id
  74. }
  75. -- | Modify the carrier type for a 'SExprSpec'. This is
  76. -- used internally to convert between various 'SExpr' representations,
  77. -- but could also be used externally to add an extra conversion layer
  78. -- onto a 'SExprSpec'.
  79. --
  80. -- The following defines an S-expression spec that recognizes the
  81. -- language of binary addition trees. It does so by first transforming
  82. -- the internal S-expression representation using 'asWellFormed', and
  83. -- then providing a conversion between the 'WellFormedSExpr' type and
  84. -- an @Expr@ AST. Notice that the below parser uses 'String' as its
  85. -- underlying atom type.
  86. --
  87. -- > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
  88. -- >
  89. -- > toExpr :: WellFormedSExpr String -> Either String Expr
  90. -- > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
  91. -- > toExpr (WFSAtom c) | all isDigit c = pure (Num (read c))
  92. -- > toExpr c = Left ("Invalid expr: " ++ show c)
  93. -- >
  94. -- > fromExpr :: Expr -> WellFormedSExpr String
  95. -- > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
  96. -- > fromExpr (Num n) = WFSAtom (show n)
  97. -- >
  98. -- > mySpec :: SExprSpec String Expr
  99. -- > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
  100. -- > where parser = unpack <$> takeWhile1 isValidChar
  101. -- > isValidChar c = isDigit c || c == '+'
  102. convertSpec :: (b -> Either String c) -> (c -> b)
  103. -> SExprSpec a b -> SExprSpec a c
  104. convertSpec f g spec = spec
  105. { postparse = postparse spec >=> f
  106. , preserial = preserial spec . g
  107. }
  108. -- | Convert the final output representation from the 'SExpr' type
  109. -- to the 'RichSExpr' type.
  110. asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
  111. asRich = convertSpec (return . toRich) fromRich
  112. -- | Convert the final output representation from the 'SExpr' type
  113. -- to the 'WellFormedSExpr' type.
  114. asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
  115. asWellFormed = convertSpec toWellFormed fromWellFormed
  116. -- | Add the ability to execute some particular reader macro, as
  117. -- defined by its initial character and the 'Parser' which returns
  118. -- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
  119. -- can be recursively called to parse more S-Expressions, and begins
  120. -- parsing after the reader character has been removed from the
  121. -- stream.
  122. --
  123. -- The following defines an S-expression variant that treats
  124. -- @'expr@ as being sugar for @(quote expr)@:
  125. --
  126. -- > mySpec :: SExprSpec Text (SExpr Text)
  127. -- > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
  128. -- > where reader p = quote <$> p
  129. -- > quote e = SCons (SAtom "quote") (SCons e SNil)
  130. addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
  131. addReader c reader spec = spec
  132. { readerMap = M.insert c reader (readerMap spec) }
  133. -- | Add the ability to ignore some kind of comment. This gets
  134. -- factored into whitespace parsing, and it's very important that
  135. -- the parser supplied __be able to fail__ (as otherwise it will
  136. -- cause an infinite loop), and also that it __not consume any input__
  137. -- (which may require it to be wrapped in 'try'.)
  138. --
  139. -- The following code defines an S-expression variant that skips
  140. -- C++-style comments, i.e. those which begin with @//@ and last
  141. -- until the end of a line:
  142. --
  143. -- > t :: SExprSpec Text (SExpr Text)
  144. -- > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
  145. -- > where comm = try (string "//" *> takeWhile (/= '\n') *> pure ())
  146. setComment :: Comment -> SExprSpec a c -> SExprSpec a c
  147. setComment c spec = spec { comment = Just c }
  148. -- | Add the ability to skip line comments beginning with a semicolon.
  149. withSemicolonComments :: SExprSpec a c -> SExprSpec a c
  150. withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ())
  151. -- | Add the ability to understand a quoted S-Expression. In general,
  152. -- many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
  153. -- a convenience function which allows you to easily add quoted
  154. -- expressions to a 'SExprSpec', provided that you supply which
  155. -- atom you want substituted in for the symbol @quote@.
  156. withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
  157. withQuote q = addReader '\'' (fmap go)
  158. where go s = SCons (SAtom q) (SCons s SNil)
  159. parseGenericSExpr ::
  160. Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
  161. parseGenericSExpr atom reader skip = do
  162. let sExpr = parseGenericSExpr atom reader skip
  163. skip
  164. c <- peekChar
  165. r <- case c of
  166. Nothing -> fail "Unexpected end of input"
  167. Just '(' -> char '(' >> skip >> parseList sExpr skip
  168. Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
  169. _ -> SAtom `fmap` atom
  170. skip
  171. return r
  172. parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
  173. parseList sExpr skip = do
  174. i <- peekChar
  175. case i of
  176. Nothing -> fail "Unexpected end of input"
  177. Just ')' -> char ')' >> return SNil
  178. _ -> do
  179. car <- sExpr
  180. skip
  181. c <- peekChar
  182. case c of
  183. Just '.' -> do
  184. char '.'
  185. cdr <- sExpr
  186. skip
  187. char ')'
  188. skip
  189. return (SCons car cdr)
  190. Just ')' -> do
  191. char ')'
  192. skip
  193. return (SCons car SNil)
  194. _ -> do
  195. cdr <- parseList sExpr skip
  196. return (SCons car cdr)
  197. -- | Given a CommentMap, create the corresponding parser to
  198. -- skip those comments (if they exist).
  199. buildSkip :: Maybe (Parser ()) -> Parser ()
  200. buildSkip Nothing = skipSpace
  201. buildSkip (Just c) = alternate
  202. where alternate = skipSpace >> ((c >> alternate) <|> return ())
  203. -- | Decode a single S-expression. If any trailing input is left after
  204. -- the S-expression (ignoring comments or whitespace) then this
  205. -- will fail: for those cases, use 'decode', which returns a list of
  206. -- all the S-expressions found at the top level.
  207. decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
  208. decodeOne SExprSpec { .. } = parseOnly (parser <* endOfInput) >=> postparse
  209. where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
  210. -- | Decode several S-expressions according to a given 'SExprSpec'. This
  211. -- will return a list of every S-expression that appears at the top-level
  212. -- of the document.
  213. decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
  214. decode SExprSpec { .. } =
  215. parseOnly (many1 parser <* endOfInput) >=> mapM postparse
  216. where parser = parseGenericSExpr sesPAtom readerMap (buildSkip comment)
  217. -- | Emit an S-Expression in a machine-readable way. This
  218. encode :: SExprSpec atom carrier -> carrier -> Text
  219. encode SExprSpec { .. } = undefined