HaskLike.hs 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. {-# LANGUAGE FlexibleInstances #-}
  2. {-# LANGUAGE OverloadedStrings #-}
  3. module Data.SCargot.Language.HaskLike
  4. ( -- $info
  5. HaskLikeAtom(..)
  6. , haskLikeParser
  7. , haskLikePrinter
  8. , locatedHaskLikeParser
  9. , locatedHaskLikePrinter
  10. -- * Individual Parsers
  11. , parseHaskellString
  12. , parseHaskellFloat
  13. , parseHaskellInt
  14. ) where
  15. #if !MIN_VERSION_base(4,8,0)
  16. import Control.Applicative ((<$>), (<$))
  17. #endif
  18. import Data.Maybe (catMaybes)
  19. import Data.String (IsString(..))
  20. import Data.Text (Text, pack)
  21. import Text.Parsec
  22. import Text.Parsec.Text (Parser)
  23. import Prelude hiding (concatMap)
  24. import Data.SCargot.Common
  25. import Data.SCargot.Repr.Basic (SExpr)
  26. import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
  27. {- $info
  28. This module is intended for simple, ad-hoc configuration or data
  29. formats that might not need their on rich structure but might benefit
  30. from a few various kinds of literals. The 'haskLikeParser' understands
  31. identifiers as defined by R5RS, as well as string, integer, and
  32. floating-point literals as defined by the Haskell 2010 spec. It does
  33. __not__ natively understand other data types, such as booleans,
  34. vectors, bitstrings.
  35. -}
  36. -- | An atom type that understands Haskell-like values as well as
  37. -- Scheme-like identifiers.
  38. data HaskLikeAtom
  39. = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
  40. -- standard
  41. | HSString Text -- ^ A string, parsed according to the syntax for string
  42. -- literals in the Haskell report
  43. | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
  44. -- the syntax for integer literals in the Haskell report
  45. | HSFloat Double -- ^ A double-precision floating-point value, parsed
  46. -- according to the syntax for floats in the Haskell
  47. -- report
  48. deriving (Eq, Show)
  49. instance IsString HaskLikeAtom where
  50. fromString = HSIdent . fromString
  51. instance IsString (Located HaskLikeAtom) where
  52. fromString = (At dLocation) . HSIdent . fromString
  53. -- | Parse a Haskell string literal as defined by the Haskell 2010
  54. -- language specification.
  55. parseHaskellString :: Parser Text
  56. parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
  57. where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
  58. esc = do _ <- char '\\'
  59. Nothing <$ (gap <|> char '&') <|>
  60. Just <$> code
  61. gap = many1 space >> char '\\'
  62. code = eEsc <|> eNum <|> eCtrl <|> eAscii
  63. eCtrl = char '^' >> unCtrl <$> upper
  64. eNum = (toEnum . fromInteger) <$>
  65. (decNumber <|> (char 'o' >> octNumber)
  66. <|> (char 'x' >> hexNumber))
  67. eEsc = choice [ char a >> return b | (a, b) <- escMap ]
  68. eAscii = choice [ try (string a >> return b)
  69. | (a, b) <- asciiMap ]
  70. unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
  71. escMap :: [(Char, Char)]
  72. escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
  73. asciiMap :: [(String, Char)]
  74. asciiMap = zip
  75. ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
  76. ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
  77. ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
  78. ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
  79. ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
  80. "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
  81. "\SYN\ETB\CAN\SUB\ESC\DEL")
  82. -- | Parse a Haskell floating-point number as defined by the Haskell
  83. -- 2010 language specification.
  84. parseHaskellFloat :: Parser Double
  85. parseHaskellFloat = do
  86. n <- decNumber
  87. withDot n <|> noDot n
  88. where withDot n = do
  89. _ <- char '.'
  90. m <- decNumber
  91. e <- option 1.0 expn
  92. return ((fromIntegral n + asDec m 0) * e)
  93. noDot n = do
  94. e <- expn
  95. return (fromIntegral n * e)
  96. expn = do
  97. _ <- oneOf "eE"
  98. s <- power
  99. x <- decNumber
  100. return (10 ** s (fromIntegral x))
  101. asDec 0 k = k
  102. asDec n k =
  103. asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
  104. power :: Num a => Parser (a -> a)
  105. power = negate <$ char '-' <|> id <$ char '+' <|> return id
  106. -- | Parse a Haskell integer literal as defined by the Haskell 2010
  107. -- language specification.
  108. parseHaskellInt :: Parser Integer
  109. parseHaskellInt = do
  110. s <- power
  111. n <- pZeroNum <|> decNumber
  112. return (fromIntegral (s n))
  113. pZeroNum :: Parser Integer
  114. pZeroNum = char '0' >>
  115. ( (oneOf "xX" >> hexNumber)
  116. <|> (oneOf "oO" >> octNumber)
  117. <|> decNumber
  118. <|> return 0
  119. )
  120. pHaskLikeAtom :: Parser HaskLikeAtom
  121. pHaskLikeAtom
  122. = HSFloat <$> (try parseHaskellFloat <?> "float")
  123. <|> HSInt <$> (try parseHaskellInt <?> "integer")
  124. <|> HSString <$> (parseHaskellString <?> "string literal")
  125. <|> HSIdent <$> (parseR5RSIdent <?> "token")
  126. sHaskLikeAtom :: HaskLikeAtom -> Text
  127. sHaskLikeAtom (HSIdent t) = t
  128. sHaskLikeAtom (HSString s) = pack (show s)
  129. sHaskLikeAtom (HSInt i) = pack (show i)
  130. sHaskLikeAtom (HSFloat f) = pack (show f)
  131. -- | This `SExprParser` understands s-expressions that contain
  132. -- Scheme-like tokens, as well as string literals, integer
  133. -- literals, and floating-point literals. Each of these values
  134. -- is parsed according to the lexical rules in the Haskell
  135. -- report, so the same set of string escapes, numeric bases,
  136. -- and floating-point options are available. This spec does
  137. -- not parse comments and does not understand any reader
  138. -- macros.
  139. --
  140. -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
  141. -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
  142. haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
  143. haskLikeParser = mkParser pHaskLikeAtom
  144. -- | A 'haskLikeParser' which produces 'Located' values
  145. --
  146. -- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")"
  147. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)]
  148. --
  149. -- >>> decode locatedHaskLikeParser $ pack "(1 elephant)"
  150. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
  151. locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
  152. locatedHaskLikeParser = mkParser $ located pHaskLikeAtom
  153. -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
  154. -- tokens as well as string literals, integer literals, and floating-point
  155. -- literals, which will be emitted as the literals produced by Haskell's
  156. -- 'show' function. This printer will produce a flat s-expression with
  157. -- no indentation of any kind.
  158. --
  159. -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
  160. -- "(1 \"elephant\")"
  161. haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
  162. haskLikePrinter = flatPrint sHaskLikeAtom
  163. -- | Ignore location tags when packing values into text
  164. sLocatedHasklikeAtom :: Located HaskLikeAtom -> Text
  165. sLocatedHasklikeAtom (At _loc e) = sHaskLikeAtom e
  166. -- | A 'SExprPrinter' for 'Located' values. Works exactly like 'haskLikePrinter'
  167. -- It ignores the location tags when printing the result.
  168. --
  169. -- >>> let (Right dec) = decode locatedHaskLikeParser $ pack "(1 elephant)"
  170. -- [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
  171. --
  172. -- >>> encode locatedHaskLikePrinter dec
  173. -- "(1 elephant)"
  174. locatedHaskLikePrinter :: SExprPrinter (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
  175. locatedHaskLikePrinter = flatPrint sLocatedHasklikeAtom