HaskLike.hs 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. {-# LANGUAGE FlexibleInstances #-}
  2. {-# LANGUAGE OverloadedStrings #-}
  3. module Data.SCargot.Language.HaskLike
  4. ( -- $info
  5. HaskLikeAtom(..)
  6. , haskLikeParser
  7. , haskLikePrinter
  8. -- * Individual Parsers
  9. , parseHaskellString
  10. , parseHaskellFloat
  11. , parseHaskellInt
  12. , locatedHaskLikeParser
  13. ) where
  14. #if !MIN_VERSION_base(4,8,0)
  15. import Control.Applicative ((<$>), (<$))
  16. #endif
  17. import Data.Maybe (catMaybes)
  18. import Data.String (IsString(..))
  19. import Data.Text (Text, pack)
  20. import Text.Parsec
  21. import Text.Parsec.Text (Parser)
  22. import Prelude hiding (concatMap)
  23. import Data.SCargot.Common
  24. import Data.SCargot.Repr.Basic (SExpr)
  25. import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
  26. {- $info
  27. This module is intended for simple, ad-hoc configuration or data
  28. formats that might not need their on rich structure but might benefit
  29. from a few various kinds of literals. The 'haskLikeParser' understands
  30. identifiers as defined by R5RS, as well as string, integer, and
  31. floating-point literals as defined by the Haskell 2010 spec. It does
  32. __not__ natively understand other data types, such as booleans,
  33. vectors, bitstrings.
  34. -}
  35. -- | An atom type that understands Haskell-like values as well as
  36. -- Scheme-like identifiers.
  37. data HaskLikeAtom
  38. = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
  39. -- standard
  40. | HSString Text -- ^ A string, parsed according to the syntax for string
  41. -- literals in the Haskell report
  42. | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
  43. -- the syntax for integer literals in the Haskell report
  44. | HSFloat Double -- ^ A double-precision floating-point value, parsed
  45. -- according to the syntax for floats in the Haskell
  46. -- report
  47. deriving (Eq, Show)
  48. instance IsString HaskLikeAtom where
  49. fromString = HSIdent . fromString
  50. instance IsString (Located HaskLikeAtom) where
  51. fromString = (At dLocation) . HSIdent . fromString
  52. -- | Parse a Haskell string literal as defined by the Haskell 2010
  53. -- language specification.
  54. parseHaskellString :: Parser Text
  55. parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
  56. where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
  57. esc = do _ <- char '\\'
  58. Nothing <$ (gap <|> char '&') <|>
  59. Just <$> code
  60. gap = many1 space >> char '\\'
  61. code = eEsc <|> eNum <|> eCtrl <|> eAscii
  62. eCtrl = char '^' >> unCtrl <$> upper
  63. eNum = (toEnum . fromInteger) <$>
  64. (decNumber <|> (char 'o' >> octNumber)
  65. <|> (char 'x' >> hexNumber))
  66. eEsc = choice [ char a >> return b | (a, b) <- escMap ]
  67. eAscii = choice [ try (string a >> return b)
  68. | (a, b) <- asciiMap ]
  69. unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
  70. escMap :: [(Char, Char)]
  71. escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
  72. asciiMap :: [(String, Char)]
  73. asciiMap = zip
  74. ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
  75. ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
  76. ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
  77. ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
  78. ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
  79. "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
  80. "\SYN\ETB\CAN\SUB\ESC\DEL")
  81. -- | Parse a Haskell floating-point number as defined by the Haskell
  82. -- 2010 language specification.
  83. parseHaskellFloat :: Parser Double
  84. parseHaskellFloat = do
  85. n <- decNumber
  86. withDot n <|> noDot n
  87. where withDot n = do
  88. _ <- char '.'
  89. m <- decNumber
  90. e <- option 1.0 expn
  91. return ((fromIntegral n + asDec m 0) * e)
  92. noDot n = do
  93. e <- expn
  94. return (fromIntegral n * e)
  95. expn = do
  96. _ <- oneOf "eE"
  97. s <- power
  98. x <- decNumber
  99. return (10 ** s (fromIntegral x))
  100. asDec 0 k = k
  101. asDec n k =
  102. asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
  103. power :: Num a => Parser (a -> a)
  104. power = negate <$ char '-' <|> id <$ char '+' <|> return id
  105. -- | Parse a Haskell integer literal as defined by the Haskell 2010
  106. -- language specification.
  107. parseHaskellInt :: Parser Integer
  108. parseHaskellInt = do
  109. s <- power
  110. n <- pZeroNum <|> decNumber
  111. return (fromIntegral (s n))
  112. pZeroNum :: Parser Integer
  113. pZeroNum = char '0' >>
  114. ( (oneOf "xX" >> hexNumber)
  115. <|> (oneOf "oO" >> octNumber)
  116. <|> decNumber
  117. <|> return 0
  118. )
  119. pHaskLikeAtom :: Parser HaskLikeAtom
  120. pHaskLikeAtom
  121. = HSFloat <$> (try parseHaskellFloat <?> "float")
  122. <|> HSInt <$> (try parseHaskellInt <?> "integer")
  123. <|> HSString <$> (parseHaskellString <?> "string literal")
  124. <|> HSIdent <$> (parseR5RSIdent <?> "token")
  125. sHaskLikeAtom :: HaskLikeAtom -> Text
  126. sHaskLikeAtom (HSIdent t) = t
  127. sHaskLikeAtom (HSString s) = pack (show s)
  128. sHaskLikeAtom (HSInt i) = pack (show i)
  129. sHaskLikeAtom (HSFloat f) = pack (show f)
  130. -- | This `SExprParser` understands s-expressions that contain
  131. -- Scheme-like tokens, as well as string literals, integer
  132. -- literals, and floating-point literals. Each of these values
  133. -- is parsed according to the lexical rules in the Haskell
  134. -- report, so the same set of string escapes, numeric bases,
  135. -- and floating-point options are available. This spec does
  136. -- not parse comments and does not understand any reader
  137. -- macros.
  138. --
  139. -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
  140. -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
  141. haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
  142. haskLikeParser = mkParser pHaskLikeAtom
  143. -- | A 'haskLikeParser' which produces 'Located' values
  144. --
  145. -- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")"
  146. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)]
  147. --
  148. -- >>> decode locatedHaskLikeParser $ pack "(1 elephant)"
  149. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
  150. locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
  151. locatedHaskLikeParser = mkParser $ located pHaskLikeAtom
  152. -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
  153. -- tokens as well as string literals, integer literals, and floating-point
  154. -- literals, which will be emitted as the literals produced by Haskell's
  155. -- 'show' function. This printer will produce a flat s-expression with
  156. -- no indentation of any kind.
  157. --
  158. -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
  159. -- "(1 \"elephant\")"
  160. haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
  161. haskLikePrinter = flatPrint sHaskLikeAtom