HaskLike.hs 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. {-# LANGUAGE OverloadedStrings #-}
  2. module Data.SCargot.Language.HaskLike
  3. ( -- $info
  4. HaskLikeAtom(..)
  5. , haskLikeParser
  6. , haskLikePrinter
  7. -- * Individual Parsers
  8. , parseHaskellString
  9. , parseHaskellFloat
  10. , parseHaskellInt
  11. , locatedHaskLikeParser
  12. ) where
  13. #if !MIN_VERSION_base(4,8,0)
  14. import Control.Applicative ((<$>), (<$))
  15. #endif
  16. import Data.Maybe (catMaybes)
  17. import Data.String (IsString(..))
  18. import Data.Text (Text, pack)
  19. import Text.Parsec
  20. import Text.Parsec.Text (Parser)
  21. import Prelude hiding (concatMap)
  22. import Data.SCargot.Common
  23. import Data.SCargot.Repr.Basic (SExpr)
  24. import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
  25. {- $info
  26. This module is intended for simple, ad-hoc configuration or data
  27. formats that might not need their on rich structure but might benefit
  28. from a few various kinds of literals. The 'haskLikeParser' understands
  29. identifiers as defined by R5RS, as well as string, integer, and
  30. floating-point literals as defined by the Haskell 2010 spec. It does
  31. __not__ natively understand other data types, such as booleans,
  32. vectors, bitstrings.
  33. -}
  34. -- | An atom type that understands Haskell-like values as well as
  35. -- Scheme-like identifiers.
  36. data HaskLikeAtom
  37. = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
  38. -- standard
  39. | HSString Text -- ^ A string, parsed according to the syntax for string
  40. -- literals in the Haskell report
  41. | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
  42. -- the syntax for integer literals in the Haskell report
  43. | HSFloat Double -- ^ A double-precision floating-point value, parsed
  44. -- according to the syntax for floats in the Haskell
  45. -- report
  46. deriving (Eq, Show)
  47. instance IsString HaskLikeAtom where
  48. fromString = HSIdent . fromString
  49. -- | Parse a Haskell string literal as defined by the Haskell 2010
  50. -- language specification.
  51. parseHaskellString :: Parser Text
  52. parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
  53. where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
  54. esc = do _ <- char '\\'
  55. Nothing <$ (gap <|> char '&') <|>
  56. Just <$> code
  57. gap = many1 space >> char '\\'
  58. code = eEsc <|> eNum <|> eCtrl <|> eAscii
  59. eCtrl = char '^' >> unCtrl <$> upper
  60. eNum = (toEnum . fromInteger) <$>
  61. (decNumber <|> (char 'o' >> octNumber)
  62. <|> (char 'x' >> hexNumber))
  63. eEsc = choice [ char a >> return b | (a, b) <- escMap ]
  64. eAscii = choice [ try (string a >> return b)
  65. | (a, b) <- asciiMap ]
  66. unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
  67. escMap :: [(Char, Char)]
  68. escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
  69. asciiMap :: [(String, Char)]
  70. asciiMap = zip
  71. ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
  72. ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
  73. ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
  74. ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
  75. ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
  76. "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
  77. "\SYN\ETB\CAN\SUB\ESC\DEL")
  78. -- | Parse a Haskell floating-point number as defined by the Haskell
  79. -- 2010 language specification.
  80. parseHaskellFloat :: Parser Double
  81. parseHaskellFloat = do
  82. n <- decNumber
  83. withDot n <|> noDot n
  84. where withDot n = do
  85. _ <- char '.'
  86. m <- decNumber
  87. e <- option 1.0 expn
  88. return ((fromIntegral n + asDec m 0) * e)
  89. noDot n = do
  90. e <- expn
  91. return (fromIntegral n * e)
  92. expn = do
  93. _ <- oneOf "eE"
  94. s <- power
  95. x <- decNumber
  96. return (10 ** s (fromIntegral x))
  97. asDec 0 k = k
  98. asDec n k =
  99. asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
  100. power :: Num a => Parser (a -> a)
  101. power = negate <$ char '-' <|> id <$ char '+' <|> return id
  102. -- | Parse a Haskell integer literal as defined by the Haskell 2010
  103. -- language specification.
  104. parseHaskellInt :: Parser Integer
  105. parseHaskellInt = do
  106. s <- power
  107. n <- pZeroNum <|> decNumber
  108. return (fromIntegral (s n))
  109. pZeroNum :: Parser Integer
  110. pZeroNum = char '0' >>
  111. ( (oneOf "xX" >> hexNumber)
  112. <|> (oneOf "oO" >> octNumber)
  113. <|> decNumber
  114. <|> return 0
  115. )
  116. pHaskLikeAtom :: Parser HaskLikeAtom
  117. pHaskLikeAtom
  118. = HSFloat <$> (try parseHaskellFloat <?> "float")
  119. <|> HSInt <$> (try parseHaskellInt <?> "integer")
  120. <|> HSString <$> (parseHaskellString <?> "string literal")
  121. <|> HSIdent <$> (parseR5RSIdent <?> "token")
  122. sHaskLikeAtom :: HaskLikeAtom -> Text
  123. sHaskLikeAtom (HSIdent t) = t
  124. sHaskLikeAtom (HSString s) = pack (show s)
  125. sHaskLikeAtom (HSInt i) = pack (show i)
  126. sHaskLikeAtom (HSFloat f) = pack (show f)
  127. -- | This `SExprParser` understands s-expressions that contain
  128. -- Scheme-like tokens, as well as string literals, integer
  129. -- literals, and floating-point literals. Each of these values
  130. -- is parsed according to the lexical rules in the Haskell
  131. -- report, so the same set of string escapes, numeric bases,
  132. -- and floating-point options are available. This spec does
  133. -- not parse comments and does not understand any reader
  134. -- macros.
  135. --
  136. -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
  137. -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
  138. haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
  139. haskLikeParser = mkParser pHaskLikeAtom
  140. -- | A 'haskLikeParser' which produces 'Located' values
  141. --
  142. -- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")"
  143. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)]
  144. --
  145. -- >>> decode locatedHaskLikeParser $ pack "(1 elephant)"
  146. -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
  147. locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
  148. locatedHaskLikeParser = mkParser $ located pHaskLikeAtom
  149. -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
  150. -- tokens as well as string literals, integer literals, and floating-point
  151. -- literals, which will be emitted as the literals produced by Haskell's
  152. -- 'show' function. This printer will produce a flat s-expression with
  153. -- no indentation of any kind.
  154. --
  155. -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
  156. -- "(1 \"elephant\")"
  157. haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
  158. haskLikePrinter = flatPrint sHaskLikeAtom