123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 |
- {-# LANGUAGE FlexibleInstances #-}
- {-# LANGUAGE OverloadedStrings #-}
- module Data.SCargot.Language.HaskLike
- ( -- $info
- HaskLikeAtom(..)
- , haskLikeParser
- , haskLikePrinter
- , locatedHaskLikeParser
- , locatedHaskLikePrinter
- -- * Individual Parsers
- , parseHaskellString
- , parseHaskellFloat
- , parseHaskellInt
- ) where
- #if !MIN_VERSION_base(4,8,0)
- import Control.Applicative ((<$>), (<$))
- #endif
- import Data.Maybe (catMaybes)
- import Data.String (IsString(..))
- import Data.Text (Text, pack)
- import Text.Parsec
- import Text.Parsec.Text (Parser)
- import Prelude hiding (concatMap)
- import Data.SCargot.Common
- import Data.SCargot.Repr.Basic (SExpr)
- import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
- {- $info
- This module is intended for simple, ad-hoc configuration or data
- formats that might not need their on rich structure but might benefit
- from a few various kinds of literals. The 'haskLikeParser' understands
- identifiers as defined by R5RS, as well as string, integer, and
- floating-point literals as defined by the Haskell 2010 spec. It does
- __not__ natively understand other data types, such as booleans,
- vectors, bitstrings.
- -}
- -- | An atom type that understands Haskell-like values as well as
- -- Scheme-like identifiers.
- data HaskLikeAtom
- = HSIdent Text -- ^ An identifier, parsed according to the R5RS Scheme
- -- standard
- | HSString Text -- ^ A string, parsed according to the syntax for string
- -- literals in the Haskell report
- | HSInt Integer -- ^ An arbitrary-sized integer value, parsed according to
- -- the syntax for integer literals in the Haskell report
- | HSFloat Double -- ^ A double-precision floating-point value, parsed
- -- according to the syntax for floats in the Haskell
- -- report
- deriving (Eq, Show)
- instance IsString HaskLikeAtom where
- fromString = HSIdent . fromString
- instance IsString (Located HaskLikeAtom) where
- fromString = (At dLocation) . HSIdent . fromString
- -- | Parse a Haskell string literal as defined by the Haskell 2010
- -- language specification.
- parseHaskellString :: Parser Text
- parseHaskellString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
- where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
- esc = do _ <- char '\\'
- Nothing <$ (gap <|> char '&') <|>
- Just <$> code
- gap = many1 space >> char '\\'
- code = eEsc <|> eNum <|> eCtrl <|> eAscii
- eCtrl = char '^' >> unCtrl <$> upper
- eNum = (toEnum . fromInteger) <$>
- (decNumber <|> (char 'o' >> octNumber)
- <|> (char 'x' >> hexNumber))
- eEsc = choice [ char a >> return b | (a, b) <- escMap ]
- eAscii = choice [ try (string a >> return b)
- | (a, b) <- asciiMap ]
- unCtrl c = toEnum (fromEnum c - fromEnum 'A' + 1)
- escMap :: [(Char, Char)]
- escMap = zip "abfntv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
- asciiMap :: [(String, Char)]
- asciiMap = zip
- ["BS","HT","LF","VT","FF","CR","SO","SI","EM"
- ,"FS","GS","RS","US","SP","NUL","SOH","STX","ETX"
- ,"EOT","ENQ","ACK","BEL","DLE","DC1","DC2","DC3"
- ,"DC4","NAK","SYN","ETB","CAN","SUB","ESC","DEL"]
- ("\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP\NUL\SOH" ++
- "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
- "\SYN\ETB\CAN\SUB\ESC\DEL")
- -- | Parse a Haskell floating-point number as defined by the Haskell
- -- 2010 language specification.
- parseHaskellFloat :: Parser Double
- parseHaskellFloat = do
- n <- decNumber
- withDot n <|> noDot n
- where withDot n = do
- _ <- char '.'
- m <- decNumber
- e <- option 1.0 expn
- return ((fromIntegral n + asDec m 0) * e)
- noDot n = do
- e <- expn
- return (fromIntegral n * e)
- expn = do
- _ <- oneOf "eE"
- s <- power
- x <- decNumber
- return (10 ** s (fromIntegral x))
- asDec 0 k = k
- asDec n k =
- asDec (n `div` 10) ((fromIntegral (n `rem` 10) + k) * 0.1)
- power :: Num a => Parser (a -> a)
- power = negate <$ char '-' <|> id <$ char '+' <|> return id
- -- | Parse a Haskell integer literal as defined by the Haskell 2010
- -- language specification.
- parseHaskellInt :: Parser Integer
- parseHaskellInt = do
- s <- power
- n <- pZeroNum <|> decNumber
- return (fromIntegral (s n))
- pZeroNum :: Parser Integer
- pZeroNum = char '0' >>
- ( (oneOf "xX" >> hexNumber)
- <|> (oneOf "oO" >> octNumber)
- <|> decNumber
- <|> return 0
- )
- pHaskLikeAtom :: Parser HaskLikeAtom
- pHaskLikeAtom
- = HSFloat <$> (try parseHaskellFloat <?> "float")
- <|> HSInt <$> (try parseHaskellInt <?> "integer")
- <|> HSString <$> (parseHaskellString <?> "string literal")
- <|> HSIdent <$> (parseR5RSIdent <?> "token")
- sHaskLikeAtom :: HaskLikeAtom -> Text
- sHaskLikeAtom (HSIdent t) = t
- sHaskLikeAtom (HSString s) = pack (show s)
- sHaskLikeAtom (HSInt i) = pack (show i)
- sHaskLikeAtom (HSFloat f) = pack (show f)
- -- | This `SExprParser` understands s-expressions that contain
- -- Scheme-like tokens, as well as string literals, integer
- -- literals, and floating-point literals. Each of these values
- -- is parsed according to the lexical rules in the Haskell
- -- report, so the same set of string escapes, numeric bases,
- -- and floating-point options are available. This spec does
- -- not parse comments and does not understand any reader
- -- macros.
- --
- -- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
- -- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
- haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
- haskLikeParser = mkParser pHaskLikeAtom
- -- | A 'haskLikeParser' which produces 'Located' values
- --
- -- >>> decode locatedHaskLikeParser $ pack "(0x01 \"\\x65lephant\")"
- -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 6)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 7) (line 1, column 20)) (HSString "elephant"))) SNil)]
- --
- -- >>> decode locatedHaskLikeParser $ pack "(1 elephant)"
- -- Right [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
- locatedHaskLikeParser :: SExprParser (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
- locatedHaskLikeParser = mkParser $ located pHaskLikeAtom
- -- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
- -- tokens as well as string literals, integer literals, and floating-point
- -- literals, which will be emitted as the literals produced by Haskell's
- -- 'show' function. This printer will produce a flat s-expression with
- -- no indentation of any kind.
- --
- -- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
- -- "(1 \"elephant\")"
- haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
- haskLikePrinter = flatPrint sHaskLikeAtom
- -- | Ignore location tags when packing values into text
- sLocatedHasklikeAtom :: Located HaskLikeAtom -> Text
- sLocatedHasklikeAtom (At _loc e) = sHaskLikeAtom e
- -- | A 'SExprPrinter' for 'Located' values. Works exactly like 'haskLikePrinter'
- -- It ignores the location tags when printing the result.
- --
- -- >>> let (Right dec) = decode locatedHaskLikeParser $ pack "(1 elephant)"
- -- [SCons (SAtom (At (Span (line 1, column 2) (line 1, column 3)) (HSInt 1))) (SCons (SAtom (At (Span (line 1, column 4) (line 1, column 12)) (HSIdent "elephant"))) SNil)]
- --
- -- >>> encode locatedHaskLikePrinter dec
- -- "(1 elephant)"
- locatedHaskLikePrinter :: SExprPrinter (Located HaskLikeAtom) (SExpr (Located HaskLikeAtom))
- locatedHaskLikePrinter = flatPrint sLocatedHasklikeAtom
|