Common.hs 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. module Data.SCargot.Common ( number
  2. , decNumber
  3. , hexNumber
  4. , octNumber
  5. , sign
  6. -- * Lisp Identifier Syntaxes
  7. , parseR5RSIdent
  8. , parseR6RSIdent
  9. , parseR7RSIdent
  10. ) where
  11. import Data.Char
  12. import Data.Text (Text)
  13. import qualified Data.Text as T
  14. import Text.Parsec
  15. import Text.Parsec.Char (satisfy)
  16. import Text.Parsec.Text (Parser)
  17. parseR5RSIdent :: Parser Text
  18. parseR5RSIdent =
  19. T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
  20. where initial = letter <|> oneOf "!$%&*/:<=>?^_~"
  21. subsequent = initial <|> digit <|> oneOf "+-.@"
  22. peculiar = string "+" <|> string "-" <|> string "..."
  23. hasCategory :: Char -> [GeneralCategory] -> Bool
  24. hasCategory c cs = generalCategory c `elem` cs
  25. parseR6RSIdent :: Parser Text
  26. parseR6RSIdent =
  27. T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
  28. where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex
  29. constituent = letter
  30. <|> uniClass (\ c -> isLetter c ||
  31. isSymbol c ||
  32. hasCategory c
  33. [ NonSpacingMark
  34. , LetterNumber
  35. , OtherNumber
  36. , DashPunctuation
  37. , ConnectorPunctuation
  38. , OtherPunctuation
  39. , PrivateUse
  40. ])
  41. inlineHex = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';')
  42. subsequent = initial <|> digit <|> oneOf "+-.@"
  43. <|> uniClass (\ c -> hasCategory c
  44. [ DecimalNumber
  45. , SpacingCombiningMark
  46. , EnclosingMark
  47. ])
  48. peculiar = string "+" <|> string "-" <|> string "..." <|>
  49. ((++) <$> string "->" <*> many subsequent)
  50. uniClass :: (Char -> Bool) -> Parser Char
  51. uniClass sp = satisfy (\ c -> c > '\x7f' && sp c)
  52. parseR7RSIdent :: Parser Text
  53. parseR7RSIdent = T.pack <$>
  54. ( (:) <$> initial <*> many subsequent
  55. <|> char '|' *> many1 symbolElement <* char '|'
  56. <|> peculiar
  57. )
  58. where initial = letter <|> specInit
  59. specInit = oneOf "!$%&*/:<=>?^_~"
  60. subsequent = initial <|> digit <|> specSubsequent
  61. specSubsequent = expSign <|> oneOf ".@"
  62. expSign = oneOf "+-"
  63. symbolElement = undefined
  64. peculiar = undefined
  65. -- | A helper function for defining parsers for arbitrary-base integers.
  66. -- The first argument will be the base, and the second will be the
  67. -- parser for the individual digits.
  68. number :: Integer -> Parser Char -> Parser Integer
  69. number base digits = foldl go 0 <$> many1 digits
  70. where go x d = base * x + toInteger (value d)
  71. value c
  72. | c == 'a' || c == 'A' = 0xa
  73. | c == 'b' || c == 'B' = 0xb
  74. | c == 'c' || c == 'C' = 0xc
  75. | c == 'd' || c == 'D' = 0xd
  76. | c == 'e' || c == 'E' = 0xe
  77. | c == 'f' || c == 'F' = 0xf
  78. | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
  79. | otherwise = error ("Unknown letter in number: " ++ show c)
  80. -- | A parser for bare binary numbers
  81. binNumber :: Parser Integer
  82. binNumber = number 2 (char '0' <|> char '1')
  83. -- | A parser for bare octal numbers
  84. octNumber :: Parser Integer
  85. octNumber = number 8 digit
  86. -- | A parser for bare decimal numbers
  87. decNumber :: Parser Integer
  88. decNumber = number 10 digit
  89. -- | A parser for bare hexadecimal numbers
  90. hexNumber :: Parser Integer
  91. hexNumber = number 16 hexDigit
  92. -- | A parser for numeric signs, represented as a function from numbers
  93. -- to numbers. It will parse '+' as the identity function, '-', as
  94. -- 'negate', or consume no input and return the identity function.
  95. -- This can be combined with other numeric literals to implement
  96. -- signedness:
  97. --
  98. -- > myNum = go <$> sign <*> decNumber
  99. -- > where go s n = s n
  100. sign :: Num a => Parser (a -> a)
  101. sign = (pure id <* char '+')
  102. <|> (pure negate <* char '-')
  103. <|> pure id