|
@@ -1,8 +1,13 @@
|
|
|
module Data.SCargot.Common ( -- $intro
|
|
|
- -- * Lisp Identifier Syntaxes
|
|
|
+ -- * Identifier Syntaxes
|
|
|
parseR5RSIdent
|
|
|
, parseR6RSIdent
|
|
|
, parseR7RSIdent
|
|
|
+ , parseXIDIdentStrict
|
|
|
+ , parseXIDIdentGeneral
|
|
|
+ , parseHaskellIdent
|
|
|
+ , parseHaskellVariable
|
|
|
+ , parseHaskellConstructor
|
|
|
-- * Numeric Literal Parsers
|
|
|
, signed
|
|
|
, prefixedNumber
|
|
@@ -116,6 +121,93 @@ parseR7RSIdent = T.pack <$>
|
|
|
cons2 a b cs = a : b : cs
|
|
|
cons3 a b c ds = a : b : c : ds
|
|
|
|
|
|
+-- | Parse a Haskell variable identifier: a sequence of alphanumeric
|
|
|
+-- characters, underscores, or single quote that begins with a
|
|
|
+-- lower-case letter.
|
|
|
+parseHaskellVariable :: Parser Text
|
|
|
+parseHaskellVariable =
|
|
|
+ T.pack <$> ((:) <$> small <*> many (small <|>
|
|
|
+ large <|>
|
|
|
+ digit' <|>
|
|
|
+ char '\'' <|>
|
|
|
+ char '_'))
|
|
|
+ where small = satisfy isLower
|
|
|
+ large = satisfy isUpper
|
|
|
+ digit' = satisfy isDigit
|
|
|
+
|
|
|
+-- | Parse a Haskell constructor: a sequence of alphanumeric
|
|
|
+-- characters, underscores, or single quote that begins with an
|
|
|
+-- upper-case letter.
|
|
|
+parseHaskellConstructor :: Parser Text
|
|
|
+parseHaskellConstructor =
|
|
|
+ T.pack <$> ((:) <$> large <*> many (small <|>
|
|
|
+ large <|>
|
|
|
+ digit' <|>
|
|
|
+ char '\'' <|>
|
|
|
+ char '_'))
|
|
|
+ where small = satisfy isLower
|
|
|
+ large = satisfy isUpper
|
|
|
+ digit' = satisfy isDigit
|
|
|
+
|
|
|
+-- | Parse a Haskell identifer: a sequence of alphanumeric
|
|
|
+-- characters, underscores, or a single quote. This matches both
|
|
|
+-- variable and constructor names.
|
|
|
+parseHaskellIdent :: Parser Text
|
|
|
+parseHaskellIdent =
|
|
|
+ T.pack <$> ((:) <$> (large <|> small)
|
|
|
+ <*> many (small <|>
|
|
|
+ large <|>
|
|
|
+ digit' <|>
|
|
|
+ char '\'' <|>
|
|
|
+ char '_'))
|
|
|
+ where small = satisfy isLower
|
|
|
+ large = satisfy isUpper
|
|
|
+ digit' = satisfy isDigit
|
|
|
+
|
|
|
+-- Ensure that a given character has the given Unicode category
|
|
|
+hasCat :: [GeneralCategory] -> Parser Char
|
|
|
+hasCat cats = satisfy (flip hasCategory cats)
|
|
|
+
|
|
|
+xidStart :: [GeneralCategory]
|
|
|
+xidStart = [ UppercaseLetter
|
|
|
+ , LowercaseLetter
|
|
|
+ , TitlecaseLetter
|
|
|
+ , ModifierLetter
|
|
|
+ , OtherLetter
|
|
|
+ , LetterNumber
|
|
|
+ ]
|
|
|
+
|
|
|
+xidContinue :: [GeneralCategory]
|
|
|
+xidContinue = xidStart ++ [ NonSpacingMark
|
|
|
+ , SpacingCombiningMark
|
|
|
+ , DecimalNumber
|
|
|
+ , ConnectorPunctuation
|
|
|
+ ]
|
|
|
+
|
|
|
+-- | Parse an identifier of unicode characters of the form
|
|
|
+-- @<XID_Start> <XID_Continue>*@, which corresponds strongly
|
|
|
+-- to the identifiers found in most C-like languages. Note that
|
|
|
+-- the @XID_Start@ category does not include the underscore,
|
|
|
+-- so @__foo@ is not a valid XID identifier. To parse
|
|
|
+-- identifiers that may include leading underscores, use
|
|
|
+-- 'parseXIDIdentGeneral'.
|
|
|
+parseXIDIdentStrict :: Parser Text
|
|
|
+parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart
|
|
|
+ <*> many (hasCat xidContinue))
|
|
|
+
|
|
|
+-- | Parse an identifier of unicode characters of the form
|
|
|
+-- @(<XID_Start> | '_') <XID_Continue>*@, which corresponds
|
|
|
+-- strongly to the identifiers found in most C-like languages.
|
|
|
+-- Unlike 'parseXIDIdentStrict', this will also accept an
|
|
|
+-- underscore as leading character, which corresponds more
|
|
|
+-- closely to programming languages like C and Java, but
|
|
|
+-- deviates somewhat from the
|
|
|
+-- <http://unicode.org/reports/tr31/ Unicode Identifier and
|
|
|
+-- Pattern Syntax standard>.
|
|
|
+parseXIDIdentGeneral :: Parser Text
|
|
|
+parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_')
|
|
|
+ <*> many (hasCat xidContinue))
|
|
|
+
|
|
|
-- | A helper function for defining parsers for arbitrary-base integers.
|
|
|
-- The first argument will be the base, and the second will be the
|
|
|
-- parser for the individual digits.
|