Browse Source

Removing R7RS from s-cargot proper, pushing Scheme identifier functions to new Common file

Getty Ritter 9 years ago
parent
commit
2b126f22b6

+ 13 - 12
Data/SCargot/Basic.hs

@@ -1,13 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module Data.SCargot.Basic
-  ( basicSpec
-  , asRich
-  , asWellFormed
-  , addReader
-  , setComment
-  , withLispComments
-  , withQuote
+  ( -- * Spec
+    -- $descr
+    basicSpec
   ) where
 
 import           Control.Applicative ((<$>))
@@ -18,11 +14,6 @@ import           Data.Text (Text, pack)
 import           Data.SCargot.Repr.Basic (SExpr)
 import           Data.SCargot.General ( SExprSpec
                                       , mkSpec
-                                      , asRich
-                                      , asWellFormed
-                                      , addReader
-                                      , setComment
-                                      , withQuote
                                       )
 import           Data.SCargot.Comments (withLispComments)
 
@@ -32,12 +23,19 @@ isAtomChar c = isAlphaNum c
   || c == '+' || c == '<' || c == '>'
   || c == '=' || c == '!' || c == '?'
 
+-- $descr
+-- The 'basicSpec' describes S-expressions whose atoms are simply
+-- text strings that contain alphanumeric characters and a small
+-- set of punctuation. It does no parsing of numbers or other data
+-- types, and will accept tokens that typical Lisp implementations
+-- would find nonsensical (like @77foo@).
+--
+-- Atoms recognized by the 'basicSpec' are any string matching the
+-- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
+
 -- | A 'SExprSpec' that understands atoms to be sequences of
 --   alphanumeric characters as well as the punctuation
 --   characters @[-*/+<>=!?]@, and does no processing of them.
 basicSpec :: SExprSpec Text (SExpr Text)
 basicSpec = mkSpec pToken id
   where pToken = pack <$> many1 (satisfy isAtomChar)

+ 12 - 5
Data/SCargot/Comments.hs

@@ -1,20 +1,21 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module Data.SCargot.Comments
-  ( -- * Comment Syntax
-    -- $intro
-    -- * Lisp Comments
+  ( -- $intro
+
+    -- * Lisp-Style Syntax
+    -- $lisp
     withLispComments
     -- * Other Existing Comment Syntaxes
     -- ** Scripting Language Syntax
     -- $script
   , withOctothorpeComments
-    -- ** C-Like Syntax
+    -- ** C-Style Syntax
     -- $clike
   , withCLikeLineComments
   , withCLikeBlockComments
   , withCLikeComments
-    -- ** Haskell Syntax
+    -- ** Haskell-Style Syntax
     -- $haskell
   , withHaskellLineComments
   , withHaskellBlockComments
@@ -146,6 +147,12 @@ comment syntaxes:
 
 -}
 
+{- $lisp
+> (one   ; a comment
+>   two  ; another one
+>   three)
+-}
+
 {- $script
 > (one   # a comment
 >   two  # another one

+ 114 - 0
Data/SCargot/Common.hs

@@ -0,0 +1,114 @@
+module Data.SCargot.Common ( number
+                           , decNumber
+                           , hexNumber
+                           , octNumber
+                           , sign
+                           -- * Lisp Identifier Syntaxes
+                           , parseR5RSIdent
+                           , parseR6RSIdent
+                           , parseR7RSIdent
+                           ) where
+
+import           Data.Char
+import           Data.Text (Text)
+import qualified Data.Text as T
+import           Text.Parsec
+import           Text.Parsec.Char (satisfy)
+import           Text.Parsec.Text (Parser)
+
+parseR5RSIdent :: Parser Text
+parseR5RSIdent =
+  T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
+  where initial    = letter <|> oneOf "!$%&*/:<=>?^_~"
+        subsequent = initial <|> digit <|> oneOf "+-.@"
+        peculiar   = string "+" <|> string "-" <|> string "..."
+
+hasCategory :: Char -> [GeneralCategory] -> Bool
+hasCategory c cs = generalCategory c `elem` cs
+
+parseR6RSIdent :: Parser Text
+parseR6RSIdent =
+  T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
+  where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex
+        constituent = letter
+                   <|> uniClass (\ c -> isLetter c ||
+                                        isSymbol c ||
+                                        hasCategory c
+                                          [ NonSpacingMark
+                                          , LetterNumber
+                                          , OtherNumber
+                                          , DashPunctuation
+                                          , ConnectorPunctuation
+                                          , OtherPunctuation
+                                          , PrivateUse
+                                          ])
+        inlineHex   = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';')
+        subsequent  = initial <|> digit <|> oneOf "+-.@"
+                   <|> uniClass (\ c -> hasCategory c
+                                          [ DecimalNumber
+                                          , SpacingCombiningMark
+                                          , EnclosingMark
+                                          ])
+        peculiar    = string "+" <|> string "-" <|> string "..." <|>
+                      ((++) <$> string "->" <*> many subsequent)
+        uniClass :: (Char -> Bool) -> Parser Char
+        uniClass sp = satisfy (\ c -> c > '\x7f' && sp c)
+
+parseR7RSIdent :: Parser Text
+parseR7RSIdent =  T.pack <$>
+          (  (:) <$> initial <*> many subsequent
+         <|> char '|' *> many1 symbolElement <* char '|'
+         <|> peculiar
+          )
+  where initial = letter <|> specInit
+        specInit = oneOf "!$%&*/:<=>?^_~"
+        subsequent = initial <|> digit <|> specSubsequent
+        specSubsequent = expSign <|> oneOf ".@"
+        expSign = oneOf "+-"
+        symbolElement = undefined
+        peculiar = undefined
+
+-- | A helper function for defining parsers for arbitrary-base integers.
+--   The first argument will be the base, and the second will be the
+--   parser for the individual digits.
+number :: Integer -> Parser Char -> Parser Integer
+number base digits = foldl go 0 <$> many1 digits
+  where go x d = base * x + toInteger (value d)
+        value c
+          | c == 'a' || c == 'A' = 0xa
+          | c == 'b' || c == 'B' = 0xb
+          | c == 'c' || c == 'C' = 0xc
+          | c == 'd' || c == 'D' = 0xd
+          | c == 'e' || c == 'E' = 0xe
+          | c == 'f' || c == 'F' = 0xf
+          | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
+          | otherwise = error ("Unknown letter in number: " ++ show c)
+
+-- | A parser for bare binary numbers
+binNumber :: Parser Integer
+binNumber = number 2 (char '0' <|> char '1')
+
+-- | A parser for bare octal numbers
+octNumber :: Parser Integer
+octNumber = number 8 digit
+
+-- | A parser for bare decimal numbers
+decNumber :: Parser Integer
+decNumber = number 10 digit
+
+-- | A parser for bare hexadecimal numbers
+hexNumber :: Parser Integer
+hexNumber = number 16 hexDigit
+
+-- | A parser for numeric signs, represented as a function from numbers
+--   to numbers. It will parse '+' as the identity function, '-', as
+--   'negate', or consume no input and return the identity function.
+--   This can be combined with other numeric literals to implement
+--   signedness:
+--
+--   > myNum = go <$> sign <*> decNumber
+--   >   where go s n = s n
+sign :: Num a => Parser (a -> a)
+sign =  (pure id     <* char '+')
+    <|> (pure negate <* char '-')
+    <|> pure id

+ 8 - 1
Data/SCargot/General.hs

@@ -45,7 +45,14 @@ import           Text.Parsec ( (<|>)
 import           Text.Parsec.Char (anyChar, space)
 import           Text.Parsec.Text (Parser)
 
-import           Data.SCargot.Repr
+import           Data.SCargot.Repr ( SExpr(..)
+                                   , RichSExpr
+                                   , WellFormedSExpr
+                                   , fromRich
+                                   , toRich
+                                   , fromWellFormed
+                                   , toWellFormed
+                                   )
 
 type ReaderMacroMap atom = Map Char (Reader atom)
 

+ 16 - 44
Data/SCargot/HaskLike.hs

@@ -14,6 +14,7 @@ import           Text.Parsec.Text (Parser)
 
 import           Prelude hiding (concatMap)
 
+import Data.SCargot.Common
 import Data.SCargot.Repr.Basic (SExpr)
 import Data.SCargot.General (SExprSpec, mkSpec)
 
@@ -22,7 +23,7 @@ import Data.SCargot.General (SExprSpec, mkSpec)
 This module is intended for simple, ad-hoc configuration or data formats
 that might not need their on rich structure but might benefit from a few
 various literal formats. the 'haskLikeSpec' understands identifiers as
-defined by R6RS as well as string, integer, and floating-point literals
+defined by R5RS as well as string, integer, and floating-point literals
 as defined by the Haskell spec, but won't get any Lisp-specific vector
 literals or other structure.
 
@@ -32,7 +33,7 @@ literals or other structure.
 -- | An atom type that understands Haskell-like values as well as
 --   Scheme-like identifiers.
 data HaskLikeAtom
-  = HSIdent  Text  -- ^ An identifier, parsed according to the R6RS Scheme
+  = HSIdent  Text  -- ^ An identifier, parsed according to the R5RS Scheme
                    --   standard
   | HSString Text  -- ^ A string, parsed according to the syntax for string
                    --   literals in the Haskell report
@@ -46,19 +47,6 @@ data HaskLikeAtom
 instance IsString HaskLikeAtom where
   fromString = HSIdent . fromString
 
-pToken :: Parser Text
-pToken =  pack <$> (  (:) <$> initial <*> many subsequent
-                  <|> string "+"
-                  <|> string "-"
-                  <|> string "..."
-                   )
-
-initial :: Parser Char
-initial = letter <|> oneOf "!$%&*/:<=>?^_~"
-
-subsequent :: Parser Char
-subsequent = initial <|> digit <|> oneOf "+-.@"
-
 pString :: Parser Text
 pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc))
   where val = Just <$> satisfy (\ c -> c /= '"' && c /= '\\' && c > '\026')
@@ -69,8 +57,8 @@ pString = pack . catMaybes <$> between (char '"') (char '"') (many (val <|> esc)
         code = eEsc <|> eNum <|> eCtrl <|> eAscii
         eCtrl  = char '^' >> unCtrl <$> upper
         eNum   = (toEnum . fromInteger) <$>
-                   (decimal <|> (char 'o' >> number 8 octDigit)
-                            <|> (char 'x' >> number 16 hexDigit))
+                   (decNumber <|> (char 'o' >> octNumber)
+                              <|> (char 'x' >> hexNumber))
         eEsc   = choice [ char a >> return b | (a, b) <- escMap ]
         eAscii = choice [ try (string a >> return b)
                         | (a, b) <- asciiMap ]
@@ -89,29 +77,13 @@ asciiMap = zip
    "\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK" ++
    "\SYN\ETB\CAN\SUB\ESC\DEL")
 
-decimal :: Parser Integer
-decimal = number 10 digit
-
-number :: Integer -> Parser Char -> Parser Integer
-number base digits = foldl go 0 <$> many1 digits
-  where go x d = base * x + toInteger (value d)
-        value c
-          | c == 'a' || c == 'A' = 0xa
-          | c == 'b' || c == 'B' = 0xb
-          | c == 'c' || c == 'C' = 0xc
-          | c == 'd' || c == 'D' = 0xd
-          | c == 'e' || c == 'E' = 0xe
-          | c == 'f' || c == 'F' = 0xf
-          | c >= '0' && c <= '9' = fromEnum c - fromEnum '0'
-          | otherwise = error ("Unknown letter in number: " ++ show c)
-
 pFloat :: Parser Double
 pFloat = do
-  n <- decimal
+  n <- decNumber
   withDot n <|> noDot n
   where withDot n = do
           char '.'
-          m <- decimal
+          m <- decNumber
           e <- option 1.0 exponent
           return ((fromIntegral n + asDec m 0) * e)
         noDot n = do
@@ -120,7 +92,7 @@ pFloat = do
         exponent = do
           oneOf "eE"
           s <- power
-          x <- decimal
+          x <- decNumber
           return (10 ** s (fromIntegral x))
         asDec 0 k = k
         asDec n k =
@@ -132,23 +104,23 @@ power = negate <$ char '-' <|> id <$ char '+' <|> return id
 pInt :: Parser Integer
 pInt = do
   s <- power
-  n <- pZeroNum <|> decimal
+  n <- pZeroNum <|> decNumber
   return (fromIntegral (s n))
 
 pZeroNum :: Parser Integer
 pZeroNum = char '0' >>
-  (  (oneOf "xX" >> number 16 hexDigit)
- <|> (oneOf "oO" >> number 8 octDigit)
- <|> decimal
+  (  (oneOf "xX" >> hexNumber)
+ <|> (oneOf "oO" >> octNumber)
+ <|> decNumber
  <|> return 0
   )
 
 pHaskLikeAtom :: Parser HaskLikeAtom
 pHaskLikeAtom
-   =  HSFloat  <$> (try pFloat <?> "float")
-  <|> HSInt    <$> (try pInt   <?> "integer")
-  <|> HSString <$> (pString    <?> "string literal")
-  <|> HSIdent  <$> (pToken     <?> "token")
+   =  HSFloat   <$> (try pFloat     <?> "float")
+  <|> HSInt     <$> (try pInt       <?> "integer")
+  <|> HSString  <$> (pString        <?> "string literal")
+  <|> HSIdent   <$> (parseR5RSIdent <?> "token")
 
 sHaskLikeAtom :: HaskLikeAtom -> Text
 sHaskLikeAtom (HSIdent t)  = t

+ 6 - 3
Data/SCargot/Pretty.hs

@@ -92,10 +92,12 @@ indent :: Int -> Text -> Text
 indent n ts = T.replicate n " " <> ts
 
 -- Indents every line n spaces, and adds a newline to the beginning
+-- used in swung indents
 indentAll :: Int -> [Text] -> Text
 indentAll n = ("\n" <>) . joinLines . map (indent n)
 
 -- Indents every line but the first by some amount
+-- used in aligned indents
 indentSubsequent :: Int -> [Text] -> Text
 indentSubsequent _ [] = ""
 indentSubsequent _ [t] = t
@@ -107,7 +109,7 @@ indentSubsequent n (t:ts) = joinLines (t : go ts)
 -- i swear i'll do better in the future i promise i have to
 -- for my sake and for everyone's
 
+-- | Pretty-print a 'SExpr' according to the options in a
 --   'LayoutOptions' value.
 prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text
 prettyPrintSExpr LayoutOptions { .. } = pHead 0
@@ -136,7 +138,7 @@ prettyPrintSExpr LayoutOptions { .. } = pHead 0
                       indentSubsequent (ind + headWidth + 1)
                         (map (pHead (ind + headWidth + 1)) lst)
                 body
-                  | length lst == 0                = ""
+                  | length lst == 0              = ""
                   | Just maxAmt <- maxWidth
-                  , (T.length flat + ind) > maxAmt = " " <> indented
-                  | otherwise                      = " " <> flat
+                  , T.length flat + ind > maxAmt = " " <> indented
+                  | otherwise                    = " " <> flat

+ 85 - 15
Data/SCargot/Scheme/R7RS.hs

@@ -1,5 +1,24 @@
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE UndecidableInstances #-}
+
 module Data.SCargot.Scheme.R7RS where
 
+import           Data.Char (chr, isAlphaNum)
+import           Data.Text (Text)
+import qualified Data.Text as T
+import           Data.String (IsString(..))
+import           Data.SCargot.Common
+import           Data.SCargot.General
+import           Data.SCargot.Repr.Basic
+import           Data.Word (Word8)
+import           Text.Parsec
+import           Text.Parsec.Text (Parser)
+
+instance IsString (SchemeAtom c) where
+  fromString = ScmIdent . fromString
+
 -- | A Scheme value type. This is strictly larger than just
 --   'atoms', as they may include things like vectors or
 --   labeled data, which must be able to refer to yet other
@@ -16,30 +35,81 @@ data SchemeAtom carrier
   | ScmByteVec [Word8]
   | ScmLabeledDatum Int (carrier (SchemeAtom carrier))
   | ScmLabelReference Int
+
+-- | Scheme has a lot of numbers.
+data SchemeNumber
+  = ScmNumber
+  | ScmComplexNumber  Double Double
+  | ScmRealNumber     Double
+  | ScmRationalNumber Rational
+  | ScmInteger        Integer
     deriving (Eq, Show)
 
-withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom Sexpr))
+deriving instance Show (c (SchemeAtom c)) => Show (SchemeAtom c)
+deriving instance Eq (c (SchemeAtom c)) => Eq (SchemeAtom c)
+
+badSpec :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
+badSpec = mkSpec (ScmIdent . T.pack <$> many1 (satisfy isAlphaNum)) undefined
+
+withQuasiQuote :: SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
                -> SExprSpec (SchemeAtom SExpr) (SExpr (SchemeAtom SExpr))
 withQuasiQuote spec = addReader '`' (fmap (go "quasiquote"))
                     $ addReader ',' unquote
+                    $ addReader '\'' (fmap (go "quote"))
                     $ spec
    where go name s = name ::: s ::: Nil
-         unquote p = char '@' *> fmap (go "unquote-splicing")
-                  <|> fmap (go "unquote")
+         unquote p = char '@' *> fmap (go "unquote-splicing") p
+                  <|> fmap (go "unquote") p
 
-octoReader :: Reader (SExpr (SchemeAtom SExpr))
+octoReader :: Reader (SchemeAtom SExpr)
 octoReader pSexpr =
-      string "true"  *> pure (ScmBool True)
-  <|> string "false" *> pure (ScmBool False)
-  <|> char 't' *> pure (ScmBool True)
-  <|> char 'f' *> pure (ScmBool False)
-  <|> char '\\' *> characterConstant
-  <|> char '(' *> fmap ScmVec (vector pSexpr)
-  <|> string "u8(" *> bytevec
+      string "true"  *> pure (A (ScmBool True))
+  <|> string "false" *> pure (A (ScmBool False))
+  <|> char 't' *> pure (A (ScmBool True))
+  <|> char 'f' *> pure (A (ScmBool False))
+  <|> char '\\' *> fmap (A . ScmChar) characterConstant
+  <|> char '(' *> fmap (A . ScmVec) (vector pSexpr)
+  <|> string "u8(" *> fmap A bytevec
   <|> do n <- read <$> many1 digit
-         (char '#' *> ScmLabelReference n <|>
-          char '=' *> fmap (ScmLabeledDatum n) pSexpr)
+         (char '#' *> pure (A (ScmLabelReference n)) <|>
+          char '=' *> fmap (A . ScmLabeledDatum n) pSexpr)
 
 vector :: Parser (SExpr (SchemeAtom SExpr)) -> Parser [SExpr (SchemeAtom SExpr)]
-vector pSexpr =
-  (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSexpr)
+vector pSExpr =
+  (char ')' *> pure []) <|> ((:) <$> pSExpr <*> vector pSExpr)
+
+bytevec :: Parser (SchemeAtom SExpr)
+bytevec = undefined
+
+characterConstant :: Parser Char
+characterConstant = namedCharacter
+                 <|> (chr . fromInteger <$> (char 'x' *> hexNumber))
+                 <|> anyCharacter
+  where namedCharacter =  string "alarm"     *> pure '\x07'
+                      <|> string "backspace" *> pure '\x08'
+                      <|> string "delete"    *> pure '\x7f'
+                      <|> string "escape"    *> pure '\x1b'
+                      <|> string "newline"   *> pure '\x0a'
+                      <|> string "null"      *> pure '\x00'
+                      <|> string "return"    *> pure '\x0d'
+                      <|> string "space"     *> pure ' '
+                      <|> string "tab"       *> pure '\x09'
+        anyCharacter = anyToken
+
+r7rsNum :: Int -> Parser Int
+r7rsNum radix = prefix <*> complex
+  where prefix = radix <*> exactness <|> exactness <*> radix
+        complex =  real
+               <|> real <* char '@' <*> real
+               <|> real <* char '+' <*> ureal <* char 'i'
+               <|> real <* char '-' <*> ureal <* char 'i'
+               <|> real <* char '+' <* char 'i'
+               <|> real <* char '-' <* char 'i'
+               <|> real <*> infnan <* char 'i'
+               <|> char '+' *> ureal <* char 'i'
+               <|> char '-' *> ureal <* char 'i'
+               <|> infnan <* char 'i'
+               <|> string "+i"
+               <|> string "-i"
+        real = ($) <$> sign <*> ureal
+            <|> infnan

+ 2 - 2
s-cargot.cabal

@@ -21,8 +21,8 @@ library
                        Data.SCargot.Pretty,
                        Data.SCargot.Basic,
                        Data.SCargot.Comments,
-                       Data.SCargot.HaskLike,
-                       Data.SCargot.Tutorial
+                       Data.SCargot.Common,
+                       Data.SCargot.HaskLike
   -- other-modules:
   -- other-extensions:
   build-depends:       base >=4.7 && <5, parsec, text, containers