Pārlūkot izejas kodu

Big final bunch of refactors/documentation bits in preparation for Hackage release

Getty Ritter 8 gadi atpakaļ
vecāks
revīzija
ed1b3db362

+ 92 - 3
Data/SCargot.hs

@@ -1,5 +1,94 @@
 module Data.SCargot
-         ( module Data.SCargot.General
-         ) where
+  ( -- * SCargot Basics
 
-import Data.SCargot.General
+    -- $intro
+
+    -- * Parsing and Printing
+    decode
+  , decodeOne
+  , encode
+  , encodeOne
+    -- * Parser Construction
+    -- ** Specifying a Parser
+  , SExprParser
+  , Reader
+  , Comment
+  , mkParser
+  , setCarrier
+  , addReader
+  , setComment
+  , asRich
+  , asWellFormed
+  , withQuote
+    -- * Printer Construction
+    -- * Specifying a Pretty-Printer
+  , SExprPrinter
+  , Indent(..)
+  , basicPrint
+  , flatPrint
+  , setFromCarrier
+  , setMaxWidth
+  , removeMaxWidth
+  , setIndentAmount
+  , setIndentStrategy
+  ) where
+
+import Data.SCargot.Parse
+import Data.SCargot.Print
+
+{- $intro
+
+The S-Cargot library is a library for parsing and emitting
+<https://en.wikipedia.org/wiki/S-expression s-expressions>, designed
+to be as flexible as possible. Despite some efforts at
+<http://people.csail.mit.edu/rivest/Sexp.txt standardization>,
+s-expressions are a general approach to describing a data format
+that can very often differ in subtle, incompatible ways: the
+s-expressions understood by Common Lisp are different from the
+s-expressions understood by Scheme, and even the different
+revisions of the Scheme language understand s-expressions in a
+slightly different way. To accomodate this, the S-Cargot library
+provides a toolbox for defining variations on s-expressions,
+complete with the ability to select various comment syntaxes, reader
+macros, and atom types.
+
+If all you want is to read some s-expressions and don't care about
+the edge cases of the format, or all you want is a new configuration
+format, try the "Data.SCargot.Language.Basic" or "Data.SCargot.Language.HaskLike"
+modules, which define an s-expression language whose atoms are
+plain strings and Haskell literals, respectively.
+
+The S-Cargot library works by specifying values which contain all
+the information needed to either parse or print an s-expression.
+The actual s-expression structure is parsed as a structure of
+<https://en.wikipedia.org/wiki/Cons cons cells> as represented
+by the 'SExpr' type, but can alternately be exposed as the
+isomorphic 'RichSExpr' type or the less expressive but
+easier-to-work-with 'WellFormedSExpr' type. Modules devoted
+to each representation type (in "Data.SCargot.Repr.Basic",
+"Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed")
+provide helper functions, lenses, and pattern synonyms to make
+creating and processing these values easier.
+
+The details of how to parse a given structure are represented
+by building up a 'SExprParser' value, which is defined in
+"Data.SCargot.Parse" and re-exported here. A minimal
+'SExprParser' defines only how to parse the atoms of the
+language; helper functions can define comment syntaxes,
+reader macros, and transformations over the parsed structure.
+
+The details of how to print a given structure are represented
+by building up a 'SExprPrinter' value, which is defined in
+"Data.SCargot.Print" and re-exported here. A minimal
+'SExprPrinter' defines only how to print the atoms of the
+language; helper functions help with the layout of the
+pretty-printed s-expression in terms of how to indent the
+surrounding expression.
+
+Other helper modules define useful primitives for building up
+s-expression languages: the "Data.SCargot.Common" module provides
+parsers for common literals, while the "Data.SCargot.Comments"
+module provides parsers for comment syntaxes borrowed from
+various other languages.
+
+-}

+ 27 - 26
Data/SCargot/Comments.hs

@@ -4,6 +4,7 @@ module Data.SCargot.Comments
   ( -- $intro
 
     -- * Lisp-Style Syntax
+
     -- $lisp
     withLispComments
     -- * Other Existing Comment Syntaxes
@@ -35,10 +36,10 @@ import           Text.Parsec ( (<|>)
                              , string
                              )
 
-import            Data.SCargot.General ( Comment
-                                       , SExprSpec
-                                       , setComment
-                                       )
+import            Data.SCargot.Parse ( Comment
+                                     , SExprParser
+                                     , setComment
+                                     )
 
 -- | Given a string, produce a comment parser that matches that
 --   initial string and ignores everything until the end of the
@@ -73,77 +74,77 @@ simpleBlockComment begin end =
 -- | Lisp-style line-oriented comments start with @;@ and last
 --   until the end of the line. This is usually the comment
 --   syntax you want.
-withLispComments :: SExprSpec t a -> SExprSpec t a
+withLispComments :: SExprParser t a -> SExprParser t a
 withLispComments = setComment (lineComment ";")
 
 -- | C++-like line-oriented comment start with @//@ and last
 --   until the end of the line.
-withCLikeLineComments :: SExprSpec t a -> SExprSpec t a
+withCLikeLineComments :: SExprParser t a -> SExprParser t a
 withCLikeLineComments = setComment (lineComment "//")
 
 -- | C-like block comments start with @/*@ and end with @*/@.
 --   They do not nest.
-withCLikeBlockComments :: SExprSpec t a -> SExprSpec t a
+withCLikeBlockComments :: SExprParser t a -> SExprParser t a
 withCLikeBlockComments = setComment (simpleBlockComment "/*" "*/")
 
 -- | C-like comments include both line- and block-comments, the
 --   former starting with @//@ and the latter contained within
 --   @//* ... *//@.
-withCLikeComments :: SExprSpec t a -> SExprSpec t a
+withCLikeComments :: SExprParser t a -> SExprParser t a
 withCLikeComments = setComment (lineComment "//" <|>
                                 simpleBlockComment "/*" "*/")
 
 -- | Haskell line-oriented comments start with @--@ and last
 --   until the end of the line.
-withHaskellLineComments :: SExprSpec t a -> SExprSpec t a
+withHaskellLineComments :: SExprParser t a -> SExprParser t a
 withHaskellLineComments = setComment (lineComment "--")
 
 -- | Haskell block comments start with @{-@ and end with @-}@.
 --   They do not nest.
-withHaskellBlockComments :: SExprSpec t a -> SExprSpec t a
+withHaskellBlockComments :: SExprParser t a -> SExprParser t a
 withHaskellBlockComments = setComment (simpleBlockComment "{-" "-}")
 
 -- | Haskell comments include both the line-oriented @--@ comments
 --   and the block-oriented @{- ... -}@ comments
-withHaskellComments :: SExprSpec t a -> SExprSpec t a
+withHaskellComments :: SExprParser t a -> SExprParser t a
 withHaskellComments = setComment (lineComment "--" <|>
                                   simpleBlockComment "{-" "-}")
 
 -- | Many scripting and shell languages use these, which begin with
 --   @#@ and last until the end of the line.
-withOctothorpeComments :: SExprSpec t a -> SExprSpec t a
+withOctothorpeComments :: SExprParser t a -> SExprParser t a
 withOctothorpeComments = setComment (lineComment "#")
 
 
 {- $intro
 
-By default a 'SExprSpec' will not understand any kind of comment
+By default a 'SExprParser' will not understand any kind of comment
 syntax. Most varieties of s-expression will, however, want some kind
 of commenting capability, so the below functions will produce a new
-'SExprSpec' which understands various kinds of comment syntaxes.
+'SExprParser' which understands various kinds of comment syntaxes.
 
 For example:
 
-> mySpec :: SExprSpec Text (SExpr Text)
-> mySpec = asWellFormed $ mkSpec (pack <$> many1 alphaNum) id
+> mySpec :: SExprParser Text (SExpr Text)
+> mySpec = asWellFormed $ mkParser (pack <$> many1 alphaNum)
 >
-> myLispySpec :: SExprSpec Text (SExpr Text)
+> myLispySpec :: SExprParser Text (SExpr Text)
 > myLispySpec = withLispComments mySpec
 >
-> myCLikeSpec :: SExprSpec Text (SExpr Text)
+> myCLikeSpec :: SExprParser Text (SExpr Text)
 > myCLikeSpec = withCLikeComment mySpec
 
 We can then use these to parse s-expressions with different kinds of
 comment syntaxes:
 
-> > decode mySpec "(foo ; a lisp comment\n  bar)\n"
-> Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom"
-> > decode myLispySpec "(foo ; a lisp comment\n  bar)\n"
-> Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
-> > decode mySpec "(foo /* a c-like\n   comment */ bar)\n"
-> Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom"
-> > decode myCLikeSpec "(foo /* a c-like\n   comment */ bar)\n"
-> Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
+>>> decode mySpec "(foo ; a lisp comment\n  bar)\n"
+Left "(line 1, column 6):\nunexpected \";\"\nexpecting space or atom"
+>>> decode myLispySpec "(foo ; a lisp comment\n  bar)\n"
+Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
+>>> decode mySpec "(foo /* a c-like\n   comment */ bar)\n"
+Left "(line 1, column 6):\nunexpected \"/\"\nexpecting space or atom"
+>>> decode myCLikeSpec "(foo /* a c-like\n   comment */ bar)\n"
+Right [WFSList [WFSAtom "foo", WFSAtom "bar"]]
 
 -}
 

+ 22 - 2
Data/SCargot/Common.hs

@@ -4,6 +4,9 @@ module Data.SCargot.Common ( -- $intro
                            , parseR6RSIdent
                            , parseR7RSIdent
                              -- * Numeric Literal Parsers
+                           , signed
+                           , prefixedNumber
+                           , signedPrefixedNumber
                            , binNumber
                            , signedBinNumber
                            , octNumber
@@ -14,7 +17,6 @@ module Data.SCargot.Common ( -- $intro
                            , signedDozNumber
                            , hexNumber
                            , signedHexNumber
-                           , signed
                            ) where
 
 import           Data.Char
@@ -42,7 +44,7 @@ hasCategory c cs = generalCategory c `elem` cs
 
 -- | Parse an identifier according to the R6RS Scheme standard. An
 --   R6RS identifier may include inline hexadecimal escape sequences
+--   so that, for example, @foo@ is equivalent to @f\\x6f;o@, and is
 --   more liberal than R5RS as to which Unicode characters it may
 --   accept.
 parseR6RSIdent :: Parser Text
@@ -141,13 +143,30 @@ sign =  (pure id     <* char '+')
 signed :: Num a => Parser a -> Parser a
 signed p = ($) <$> sign <*> p
 
+-- | Parses a number in the same way as 'prefixedNumber', with an optional
+--   leading @+@ or @-@.
+signedPrefixedNumber :: Parser Integer
+signedPrefixedNumber = signed prefixedNumber
+
+-- | Parses a number, determining which numeric base to use by examining
+--   the literal's prefix: @0x@ for a hexadecimal number, @0z@ for a
+--   dozenal number, @0o@ for an octal number, and @0b@ for a binary
+--   number (as well as the upper-case versions of the same.) If the
+--   base is omitted entirely, then it is treated as a decimal number.
+prefixedNumber :: Parser Integer
+prefixedNumber =  (string "0x" <|> string "0X") *> hexNumber
+              <|> (string "0o" <|> string "0O") *> octNumber
+              <|> (string "0z" <|> string "0Z") *> dozNumber
+              <|> (string "0b" <|> string "0B") *> binNumber
+              <|> decNumber
+
 -- | A parser for non-signed binary numbers
 binNumber :: Parser Integer
 binNumber = number 2 (char '0' <|> char '1')
 
 -- | A parser for signed binary numbers, with an optional leading @+@ or @-@.
 signedBinNumber :: Parser Integer
-signedBinNumber = ($) <$> sign <*> binNumber
+signedBinNumber = signed binNumber
 
 -- | A parser for non-signed octal numbers
 octNumber :: Parser Integer

+ 22 - 7
Data/SCargot/Basic.hs

@@ -1,9 +1,10 @@
 {-# LANGUAGE OverloadedStrings #-}
 
-module Data.SCargot.Basic
+module Data.SCargot.Language.Basic
   ( -- * Spec
     -- $descr
-    basicSpec
+    basicParser
+  , basicPrinter
   ) where
 
 import           Control.Applicative ((<$>))
@@ -12,9 +13,11 @@ import           Text.Parsec (many1, satisfy)
 import           Data.Text (Text, pack)
 
 import           Data.SCargot.Repr.Basic (SExpr)
-import           Data.SCargot.General ( SExprSpec
-                                      , mkSpec
-                                      )
+import           Data.SCargot ( SExprParser
+                              , SExprPrinter
+                              , mkParser
+                              , flatPrint
+                              )
 import           Data.SCargot.Comments (withLispComments)
 
 isAtomChar :: Char -> Bool
@@ -33,9 +36,20 @@ isAtomChar c = isAlphaNum c
 -- Atoms recognized by the 'basicSpec' are any string matching the
 -- regular expression @[A-Za-z0-9+*<>/=!?-]+@.
 
+-- | A 'SExprParser' that understands atoms to be sequences of
 --   alphanumeric characters as well as the punctuation
 --   characters @[-*/+<>=!?]@, and does no processing of them.
-basicSpec :: SExprSpec Text (SExpr Text)
-basicSpec = mkSpec pToken id
+--
+-- >>> decode basicParser "(1 elephant)"
+-- Right [SCons (SAtom "1") (SCons (SAtom "elephant") SNil)]
+basicParser :: SExprParser Text (SExpr Text)
+basicParser = mkParser pToken
   where pToken = pack <$> many1 (satisfy isAtomChar)
+
+-- | A 'SExprPrinter' that prints textual atoms directly (without quoting
+--   or any other processing) onto a single line.
+--
+-- >>> encode basicPrinter [L [A "1", A "elephant"]]
+-- "(1 elephant)"
+basicPrinter :: SExprPrinter Text (SExpr Text)
+basicPrinter = flatPrint id

+ 27 - 10
Data/SCargot/HaskLike.hs

@@ -1,9 +1,11 @@
 {-# LANGUAGE OverloadedStrings #-}
 
-module Data.SCargot.HaskLike ( -- $info
-                               haskLikeSpec
-                             , HaskLikeAtom(..)
-                             ) where
+module Data.SCargot.Language.HaskLike
+  ( -- $info
+    HaskLikeAtom(..)
+  , haskLikeParser
+  , haskLikePrinter
+  ) where
 
 import           Control.Applicative ((<$>), (<*>), (<$))
 import           Data.Maybe (catMaybes)
@@ -16,16 +18,16 @@ import           Prelude hiding (concatMap)
 
 import Data.SCargot.Common
 import Data.SCargot.Repr.Basic (SExpr)
-import Data.SCargot.General (SExprSpec, mkSpec)
+import Data.SCargot (SExprParser, SExprPrinter, mkParser, flatPrint)
 
 {- $info
 
 This module is intended for simple, ad-hoc configuration or data formats
 that might not need their on rich structure but might benefit from a few
-various kinds of literals. The 'haskLikeSpec' understands identifiers as
+various kinds of literals. The 'haskLikeParser' understands identifiers as
 defined by R5RS, as well as string, integer, and floating-point literals
-as defined by the Haskell spec. It does _not_ natively understand other
-data types, such as booleans, vectors, bitstrings, or the like.
+as defined by the Haskell spec. It does __not__ natively understand other
+data types, such as booleans, vectors, bitstrings.
 
 -}
 
@@ -128,7 +130,7 @@ sHaskLikeAtom (HSString s) = pack (show s)
 sHaskLikeAtom (HSInt i)    = pack (show i)
 sHaskLikeAtom (HSFloat f)  = pack (show f)
 
+-- | This `SExprParser` understands s-expressions that contain
 --   Scheme-like tokens, as well as string literals, integer
 --   literals, and floating-point literals. Each of these values
 --   is parsed according to the lexical rules in the Haskell
@@ -136,5 +138,19 @@ sHaskLikeAtom (HSFloat f)  = pack (show f)
 --   and floating-point options are available. This spec does
 --   not parse comments and does not understand any reader
 --   macros.
-haskLikeSpec :: SExprSpec HaskLikeAtom (SExpr HaskLikeAtom)
-haskLikeSpec = mkSpec pHaskLikeAtom sHaskLikeAtom
+--
+-- >>> decode haskLikeParser "(0x01 \"\\x65lephant\")"
+-- Right [SCons (SAtom (HSInt 1)) (SCons (SAtom (HSString "elephant")) SNil)]
+haskLikeParser :: SExprParser HaskLikeAtom (SExpr HaskLikeAtom)
+haskLikeParser = mkParser pHaskLikeAtom
+
+-- | This 'SExprPrinter' emits s-expressions that contain Scheme-like
+--   tokens as well as string literals, integer literals, and floating-point
+--   literals, which will be emitted as the literals produced by Haskell's
+--   'show' function. This printer will produce a flat s-expression with
+--   no indentation of any kind.
+--
+-- >>> encode haskLikePrinter [L [A (HSInt 1), A (HSString "elephant")]]
+-- "(1 \"elephant\")"
+haskLikePrinter :: SExprPrinter HaskLikeAtom (SExpr HaskLikeAtom)
+haskLikePrinter = flatPrint sHaskLikeAtom

+ 81 - 45
Data/SCargot/General.hs

@@ -1,26 +1,22 @@
 {-# LANGUAGE ViewPatterns #-}
 {-# LANGUAGE OverloadedStrings #-}
 
-module Data.SCargot.General
-  ( -- * SExprSpec
-    SExprSpec
-  , mkSpec
-  , convertSpec
+module Data.SCargot.Parse
+  ( -- * Parsing
+    decode
+  , decodeOne
+    -- * Parsing Control
+  , SExprParser
+  , Reader
+  , Comment
+  , mkParser
+  , setCarrier
   , addReader
   , setComment
-    -- * Specific SExprSpec Conversions
+    -- * Specific SExprParser Conversions
   , asRich
   , asWellFormed
   , withQuote
-    -- * Using a SExprSpec
-  , decode
-  , decodeOne
-  , encode
-  , encodeOne
-    -- * Useful Type Aliases
-  , Reader
-  , Comment
-  , Serializer
   ) where
 
 import           Control.Applicative ((<*), (*>), (<*>), (<$>), pure)
@@ -63,87 +59,69 @@ type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
 
 -- | A 'Comment' represents any kind of skippable comment. This
 --   parser __must__ be able to fail if a comment is not being
+--   recognized, and it __must__ not consume any input in case
+--   of failure.
 type Comment = Parser ()
 
-type Serializer atom = atom -> Text
-
-data SExprSpec atom carrier = SExprSpec
+-- | A 'SExprParser' describes a parser for a particular value
+--   that has been serialized as an s-expression. The @atom@ parameter
+--   corresponds to a Haskell type used to represent the atoms,
+--   and the @carrier@ parameter corresponds to the parsed S-Expression
+--   structure.
+data SExprParser atom carrier = SExprParser
   { sesPAtom   :: Parser atom
-  , sesSAtom   :: Serializer atom
   , readerMap  :: ReaderMacroMap atom
   , comment    :: Maybe Comment
   , postparse  :: SExpr atom -> Either String carrier
-  , preserial  :: carrier -> SExpr atom
   }
 
+-- | Create a basic 'SExprParser' when given a parser
+--   for an atom type.
 --
-mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
-mkSpec p s = SExprSpec
-  { sesPAtom   = p <?> "atom"
-  , sesSAtom   = s
+--   >>> import Text.Parsec (alphaNum, many1)
+--   >>> let parser = mkParser (many1 alphaNum)
+--   >>> decode parser "(ele phant)"
+--   Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
+mkParser :: Parser atom -> SExprParser atom (SExpr atom)
+mkParser parser = SExprParser
+  { sesPAtom   = parser
   , readerMap  = M.empty
   , comment    = Nothing
   , postparse  = return
-  , preserial  = id
   }
 
+-- | Modify the carrier type for a 'SExprParser'. This is
 --   used internally to convert between various 'SExpr' representations,
 --   but could also be used externally to add an extra conversion layer
---
+--   onto a 'SExprParser'.
 --
-convertSpec :: (b -> Either String c) -> (c -> b)
-               -> SExprSpec a b -> SExprSpec a c
-convertSpec f g spec = spec
-  { postparse = postparse spec >=> f
-  , preserial = preserial spec . g
-  }
+-- >>> import Text.Parsec (alphaNum, many1)
+-- >>> import Data.SCargot.Repr (toRich)
+-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
+-- >>> decode parser "(ele phant)"
+-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
+setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
+setCarrier f spec = spec { postparse = postparse spec >=> f }
 
 -- | Convert the final output representation from the 'SExpr' type
 --   to the 'RichSExpr' type.
-asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
-asRich = convertSpec (return . toRich) fromRich
+--
+-- >>> import Text.Parsec (alphaNum, many1)
+-- >>> let parser = asRich (mkParser (many1 alphaNum))
+-- >>> decode parser "(ele phant)"
+-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
+asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
+asRich = setCarrier (return . toRich)
 
 -- | Convert the final output representation from the 'SExpr' type
 --   to the 'WellFormedSExpr' type.
-asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
-asWellFormed = convertSpec toWellFormed fromWellFormed
+--
+-- >>> import Text.Parsec (alphaNum, many1)
+-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
+-- >>> decode parser "(ele phant)"
+-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
+asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
+asWellFormed = setCarrier toWellFormed
 
 -- | Add the ability to execute some particular reader macro, as
 --   defined by its initial character and the 'Parser' which returns
@@ -152,16 +130,13 @@ asWellFormed = convertSpec toWellFormed fromWellFormed
 --   parsing after the reader character has been removed from the
 --   stream.
 --
---
-addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
+-- >>> import Text.Parsec (alphaNum, char, many1)
+-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
+-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
+-- >>> decode parser "(an [ele phant])"
+-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
+
+addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
 addReader c reader spec = spec
   { readerMap = M.insert c reader (readerMap spec) }
 
@@ -171,23 +146,25 @@ addReader c reader spec = spec
 --   cause an infinite loop), and also that it __not consume any input__
 --   (which may require it to be wrapped in 'try'.)
 --
---
+-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
+-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
+-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
+-- >>> decode parser "(ele //a comment\n  phant)"
+-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
 
-setComment :: Comment -> SExprSpec a c -> SExprSpec a c
+setComment :: Comment -> SExprParser a c -> SExprParser a c
 setComment c spec = spec { comment = Just (c <?> "comment") }
 
-withQuote :: IsString t => SExprSpec t (SExpr t) -> SExprSpec t (SExpr t)
+-- | Add the ability to understand a quoted S-Expression.
+--   Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
+--   assumes that the underlying atom type implements the "IsString"
+--   class, and will create the @quote@ atom using @fromString "quote"@.
+--
+-- >>> import Text.Parsec (alphaNum, many1)
+-- >>> let parser = withQuote (mkParser (many1 alphaNum))
+-- >>> decode parser "'elephant"
+-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
+withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
 withQuote = addReader '\'' (fmap go)
   where go s  = SCons "quote" (SCons s SNil)
 
@@ -250,17 +227,17 @@ doParse p t = case runParser p () "" t of
 --   the S-expression (ignoring comments or whitespace) then this
 --   will fail: for those cases, use 'decode', which returns a list of
 --   all the S-expressions found at the top level.
-decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
+decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
 decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
   where parser = parseGenericSExpr
                    (sesPAtom spec)
                    (readerMap spec)
                    (buildSkip (comment spec))
 
+-- | Decode several S-expressions according to a given 'SExprParser'. This
 --   will return a list of every S-expression that appears at the top-level
 --   of the document.
-decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
+decode :: SExprParser atom carrier -> Text -> Either String [carrier]
 decode spec =
   doParse (many1 parser <* eof) >=> mapM (postparse spec)
     where parser = parseGenericSExpr
@@ -268,6 +245,7 @@ decode spec =
                      (readerMap spec)
                      (buildSkip (comment spec))
 
+{-
 -- | Encode (without newlines) a single S-expression.
 encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
 encodeSExpr SNil _         = "()"
@@ -279,8 +257,9 @@ encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
 
 -- | Emit an S-Expression in a machine-readable way. This does no
 --   pretty-printing or indentation, and produces no comments.
-encodeOne :: SExprSpec atom carrier -> carrier -> Text
+encodeOne :: SExprParser atom carrier -> carrier -> Text
 encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
 
-encode :: SExprSpec atom carrier -> [carrier] -> Text
+encode :: SExprParser atom carrier -> [carrier] -> Text
 encode spec cs = T.concat (map (encodeOne spec) cs)
+-}

+ 89 - 12
Data/SCargot/Pretty.hs

@@ -2,12 +2,18 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 
-module Data.SCargot.Pretty
+module Data.SCargot.Print
          ( -- * Pretty-Printing
-           prettyPrintSExpr
+           encodeOne
+         , encode
            -- * Pretty-Printing Control
-         , LayoutOptions(..)
+         , SExprPrinter
          , Indent(..)
+         , setFromCarrier
+         , setMaxWidth
+         , removeMaxWidth
+         , setIndentAmount
+         , setIndentStrategy
            -- * Default Printing Strategies
          , basicPrint
          , flatPrint
@@ -46,12 +52,16 @@ data Indent
           --   >      quux)
     deriving (Eq, Show)
 
-data LayoutOptions a = LayoutOptions
-  { atomPrinter  :: a -> Text
+-- | A 'SExprPrinter' value describes how to print a given value as an
+--   s-expression. The @carrier@ type parameter indicates the value
+--   that will be printed, and the @atom@ parameter indicates the type
+--   that will represent tokens in an s-expression structure.
+data SExprPrinter atom carrier = SExprPrinter
+  { atomPrinter  :: atom -> Text
       -- ^ How to serialize a given atom to 'Text'.
-  , swingIndent  :: SExpr a -> Indent
+  , fromCarrier  :: carrier -> SExpr atom
+      -- ^ How to turn a carrier type back into a 'Sexpr'.
+  , swingIndent  :: SExpr atom -> Indent
       -- ^ How to indent subsequent expressions, as determined by
       --   the head of the list.
   , indentAmount :: Int
@@ -64,9 +74,10 @@ data LayoutOptions a = LayoutOptions
 
 -- | A default 'LayoutOptions' struct that will always print a 'SExpr'
 --   as a single line.
-flatPrint :: (a -> Text) -> LayoutOptions a
-flatPrint printer = LayoutOptions
+flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
+flatPrint printer = SExprPrinter
   { atomPrinter  = printer
+  , fromCarrier  = id
   , swingIndent  = const Swing
   , indentAmount = 2
   , maxWidth     = Nothing
@@ -75,14 +86,67 @@ flatPrint printer = LayoutOptions
 -- | A default 'LayoutOptions' struct that will always swing subsequent
 --   expressions onto later lines if they're too long, indenting them
 --   by two spaces.
-basicPrint :: (a -> Text) -> LayoutOptions a
-basicPrint printer = LayoutOptions
+basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
+basicPrint printer = SExprPrinter
   { atomPrinter  = printer
+  , fromCarrier  = id
   , swingIndent  = const Swing
   , indentAmount = 2
   , maxWidth     = Just 80
   }
 
+-- | Modify the carrier type of a 'SExprPrinter' by describing how
+--   to convert the new type back to the previous type. For example,
+--   to pretty-print a well-formed s-expression, we can modify the
+--   'SExprPrinter' value as follows:
+--
+-- >>> let printer = setFromCarrier fromWellFormed (basicPrint id)
+-- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"])
+-- "(ele phant)"
+setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c
+setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc }
+
+-- | Dictate a maximum width for pretty-printed s-expressions.
+--
+-- >>> let printer = setMaxWidth 8 (basicPrint id)
+-- >>> encodeOne printer (L [A "one", A "two", A "three"])
+-- "(one \n  two\n  three)"
+setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
+setMaxWidth n pr = pr { maxWidth = Just n }
+
+-- | Allow the serialized s-expression to be arbitrarily wide. This
+--   makes all pretty-printing happen on a single line.
+--
+-- >>> let printer = removeMaxWidth (basicPrint id)
+-- >>> encodeOne printer (L [A "one", A "two", A "three"])
+-- "(one two three)"
+removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier
+removeMaxWidth pr = pr { maxWidth = Nothing }
+
+-- | Set the number of spaces that a subsequent line will be indented
+--   after a swing indentation.
+--
+-- >>> let printer = setMaxWidth 12 (basicPrint id)
+-- >>> encodeOne printer (L [A "elephant", A "pachyderm"])
+-- "(elephant \n  pachyderm)"
+-- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"])
+-- "(elephant \n    pachyderm)"
+setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
+setIndentAmount n pr = pr { indentAmount = n }
+
+-- | Dictate how to indent subsequent lines based on the leading
+--   subexpression in an s-expression. For details on how this works,
+--   consult the documentation of the 'Indent' type.
+--
+-- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing
+-- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id))
+-- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ])
+-- "(def (func arg)\n  body)"
+-- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ])
+-- "(elephant \n  among\n  pachyderms)"
+setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier
+setIndentStrategy st pr = pr { swingIndent = st }
+
 -- Sort of like 'unlines' but without the trailing newline
 joinLines :: [Text] -> Text
 joinLines = T.intercalate "\n"
@@ -111,8 +175,8 @@ indentSubsequent n (t:ts) = joinLines (t : go ts)
 
 -- | Pretty-print a 'SExpr' according to the options in a
 --   'LayoutOptions' value.
-prettyPrintSExpr :: LayoutOptions a -> SExpr a -> Text
-prettyPrintSExpr LayoutOptions { .. } = pHead 0
+prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
+prettyPrintSExpr SExprPrinter { .. } = pHead 0
   where pHead _   SNil         = "()"
         pHead _   (SAtom a)    = atomPrinter a
         pHead ind (SCons x xs) = gather ind x xs id
@@ -142,3 +206,14 @@ prettyPrintSExpr LayoutOptions { .. } = pHead 0
                   | Just maxAmt <- maxWidth
                   , T.length flat + ind > maxAmt = " " <> indented
                   | otherwise                    = " " <> flat
+
+-- | Turn a single s-expression into a string according to a given
+--   'SExprPrinter'.
+encodeOne :: SExprPrinter atom carrier -> carrier -> Text
+encodeOne s@(SExprPrinter { .. }) =
+  prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
+
+-- | Turn a list of s-expressions into a single string according to
+--   a given 'SExprPrinter'.
+encode :: SExprPrinter atom carrier -> [carrier] -> Text
+encode spec = T.intercalate "\n\n" . map (encodeOne spec)

+ 42 - 1
Data/SCargot/Repr.hs

@@ -5,7 +5,8 @@
 {-# LANGUAGE TypeFamilies #-}
 
 module Data.SCargot.Repr
-       ( -- * Elementary SExpr representation
+       ( -- $reprs
+         -- * Elementary SExpr representation
          SExpr(..)
          -- * Rich SExpr representation
        , RichSExpr(..)
@@ -51,8 +52,8 @@ instance IsList (SExpr atom) where
 --   represent a well-formed cons list, and 'RSDotted'
 --   to represent an improper list of the form
 --   @(a b c . d)@. This representation is based on
+--   the structure of the parsed S-Expression, and not on
+--   how it was originally represented: thus, @(a . (b))@ is going to
 --   be represented as @RSList[RSAtom a, RSAtom b]@
 --   despite having been originally represented as a
 --   dotted list.
@@ -138,3 +139,41 @@ fromWellFormed :: WellFormedSExpr atom -> SExpr atom
 fromWellFormed (WFSAtom a)  = SAtom a
 fromWellFormed (WFSList xs) =
   foldr SCons SNil (map fromWellFormed xs)
+
+{- $reprs
+
+This module contains several different representations for
+s-expressions. The s-cargot library underlying uses the
+'SExpr' type as its representation type, which is a binary
+tree representation with an arbitrary type for its leaves.
+
+This type is not always convenient to manipulate in Haskell
+code, this module defines two alternate representations
+which turn a sequence of nested right-branching cons pairs
+into Haskell lists: that is to say, they transform between
+
+@
+SCons a (SCons b (SCons c SNil))  \<=\>  RSList [a, b, c]
+@
+
+These two types differ in how they handle non-well-formed
+lists, i.e. lists that end with an atom. The 'RichSExpr'
+format handles this with a special constructor for lists
+that end in an atom:
+
+@
+SCons a (SCons b (SAtom c))  \<=\>  RSDotted [a, b] c
+@
+
+On the other hand, the 'WellFormedSExpr' type elects
+not to handle this case. This is unusual for Lisp source code,
+but is a reasonable choice for configuration or data
+storage formats that use s-expressions, where
+non-well-formed lists would be an unnecessary
+complication.
+
+To make working with these types less verbose, there are other
+modules that export pattern aliases and helper functions: these
+can be found at "Data.SCargot.Repr.Basic",
+"Data.SCargot.Repr.Rich", and "Data.SCargot.Repr.WellFormed".
+-}

+ 9 - 4
s-cargot.cabal

@@ -1,6 +1,7 @@
 name:                s-cargot
 version:             0.1.0.0
 synopsis:            A flexible, extensible s-expression library.
+homepage:            https://github.com/aisamanra/s-cargot
 description:         S-Cargot is a library for working with s-expressions in
                      a modular and extensible way, opting for genericity and
                      flexibility instead of speed. Instead of understanding
@@ -18,18 +19,22 @@ category:            Data
 build-type:          Simple
 cabal-version:       >=1.10
 
+source-repository head
+   type: git
+   location: git://github.com/aisamanra/s-cargot.git
+
 library
   exposed-modules:     Data.SCargot,
                        Data.SCargot.Repr,
                        Data.SCargot.Repr.Basic,
                        Data.SCargot.Repr.Rich,
                        Data.SCargot.Repr.WellFormed,
-                       Data.SCargot.General,
-                       Data.SCargot.Pretty,
-                       Data.SCargot.Basic,
+                       Data.SCargot.Parse,
+                       Data.SCargot.Print,
                        Data.SCargot.Comments,
                        Data.SCargot.Common,
-                       Data.SCargot.HaskLike
+                       Data.SCargot.Language.Basic,
+                       Data.SCargot.Language.HaskLike
   build-depends:       base >=4.7 && <5,
                        parsec,
                        text,