123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465 |
- {-# LANGUAGE RecordWildCards #-}
- {-# LANGUAGE OverloadedStrings #-}
- {-# LANGUAGE ScopedTypeVariables #-}
- module Data.SCargot.Print
- ( -- * Pretty-Printing
- encodeOne
- , encode
- , encodeOneLazy
- , encodeLazy
- -- * Pretty-Printing Control
- , SExprPrinter
- , Indent(..)
- , setFromCarrier
- , setMaxWidth
- , removeMaxWidth
- , setIndentAmount
- , setIndentStrategy
- -- * Default Printing Strategies
- , basicPrint
- , flatPrint
- , unconstrainedPrint
- ) where
- import qualified Data.Foldable as F
- import Data.Monoid ((<>))
- import qualified Data.Sequence as Seq
- import Data.Text (Text)
- import qualified Data.Text as T
- import qualified Data.Text.Lazy as TL
- import qualified Data.Text.Lazy.Builder as B
- import qualified Data.Traversable as T
- import Data.SCargot.Repr
- -- | The 'Indent' type is used to determine how to indent subsequent
- -- s-expressions in a list, after printing the head of the list.
- data Indent
- = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed
- -- amount more than the current line.
- --
- -- > (foo
- -- > bar
- -- > baz
- -- > quux)
- | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the
- -- first @n@ expressions after the head on the same
- -- line as the head, and all after will be swung.
- -- 'SwingAfter' @0@ is equivalent to 'Swing'.
- --
- -- > (foo bar
- -- > baz
- -- > quux)
- | Align -- ^ An 'Align' indent will print the first expression after
- -- the head on the same line, and subsequent expressions will
- -- be aligned with that one.
- --
- -- > (foo bar
- -- > baz
- -- > quux)
- deriving (Eq, Show)
- -- | A 'SExprPrinter' value describes how to print a given value as an
- -- s-expression. The @carrier@ type parameter indicates the value
- -- that will be printed, and the @atom@ parameter indicates the type
- -- that will represent tokens in an s-expression structure.
- data SExprPrinter atom carrier = SExprPrinter
- { atomPrinter :: atom -> Text
- -- ^ How to serialize a given atom to 'Text'.
- , fromCarrier :: carrier -> SExpr atom
- -- ^ How to turn a carrier type back into a 'Sexpr'.
- , swingIndent :: SExpr atom -> Indent
- -- ^ How to indent subsequent expressions, as determined by
- -- the head of the list.
- , indentAmount :: Int
- -- ^ How much to indent after a swung indentation.
- , maxWidth :: Maybe Int
- -- ^ The maximum width (if any) If this is 'None' then the
- -- resulting s-expression might be printed on one line (if
- -- 'indentPrint' is 'False') and might be pretty-printed in
- -- the most naive way possible (if 'indentPrint' is 'True').
- , indentPrint :: Bool
- -- ^ Whether to indent or not. This has been retrofitted onto
- }
- -- | A default 'SExprPrinter' struct that will always print a 'SExpr'
- -- as a single line.
- flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
- flatPrint printer = SExprPrinter
- { atomPrinter = printer
- , fromCarrier = id
- , swingIndent = const Swing
- , indentAmount = 2
- , maxWidth = Nothing
- , indentPrint = False
- }
- -- | A default 'SExprPrinter' struct that will always swing subsequent
- -- expressions onto later lines if they're too long, indenting them
- -- by two spaces, and uses a soft maximum width of 80 characters
- basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
- basicPrint printer = SExprPrinter
- { atomPrinter = printer
- , fromCarrier = id
- , swingIndent = const Swing
- , indentAmount = 2
- , maxWidth = Just 80
- , indentPrint = True
- }
- -- | A default 'SExprPrinter' struct that will always swing subsequent
- -- expressions onto later lines if they're too long, indenting them by
- -- two spaces, but makes no effort to keep the pretty-printed sources
- -- inside a maximum width. In the case that we want indented printing
- -- but don't care about a "maximum" width, we can print more
- -- efficiently than in other situations.
- unconstrainedPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
- unconstrainedPrint printer = SExprPrinter
- { atomPrinter = printer
- , fromCarrier = id
- , swingIndent = const Swing
- , indentAmount = 2
- , maxWidth = Nothing
- , indentPrint = True
- }
- data Size = Size
- { sizeSum :: !Int
- , sizeMax :: !Int
- } deriving (Show)
- -- | This is an intermediate representation which is like (but not
- -- identical to) a RichSExpr representation. In particular, it has a
- -- special case for empty lists, and it also keeps a single piece of
- -- indent information around for each list
- data Intermediate
- = IAtom Text
- -- ^ An atom, already serialized
- | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text)
- -- ^ A (possibly-improper) list, with the intended indentation
- -- strategy, the head of the list, the main set of elements, and the
- -- final improper element (if it exists)
- | IEmpty
- -- ^ An empty list
- sizeOf :: Intermediate -> Size
- sizeOf IEmpty = Size 2 2
- sizeOf (IAtom t) = Size len len where len = T.length t
- sizeOf (IList _ s _ _ _) = s
- concatSize :: Size -> Size -> Size
- concatSize l r = Size
- { sizeSum = sizeSum l + 1 + sizeSum r
- , sizeMax = sizeMax l `max` sizeMax r
- }
- toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate
- toIntermediate
- SExprPrinter { atomPrinter = printAtom
- , swingIndent = swing
- } = headOf
- where
- headOf (SAtom a) = IAtom (printAtom a)
- headOf SNil = IEmpty
- headOf (SCons x xs) =
- gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x
- gather sw hd rs SNil sz =
- IList sw sz hd rs Nothing
- gather sw hd rs (SAtom a) sz =
- IList sw (sz `concatSize` aSize) hd rs (Just aStr)
- where aSize = Size (T.length aStr) (T.length aStr)
- aStr = printAtom a
- gather sw hd rs (SCons x xs) sz =
- gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x')
- where x' = headOf x
- unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
- unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec
- where
- finalize = B.toLazyText . joinLinesS
- go :: Intermediate -> Seq.Seq B.Builder
- go (IAtom t) = Seq.singleton (B.fromText t)
- go IEmpty = Seq.singleton (B.fromString "()")
- -- this case should never be called with an empty argument to
- -- @values@, as that should have been translated to @IEmpty@
- -- instead.
- go (IList iv _ initial values rest)
- -- if we're looking at an s-expression that has no nested
- -- s-expressions, then we might as well consider it flat and let
- -- it take the whole line
- | Just strings <- T.traverse ppBasic (initial Seq.<| values) =
- Seq.singleton (B.fromString "(" <> buildUnwords strings <> pTail rest)
- -- it's not "flat", so we might want to swing after the first thing
- | Swing <- iv =
- -- if this match fails, then it means we've failed to
- -- convert to an Intermediate correctly!
- let butLast = insertParen (go initial) <> fmap doIndent (F.foldMap go values)
- in handleTail rest butLast
- -- ...or after several things
- | SwingAfter n <- iv =
- let (hs, xs) = Seq.splitAt n (initial Seq.<| values)
- hd = B.fromString "(" <> buildUnwords (F.foldMap go hs)
- butLast = hd Seq.<| fmap doIndent (F.foldMap go xs)
- in handleTail rest butLast
- -- the 'align' choice is clunkier because we need to know how
- -- deep to indent, so we have to force the first builder to grab its size
- | otherwise =
- let -- so we grab that and figure out its length plus two (for
- -- the leading paren and the following space). This uses a
- -- max because it's possible the first thing is itself a
- -- multi-line s-expression (in which case it seems like
- -- using the Align strategy is a terrible idea, but who am
- -- I to quarrel with the wild fruits upon the Tree of
- -- Life?)
- len = 2 + F.maximum (fmap (TL.length . B.toLazyText) (go initial))
- in case Seq.viewl values of
- -- if there's nothing after the head of the expression, then
- -- we simply close it
- Seq.EmptyL -> insertParen (insertCloseParen (go initial))
- -- otherwise, we put the first two things on the same line
- -- with spaces and everything else gets indended the
- -- forementioned length
- y Seq.:< ys ->
- let hd = B.fromString "(" <> buildUnwords (F.foldMap go (Seq.fromList [initial, y]))
- butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys)
- in handleTail rest butLast
- doIndent :: B.Builder -> B.Builder
- doIndent = doIndentOf (indentAmount spec)
- doIndentOf :: Int -> B.Builder -> B.Builder
- doIndentOf n b = B.fromText (T.replicate n " ") <> b
- insertParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
- insertParen s = case Seq.viewl s of
- Seq.EmptyL -> s
- x Seq.:< xs -> (B.fromString "(" <> x) Seq.<| xs
- handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder
- handleTail Nothing = insertCloseParen
- handleTail (Just t) =
- (Seq.|> (B.fromString " . " <> B.fromText t <> B.fromString ")"))
- insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
- insertCloseParen s = case Seq.viewr s of
- Seq.EmptyR -> Seq.singleton (B.fromString ")")
- xs Seq.:> x -> xs Seq.|> (x <> B.fromString ")")
- buildUnwords sq =
- case Seq.viewl sq of
- Seq.EmptyL -> mempty
- t Seq.:< ts -> t <> F.foldMap (\ x -> B.fromString " " <> x) ts
- pTail Nothing = B.fromString ")"
- pTail (Just t) = B.fromString " . " <> B.fromText t <> B.fromString ")"
- ppBasic (IAtom t) = Just (B.fromText t)
- ppBasic (IEmpty) = Just (B.fromString "()")
- ppBasic _ = Nothing
- -- | Modify the carrier type of a 'SExprPrinter' by describing how
- -- to convert the new type back to the previous type. For example,
- -- to pretty-print a well-formed s-expression, we can modify the
- -- 'SExprPrinter' value as follows:
- --
- -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id)
- -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"])
- -- "(ele phant)"
- setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c
- setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc }
- -- | Dictate a maximum width for pretty-printed s-expressions.
- --
- -- >>> let printer = setMaxWidth 8 (basicPrint id)
- -- >>> encodeOne printer (L [A "one", A "two", A "three"])
- -- "(one \n two\n three)"
- setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
- setMaxWidth n pr = pr { maxWidth = Just n }
- -- | Allow the serialized s-expression to be arbitrarily wide. This
- -- makes all pretty-printing happen on a single line.
- --
- -- >>> let printer = removeMaxWidth (basicPrint id)
- -- >>> encodeOne printer (L [A "one", A "two", A "three"])
- -- "(one two three)"
- removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier
- removeMaxWidth pr = pr { maxWidth = Nothing }
- -- | Set the number of spaces that a subsequent line will be indented
- -- after a swing indentation.
- --
- -- >>> let printer = setMaxWidth 12 (basicPrint id)
- -- >>> encodeOne printer (L [A "elephant", A "pachyderm"])
- -- "(elephant \n pachyderm)"
- -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"])
- -- "(elephant \n pachyderm)"
- setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
- setIndentAmount n pr = pr { indentAmount = n }
- -- | Dictate how to indent subsequent lines based on the leading
- -- subexpression in an s-expression. For details on how this works,
- -- consult the documentation of the 'Indent' type.
- --
- -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing
- -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id))
- -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ])
- -- "(def (func arg)\n body)"
- -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ])
- -- "(elephant \n among\n pachyderms)"
- setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier
- setIndentStrategy st pr = pr { swingIndent = st }
- -- Indents a line by n spaces
- indent :: Int -> B.Builder -> B.Builder
- indent n ts = B.fromText (T.replicate n " ") <> ts
- -- Sort of like 'unlines' but without the trailing newline
- joinLinesS :: Seq.Seq B.Builder -> B.Builder
- joinLinesS s = case Seq.viewl s of
- Seq.EmptyL -> ""
- t Seq.:< ts
- | F.null ts -> t
- | otherwise -> t <> B.fromString "\n" <> joinLinesS ts
- -- Sort of like 'unlines' but without the trailing newline
- unwordsS :: Seq.Seq B.Builder -> B.Builder
- unwordsS s = case Seq.viewl s of
- Seq.EmptyL -> ""
- t Seq.:< ts
- | F.null ts -> t
- | otherwise -> t <> " " <> joinLinesS ts
- -- Indents every line n spaces, and adds a newline to the beginning
- -- used in swung indents
- indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder
- indentAllS n = ("\n" <>) . joinLinesS . fmap (indent n)
- -- Indents every line but the first by some amount
- -- used in aligned indents
- indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder
- indentSubsequentS n s = case Seq.viewl s of
- Seq.EmptyL -> ""
- t Seq.:< ts
- | F.null ts -> t
- | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts)
- -- oh god this code is so disgusting
- -- i'm sorry to everyone i let down by writing this
- -- i swear i'll do better in the future i promise i have to
- -- for my sake and for everyone's
- -- | Pretty-print a 'SExpr' according to the options in a
- -- 'LayoutOptions' value.
- prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
- prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
- Nothing
- | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr)
- | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
- Just w -> indentPrintSExpr' w pr expr
- indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
- indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr
- where
- pp _ IEmpty = B.fromString "()"
- pp _ (IAtom t) = B.fromText t
- pp ind (IList i sz h values end) =
- -- we always are going to have a head, a (possibly empty) body,
- -- and a (possibly empty) tail in our list formats
- B.fromString "(" <> hd <> body <> tl <> B.fromString ")"
- where
- -- the tail is either nothing, or the final dotted pair
- tl = case end of
- Nothing -> mempty
- Just x -> B.fromString " . " <> B.fromText x
- -- the head is the pretty-printed head, with an ambient
- -- indentation of +1 to account for the left paren
- hd = pp (ind+1) h
- headWidth = sizeSum (sizeOf h)
- indented =
- case i of
- SwingAfter n ->
- let (l, ls) = Seq.splitAt n values
- t = unwordsS (fmap (pp (ind+1)) l)
- ts = indentAllS (ind + indentAmount)
- (fmap (pp (ind + indentAmount)) ls)
- in t <> ts
- Swing ->
- indentAllS (ind + indentAmount)
- (fmap (pp (ind + indentAmount)) values)
- Align ->
- indentSubsequentS (ind + headWidth + 1)
- (fmap (pp (ind + headWidth + 1)) values)
- body
- -- if there's nothing here, then we don't have anything to
- -- indent
- | length values == 0 = mempty
- -- if we can't fit the whole next s-expression on the same
- -- line, then we use the indented form
- | sizeSum sz + ind > maxAmt = B.fromString " " <> indented
- | otherwise =
- -- otherwise we print the whole thing on one line!
- B.fromString " " <> unwordsS (fmap (pp (ind + 1)) values)
- -- if we don't indent anything, then we can ignore a bunch of the
- -- details above
- flatPrintSExpr :: SExpr Text -> TL.Text
- flatPrintSExpr = B.toLazyText . pHead
- where
- pHead (SCons x xs) =
- B.fromString "(" <> pHead x <> pTail xs
- pHead (SAtom t) =
- B.fromText t
- pHead SNil =
- B.fromString "()"
- pTail (SCons x xs) =
- B.fromString " " <> pHead x <> pTail xs
- pTail (SAtom t) =
- B.fromString " . " <> B.fromText t <> B.fromString ")"
- pTail SNil =
- B.fromString ")"
- -- | Turn a single s-expression into a string according to a given
- -- 'SExprPrinter'.
- encodeOne :: SExprPrinter atom carrier -> carrier -> Text
- encodeOne s@(SExprPrinter { .. }) =
- TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
- -- | Turn a list of s-expressions into a single string according to
- -- a given 'SExprPrinter'.
- encode :: SExprPrinter atom carrier -> [carrier] -> Text
- encode spec =
- T.intercalate "\n\n" . map (encodeOne spec)
- -- | Turn a single s-expression into a lazy 'Text' according to a given
- -- 'SExprPrinter'.
- encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text
- encodeOneLazy s@(SExprPrinter { .. }) =
- prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
- -- | Turn a list of s-expressions into a lazy 'Text' according to
- -- a given 'SExprPrinter'.
- encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text
- encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec)
|