Print.hs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. {-# LANGUAGE RecordWildCards #-}
  2. {-# LANGUAGE OverloadedStrings #-}
  3. {-# LANGUAGE ScopedTypeVariables #-}
  4. module Data.SCargot.Print
  5. ( -- * Pretty-Printing
  6. encodeOne
  7. , encode
  8. , encodeOneLazy
  9. , encodeLazy
  10. -- * Pretty-Printing Control
  11. , SExprPrinter
  12. , Indent(..)
  13. , setFromCarrier
  14. , setMaxWidth
  15. , removeMaxWidth
  16. , setIndentAmount
  17. , setIndentStrategy
  18. -- * Default Printing Strategies
  19. , basicPrint
  20. , flatPrint
  21. , unconstrainedPrint
  22. ) where
  23. import qualified Data.Foldable as F
  24. import Data.Monoid ((<>))
  25. import qualified Data.Sequence as Seq
  26. import Data.Text (Text)
  27. import qualified Data.Text as T
  28. import qualified Data.Text.Lazy as TL
  29. import qualified Data.Text.Lazy.Builder as B
  30. import qualified Data.Traversable as T
  31. import Data.SCargot.Repr
  32. -- | The 'Indent' type is used to determine how to indent subsequent
  33. -- s-expressions in a list, after printing the head of the list.
  34. data Indent
  35. = Swing -- ^ A 'Swing' indent will indent subsequent exprs some fixed
  36. -- amount more than the current line.
  37. --
  38. -- > (foo
  39. -- > bar
  40. -- > baz
  41. -- > quux)
  42. | SwingAfter Int -- ^ A 'SwingAfter' @n@ indent will try to print the
  43. -- first @n@ expressions after the head on the same
  44. -- line as the head, and all after will be swung.
  45. -- 'SwingAfter' @0@ is equivalent to 'Swing'.
  46. --
  47. -- > (foo bar
  48. -- > baz
  49. -- > quux)
  50. | Align -- ^ An 'Align' indent will print the first expression after
  51. -- the head on the same line, and subsequent expressions will
  52. -- be aligned with that one.
  53. --
  54. -- > (foo bar
  55. -- > baz
  56. -- > quux)
  57. deriving (Eq, Show)
  58. -- | A 'SExprPrinter' value describes how to print a given value as an
  59. -- s-expression. The @carrier@ type parameter indicates the value
  60. -- that will be printed, and the @atom@ parameter indicates the type
  61. -- that will represent tokens in an s-expression structure.
  62. data SExprPrinter atom carrier = SExprPrinter
  63. { atomPrinter :: atom -> Text
  64. -- ^ How to serialize a given atom to 'Text'.
  65. , fromCarrier :: carrier -> SExpr atom
  66. -- ^ How to turn a carrier type back into a 'Sexpr'.
  67. , swingIndent :: SExpr atom -> Indent
  68. -- ^ How to indent subsequent expressions, as determined by
  69. -- the head of the list.
  70. , indentAmount :: Int
  71. -- ^ How much to indent after a swung indentation.
  72. , maxWidth :: Maybe Int
  73. -- ^ The maximum width (if any) If this is 'None' then the
  74. -- resulting s-expression might be printed on one line (if
  75. -- 'indentPrint' is 'False') and might be pretty-printed in
  76. -- the most naive way possible (if 'indentPrint' is 'True').
  77. , indentPrint :: Bool
  78. -- ^ Whether to indent or not. This has been retrofitted onto
  79. }
  80. -- | A default 'SExprPrinter' struct that will always print a 'SExpr'
  81. -- as a single line.
  82. flatPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
  83. flatPrint printer = SExprPrinter
  84. { atomPrinter = printer
  85. , fromCarrier = id
  86. , swingIndent = const Swing
  87. , indentAmount = 2
  88. , maxWidth = Nothing
  89. , indentPrint = False
  90. }
  91. -- | A default 'SExprPrinter' struct that will always swing subsequent
  92. -- expressions onto later lines if they're too long, indenting them
  93. -- by two spaces, and uses a soft maximum width of 80 characters
  94. basicPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
  95. basicPrint printer = SExprPrinter
  96. { atomPrinter = printer
  97. , fromCarrier = id
  98. , swingIndent = const Swing
  99. , indentAmount = 2
  100. , maxWidth = Just 80
  101. , indentPrint = True
  102. }
  103. -- | A default 'SExprPrinter' struct that will always swing subsequent
  104. -- expressions onto later lines if they're too long, indenting them by
  105. -- two spaces, but makes no effort to keep the pretty-printed sources
  106. -- inside a maximum width. In the case that we want indented printing
  107. -- but don't care about a "maximum" width, we can print more
  108. -- efficiently than in other situations.
  109. unconstrainedPrint :: (atom -> Text) -> SExprPrinter atom (SExpr atom)
  110. unconstrainedPrint printer = SExprPrinter
  111. { atomPrinter = printer
  112. , fromCarrier = id
  113. , swingIndent = const Swing
  114. , indentAmount = 2
  115. , maxWidth = Nothing
  116. , indentPrint = True
  117. }
  118. data Size = Size
  119. { sizeSum :: !Int
  120. , sizeMax :: !Int
  121. } deriving (Show)
  122. -- | This is an intermediate representation which is like (but not
  123. -- identical to) a RichSExpr representation. In particular, it has a
  124. -- special case for empty lists, and it also keeps a single piece of
  125. -- indent information around for each list
  126. data Intermediate
  127. = IAtom Text
  128. -- ^ An atom, already serialized
  129. | IList Indent Size Intermediate (Seq.Seq Intermediate) (Maybe Text)
  130. -- ^ A (possibly-improper) list, with the intended indentation
  131. -- strategy, the head of the list, the main set of elements, and the
  132. -- final improper element (if it exists)
  133. | IEmpty
  134. -- ^ An empty list
  135. deriving Show
  136. sizeOf :: Intermediate -> Size
  137. sizeOf IEmpty = Size 2 2
  138. sizeOf (IAtom t) = Size len len where len = T.length t
  139. sizeOf (IList _ (Size n m) _ _ _) = Size (n + 2) (m + 2)
  140. concatSize :: Size -> Size -> Size
  141. concatSize l r = Size
  142. { sizeSum = sizeSum l + 1 + sizeSum r
  143. , sizeMax = sizeMax l `max` sizeMax r
  144. }
  145. toIntermediate :: SExprPrinter a (SExpr a) -> SExpr a -> Intermediate
  146. toIntermediate
  147. SExprPrinter { atomPrinter = printAtom
  148. , swingIndent = swing
  149. } = headOf
  150. where
  151. headOf (SAtom a) = IAtom (printAtom a)
  152. headOf SNil = IEmpty
  153. headOf (SCons x xs) =
  154. gather (swing x) hd Seq.empty xs (sizeOf hd) where hd = headOf x
  155. gather sw hd rs SNil sz =
  156. IList sw sz hd rs Nothing
  157. gather sw hd rs (SAtom a) sz =
  158. IList sw (sz `concatSize` aSize) hd rs (Just aStr)
  159. where aSize = Size (T.length aStr) (T.length aStr)
  160. aStr = printAtom a
  161. gather sw hd rs (SCons x xs) sz =
  162. gather sw hd (rs Seq.|> x') xs (sz `concatSize` sizeOf x')
  163. where x' = headOf x
  164. unboundIndentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
  165. unboundIndentPrintSExpr spec = finalize . go . toIntermediate spec
  166. where
  167. finalize = B.toLazyText . joinLinesS
  168. go :: Intermediate -> Seq.Seq B.Builder
  169. go (IAtom t) = Seq.singleton (B.fromText t)
  170. go IEmpty = Seq.singleton (B.fromString "()")
  171. -- this case should never be called with an empty argument to
  172. -- @values@, as that should have been translated to @IEmpty@
  173. -- instead.
  174. go (IList iv _ initial values rest)
  175. -- if we're looking at an s-expression that has no nested
  176. -- s-expressions, then we might as well consider it flat and let
  177. -- it take the whole line
  178. | Just strings <- T.traverse ppBasic (initial Seq.<| values) =
  179. Seq.singleton (B.singleton '(' <> buildUnwords strings <> pTail rest)
  180. -- it's not "flat", so we might want to swing after the first thing
  181. | Swing <- iv =
  182. -- if this match fails, then it means we've failed to
  183. -- convert to an Intermediate correctly!
  184. let butLast = insertParen (go initial) <> fmap doIndent (F.foldMap go values)
  185. in handleTail rest butLast
  186. -- ...or after several things
  187. | SwingAfter n <- iv =
  188. let (hs, xs) = Seq.splitAt n (initial Seq.<| values)
  189. hd = B.singleton '(' <> buildUnwords (F.foldMap go hs)
  190. butLast = hd Seq.<| fmap doIndent (F.foldMap go xs)
  191. in handleTail rest butLast
  192. -- the 'align' choice is clunkier because we need to know how
  193. -- deep to indent, so we have to force the first builder to grab its size
  194. | otherwise =
  195. let -- so we grab that and figure out its length plus two (for
  196. -- the leading paren and the following space). This uses a
  197. -- max because it's possible the first thing is itself a
  198. -- multi-line s-expression (in which case it seems like
  199. -- using the Align strategy is a terrible idea, but who am
  200. -- I to quarrel with the wild fruits upon the Tree of
  201. -- Life?)
  202. len = 2 + F.maximum (fmap (TL.length . B.toLazyText) (go initial))
  203. in case Seq.viewl values of
  204. -- if there's nothing after the head of the expression, then
  205. -- we simply close it
  206. Seq.EmptyL -> insertParen (insertCloseParen (go initial))
  207. -- otherwise, we put the first two things on the same line
  208. -- with spaces and everything else gets indended the
  209. -- forementioned length
  210. y Seq.:< ys ->
  211. let hd = B.singleton '(' <> buildUnwords (F.foldMap go (Seq.fromList [initial, y]))
  212. butLast = hd Seq.<| fmap (doIndentOf (fromIntegral len)) (F.foldMap go ys)
  213. in handleTail rest butLast
  214. doIndent :: B.Builder -> B.Builder
  215. doIndent = doIndentOf (indentAmount spec)
  216. doIndentOf :: Int -> B.Builder -> B.Builder
  217. doIndentOf n b = B.fromText (T.replicate n " ") <> b
  218. insertParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
  219. insertParen s = case Seq.viewl s of
  220. Seq.EmptyL -> s
  221. x Seq.:< xs -> (B.singleton '(' <> x) Seq.<| xs
  222. handleTail :: Maybe Text -> Seq.Seq B.Builder -> Seq.Seq B.Builder
  223. handleTail Nothing = insertCloseParen
  224. handleTail (Just t) =
  225. (Seq.|> (B.fromString " . " <> B.fromText t <> B.singleton ')'))
  226. insertCloseParen :: Seq.Seq B.Builder -> Seq.Seq B.Builder
  227. insertCloseParen s = case Seq.viewr s of
  228. Seq.EmptyR -> Seq.singleton (B.singleton ')')
  229. xs Seq.:> x -> xs Seq.|> (x <> B.singleton ')')
  230. buildUnwords sq =
  231. case Seq.viewl sq of
  232. Seq.EmptyL -> mempty
  233. t Seq.:< ts -> t <> F.foldMap (\ x -> B.singleton ' ' <> x) ts
  234. pTail Nothing = B.singleton ')'
  235. pTail (Just t) = B.fromString " . " <> B.fromText t <> B.singleton ')'
  236. ppBasic (IAtom t) = Just (B.fromText t)
  237. ppBasic (IEmpty) = Just (B.fromString "()")
  238. ppBasic _ = Nothing
  239. -- | Modify the carrier type of a 'SExprPrinter' by describing how
  240. -- to convert the new type back to the previous type. For example,
  241. -- to pretty-print a well-formed s-expression, we can modify the
  242. -- 'SExprPrinter' value as follows:
  243. --
  244. -- >>> let printer = setFromCarrier fromWellFormed (basicPrint id)
  245. -- >>> encodeOne printer (WFSList [WFSAtom "ele", WFSAtom "phant"])
  246. -- "(ele phant)"
  247. setFromCarrier :: (c -> b) -> SExprPrinter a b -> SExprPrinter a c
  248. setFromCarrier fc pr = pr { fromCarrier = fromCarrier pr . fc }
  249. -- | Dictate a maximum width for pretty-printed s-expressions.
  250. --
  251. -- >>> let printer = setMaxWidth 8 (basicPrint id)
  252. -- >>> encodeOne printer (L [A "one", A "two", A "three"])
  253. -- "(one \n two\n three)"
  254. setMaxWidth :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
  255. setMaxWidth n pr = pr { maxWidth = Just n }
  256. -- | Allow the serialized s-expression to be arbitrarily wide. This
  257. -- makes all pretty-printing happen on a single line.
  258. --
  259. -- >>> let printer = removeMaxWidth (basicPrint id)
  260. -- >>> encodeOne printer (L [A "one", A "two", A "three"])
  261. -- "(one two three)"
  262. removeMaxWidth :: SExprPrinter atom carrier -> SExprPrinter atom carrier
  263. removeMaxWidth pr = pr { maxWidth = Nothing }
  264. -- | Set the number of spaces that a subsequent line will be indented
  265. -- after a swing indentation.
  266. --
  267. -- >>> let printer = setMaxWidth 12 (basicPrint id)
  268. -- >>> encodeOne printer (L [A "elephant", A "pachyderm"])
  269. -- "(elephant \n pachyderm)"
  270. -- >>> encodeOne (setIndentAmount 4) (L [A "elephant", A "pachyderm"])
  271. -- "(elephant \n pachyderm)"
  272. setIndentAmount :: Int -> SExprPrinter atom carrier -> SExprPrinter atom carrier
  273. setIndentAmount n pr = pr { indentAmount = n }
  274. -- | Dictate how to indent subsequent lines based on the leading
  275. -- subexpression in an s-expression. For details on how this works,
  276. -- consult the documentation of the 'Indent' type.
  277. --
  278. -- >>> let indent (A "def") = SwingAfter 1; indent _ = Swing
  279. -- >>> let printer = setIndentStrategy indent (setMaxWidth 8 (basicPrint id))
  280. -- >>> encodeOne printer (L [ A "def", L [ A "func", A "arg" ], A "body" ])
  281. -- "(def (func arg)\n body)"
  282. -- >>> encodeOne printer (L [ A "elephant", A "among", A "pachyderms" ])
  283. -- "(elephant \n among\n pachyderms)"
  284. setIndentStrategy :: (SExpr atom -> Indent) -> SExprPrinter atom carrier -> SExprPrinter atom carrier
  285. setIndentStrategy st pr = pr { swingIndent = st }
  286. spaceDot :: B.Builder
  287. spaceDot = B.singleton ' ' <> B.singleton '.' <> B.singleton ' '
  288. -- Indents a line by n spaces
  289. indent :: Int -> B.Builder -> B.Builder
  290. indent n ts =
  291. mconcat [ B.singleton ' ' | _ <- [1..n]] <> ts
  292. -- Sort of like 'unlines' but without the trailing newline
  293. joinLinesS :: Seq.Seq B.Builder -> B.Builder
  294. joinLinesS s = case Seq.viewl s of
  295. Seq.EmptyL -> ""
  296. t Seq.:< ts
  297. | F.null ts -> t
  298. | otherwise -> t <> B.fromString "\n" <> joinLinesS ts
  299. -- Sort of like 'unlines' but without the trailing newline
  300. unwordsS :: Seq.Seq B.Builder -> B.Builder
  301. unwordsS s = case Seq.viewl s of
  302. Seq.EmptyL -> ""
  303. t Seq.:< ts
  304. | F.null ts -> t
  305. | otherwise -> t <> " " <> unwordsS ts
  306. -- Indents every line n spaces, and adds a newline to the beginning
  307. -- used in swung indents
  308. indentAllS :: Int -> Seq.Seq B.Builder -> B.Builder
  309. indentAllS n = ("\n" <>) . joinLinesS . fmap (indent n)
  310. -- Indents every line but the first by some amount
  311. -- used in aligned indents
  312. indentSubsequentS :: Int -> Seq.Seq B.Builder -> B.Builder
  313. indentSubsequentS n s = case Seq.viewl s of
  314. Seq.EmptyL -> ""
  315. t Seq.:< ts
  316. | F.null ts -> t
  317. | otherwise -> joinLinesS (t Seq.<| fmap (indent n) ts)
  318. -- oh god this code is so disgusting
  319. -- i'm sorry to everyone i let down by writing this
  320. -- i swear i'll do better in the future i promise i have to
  321. -- for my sake and for everyone's
  322. -- | Pretty-print a 'SExpr' according to the options in a
  323. -- 'LayoutOptions' value.
  324. prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
  325. prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
  326. Nothing
  327. | indentPrint -> unboundIndentPrintSExpr pr (fromCarrier expr)
  328. | otherwise -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
  329. Just w -> indentPrintSExpr' w pr expr
  330. indentPrintSExpr' :: Int -> SExprPrinter a (SExpr a) -> SExpr a -> TL.Text
  331. indentPrintSExpr' maxAmt pr@SExprPrinter { .. } = B.toLazyText . pp 0 . toIntermediate pr
  332. where
  333. pp _ IEmpty = B.fromString "()"
  334. pp _ (IAtom t) = B.fromText t
  335. pp ind (IList i sz h values end) =
  336. -- we always are going to have a head, a (possibly empty) body,
  337. -- and a (possibly empty) tail in our list formats
  338. B.singleton '(' <> hd <> body <> tl <> B.singleton ')'
  339. where
  340. -- the tail is either nothing, or the final dotted pair
  341. tl = case end of
  342. Nothing -> mempty
  343. Just x -> B.fromString " . " <> B.fromText x
  344. -- the head is the pretty-printed head, with an ambient
  345. -- indentation of +1 to account for the left paren
  346. hd = pp (ind+1) h
  347. headWidth = sizeSum (sizeOf h)
  348. indented =
  349. case i of
  350. SwingAfter n ->
  351. let (l, ls) = Seq.splitAt n values
  352. t = unwordsS (fmap (pp (ind+1)) l)
  353. ts = indentAllS (ind + indentAmount)
  354. (fmap (pp (ind + indentAmount)) ls)
  355. in t <> ts
  356. Swing ->
  357. indentAllS (ind + indentAmount)
  358. (fmap (pp (ind + indentAmount)) values)
  359. Align ->
  360. indentSubsequentS (ind + headWidth + 1)
  361. (fmap (pp (ind + headWidth + 1)) values)
  362. body
  363. -- if there's nothing here, then we don't have anything to
  364. -- indent
  365. | length values == 0 = mempty
  366. -- if we can't fit the whole next s-expression on the same
  367. -- line, then we use the indented form
  368. | sizeSum sz + ind > maxAmt = B.singleton ' ' <> indented
  369. | otherwise =
  370. -- otherwise we print the whole thing on one line!
  371. B.singleton ' ' <> unwordsS (fmap (pp (ind + 1)) values)
  372. -- if we don't indent anything, then we can ignore a bunch of the
  373. -- details above
  374. flatPrintSExpr :: SExpr Text -> TL.Text
  375. flatPrintSExpr = B.toLazyText . pHead
  376. where
  377. pHead (SCons x xs) =
  378. B.singleton '(' <> pHead x <> pTail xs
  379. pHead (SAtom t) =
  380. B.fromText t
  381. pHead SNil =
  382. B.singleton '(' <> B.singleton ')'
  383. pTail (SCons x xs) =
  384. B.singleton ' ' <> pHead x <> pTail xs
  385. pTail (SAtom t) =
  386. spaceDot <>
  387. B.fromText t <>
  388. B.singleton ')'
  389. pTail SNil =
  390. B.singleton ')'
  391. -- | Turn a single s-expression into a string according to a given
  392. -- 'SExprPrinter'.
  393. encodeOne :: SExprPrinter atom carrier -> carrier -> Text
  394. encodeOne s@(SExprPrinter { .. }) =
  395. TL.toStrict . prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
  396. -- | Turn a list of s-expressions into a single string according to
  397. -- a given 'SExprPrinter'.
  398. encode :: SExprPrinter atom carrier -> [carrier] -> Text
  399. encode spec =
  400. T.intercalate "\n\n" . map (encodeOne spec)
  401. -- | Turn a single s-expression into a lazy 'Text' according to a given
  402. -- 'SExprPrinter'.
  403. encodeOneLazy :: SExprPrinter atom carrier -> carrier -> TL.Text
  404. encodeOneLazy s@(SExprPrinter { .. }) =
  405. prettyPrintSExpr (s { fromCarrier = id }) . fromCarrier
  406. -- | Turn a list of s-expressions into a lazy 'Text' according to
  407. -- a given 'SExprPrinter'.
  408. encodeLazy :: SExprPrinter atom carrier -> [carrier] -> TL.Text
  409. encodeLazy spec = TL.intercalate "\n\n" . map (encodeOneLazy spec)