Browse Source

New pretty-printer with greatly improved performance.

The implementation is not pretty, and there is much more tedious
explicitness than graceful eloquence in this code.  The problem domain
is difficult to solve, but the general approach here is:

  1. Pre-calculate *unconstrained* widths of each element (possibly as a
     composition of its sub-elements.

  2. For each multi-part sub-expression, consider the following (in
     order of preference):

        a. All elements on one line.
        b. If first element is an `(SCons (SAtom _) r)` then try
           to place the atom and the first element of `r` on the
           same line, lining up all subsequent elements of `r`
           beneath the first one (essentially the `Align`
           indentation).
        c. All elements are on their own line.
Kevin Quick 6 years ago
parent
commit
ed9cd8dc37
1 changed files with 203 additions and 41 deletions
  1. 203 41
      Data/SCargot/Print.hs

+ 203 - 41
Data/SCargot/Print.hs

@@ -19,6 +19,7 @@ module Data.SCargot.Print
          , flatPrint
          ) where
 
+import           Control.Applicative
 import           Data.Monoid ((<>))
 import           Data.Text (Text)
 import qualified Data.Text as T
@@ -180,10 +181,10 @@ indentSubsequent n (t:ts) = joinLines (t : go ts)
 prettyPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
 prettyPrintSExpr pr@SExprPrinter { .. } expr = case maxWidth of
   Nothing -> flatPrintSExpr (fmap atomPrinter (fromCarrier expr))
-  Just _  -> indentPrintSExpr pr expr
+  Just w  -> indentPrintSExpr2 pr w expr
 
-indentPrintSExpr :: SExprPrinter a (SExpr a) -> SExpr a -> Text
-indentPrintSExpr SExprPrinter { .. } = pHead 0
+indentPrintSExpr :: SExprPrinter a (SExpr a) -> Int -> SExpr a -> Text
+indentPrintSExpr SExprPrinter { .. } _ = pHead 0
   where
     pHead _   SNil         = "()"
     pHead _   (SAtom a)    = atomPrinter a
@@ -218,44 +219,205 @@ indentPrintSExpr SExprPrinter { .. } = pHead 0
               , T.length flat + ind > maxAmt = " " <> indented
               | otherwise = " " <> flat
 
-  -- where
-  --   -- this is the base-case that knows how to print empty lists and
-  --   -- atoms
-  --   pHead _   SNil         = B.fromString "()"
-  --   pHead _   (SAtom a)    = B.fromText a
-  --   pHead ind (SCons x xs) = gather ind x xs id 0
-
-  --   -- otherwise, we trawl through the list grabbing every element...
-  --   gather ind h (SCons x xs) k r = gather ind h xs (k . (x:)) (r + T.length x)
-  --   gather ind h end          k r = B.fromString "(" <> hd <> body <> tl <> B.fromString ")"
-  --     where
-  --       tl   = case end of
-  --                SNil      -> mempty
-  --                SAtom a   -> B.fromString " . " <> B.fromText a
-  --                SCons _ _ -> error "[unreachable]"
-  --       hd   = indentSubsequent ind [pHead (ind+1) h]
-  --       lst  = k []
-  --       flat = T.unwords (map (pHead (ind+1)) lst)
-  --       headWidth = T.length hd + 1
-  --       indented =
-  --         case swingIndent h of
-  --           SwingAfter n ->
-  --             let (l, ls) = splitAt n lst
-  --                 t  = T.unwords (map (pHead (ind+1)) l)
-  --                 ts = indentAll (ind + indentAmount)
-  --                                (map (pHead (ind + indentAmount)) ls)
-  --             in t <> ts
-  --           Swing ->
-  --             indentAll (ind + indentAmount)
-  --               (map (pHead (ind + indentAmount)) lst)
-  --           Align ->
-  --             indentSubsequent (ind + headWidth + 1)
-  --               (map (pHead (ind + headWidth + 1)) lst)
-  --       body
-  --         | length lst == 0              = B.fromString ""
-  --         | Just maxAmt <- maxWidth
-  --         , T.length flat + ind > maxAmt = B.fromString " " <> indented
-  --         | otherwise                    = B.fromString " " <> flat
+-- | Pretty-printing for S-Expressions.  The general strategy is that
+-- an SCons tail should either all fit on the current line, or else
+-- each tail item should be placed on its own line with indenting.
+-- Note that a line must print something, so while subsequent elements
+-- will be placed on following lines, it is possible that the first
+-- thing on a line (plus its indentation) may exceed the maxwidth.
+
+type IndentSpec = Int
+type Indenting = Maybe IndentSpec
+
+data PPS = PPS { indentWc :: IndentSpec
+               , remWidth :: Int
+               , numClose :: Int
+               }
+         deriving Show
+
+data SElem = SText Int T.Text
+           | SPair Int SElem SElem
+           | SDecl Int SElem [SElem]
+           | SJoin Int [SElem]
+             deriving (Show, Eq)
+
+sElemSize :: SElem -> Int
+sElemSize (SText n _) = n
+sElemSize (SPair n _ _) = n
+sElemSize (SDecl n _ _) = n
+sElemSize (SJoin n _) = n
+
+indentPrintSExpr2 :: SExprPrinter a (SExpr a) -> Int -> SExpr a -> Text
+indentPrintSExpr2 SExprPrinter { .. } maxW sexpr =
+    let atomTextTree = selems sexpr
+        pretty = fmap addIndent $ fst $ pHead (PPS 0 maxW 0) atomTextTree
+        -- prettyWithDebug = pretty <> ["", (T.pack $ show atomTextTree)]
+    in T.unlines pretty
+  where
+    -- selems converts the (SExpr a) into an SElem, converting
+    -- individual atoms to their text format but not concerned with
+    -- other text formatting.  The resulting SElem tree will be
+    -- iterated over to determine the wrapping strategy to apply.
+    selems SNil = SText 2 "()"
+    selems (SAtom a) = let p = atomPrinter a in SText (T.length p) p
+    selems (SCons l r) =
+        let l' = selems l
+            lsz = sElemSize l'
+            r' = selems r
+            rsz = sElemSize r'
+            bsz = lsz + rsz
+        in case r of
+             SNil -> SJoin lsz [l']
+             SAtom _ -> SPair bsz l' r'
+             _ -> case l of
+                    SAtom _ -> case r' of
+                                 SJoin _ rl' -> SDecl bsz l' rl'
+                                 SDecl _ d dl -> SDecl bsz l' (d:dl)
+                                 _ -> SDecl bsz l' [r']
+                    _ -> SJoin bsz $ prefixOnto l' r'
+
+    prefixOnto e (SJoin _ l) = e:l
+    prefixOnto e (SDecl _ l r) = e:l:r
+    prefixOnto e r = [e,r]
+
+    addIndent (Nothing, t) = t
+    addIndent (Just n, t) = indent n t
+
+    nextIndent = incrIndent indentAmount
+    incrIndent v n = n + v
+
+    pHead :: PPS -> SElem -> ( [(Indenting, Text)], PPS )
+    pHead pps (SText _ t) = ( [(Nothing, t)]
+                            , pps { remWidth = remWidth pps - T.length t})
+    pHead pps (SPair _ e1 e2) =
+        let (t1,pps1) = pHead pps e1
+            (t2,pps2) = pTail ppsNextLine e2
+            (t3,pps3) = pTail ppsSameLine e2  -- same line
+            ppsNextLine = pps { remWidth = remWidth pps - T.length sep }
+            ppsSameLine = pps1 { remWidth = remWidth pps1 - T.length sep }
+            sep = " . "
+            t1h = head t1
+            wrapJoin i l rs = wrapT i (snd l <> sep) rs
+            sameLine l r p = (wrapJoin (indentWc pps) l r, p)
+            separateLines l r p = (wrapTWith False "(" "" (indentWc pps) "" l ++
+                                   wrapTWith True sep ")" (indentWc pps) "" r, p)
+        in if length t1 > 1 || remWidth pps3 < numClose pps + 5
+           then separateLines t1 t2 pps2
+           else sameLine t1h t3 pps3
+    -- An SJoin is a sequence of elements at the same rank.  They are
+    -- either all placed on a single line, or one on each line.
+    pHead pps (SJoin _ []) = ( [], pps )
+    pHead pps (SJoin els others) =
+        let (t1,_) = pHead pps $ head others
+            (t3,pps3) = foldl pTail' ([], pps) others
+            pTail' :: ([(Indenting, Text)], PPS)
+                   -> SElem
+                   -> ([(Indenting, Text)], PPS)
+            pTail' (rl,pp) ne = let (rt,pr) = pTail pp ne
+                                    hrl = head rl
+                                    hrt = head rt
+                                in if length rt == 1
+                                   then case length rl of
+                                          0 -> (rt, pr)
+                                          1 -> ((fst hrl, snd hrl <> " " <> snd hrt):[], pr)
+                                          _ -> (rl <> rt, pr)
+                                   else (rl <> rt, pr)
+            sameLine parts pEnd = (wrapT (indentWc ppsSame) "" parts, pEnd)
+            ppsNext = pps { indentWc = nextIndent (indentWc pps)
+                          , remWidth = remWidth pps - indentAmount
+                          }
+            ppsSame = pps { indentWc = nextIndent (indentWc pps)
+                          , remWidth = remWidth pps - indentAmount
+                          }
+            ppsMulti = pps { indentWc = nextIndent (indentWc pps)
+                           , remWidth = remWidth pps - indentAmount
+                           }
+            pps3' = pps3
+            separateLines elems pEnd =
+                let lr = concatMap (fst . pTail pEnd) elems
+                in (wrapTWith False "(" ")" (indentWc ppsNext) "" lr, pEnd)
+        in if els > remWidth pps3 || length t1 > 1 || remWidth pps3 < numClose pps + 5
+           then separateLines others ppsMulti
+           else sameLine t3 pps3'
+    --  For an SDecl, always put the first element on the line.  If
+    --  *all* other elements fit on the same line, do that, otherwise
+    --  all other elements should appear on subsequent lines with
+    --  indentation.  This will produce left-biased wrapping: wrapping
+    --  will occur near the root of the SExp tree more than at the
+    --  leaves.
+    pHead pps (SDecl els first others) =
+        let (t1,pps1) = pHead pp2 first
+            (to1,_) = pTail pps1 (head others)
+            firstPlusFits = sElemSize first + sElemSize (head others) < (remWidth pps - 4)
+            allFits = els < (remWidth pps - length others - 3)
+            tryFirstArgSameLine = case swingIndent (SCons SNil (SCons SNil SNil)) of
+                                    Align -> True
+                                    _ -> False
+            pp2 = pps { indentWc = nextIndent (indentWc pps)
+                      , remWidth = remWidth pps - 1 - indentAmount
+                      , numClose = numClose pps + 1
+                      }
+            pp2next = pp2
+            pp2solo = pp2
+            t1h = head t1
+            pps1' = pps1 { indentWc = incrIndent (T.length (snd t1h) + 1)
+                                                 (indentWc pps1)
+                         , remWidth = remWidth pps1 - T.length (snd t1h) - 1
+                         }
+            tothers = concatMap (fst . pTail pp2next) others -- multiline
+            tothers' = concatMap (fst . pTail pps1') $ tail others -- multiline from 2nd
+            (others', ppone) = foldl foldPTail ([],pps1) others -- oneline
+            (others'', ppone') = foldl foldPTail ([],pps1') $ tail others -- multiline from 2nd
+            foldPTail (tf,ppf) o = let (ot,opp) = pTail ppf o
+                                       tf1 = head tf
+                                       tr = if length ot == 1
+                                            then case length tf of
+                                                   0 -> ot
+                                                   1 -> [(fst tf1, snd tf1 <> " " <> snd (head ot))]
+                                                   _ -> tf ++ ot
+                                            else tf ++ ot
+                                   in (tr, opp)
+            separateLines l r p =
+                let wr = if null r then []
+                         else wrapTWith True "" ")" (indentWc p) "" r
+                    cl = if null r then ")" else ""
+                in (wrapTWith False "(" cl (indentWc pps) "" l <> wr, pp2)
+            maybeSameLine l (r1,p1) (rn,p2) =
+                if length r1 <= 1 && remWidth p1 > numClose p1
+                then (wrapT (indentWc pps) (snd l <> " ") r1, p1)
+                else separateLines [l] rn p2
+        in if allFits && length t1 < 2
+           then maybeSameLine t1h (others',ppone) (tothers,pp2solo)
+           else if (tryFirstArgSameLine && firstPlusFits &&
+                    length t1 < 2 &&
+                    length to1 < 2 &&
+                    not (null to1) && not (null others))
+                then maybeSameLine (fst t1h,
+                                    snd t1h <> " " <> snd (head to1)) (others'',ppone') (tothers',pps1')
+                else separateLines t1 tothers pp2
+
+
+    pTail = pHead
+
+
+wrapTWith :: Bool -> T.Text -> T.Text -> IndentSpec
+          -> T.Text
+          -> [(Indenting, T.Text)]
+          -> [(Indenting, T.Text)]
+wrapTWith isContinued st en ind hstart ts =
+    let th = head ts
+        tt = last ts
+        tb = init $ tail ts
+        tp l = (fst l <|> Just ind, snd l)
+        fi = if isContinued then Just ind else Nothing
+    in if length ts > 1
+       then (((fi, st <> hstart <> snd th) : map tp tb) ++
+             [ tp $ (fst tt, snd tt <> en) ])
+       else [(fi, st <> hstart <> snd th <> en)]
+
+wrapT :: IndentSpec -> T.Text -> [(Indenting, T.Text)] -> [(Indenting, T.Text)]
+wrapT = wrapTWith False "(" ")"
+
 
 -- if we don't indent anything, then we can ignore a bunch of the
 -- details above