{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.SCargot.General
  ( -- * SExprSpec
    SExprSpec
  , mkSpec
  , convertSpec
  , addReader
  , setComment
    -- * Specific SExprSpec Conversions
  , asRich
  , asWellFormed
  , withSemicolonComments
  , withQuote
    -- * Using a SExprSpec
  , decode
  , decodeOne
  , encode
    -- * Useful Type Aliases
  , Reader
  , Comment
  , Serializer
  ) where

import           Control.Applicative ((<*), (*>), (<|>), (<*>), (<$>), pure)
import           Control.Monad ((>=>))
import           Data.Attoparsec.Text
import           Data.Char (isAlpha, isDigit, isAlphaNum)
import           Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import           Data.Monoid ((<>))
import           Data.Text (Text, pack, unpack)
import qualified Data.Text as T

import           Prelude hiding (takeWhile)

import           Data.SCargot.Repr

type ReaderMacroMap atom = Map Char (Reader atom)

-- | A 'Reader' represents a reader macro: it takes a parser for
--   the S-Expression type and performs as much or as little
--   parsing as it would like, and then returns an S-expression.
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))

-- | A 'Comment' represents any kind of skippable comment. This
--   parser __must__ be able to fail if a comment is not being
--   recognized, and it __must__ not consume any input.
type Comment = Parser ()

-- | A 'Serializer' is any function which can serialize an Atom
--   to 'Text'.
type Serializer atom = atom -> Text

-- | A 'SExprSpec' describes a parser and emitter for a particular
--   variant of S-Expressions. The @atom@ type corresponds to a
--   Haskell type used to represent the atoms, and the @carrier@
--   type corresponds to the parsed S-Expression structure. The
--   'SExprSpec' type is deliberately opaque so that it must be
--   constructed and modified with other helper functions.
data SExprSpec atom carrier = SExprSpec
  { sesPAtom   :: Parser atom
  , sesSAtom   :: Serializer atom
  , readerMap  :: ReaderMacroMap atom
  , comment    :: Maybe Comment
  , postparse  :: SExpr atom -> Either String carrier
  , preserial  :: carrier -> SExpr atom
  }

-- | Create a basic 'SExprSpec' when given a parser and serializer
--   for an atom type. A small minimal 'SExprSpec' that recognizes
--   any alphanumeric sequence as a valid atom looks like:
--
--   > simpleSpec :: SExprSpec Text (SExpr Text)
--   > simpleSpec = mkSpec (takeWhile1 isAlphaNum) id
mkSpec :: Parser atom -> Serializer atom -> SExprSpec atom (SExpr atom)
mkSpec p s = SExprSpec
  { sesPAtom   = p
  , sesSAtom   = s
  , readerMap  = M.empty
  , comment    = Nothing
  , postparse  = return
  , preserial  = id
  }

-- | Modify the carrier type for a 'SExprSpec'. This is
--   used internally to convert between various 'SExpr' representations,
--   but could also be used externally to add an extra conversion layer
--   onto a 'SExprSpec'.
--
--   The following defines an S-expression spec that recognizes the
--   language of binary addition trees. It does so by first transforming
--   the internal S-expression representation using 'asWellFormed', and
--   then providing a conversion between the 'WellFormedSExpr' type and
--   an @Expr@ AST. Notice that the below parser uses 'String' as its
--   underlying atom type.
--
--   > data Expr = Add Expr Expr | Num Int deriving (Eq, Show)
--   >
--   > toExpr :: WellFormedSExpr String -> Either String Expr
--   > toExpr (WFSList [WFSAtom "+", l, r]) = Add <$> toExpr l <*> toExpr r
--   > toExpr (WFSAtom c) | all isDigit c   = pure (Num (read c))
--   > toExpr c                             = Left ("Invalid expr: " ++ show c)
--   >
--   > fromExpr :: Expr -> WellFormedSExpr String
--   > fromExpr (Add l r) = WFSList [WFSAtom "+", fromExpr l, fromExpr r]
--   > fromExpr (Num n)   = WFSAtom (show n)
--   >
--   > mySpec :: SExprSpec String Expr
--   > mySpec = convertSpec toExpr fromExpr $ asWellFormed $ mkSpec parser pack
--   >   where parser = unpack <$> takeWhile1 isValidChar
--   >         isValidChar c = isDigit c || c == '+'
convertSpec :: (b -> Either String c) -> (c -> b)
               -> SExprSpec a b -> SExprSpec a c
convertSpec f g spec = spec
  { postparse = postparse spec >=> f
  , preserial = preserial spec . g
  }

-- | Convert the final output representation from the 'SExpr' type
--   to the 'RichSExpr' type.
asRich :: SExprSpec a (SExpr b) -> SExprSpec a (RichSExpr b)
asRich = convertSpec (return . toRich) fromRich

-- | Convert the final output representation from the 'SExpr' type
--   to the 'WellFormedSExpr' type.
asWellFormed :: SExprSpec a (SExpr b) -> SExprSpec a (WellFormedSExpr b)
asWellFormed = convertSpec toWellFormed fromWellFormed

-- | Add the ability to execute some particular reader macro, as
--   defined by its initial character and the 'Parser' which returns
--   the parsed S-Expression. The 'Reader' is passed a 'Parser' which
--   can be recursively called to parse more S-Expressions, and begins
--   parsing after the reader character has been removed from the
--   stream.
--
--   The following defines an S-expression variant that treats
--   @'expr@ as being sugar for @(quote expr)@:
--
--   > mySpec :: SExprSpec Text (SExpr Text)
--   > mySpec = addReader '\'' reader $ mkSpec (takeWhile1 isAlphaNum) id
--   >   where reader p = quote <$> p
--   >         quote e  = SCons (SAtom "quote") (SCons e SNil)
addReader :: Char -> Reader a -> SExprSpec a c -> SExprSpec a c
addReader c reader spec = spec
  { readerMap = M.insert c reader (readerMap spec) }

-- | Add the ability to ignore some kind of comment. This gets
--   factored into whitespace parsing, and it's very important that
--   the parser supplied __be able to fail__ (as otherwise it will
--   cause an infinite loop), and also that it __not consume any input__
--   (which may require it to be wrapped in 'try'.)
--
--   The following code defines an S-expression variant that skips
--   C++-style comments, i.e. those which begin with @//@ and last
--   until the end of a line:
--
--   > t :: SExprSpec Text (SExpr Text)
--   > t = setComment comm $ mkSpec (takeWhile1 isAlphaNum) id
--   >   where comm = try (string "//" *> takeWhile (/= '\n') *> pure ())

setComment :: Comment -> SExprSpec a c -> SExprSpec a c
setComment c spec = spec { comment = Just c }

-- | Add the ability to skip line comments beginning with a semicolon.
withSemicolonComments :: SExprSpec a c -> SExprSpec a c
withSemicolonComments = setComment (char ';' >> takeWhile (/='\n') >> return ())

-- | Add the ability to understand a quoted S-Expression. In general,
--   many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This is
--   a convenience function which allows you to easily add quoted
--   expressions to a 'SExprSpec', provided that you supply which
--   atom you want substituted in for the symbol @quote@.
withQuote :: a -> SExprSpec a (SExpr a) -> SExprSpec a (SExpr a)
withQuote q = addReader '\'' (fmap go)
  where go s  = SCons (SAtom q) (SCons s SNil)

parseGenericSExpr ::
  Parser atom  -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
parseGenericSExpr atom reader skip = do
  let sExpr = parseGenericSExpr atom reader skip
  skip
  c <- peekChar
  r <- case c of
    Nothing -> fail "Unexpected end of input"
    Just '(' -> char '(' >> skip >> parseList sExpr skip
    Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
    _ -> SAtom `fmap` atom
  skip
  return r

parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
parseList sExpr skip = do
  i <- peekChar
  case i of
    Nothing  -> fail "Unexpected end of input"
    Just ')' -> char ')' >> return SNil
    _        -> do
      car <- sExpr
      skip
      c <- peekChar
      case c of
        Just '.' -> do
          char '.'
          cdr <- sExpr
          skip
          char ')'
          skip
          return (SCons car cdr)
        Just ')' -> do
          char ')'
          skip
          return (SCons car SNil)
        _ -> do
          cdr <- parseList sExpr skip
          return (SCons car cdr)

-- | Given a CommentMap, create the corresponding parser to
--   skip those comments (if they exist).
buildSkip :: Maybe (Parser ()) -> Parser ()
buildSkip Nothing  = skipSpace
buildSkip (Just c) = alternate
  where alternate = skipSpace >> ((c >> alternate) <|> return ())

-- | Decode a single S-expression. If any trailing input is left after
--   the S-expression (ignoring comments or whitespace) then this
--   will fail: for those cases, use 'decode', which returns a list of
--   all the S-expressions found at the top level.
decodeOne :: SExprSpec atom carrier -> Text -> Either String carrier
decodeOne spec = parseOnly (parser <* endOfInput) >=> (postparse spec)
  where parser = parseGenericSExpr
                   (sesPAtom spec)
                   (readerMap spec)
                   (buildSkip (comment spec))

-- | Decode several S-expressions according to a given 'SExprSpec'. This
--   will return a list of every S-expression that appears at the top-level
--   of the document.
decode :: SExprSpec atom carrier -> Text -> Either String [carrier]
decode spec =
  parseOnly (many1 parser <* endOfInput) >=> mapM (postparse spec)
    where parser = parseGenericSExpr
                     (sesPAtom spec)
                     (readerMap spec)
                     (buildSkip (comment spec))

-- | Encode (without newlines) a single S-expression.
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
encodeSExpr SNil _         = "()"
encodeSExpr (SAtom s) t    = t s
encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
  where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
        go SNil rs      = "(" <> rs <> ")"
        go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)

-- | Emit an S-Expression in a machine-readable way. This does no
--   pretty-printing or indentation, and produces no comments.
encodeOne :: SExprSpec atom carrier -> carrier -> Text
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)

encode :: SExprSpec atom carrier -> [carrier] -> Text
encode spec cs = T.concat (map (encodeOne spec) cs)