Browse Source

Change Adnot language slightly + add FromAdnot parsing

Getty Ritter 6 years ago
parent
commit
f144540f9a
6 changed files with 258 additions and 64 deletions
  1. 2 0
      Data/Adnot.hs
  2. 206 2
      Data/Adnot/Class.hs
  3. 17 10
      Data/Adnot/Emit.hs
  4. 5 4
      Data/Adnot/Parse.hs
  5. 8 3
      Data/Adnot/Type.hs
  6. 20 45
      README.md

+ 2 - 0
Data/Adnot.hs

@@ -3,8 +3,10 @@ module Data.Adnot ( Value(..)
                   , Product
                   , decodeValue
                   , encodeValue
+                  , module Data.Adnot.Class
                   ) where
 
+import Data.Adnot.Class
 import Data.Adnot.Emit
 import Data.Adnot.Parse
 import Data.Adnot.Type

+ 206 - 2
Data/Adnot/Class.hs

@@ -12,8 +12,10 @@ module Data.Adnot.Class where
 
 import           Data.Adnot.Type
 import           Data.Adnot.Emit
+import           Data.Adnot.Parse
 import           Data.Int
 import           Data.Word
+import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as BSL
 import qualified Data.Foldable as F
 import qualified Data.List.NonEmpty as NE
@@ -168,6 +170,12 @@ instance ToAdnot a => ToAdnot (NE.NonEmpty a) where
 instance ToAdnot a => ToAdnot (MS.Map T.Text a) where
   toAdnot ls = Product (fmap toAdnot ls)
 
+product :: [(T.Text, Value)] -> Value
+product = Product . MS.fromList
+
+(.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value)
+key .= val = (key, toAdnot val)
+
 -- * Tuples
 instance ToAdnot () where
   toAdnot () = List []
@@ -192,18 +200,214 @@ instance (ToAdnot a, ToAdnot b) => ToAdnot (Either a b) where
   toAdnot (Right y) = Sum "Right" [toAdnot y]
 
 instance ToAdnot Bool where
-  toAdnot True  = Symbol "True"
-  toAdnot False = Symbol "False"
+  toAdnot True  = String "True"
+  toAdnot False = String "False"
 
 -- * Parsing
 
 type ParseError = String
 type Parser a = Either ParseError a
 
+niceType :: Value -> String
+niceType Sum {}     = "sum"
+niceType Product {} = "product"
+niceType List {}    = "list"
+niceType Integer {} = "integer"
+niceType Double {}  = "double"
+niceType String {}  = "string"
+
 withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a
 withSum n k val = case val of
   Sum t as -> k t as
   _        -> Left ("Expected sum in " ++ n)
 
+withProd :: String -> (Product -> Parser a) -> Value -> Parser a
+withProd n k val = case val of
+  Product as -> k as
+  _          -> Left ("Expected product in " ++ n)
+
+withList :: String -> (Array -> Parser a) -> Value -> Parser a
+withList n k val = case val of
+  List as -> k as
+  _       -> Left ("Expected list in " ++ n)
+
+withString :: String -> (T.Text -> Parser a) -> Value -> Parser a
+withString n k val = case val of
+  String t -> case k t of
+    Right x -> Right x
+    Left msg -> Left (msg ++ " in " ++ n)
+  _        -> Left ("Expected string in " ++ n)
+
+decode :: FromAdnot a => BS.ByteString -> Maybe a
+decode x = case decodeValue x of
+  Left _ -> Nothing
+  Right y -> case parseAdnot y of
+    Left _ -> Nothing
+    Right z -> Just z
+
+decodeEither :: FromAdnot a => BS.ByteString -> Either String a
+decodeEither x = do
+  y <- decodeValue x
+  parseAdnot y
+
 class FromAdnot a where
   parseAdnot :: Value -> Parser a
+
+instance FromAdnot Value where
+  parseAdnot v = return v
+
+instance FromAdnot Int where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Integer where
+  parseAdnot (Integer n) = return n
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Int8 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Int16 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Int32 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Int64 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Word8 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Word16 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Word32 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+instance FromAdnot Word64 where
+  parseAdnot (Integer n) = return (fromIntegral n)
+  parseAdnot n = Left ("Expected integer, found " ++ niceType n)
+
+-- Rational/Floating types
+
+instance FromAdnot Double where
+  parseAdnot (Double d) = return d
+  parseAdnot n = Left ("Expected double, found " ++ niceType n)
+
+instance FromAdnot Float where
+  parseAdnot (Double d) = return (fromRational (toRational d))
+  parseAdnot n = Left ("Expected double, found " ++ niceType n)
+
+-- String types
+
+instance FromAdnot T.Text where
+  parseAdnot (String t) = return t
+  parseAdnot n = Left ("Expected string, found " ++ niceType n)
+
+instance FromAdnot TL.Text where
+  parseAdnot (String t) = return (TL.fromStrict t)
+  parseAdnot n = Left ("Expected string, found " ++ niceType n)
+
+instance {-# INCOHERENT #-} FromAdnot String where
+  parseAdnot (String t) = return (T.unpack t)
+  parseAdnot n = Left ("Expected string, found " ++ niceType n)
+
+-- sequence types
+
+instance FromAdnot t => FromAdnot [t] where
+  parseAdnot (List ts) =
+    fmap (V.toList) (traverse parseAdnot ts)
+  parseAdnot n = Left ("Expected list, found " ++ niceType n)
+
+instance FromAdnot t => FromAdnot (V.Vector t) where
+  parseAdnot (List ts) = traverse parseAdnot ts
+  parseAdnot n = Left ("Expected list, found " ++ niceType n)
+
+instance FromAdnot t => FromAdnot (Seq.Seq t) where
+  parseAdnot (List ts) =
+    fmap (Seq.fromList . V.toList) (traverse parseAdnot ts)
+  parseAdnot n = Left ("Expected list, found " ++ niceType n)
+
+instance FromAdnot t => FromAdnot (NE.NonEmpty t) where
+  parseAdnot (List ts) =
+    fmap (NE.fromList . V.toList) (traverse parseAdnot ts)
+  parseAdnot n = Left ("Expected list, found " ++ niceType n)
+
+-- tuples
+
+instance FromAdnot () where
+  parseAdnot (List []) = return ()
+  parseAdnot n = Left ("Expected list of length 0, found " ++ niceType n)
+
+instance (FromAdnot a, FromAdnot b) => FromAdnot (a, b) where
+  parseAdnot (List [a, b]) = (,) <$> parseAdnot a <*> parseAdnot b
+  parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
+
+instance (FromAdnot a, FromAdnot b, FromAdnot c) => FromAdnot (a, b, c) where
+  parseAdnot (List [a, b, c]) =
+    (,,) <$> parseAdnot a
+         <*> parseAdnot b
+         <*> parseAdnot c
+  parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
+
+instance (FromAdnot a, FromAdnot b, FromAdnot c, FromAdnot d)
+         => FromAdnot (a, b, c, d) where
+  parseAdnot (List [a, b, c, d]) =
+    (,,,) <$> parseAdnot a
+          <*> parseAdnot b
+          <*> parseAdnot c
+          <*> parseAdnot d
+  parseAdnot n = Left ("Expected list of length 2, found " ++ niceType n)
+
+instance FromAdnot a => FromAdnot (Maybe a) where
+  parseAdnot (Sum "Nothing" []) = return Nothing
+  parseAdnot (Sum "Just" [x]) = Just <$> parseAdnot x
+  parseAdnot (Sum "Nothing" xs) =
+    Left ("Expected 0 arguments to Maybe, but found " ++ show (F.length xs))
+  parseAdnot (Sum "Just" xs) =
+    Left ("Expected 1 argument to Just, but found " ++ show (F.length xs))
+  parseAdnot (Sum t _) =
+    Left ("Expected tag \"Nothing\" or \"Just\", but found " ++ show t)
+  parseAdnot n =
+    Left ("Expected tagged value, but found " ++ niceType n)
+
+instance (FromAdnot a, FromAdnot b) => FromAdnot (Either a b) where
+  parseAdnot (Sum "Left" [l]) = Left <$> parseAdnot l
+  parseAdnot (Sum "Right" [r]) = Right <$> parseAdnot r
+  parseAdnot (Sum "Left" xs) =
+    Left ("Expected 1 arguments to Maybe, but found " ++ show (F.length xs))
+  parseAdnot (Sum "Right" xs) =
+    Left ("Expected 1 argument to Just, but found " ++ show (F.length xs))
+  parseAdnot (Sum t _) =
+    Left ("Expected tag \"Left\" or \"Right\", but found " ++ show t)
+  parseAdnot n =
+    Left ("Expected tagged value, but found " ++ niceType n)
+
+instance FromAdnot Bool where
+  parseAdnot (String "True") = return True
+  parseAdnot (String "False") = return False
+  parseAdnot (String t) =
+    Left ("Expected \"True\" or \"False\", but found " ++ show t)
+  parseAdnot n =
+    Left ("Expected string, but found " ++ niceType n)
+
+-- mapping types
+
+instance FromAdnot t => FromAdnot (MS.Map T.Text t) where
+  parseAdnot (Product as) =
+    traverse parseAdnot as
+  parseAdnot n = Left ("Expected product, found " ++ niceType n)
+
+
+(.:) :: FromAdnot a => Product -> T.Text -> Parser a
+p .: name
+  | Just x <- MS.lookup name p = parseAdnot x
+  | otherwise = Left ("Unable to look up " ++ show name ++ " in product")

+ 17 - 10
Data/Adnot/Emit.hs

@@ -9,7 +9,8 @@ import           Data.List (intersperse)
 import qualified Data.Map.Strict as M
 import           Data.Monoid ((<>))
 import           Data.Text (Text)
-import           Data.Text.Encoding (encodeUtf8)
+import qualified Data.Text as T
+import           Data.Text.Encoding (encodeUtf8Builder)
 import qualified Data.Vector as V
 
 import Data.Adnot.Type
@@ -19,18 +20,27 @@ encodeValue = toLazyByteString . buildValue
 
 buildValue :: Value -> Builder
 buildValue (Sum n vs)
-  | V.null vs = char7 '(' <> ident n <> char7 ')'
+  | V.null vs = char7 '(' <> buildString n <> char7 ')'
   | otherwise =
-    char7 '(' <> ident n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
+    char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
 buildValue (Product ps) =
   char7 '{' <> buildPairs ps <> char7 '}'
 buildValue (List vs) =
   char7 '[' <> spaceSepArr vs <> char7 ']'
 buildValue (Integer i) = integerDec i
 buildValue (Double d) = doubleDec d
-buildValue (Symbol t) = ident t
-buildValue (String t) =
-  char7 '"' <> byteString (encodeUtf8 t) <> char7 '"'
+buildValue (String t) = buildString t
+
+buildString t
+  | isValidSymbol t = encodeUtf8Builder t
+  | otherwise       = char7 '"' <> escape t <> char7 '"'
+
+escape :: T.Text -> Builder
+escape = T.foldr go mempty
+  where go '"'  r = byteString "\\\"" <> r
+        go '\n' r = byteString "\\n" <> r
+        go '\\' r = byteString "\\\\" <> r
+        go c    r = char7 c <> r
 
 spaceSep :: [Builder] -> Builder
 spaceSep = mconcat . intersperse (char7 ' ')
@@ -38,9 +48,6 @@ spaceSep = mconcat . intersperse (char7 ' ')
 spaceSepArr :: Array -> Builder
 spaceSepArr = spaceSep . map buildValue . V.toList
 
-ident :: Text -> Builder
-ident = byteString . encodeUtf8
-
 buildPairs :: Product -> Builder
 buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ]
-  where go k v = ident k <> char7 ' ' <> buildValue v
+  where go k v = buildString k <> char7 ' ' <> buildValue v

+ 5 - 4
Data/Adnot/Parse.hs

@@ -14,16 +14,17 @@ import           Data.Adnot.Type
 decodeValue :: ByteString -> Either String Value
 decodeValue = parseOnly pVal
   where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit)
-        pSum = Sum <$> (char '(' *> ws *> pIdent)
-                   <*> (pValueList <* char ')')
+        pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString))
+                   <*> (pValueList <* (ws *> char ')'))
         pProd =  Product . M.fromList
              <$> (char '{' *> pProdBody <* ws <* char '}')
         pProdBody = many' pPair
-        pPair = (,) <$> (ws *> pIdent) <*> pVal
+        pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal
         pList = List <$> (char '[' *> pValueList <* ws <* char ']')
-        pLit  =  Symbol  <$> pIdent
+        pLit  =  String  <$> pIdent
              <|> String  <$> pString
              <|> Integer <$> decimal
+        pStr = String <$> (pIdent <|> pString)
         pValueList = V.fromList <$> many' pVal
         pIdent = T.pack <$>
                  ((:) <$> (letter_ascii <|> char '_')

+ 8 - 3
Data/Adnot/Type.hs

@@ -1,14 +1,16 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE BangPatterns #-}
 
-module Data.Adnot.Type (Value(..), Array, Product) where
+module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where
 
 import           Control.DeepSeq (NFData(..))
+import qualified Data.Char as C
 import           Data.Data (Data)
 import           Data.Typeable (Typeable)
 import           Data.Map.Strict (Map)
 import qualified Data.Map as M
 import           Data.Text (Text)
+import qualified Data.Text as T
 import           Data.Vector (Vector)
 import           GHC.Exts (IsString(..))
 
@@ -19,7 +21,6 @@ data Value
   | List !Array
   | Integer !Integer
   | Double !Double
-  | Symbol !Text
   | String !Text
     deriving (Eq, Show, Read, Typeable, Data)
 
@@ -29,7 +30,6 @@ instance NFData Value where
   rnf (List as) = rnf as
   rnf (Integer i) = rnf i
   rnf (Double d) = rnf d
-  rnf (Symbol t) = rnf t
   rnf (String t) = rnf t
 
 instance IsString Value where
@@ -37,3 +37,8 @@ instance IsString Value where
 
 type Array = Vector Value
 type Product = Map Text Value
+
+isValidSymbol :: Text -> Bool
+isValidSymbol t = case T.uncons t of
+  Nothing -> False
+  Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs

+ 20 - 45
README.md

@@ -1,80 +1,55 @@
 # Adnot
 
-The *Adnot* format is a simple data and configuration format intended
-to have a slightly enriched data model when compared to JSON or
-s-expressions but still retain the comparative simplicity of those
-formats. Unlike JSON, Adnot chooses to avoid redundant structural
-information like punctuation; unlike s-expressions, Adnot values
-natively express a wider range of basic data types.
-
-*Adnot* is not intended to be a data interchange format, but rather to
-be a richer and more convenient syntax for certain kinds of data
-description that might otherwise be done in more unwieldy formats like
-YAML. As a first approximation, Adnot may be treated as a more human-
-and version-control-friendly version of JSON whose data model is
-intended to resemble the data model of statically typed functional
-programming languages.
-
-A given Adnot value is either one of four basic types—an integer, a
-double, a string, or an identifier—or one of three composite types: a
-sequence of values, a mapping of symbols to values, or a tagged
-sequence of values which begins with a symbol:
+**WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing!
+
+The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types.
+
+*Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages.
+
+A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol:
 
 ```
-expr ::= "{" (symbol expr) * "}"
-       | "(" symbol expr* ")"
+expr ::= "{" (string expr) * "}"
+       | "(" string expr* ")"
        | "[" expr* "]"
        | string
-       | symbol
        | integer
        | double
 ```
 
-Strings are understood in the same way as JSON strings, with the same
-encoding and the same set of escapes. Symbols are unquoted strings
-that start with a Unicode character with the `XID_Start` and continue
-with the `XID_Continue` characters, and thus should resemble the
-identifier syntax for a large number of C-like languages.
+Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks.
 
-The three kinds of composite types are meant to resemble records, sum
-or variant types, and lists, respectively. Zero or more
-symbol-expression pairs inside curly brackets form a _map_:
+The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_:
 
 ```
-# a basic map
+# a basic mapping
 {
   x 2
   y 3
-  z 4
+  "and z" 4
 }
 ```
 
-Pairs do not include colons and are not separated by commas. A map
-_must_ contain an even number of sub-expressions, and every odd
-subexpression _must_ be a symbol. (This restriction might be lifted in
-the future?) Whitespace is ignored except as a separator between
-tokens, so the above map is identical to
+Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to
 
 ```
-{x 2 y 3 z 4}
+{x 2 y 3 "and z" 4}
 ```
 
-A _list_ is represented by square brackets with zero or more
-possibly-heterogeneous expressions:
+A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions:
 
 ```
 # a basic list
 [ 2 "foo" bar ]
 ```
 
-A _tagged expression_ is represented by parentheses with a single
-symbol followed by zero or more possibly-heterogeneous expressions:
+A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions:
 
 ```
 # a basic tagged expression
 (some_tag blah 7.8 "??")
 ```
 
-These are how tagged data-types are traditionally represented: because
-the thing inside the parens _must_ be a symbol, it can correspond to a
-data type in an ML-like language.
+These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language.
+
+Adnot values can contain comments, which are line-oriented and begin with a `#` character.