Browse Source

Merge branch 'master' of rosencrantz:/srv/git/adnot

Getty Ritter 5 years ago
parent
commit
f6024b376b
6 changed files with 104 additions and 19 deletions
  1. 1 0
      Data/Adnot.hs
  2. 18 2
      Data/Adnot/Class.hs
  3. 17 10
      Data/Adnot/Emit.hs
  4. 5 4
      Data/Adnot/Parse.hs
  5. 8 3
      Data/Adnot/Type.hs
  6. 55 0
      README.md

+ 1 - 0
Data/Adnot.hs

@@ -6,6 +6,7 @@ module Data.Adnot ( Value(..)
                   , module Data.Adnot.Class
                   ) where
 
+import Data.Adnot.Class
 import Data.Adnot.Emit
 import Data.Adnot.Parse
 import Data.Adnot.Type

+ 18 - 2
Data/Adnot/Class.hs

@@ -15,6 +15,7 @@ import           Control.Monad ((>=>))
 import           Data.Adnot.Parse
 import           Data.Adnot.Type
 import           Data.Adnot.Emit
+import           Data.Adnot.Parse
 import           Data.Int
 import           Data.Word
 import qualified Data.ByteString as BS
@@ -172,7 +173,13 @@ instance ToAdnot a => ToAdnot (NE.NonEmpty a) where
 instance ToAdnot a => ToAdnot (MS.Map T.Text a) where
   toAdnot ls = Product (fmap toAdnot ls)
 
+product :: [(T.Text, Value)] -> Value
+product = Product . MS.fromList
+
+(.=) :: ToAdnot t => T.Text -> t -> (T.Text, Value)
+key .= val = (key, toAdnot val)
+
+-- * Tuples
 instance ToAdnot () where
   toAdnot () = List []
 
@@ -196,8 +203,8 @@ instance (ToAdnot a, ToAdnot b) => ToAdnot (Either a b) where
   toAdnot (Right y) = Sum "Right" [toAdnot y]
 
 instance ToAdnot Bool where
-  toAdnot True  = Symbol "True"
-  toAdnot False = Symbol "False"
+  toAdnot True  = String "True"
+  toAdnot False = String "False"
 
 -- Parsing
 
@@ -207,6 +214,14 @@ decode = decodeValue >=> parseAdnot
 type ParseError = String
 type Parser a = Either ParseError a
 
+niceType :: Value -> String
+niceType Sum {}     = "sum"
+niceType Product {} = "product"
+niceType List {}    = "list"
+niceType Integer {} = "integer"
+niceType Double {}  = "double"
+niceType String {}  = "string"
+
 withSum :: String -> (T.Text -> Array -> Parser a) -> Value -> Parser a
 withSum n k val = case val of
   Sum t as -> k t as

+ 17 - 10
Data/Adnot/Emit.hs

@@ -9,7 +9,8 @@ import           Data.List (intersperse)
 import qualified Data.Map.Strict as M
 import           Data.Monoid ((<>))
 import           Data.Text (Text)
-import           Data.Text.Encoding (encodeUtf8)
+import qualified Data.Text as T
+import           Data.Text.Encoding (encodeUtf8Builder)
 import qualified Data.Vector as V
 
 import Data.Adnot.Type
@@ -19,18 +20,27 @@ encodeValue = toLazyByteString . buildValue
 
 buildValue :: Value -> Builder
 buildValue (Sum n vs)
-  | V.null vs = char7 '(' <> ident n <> char7 ')'
+  | V.null vs = char7 '(' <> buildString n <> char7 ')'
   | otherwise =
-    char7 '(' <> ident n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
+    char7 '(' <> buildString n <> char7 ' ' <> spaceSepArr vs <> char7 ')'
 buildValue (Product ps) =
   char7 '{' <> buildPairs ps <> char7 '}'
 buildValue (List vs) =
   char7 '[' <> spaceSepArr vs <> char7 ']'
 buildValue (Integer i) = integerDec i
 buildValue (Double d) = doubleDec d
-buildValue (Symbol t) = ident t
-buildValue (String t) =
-  char7 '"' <> byteString (encodeUtf8 t) <> char7 '"'
+buildValue (String t) = buildString t
+
+buildString t
+  | isValidSymbol t = encodeUtf8Builder t
+  | otherwise       = char7 '"' <> escape t <> char7 '"'
+
+escape :: T.Text -> Builder
+escape = T.foldr go mempty
+  where go '"'  r = byteString "\\\"" <> r
+        go '\n' r = byteString "\\n" <> r
+        go '\\' r = byteString "\\\\" <> r
+        go c    r = char7 c <> r
 
 spaceSep :: [Builder] -> Builder
 spaceSep = mconcat . intersperse (char7 ' ')
@@ -38,9 +48,6 @@ spaceSep = mconcat . intersperse (char7 ' ')
 spaceSepArr :: Array -> Builder
 spaceSepArr = spaceSep . map buildValue . V.toList
 
-ident :: Text -> Builder
-ident = byteString . encodeUtf8
-
 buildPairs :: Product -> Builder
 buildPairs ps = spaceSep [ go k v | (k, v) <- M.toList ps ]
-  where go k v = ident k <> char7 ' ' <> buildValue v
+  where go k v = buildString k <> char7 ' ' <> buildValue v

+ 5 - 4
Data/Adnot/Parse.hs

@@ -16,17 +16,18 @@ import           Data.Adnot.Type
 decodeValue :: ByteString -> Either String Value
 decodeValue = parseOnly pVal
   where pVal = ws *> (pSum <|> pProd <|> pList <|> pLit)
-        pSum = Sum <$> (char '(' *> ws *> pIdent)
-                   <*> (pValueList <* ws <* char ')')
+        pSum = Sum <$> (char '(' *> ws *> (pIdent <|> pString))
+                   <*> (pValueList <* (ws *> char ')'))
         pProd =  Product . M.fromList
              <$> (char '{' *> pProdBody <* ws <* char '}')
         pProdBody = many' pPair
-        pPair = (,) <$> (ws *> pIdent) <*> pVal
+        pPair = (,) <$> (ws *> (pIdent <|> pString)) <*> pVal
         pList = List <$> (char '[' *> pValueList <* ws <* char ']')
-        pLit  =  Symbol  <$> pIdent
+        pLit  =  String  <$> pIdent
              <|> String  <$> pString
              <|> Double  <$> double
              <|> Integer <$> decimal
+        pStr = String <$> (pIdent <|> pString)
         pValueList = V.fromList <$> many' pVal
         pIdent = T.pack <$>
                  ((:) <$> (letter_ascii <|> char '_')

+ 8 - 3
Data/Adnot/Type.hs

@@ -1,14 +1,16 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE BangPatterns #-}
 
-module Data.Adnot.Type (Value(..), Array, Product) where
+module Data.Adnot.Type (Value(..), Array, Product, isValidSymbol) where
 
 import           Control.DeepSeq (NFData(..))
+import qualified Data.Char as C
 import           Data.Data (Data)
 import           Data.Typeable (Typeable)
 import           Data.Map.Strict (Map)
 import qualified Data.Map as M
 import           Data.Text (Text)
+import qualified Data.Text as T
 import           Data.Vector (Vector)
 import           GHC.Exts (IsString(..))
 
@@ -19,7 +21,6 @@ data Value
   | List !Array
   | Integer !Integer
   | Double !Double
-  | Symbol !Text
   | String !Text
     deriving (Eq, Show, Read, Typeable, Data)
 
@@ -29,7 +30,6 @@ instance NFData Value where
   rnf (List as) = rnf as
   rnf (Integer i) = rnf i
   rnf (Double d) = rnf d
-  rnf (Symbol t) = rnf t
   rnf (String t) = rnf t
 
 instance IsString Value where
@@ -37,3 +37,8 @@ instance IsString Value where
 
 type Array = Vector Value
 type Product = Map Text Value
+
+isValidSymbol :: Text -> Bool
+isValidSymbol t = case T.uncons t of
+  Nothing -> False
+  Just (x, xs) -> C.isAlpha x && T.all C.isAlphaNum xs

+ 55 - 0
README.md

@@ -0,0 +1,55 @@
+# Adnot
+
+**WARNING**: this repo contains unrepentant bikeshedding and wheel-reinvention. You almost definitely shouldn't use it, and it's probably best to disregard the entire thing!
+
+The *Adnot* format is a simple data and configuration format intended to have a slightly enriched data model when compared to JSON or s-expressions but still retain the comparative simplicity of those formats. Unlike JSON, Adnot chooses to avoid redundant structural information like punctuation; unlike s-expressions, Adnot values natively express a wider range of basic data types.
+
+*Adnot* is not intended to be a data interchange format, but rather to be a richer and more convenient syntax for certain kinds of data description that might otherwise be done in more unwieldy, complicated formats like YAML. As a first approximation, Adnot may be treated as a more human- and version-control-friendly version of JSON whose data model is intended to resemble the data model of statically typed functional programming languages.
+
+A given Adnot value is either one of three basic types—an integer, a double, a string—or one of three composite types: a sequence of values, a mapping of symbols to values, or a tagged sequence of values which begins with a symbol:
+
+```
+expr ::= "{" (string expr) * "}"
+       | "(" string expr* ")"
+       | "[" expr* "]"
+       | string
+       | integer
+       | double
+```
+
+Strings can be expressed in two different ways: one is quoted strings, which are formatted like JSON strings with the same encoding and the same set of escape sequences; the other is as bare words, in which strings that begin with a character of unicode class `XID_Start` and consist subsequently of zero or more `XID_Continue` characters can be written without quotation marks.
+
+The three kinds of composite types are meant to resemble records, sum or variant types, and lists, respectively. Zero or more symbol-expression pairs inside curly brackets form a _mapping_:
+
+```
+# a basic mapping
+{
+  x 2
+  y 3
+  "and z" 4
+}
+```
+
+Pairs do not include colons and are not separated by commas. A mapping _must_ contain an even number of sub-expressions, and every odd subexpression _must_ be a string. Whitespace is ignored except as a separator between tokens, so the above map is identical to
+
+```
+{x 2 y 3 "and z" 4}
+```
+
+A _list_ is represented by square brackets with zero or more possibly-heterogeneous expressions:
+
+```
+# a basic list
+[ 2 "foo" bar ]
+```
+
+A _tagged expression_ is represented by parentheses with a single string followed by zero or more possibly-heterogeneous expressions:
+
+```
+# a basic tagged expression
+(some_tag blah 7.8 "??")
+```
+
+These are how tagged data-types are represented: because the thing inside the parens _must_ be a string, it can correspond to a data type in an ML-like language.
+
+Adnot values can contain comments, which are line-oriented and begin with a `#` character.