Browse Source

Basic, hacky impl of limited parser

Getty Ritter 6 years ago
commit
4a0ba3ec95
7 changed files with 303 additions and 0 deletions
  1. 4 0
      .gitignore
  2. 22 0
      Cargo.toml
  3. 17 0
      samples/entries.rec
  4. 13 0
      samples/has_default.rec
  5. 154 0
      src/lib.rs
  6. 13 0
      src/tools/pretty.rs
  7. 80 0
      src/tools/tojson.rs

+ 4 - 0
.gitignore

@@ -0,0 +1,4 @@
+/target/
+**/*.rs.bk
+Cargo.lock
+*~

+ 22 - 0
Cargo.toml

@@ -0,0 +1,22 @@
+[package]
+name = "rrecutils"
+version = "0.1.0"
+authors = ["Getty Ritter <gettylefou@gmail.com>"]
+
+[lib]
+name = "rrecutils"
+path = "src/lib.rs"
+
+[dependencies]
+regex = "0.2"
+serde = "*"
+serde_json = "*"
+clap = "2.27.1"
+
+[[bin]]
+name = "rr-pretty"
+path = "src/tools/pretty.rs"
+
+[[bin]]
+name = "rr-to-json"
+path = "src/tools/tojson.rs"

+ 17 - 0
samples/entries.rec

@@ -0,0 +1,17 @@
+%rec: Article
+
+Id: 1
+Title: Article 1
+
+Id: 2
+Title: Article 2
+
+%rec: Stock
+
+Id: 1
+Type: sell
+Date: 20 April 2011
+
+Id: 2
+Type: stock
+Date: 21 April 2011

+ 13 - 0
samples/has_default.rec

@@ -0,0 +1,13 @@
+Id: 1
+Title: Blah
+
+Id: 2
+Title: Bleh
+
+%rec: Movement
+
+Date: 13-Aug-2012
+Concept: 20
+
+Date: 24-Sept-2012
+Concept: 12

+ 154 - 0
src/lib.rs

@@ -0,0 +1,154 @@
+struct ParsingContext {
+    continuation_line: bool,
+    current_record_type: Option<String>,
+}
+
+#[derive(Eq, PartialEq, Debug)]
+pub struct Record {
+    pub rec_type: Option<String>,
+    pub fields: Vec<(String, String)>,
+}
+
+#[derive(Eq, PartialEq, Debug)]
+pub struct Recfile {
+    pub records: Vec<Record>,
+}
+
+
+impl Recfile {
+    pub fn parse<I>(i: I) -> Result<Recfile, String>
+        where I: std::io::BufRead
+    {
+        let mut iter = i.lines();
+        let mut current = Record {
+            fields: vec![],
+            rec_type: None,
+        };
+        let mut buf = vec![];
+        let mut ctx = ParsingContext {
+            continuation_line: false,
+            current_record_type: None,
+        };
+
+        while let Some(Ok(ln)) = iter.next() {
+            let ln = ln.trim_left_matches(' ');
+
+            if ln.starts_with('#') {
+                // skip comment lines
+            } else if ln.is_empty() {
+                if !current.fields.is_empty() {
+                    buf.push(current);
+                    current = Record {
+                        rec_type: ctx.current_record_type.clone(),
+                        fields: vec![],
+                    };
+                }
+            } else if ln.starts_with('+') {
+                if let Some(val) = current.fields.last_mut() {
+                    val.1.push_str("\n");
+                    val.1.push_str(
+                        if ln[1..].starts_with(' ') {
+                            &ln[2..]
+                        } else {
+                            &ln[1..]
+                        });
+                } else {
+                    return Err(format!(
+                        "Found continuation line in nonsensical place: {}",
+                        ln));
+                }
+            } else if let Some(pos) = ln.find(':') {
+                let (key, val) = ln.split_at(pos);
+                current.fields.push((
+                    key.to_owned(),
+                    val[1..].trim_left().to_owned()));
+                if key == "%rec" {
+                    ctx.current_record_type = Some(val[1..].trim_left().to_owned());
+                }
+            } else {
+                return Err(format!("Invalid line: {:?}", ln));
+            }
+        }
+
+        if !current.fields.is_empty() {
+            buf.push(current);
+        }
+
+        Ok(Recfile { records: buf })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ::{Recfile,Record};
+
+    fn test_parse(input: &[u8], expected: Vec<Vec<(&str, &str)>>) {
+        let file = Recfile {
+            records: expected.iter().map( |v| {
+                Record {
+                    fields: v.iter().map( |&(k, v)| {
+                        (k.to_owned(), v.to_owned())
+                    }).collect(),
+                }
+            }).collect(),
+        };
+        assert_eq!(Recfile::parse(input), Ok(file));
+    }
+
+    #[test]
+    fn empty_file() {
+        test_parse(b"\n", vec![]);
+    }
+
+    #[test]
+    fn only_comments() {
+        test_parse(b"# an empty file\n", vec![]);
+    }
+
+    #[test]
+    fn one_section() {
+        test_parse(b"hello: yes\n", vec![ vec![ ("hello", "yes") ] ]);
+    }
+
+    #[test]
+    fn two_sections() {
+        test_parse(
+            b"hello: yes\n\ngoodbye: no\n",
+            vec![
+                vec![ ("hello", "yes") ],
+                vec![ ("goodbye", "no") ],
+            ],
+        );
+    }
+
+    #[test]
+    fn continuation_with_space() {
+        test_parse(
+            b"hello: yes\n+ but also no\n",
+            vec![
+                vec![ ("hello", "yes\nbut also no") ],
+            ],
+        );
+    }
+
+    #[test]
+    fn continuation_without_space() {
+        test_parse(
+            b"hello: yes\n+but also no\n",
+            vec![
+                vec![ ("hello", "yes\nbut also no") ],
+            ],
+        );
+    }
+
+    #[test]
+    fn continuation_with_two_spaces() {
+        test_parse(
+            b"hello: yes\n+  but also no\n",
+            vec![
+                vec![ ("hello", "yes\n but also no") ],
+            ],
+        );
+    }
+
+}

+ 13 - 0
src/tools/pretty.rs

@@ -0,0 +1,13 @@
+extern crate clap;
+extern crate rrecutils;
+
+fn main() {
+    let matches = clap::App::new("rr-pretty")
+        .version("0.0")
+        .author("Getty Ritter <rrecutils@infinitenegativeutility.com>")
+        .about("Display the Rust AST for a Recutils file")
+        .get_matches();
+    let source = std::io::stdin();
+    let records = rrecutils::Recfile::parse(source.lock());
+    println!("{:#?}", records);
+}

+ 80 - 0
src/tools/tojson.rs

@@ -0,0 +1,80 @@
+extern crate clap;
+extern crate rrecutils;
+extern crate serde_json;
+
+use std::{fmt,fs,io};
+
+use serde_json::Value;
+use serde_json::map::Map;
+
+fn record_to_json(rec: &rrecutils::Record) -> Value {
+    let mut m = Map::new();
+    for tup in rec.fields.iter() {
+        let k = tup.0.clone();
+        let v = tup.1.clone();
+        m.insert(k, Value::String(v));
+    }
+    Value::Object(m)
+}
+
+fn unwrap_err<L, R: fmt::Debug>(value: Result<L, R>) -> L {
+    match value {
+        Ok(v) => v,
+        Err(err) => {
+            println!("{:?}", err);
+            std::process::exit(99)
+        }
+    }
+}
+
+fn main() {
+    let matches = clap::App::new("rr-to-json")
+        .version("0.0")
+        .author("Getty Ritter <rrecutils@infinitenegativeutility.com>")
+        .about("Display the Rust AST for a Recutils file")
+        .arg(clap::Arg::with_name("pretty")
+             .short("p")
+             .long("pretty")
+             .help("Pretty-print the resulting JSON"))
+        .arg(clap::Arg::with_name("input")
+             .short("i")
+             .long("input")
+             .value_name("FILE")
+             .help("The input recfile (or - for stdin)"))
+        .arg(clap::Arg::with_name("output")
+             .short("o")
+             .long("output")
+             .value_name("FILE")
+             .help("The desired output location (or - for stdout)"))
+        .get_matches();
+
+    let stdin = io::stdin();
+
+    let input: Box<io::BufRead> =
+        match matches.value_of("input").unwrap_or("-") {
+            "-" => Box::new(stdin.lock()),
+            path =>
+                Box::new(io::BufReader::new(unwrap_err(fs::File::open(path)))),
+        };
+
+    let json = Value::Array(unwrap_err(rrecutils::Recfile::parse(input))
+                            .records
+                            .iter()
+                            .map(|x| record_to_json(x))
+                            .collect());
+
+    let mut output: Box<io::Write> =
+        match matches.value_of("output").unwrap_or("-") {
+            "-" => Box::new(io::stdout()),
+            path => Box::new(unwrap_err(fs::File::open(path))),
+        };
+
+    let serialized = if matches.is_present("pretty") {
+        unwrap_err(serde_json::to_string_pretty(&json))
+    } else {
+        json.to_string()
+    };
+
+    unwrap_err(writeln!(output, "{}", serialized));
+
+}