Browse Source

switch to logos lexer

Getty Ritter 2 years ago
parent
commit
03097170aa
4 changed files with 63 additions and 42 deletions
  1. 36 37
      src/grammar.lalrpop
  2. 18 2
      src/lexer.rs
  3. 6 2
      src/main.rs
  4. 3 1
      tools/regenerate.rs

+ 36 - 37
src/grammar.lalrpop

@@ -1,35 +1,38 @@
 use std::str::FromStr;
+use crate::ast::*;
+use crate::lexer::*;
 
-grammar;
+grammar<'input>;
 
-match {
-    "<",
-    ">",
-    "(",
-    ")",
-    "{",
-    "}",
-    "|",
-    ":",
-    ",",
-    ";",
-    ".",
-    "..",
-    ":=",
-    "::=",
-    "puts",
-    "case",
-    "let",
-    "in",
-    r"\p{Ll}(\pL|[0-9_-])*",
-    r"\p{Lu}(\pL|[0-9_-])*",
-    r"[0-9]+",
-    r"'([^'\\]|\\.)*'",
-    r"\s*" => {},
-    r"\(\*([^*]|\*[^)])*\*\)" => {},
-}
+extern {
+    type Location = usize;
+    type Error = LexerError;
 
-use crate::ast::*;
+    enum Token<'input> {
+        "<" => Token::LAngle,
+        ">" => Token::RAngle,
+        "(" => Token::LPar,
+        ")" => Token::RPar,
+        "{" => Token::LCurl,
+        "}" => Token::RCurl,
+        "|" => Token::Pipe,
+        ":" => Token::Colon,
+        "," => Token::Comma,
+        ";" => Token::Semi,
+        "." => Token::Dot,
+        ".." => Token::DotDot,
+        ":=" => Token::Assn,
+        "::=" => Token::LitAssn,
+        "puts" => Token::Puts,
+        "case" => Token::Case,
+        "let" => Token::Let,
+        "in" => Token::In,
+        "var" => Token::Var(<&'input str>),
+        "atom" => Token::Atom(<&'input str>),
+        "num" => Token::Num(<i64>),
+        "str" => Token::Str(<String>)
+    }
+}
 
 pub Stmts: Vec<Stmt> = {
     <mut stmts:(<Stmt> ";")*> <stmt:Stmt?> => match stmt {
@@ -48,7 +51,7 @@ pub Stmt: Stmt = {
 };
 
 pub Name: String = {
-    r"\p{Ll}(\pL|[0-9_-])*" => <>.to_owned(),
+    "var" => <>.to_owned(),
 };
 
 pub Expr: Expr = {
@@ -59,7 +62,7 @@ pub Expr: Expr = {
 };
 
 pub Choice: Choice = {
-    <weight:Num> ":" <value:Term> => Choice {
+    <weight:"num"> ":" <value:Term> => Choice {
         weight: Some(weight),
         value
     },
@@ -95,12 +98,8 @@ pub Leaf: Expr = {
     "(" <e:Expr> ")" => e,
 };
 
-pub Num: i64 = {
-    r"[0-9]+" => i64::from_str(<>).unwrap(),
-};
-
 pub Literal: Literal = {
-    <Num> => Literal::Num(<>),
-    r"'([^'\\]|\\.)*'" => Literal::from_str_literal(<>),
-    r"\p{Lu}(\pL|[0-9_-])*" => Literal::Atom(<>.to_owned()),
+    "num" => Literal::Num(<>),
+    "str" => Literal::Str(<>),
+    "atom" => Literal::Atom(<>.to_owned()),
 };

+ 18 - 2
src/lexer.rs

@@ -25,8 +25,8 @@ fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<String> {
     Some(buf)
 }
 
-#[derive(Logos, Debug, PartialEq)]
-enum Token<'a> {
+#[derive(Logos, Debug, PartialEq, Clone)]
+pub enum Token<'a> {
     #[token("<")]
     LAngle,
     #[token(">")]
@@ -97,6 +97,22 @@ enum Token<'a> {
     Error
 }
 
+#[derive(Debug)]
+pub struct LexerError;
+
+pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
+
+pub fn tokens(
+    source: &str,
+) -> impl Iterator<Item = Spanned<Token<'_>, usize, LexerError>> {
+    Token::lexer(source)
+        .spanned()
+        .map(move |(token, range)| match token {
+            Token::Error => Err(LexerError),
+            token => Ok((range.start, token, range.end)),
+        })
+}
+
 #[cfg(test)]
 mod test {
     use logos::Logos;

+ 6 - 2
src/main.rs

@@ -1,10 +1,13 @@
 use matzo::grammar::StmtsParser;
 use matzo::interp::State;
+use matzo::lexer::tokens;
+use logos::Logos;
 
 use std::io::Write;
 
 fn run(src: &str) {
-    let stmts = StmtsParser::new().parse(&src).unwrap();
+    let lexed = tokens(&src);
+    let stmts = StmtsParser::new().parse(lexed).unwrap();
     let mut state = State::new();
     for stmt in stmts {
         if let Err(err) = state.execute(&stmt) {
@@ -25,8 +28,9 @@ fn run_repl() -> std::io::Result<()> {
         stdout.flush()?;
         buf.clear();
         stdin.read_line(&mut buf)?;
+        let lexed = tokens(&buf);
 
-        let stmts = match parser.parse(&buf) {
+        let stmts = match parser.parse(lexed) {
             Ok(stmts) => stmts,
             Err(err) => {
                 eprintln!("{:?}", err);

+ 3 - 1
tools/regenerate.rs

@@ -1,3 +1,4 @@
+use matzo::lexer;
 use matzo::grammar;
 
 use std::io::Write;
@@ -16,7 +17,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             };
 
             let src = std::fs::read_to_string(&exp)?;
-            if let Ok(ast) = grammar::StmtsParser::new().parse(&src) {
+            let tokens = lexer::tokens(&src);
+            if let Ok(ast) = grammar::StmtsParser::new().parse(tokens) {
                 let mut f = std::fs::File::create(exp_filename("parsed"))?;
                 writeln!(f, "{:#?}", ast)?;
             }