use logos::{Lexer, Logos}; fn parse_num<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option { let slice = lex.slice(); Some(slice.parse().ok()?) } fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option { let mut buf = String::new(); let s = lex.slice(); let mut src = s[1..s.len() - 1].chars().into_iter(); while let Some(c) = src.next() { if c == '\\' { match src.next() { Some('n') => buf.push('\n'), Some('t') => buf.push('\t'), Some('r') => buf.push('\r'), Some(c) => buf.push(c), None => return None, } } else { buf.push(c); } } Some(buf) } #[derive(Logos, Debug, PartialEq)] enum Token<'a> { #[token("<")] LAngle, #[token(">")] RAngle, #[token("(")] LPar, #[token(")")] RPar, #[token("{")] LCurl, #[token("}")] RCurl, #[token("|")] Pipe, #[token(":")] Colon, #[token(",")] Comma, #[token(";")] Semi, #[token(".")] Dot, #[token("..")] DotDot, #[token(":=")] Assn, #[token("::=")] LitAssn, #[token("puts")] Puts, #[token("case")] Case, #[token("let")] Let, #[token("in")] In, #[regex(r"\p{Ll}(\pL|[0-9_-])*")] Var(&'a str), #[regex(r"\p{Lu}(\pL|[0-9_-])*")] Atom(&'a str), #[regex(r"[0-9]+", parse_num)] Num(i64), #[regex(r"'([^'\\]|\\.)*'", parse_str)] Str(String), #[error] #[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"\(\*([^*]|\*[^)])*\*\)", logos::skip)] Error } #[cfg(test)] mod test { use logos::Logos; use super::Token; #[test] fn simple_lexer_test() { let mut lex = Token::lexer("x := Foo (* ignore *) | 'bar';"); assert_eq!(lex.next(), Some(Token::Var("x"))); assert_eq!(lex.next(), Some(Token::Assn)); assert_eq!(lex.next(), Some(Token::Atom("Foo"))); assert_eq!(lex.next(), Some(Token::Pipe)); assert_eq!(lex.next(), Some(Token::Str("bar".to_owned()))); assert_eq!(lex.next(), Some(Token::Semi)); assert_eq!(lex.next(), None) } }