lexer.rs 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. use logos::{Lexer, Logos};
  2. fn parse_num<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<i64> {
  3. let slice = lex.slice();
  4. Some(slice.parse().ok()?)
  5. }
  6. fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<String> {
  7. let mut buf = String::new();
  8. let s = lex.slice();
  9. let mut src = s[1..s.len() - 1].chars().into_iter();
  10. while let Some(c) = src.next() {
  11. if c == '\\' {
  12. match src.next() {
  13. Some('n') => buf.push('\n'),
  14. Some('t') => buf.push('\t'),
  15. Some('r') => buf.push('\r'),
  16. Some(c) => buf.push(c),
  17. None => return None,
  18. }
  19. } else {
  20. buf.push(c);
  21. }
  22. }
  23. Some(buf)
  24. }
  25. #[derive(Logos, Debug, PartialEq)]
  26. enum Token<'a> {
  27. #[token("<")]
  28. LAngle,
  29. #[token(">")]
  30. RAngle,
  31. #[token("(")]
  32. LPar,
  33. #[token(")")]
  34. RPar,
  35. #[token("{")]
  36. LCurl,
  37. #[token("}")]
  38. RCurl,
  39. #[token("|")]
  40. Pipe,
  41. #[token(":")]
  42. Colon,
  43. #[token(",")]
  44. Comma,
  45. #[token(";")]
  46. Semi,
  47. #[token(".")]
  48. Dot,
  49. #[token("..")]
  50. DotDot,
  51. #[token(":=")]
  52. Assn,
  53. #[token("::=")]
  54. LitAssn,
  55. #[token("puts")]
  56. Puts,
  57. #[token("case")]
  58. Case,
  59. #[token("let")]
  60. Let,
  61. #[token("in")]
  62. In,
  63. #[regex(r"\p{Ll}(\pL|[0-9_-])*")]
  64. Var(&'a str),
  65. #[regex(r"\p{Lu}(\pL|[0-9_-])*")]
  66. Atom(&'a str),
  67. #[regex(r"[0-9]+", parse_num)]
  68. Num(i64),
  69. #[regex(r"'([^'\\]|\\.)*'", parse_str)]
  70. Str(String),
  71. #[error]
  72. #[regex(r"[ \t\n\f]+", logos::skip)]
  73. #[regex(r"\(\*([^*]|\*[^)])*\*\)", logos::skip)]
  74. Error
  75. }
  76. #[cfg(test)]
  77. mod test {
  78. use logos::Logos;
  79. use super::Token;
  80. #[test]
  81. fn simple_lexer_test() {
  82. let mut lex = Token::lexer("x := Foo (* ignore *) | 'bar';");
  83. assert_eq!(lex.next(), Some(Token::Var("x")));
  84. assert_eq!(lex.next(), Some(Token::Assn));
  85. assert_eq!(lex.next(), Some(Token::Atom("Foo")));
  86. assert_eq!(lex.next(), Some(Token::Pipe));
  87. assert_eq!(lex.next(), Some(Token::Str("bar".to_owned())));
  88. assert_eq!(lex.next(), Some(Token::Semi));
  89. assert_eq!(lex.next(), None)
  90. }
  91. }