lexer.rs 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. use logos::{Lexer, Logos};
  2. fn parse_num<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<i64> {
  3. let slice = lex.slice();
  4. slice.parse().ok()
  5. }
  6. fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<String> {
  7. let mut buf = String::new();
  8. let s = lex.slice();
  9. let mut src = s[1..s.len() - 1].chars();
  10. while let Some(c) = src.next() {
  11. if c == '\\' {
  12. match src.next() {
  13. Some('n') => buf.push('\n'),
  14. Some('t') => buf.push('\t'),
  15. Some('r') => buf.push('\r'),
  16. Some(c) => buf.push(c),
  17. None => return None,
  18. }
  19. } else {
  20. buf.push(c);
  21. }
  22. }
  23. Some(buf)
  24. }
  25. #[derive(Logos, Debug, PartialEq, Clone)]
  26. pub enum Token<'a> {
  27. #[token("<")]
  28. LAngle,
  29. #[token(">")]
  30. RAngle,
  31. #[token("(")]
  32. LPar,
  33. #[token(")")]
  34. RPar,
  35. #[token("{")]
  36. LCurl,
  37. #[token("}")]
  38. RCurl,
  39. #[token("|")]
  40. Pipe,
  41. #[token(":")]
  42. Colon,
  43. #[token(",")]
  44. Comma,
  45. #[token(";")]
  46. Semi,
  47. #[token(".")]
  48. Dot,
  49. #[token("..")]
  50. DotDot,
  51. #[token("=>")]
  52. Arrow,
  53. #[token(":=")]
  54. Assn,
  55. #[token("::=")]
  56. LitAssn,
  57. #[token("puts")]
  58. Puts,
  59. #[token("case")]
  60. Case,
  61. #[token("let")]
  62. Let,
  63. #[token("in")]
  64. In,
  65. #[token("fix")]
  66. Fix,
  67. #[regex(r"\p{Ll}(\pL|[0-9_-])*")]
  68. Var(&'a str),
  69. #[regex(r"\p{Lu}(\pL|[0-9_-])*")]
  70. Atom(&'a str),
  71. #[regex(r"[0-9]+", parse_num)]
  72. Num(i64),
  73. #[regex(r"'([^'\\]|\\.)*'", parse_str)]
  74. #[regex("\"([^\"\\\\]|\\\\.)*\"", parse_str)]
  75. Str(String),
  76. #[error]
  77. #[regex(r"[ \t\n\f]+", logos::skip)]
  78. #[regex(r"\(\*([^*]|\*[^)])*\*\)", logos::skip)]
  79. Error,
  80. }
  81. #[derive(Debug)]
  82. pub struct LexerError;
  83. pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
  84. pub fn tokens(source: &str) -> impl Iterator<Item = Spanned<Token<'_>, usize, LexerError>> {
  85. Token::lexer(source)
  86. .spanned()
  87. .map(move |(token, range)| match token {
  88. Token::Error => Err(LexerError),
  89. token => Ok((range.start, token, range.end)),
  90. })
  91. }
  92. #[cfg(test)]
  93. mod test {
  94. use logos::Logos;
  95. use super::Token;
  96. #[test]
  97. fn simple_lexer_test() {
  98. let mut lex = Token::lexer("x := Foo (* ignore *) | \"bar\";");
  99. assert_eq!(lex.next(), Some(Token::Var("x")));
  100. assert_eq!(lex.next(), Some(Token::Assn));
  101. assert_eq!(lex.next(), Some(Token::Atom("Foo")));
  102. assert_eq!(lex.next(), Some(Token::Pipe));
  103. assert_eq!(lex.next(), Some(Token::Str("bar".to_owned())));
  104. assert_eq!(lex.next(), Some(Token::Semi));
  105. assert_eq!(lex.next(), None)
  106. }
  107. }