lexer.rs 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. use logos::{Lexer, Logos};
  2. fn parse_num<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<i64> {
  3. let slice = lex.slice();
  4. slice.parse().ok()
  5. }
  6. fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<String> {
  7. let mut buf = String::new();
  8. let s = lex.slice();
  9. let mut src = s[1..s.len() - 1].chars();
  10. while let Some(c) = src.next() {
  11. if c == '\\' {
  12. match src.next() {
  13. Some('n') => buf.push('\n'),
  14. Some('t') => buf.push('\t'),
  15. Some('r') => buf.push('\r'),
  16. Some(c) => buf.push(c),
  17. None => return None,
  18. }
  19. } else {
  20. buf.push(c);
  21. }
  22. }
  23. Some(buf)
  24. }
  25. #[derive(Logos, Debug, PartialEq, Clone)]
  26. pub enum Token<'a> {
  27. #[token("<")]
  28. LAngle,
  29. #[token(">")]
  30. RAngle,
  31. #[token("(")]
  32. LPar,
  33. #[token(")")]
  34. RPar,
  35. #[token("{")]
  36. LCurl,
  37. #[token("}")]
  38. RCurl,
  39. #[token("|")]
  40. Pipe,
  41. #[token(":")]
  42. Colon,
  43. #[token(",")]
  44. Comma,
  45. #[token(";")]
  46. Semi,
  47. #[token(".")]
  48. Dot,
  49. #[token("_")]
  50. Underscore,
  51. #[token("..")]
  52. DotDot,
  53. #[token("=>")]
  54. Arrow,
  55. #[token(":=")]
  56. Assn,
  57. #[token("::=")]
  58. LitAssn,
  59. #[token("puts")]
  60. Puts,
  61. #[token("case")]
  62. Case,
  63. #[token("let")]
  64. Let,
  65. #[token("in")]
  66. In,
  67. #[token("fix")]
  68. Fix,
  69. #[regex(r"\p{Ll}(\pL|[0-9_-])*")]
  70. Var(&'a str),
  71. #[regex(r"\p{Lu}(\pL|[0-9_-])*")]
  72. Atom(&'a str),
  73. #[regex(r"[0-9]+", parse_num)]
  74. Num(i64),
  75. #[regex(r"'([^'\\]|\\.)*'", parse_str)]
  76. #[regex("\"([^\"\\\\]|\\\\.)*\"", parse_str)]
  77. Str(String),
  78. #[error]
  79. #[regex(r"[ \t\n\f]+", logos::skip)]
  80. #[regex(r"\(\*([^*]|\*[^)])*\*\)", logos::skip)]
  81. Error,
  82. }
  83. #[derive(Debug)]
  84. pub struct LexerError;
  85. impl std::fmt::Display for LexerError {
  86. fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  87. write!(f, "LexerError")
  88. }
  89. }
  90. pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
  91. pub fn tokens(source: &str) -> impl Iterator<Item = Spanned<Token<'_>, usize, LexerError>> {
  92. Token::lexer(source)
  93. .spanned()
  94. .map(move |(token, range)| match token {
  95. Token::Error => Err(LexerError),
  96. token => Ok((range.start, token, range.end)),
  97. })
  98. }
  99. #[cfg(test)]
  100. mod test {
  101. use logos::Logos;
  102. use super::Token;
  103. #[test]
  104. fn simple_lexer_test() {
  105. let mut lex = Token::lexer("x := Foo (* ignore *) | \"bar\";");
  106. assert_eq!(lex.next(), Some(Token::Var("x")));
  107. assert_eq!(lex.next(), Some(Token::Assn));
  108. assert_eq!(lex.next(), Some(Token::Atom("Foo")));
  109. assert_eq!(lex.next(), Some(Token::Pipe));
  110. assert_eq!(lex.next(), Some(Token::Str("bar".to_owned())));
  111. assert_eq!(lex.next(), Some(Token::Semi));
  112. assert_eq!(lex.next(), None)
  113. }
  114. }