lexer.rs 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. use logos::{Lexer, Logos};
  2. /// A location in a source file
  3. #[derive(Debug, Clone, Copy)]
  4. pub struct Span {
  5. pub start: u32,
  6. pub end: u32,
  7. }
  8. impl Span {
  9. pub fn empty() -> Span {
  10. Span {
  11. start: u32::MAX,
  12. end: u32::MAX,
  13. }
  14. }
  15. pub fn exists(&self) -> bool {
  16. self.start != u32::MAX && self.end != u32::MAX
  17. }
  18. }
  19. #[derive(Debug, Clone, Copy)]
  20. pub struct Located<T> {
  21. pub span: Span,
  22. pub item: T,
  23. }
  24. impl<T> Located<T> {
  25. pub fn new(item: T, span: Span) -> Located<T> {
  26. Located { span, item }
  27. }
  28. }
  29. impl <T: Clone> Located<T> {
  30. pub fn map<R>(&self, func: impl FnOnce(T) -> R) -> Located<R> {
  31. Located {
  32. span: self.span,
  33. item: func(self.item.clone()),
  34. }
  35. }
  36. }
  37. fn parse_num<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<i64> {
  38. let slice = lex.slice();
  39. slice.parse().ok()
  40. }
  41. fn parse_str<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<String> {
  42. let mut buf = String::new();
  43. let s = lex.slice();
  44. let mut src = s[1..s.len() - 1].chars();
  45. while let Some(c) = src.next() {
  46. if c == '\\' {
  47. match src.next() {
  48. Some('n') => buf.push('\n'),
  49. Some('t') => buf.push('\t'),
  50. Some('r') => buf.push('\r'),
  51. Some(c) => buf.push(c),
  52. None => return None,
  53. }
  54. } else {
  55. buf.push(c);
  56. }
  57. }
  58. Some(buf)
  59. }
  60. #[derive(Logos, Debug, PartialEq, Clone)]
  61. pub enum Token<'a> {
  62. #[token("<")]
  63. LAngle,
  64. #[token(">")]
  65. RAngle,
  66. #[token("(")]
  67. LPar,
  68. #[token(")")]
  69. RPar,
  70. #[token("{")]
  71. LCurl,
  72. #[token("}")]
  73. RCurl,
  74. #[token("|")]
  75. Pipe,
  76. #[token(":")]
  77. Colon,
  78. #[token(",")]
  79. Comma,
  80. #[token(";")]
  81. Semi,
  82. #[token(".")]
  83. Dot,
  84. #[token("_")]
  85. Underscore,
  86. #[token("..")]
  87. DotDot,
  88. #[token("=>")]
  89. Arrow,
  90. #[token(":=")]
  91. Assn,
  92. #[token("::=")]
  93. LitAssn,
  94. #[token("puts")]
  95. Puts,
  96. #[token("case")]
  97. Case,
  98. #[token("let")]
  99. Let,
  100. #[token("in")]
  101. In,
  102. #[token("fix")]
  103. Fix,
  104. #[regex(r"\p{Ll}(\pL|[0-9_-])*")]
  105. Var(&'a str),
  106. #[regex(r"\p{Lu}(\pL|[0-9_-])*")]
  107. Atom(&'a str),
  108. #[regex(r"[0-9]+", parse_num)]
  109. Num(i64),
  110. #[regex(r"'([^'\\]|\\.)*'", parse_str)]
  111. #[regex("\"([^\"\\\\]|\\\\.)*\"", parse_str)]
  112. Str(String),
  113. #[error]
  114. #[regex(r"[ \t\n\f]+", logos::skip)]
  115. #[regex(r"\(\*([^*]|\*[^)])*\*\)", logos::skip)]
  116. Error,
  117. }
  118. #[derive(Debug)]
  119. pub struct LexerError;
  120. impl std::fmt::Display for LexerError {
  121. fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  122. write!(f, "LexerError")
  123. }
  124. }
  125. pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
  126. pub fn tokens(source: &str) -> impl Iterator<Item = Spanned<Token<'_>, usize, LexerError>> {
  127. Token::lexer(source)
  128. .spanned()
  129. .map(move |(token, range)| match token {
  130. Token::Error => Err(LexerError),
  131. token => Ok((range.start, token, range.end)),
  132. })
  133. }
  134. #[cfg(test)]
  135. mod test {
  136. use logos::Logos;
  137. use super::Token;
  138. #[test]
  139. fn simple_lexer_test() {
  140. let mut lex = Token::lexer("x := Foo (* ignore *) | \"bar\";");
  141. assert_eq!(lex.next(), Some(Token::Var("x")));
  142. assert_eq!(lex.next(), Some(Token::Assn));
  143. assert_eq!(lex.next(), Some(Token::Atom("Foo")));
  144. assert_eq!(lex.next(), Some(Token::Pipe));
  145. assert_eq!(lex.next(), Some(Token::Str("bar".to_owned())));
  146. assert_eq!(lex.next(), Some(Token::Semi));
  147. assert_eq!(lex.next(), None)
  148. }
  149. }