lexer.hh 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #ifndef RUBY_PARSER_LEXER_HH
  2. #define RUBY_PARSER_LEXER_HH
  3. #include <map>
  4. #include <memory>
  5. #include <optional>
  6. #include <queue>
  7. #include <set>
  8. #include <stack>
  9. #include <string>
  10. #include "context.hh"
  11. #include "diagnostic.hh"
  12. #include "literal.hh"
  13. #include "pool.hh"
  14. #include "state_stack.hh"
  15. #include "token.hh"
  16. namespace ruby_parser {
  17. enum class ruby_version {
  18. RUBY_18,
  19. RUBY_19,
  20. RUBY_20,
  21. RUBY_21,
  22. RUBY_22,
  23. RUBY_23,
  24. RUBY_24,
  25. RUBY_25,
  26. };
  27. class lexer {
  28. public:
  29. using environment = std::set<std::string>;
  30. struct token_table_entry {
  31. const char *token;
  32. token_type type;
  33. };
  34. enum class num_xfrm_type {
  35. NONE,
  36. RATIONAL,
  37. IMAGINARY,
  38. RATIONAL_IMAGINARY,
  39. FLOAT,
  40. FLOAT_IMAGINARY,
  41. };
  42. private:
  43. diagnostics_t &diagnostics;
  44. pool<token, 64> mempool;
  45. ruby_version version;
  46. const std::string source_buffer;
  47. std::stack<environment> static_env;
  48. std::stack<literal> literal_stack;
  49. std::queue<token_t> token_queue;
  50. int cs;
  51. const char *_p;
  52. const char *_pe;
  53. const char *ts;
  54. const char *te;
  55. int act;
  56. // State before =begin / =end block comment
  57. int cs_before_block_comment;
  58. std::vector<int> stack;
  59. int top;
  60. const char *eq_begin_s; // location of last encountered =begin
  61. const char *sharp_s; // location of last encountered #
  62. const char *newline_s; // location of last encountered newline
  63. // Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
  64. // encountered after a matching closing parenthesis.
  65. size_t paren_nest;
  66. std::stack<size_t> lambda_stack;
  67. // If the lexer is in `command state' (aka expr_value)
  68. // at the entry to #advance, it will transition to expr_cmdarg
  69. // instead of expr_arg at certain points.
  70. bool command_start;
  71. int num_base; // last numeric base
  72. const char *num_digits_s; // starting position of numeric digits
  73. const char *num_suffix_s; // starting position of numeric suffix
  74. num_xfrm_type num_xfrm; // numeric suffix-induced transformation
  75. const char *escape_s; // starting position of current sequence
  76. std::unique_ptr<std::string> escape; // last escaped sequence, as string
  77. const char *herebody_s; // starting position of current heredoc line
  78. // After encountering the closing line of <<~SQUIGGLY_HEREDOC,
  79. // we store the indentation level and give it out to the parser
  80. // on request. It is not possible to infer indentation level just
  81. // from the AST because escape sequences such as `\ ` or `\t` are
  82. // expanded inside the lexer, but count as non-whitespace for
  83. // indentation purposes.
  84. optional_size dedentLevel_;
  85. void check_stack_capacity();
  86. int stack_pop();
  87. int arg_or_cmdarg(int cmd_state);
  88. void emit_comment(const char *s, const char *e);
  89. char unescape(uint32_t cp);
  90. std::string tok();
  91. std::string tok(const char *start);
  92. std::string tok(const char *start, const char *end);
  93. void emit(token_type type);
  94. void emit(token_type type, const std::string &str);
  95. void emit(token_type type, const std::string &str, const char *start, const char *end);
  96. void emit_do(bool do_block = false);
  97. void emit_table(const token_table_entry *table);
  98. void emit_num(const std::string &num);
  99. std::string convert_base(const std::string &num, int num_base);
  100. diagnostic::range range(const char *start, const char *end);
  101. void diagnostic_(dlevel level, dclass type, const std::string &data = "");
  102. void diagnostic_(dlevel level, dclass type, diagnostic::range &&range, const std::string &data = "");
  103. template <typename... Args> int push_literal(Args &&... args);
  104. int next_state_for_literal(literal &lit);
  105. literal &literal_();
  106. int pop_literal();
  107. token_t advance_();
  108. // literal needs to call emit:
  109. friend class literal;
  110. public:
  111. state_stack cond;
  112. state_stack cmdarg;
  113. size_t last_token_s;
  114. size_t last_token_e;
  115. bool in_kwarg; // true at the end of "def foo a:"
  116. Context context;
  117. lexer(diagnostics_t &diag, ruby_version version, const std::string &source_buffer_);
  118. token_t advance();
  119. void set_state_expr_beg();
  120. void set_state_expr_end();
  121. void set_state_expr_endarg();
  122. void set_state_expr_fname();
  123. void set_state_expr_value();
  124. void extend_static();
  125. void extend_dynamic();
  126. void unextend();
  127. void declare(const std::string &name);
  128. bool is_declared(const std::string &identifier) const;
  129. optional_size dedentLevel();
  130. };
  131. } // namespace ruby_parser
  132. #include "driver.hh"
  133. #endif