Browse Source

Import parser and build from SOrbet

Getty Ritter 4 years ago
commit
f14ea043ee

+ 13 - 0
WORKSPACE

@@ -0,0 +1,13 @@
+workspace(name="diamantaire")
+
+load("//third_party:externals.bzl", "register_diamantaire_dependencies")
+register_diamantaire_dependencies()
+
+load("@rules_ragel//ragel:ragel.bzl", "ragel_register_toolchains")
+ragel_register_toolchains()
+
+load("@rules_m4//m4:m4.bzl", "m4_register_toolchains")
+m4_register_toolchains()
+
+load("@rules_bison//bison:bison.bzl", "bison_register_toolchains")
+bison_register_toolchains()

+ 7 - 0
main/BUILD

@@ -0,0 +1,7 @@
+cc_binary(
+    name = "diamantaire",
+    srcs = ["main.cc"],
+    deps = [
+        "//third_party/parser",
+    ]
+)

+ 6 - 0
main/main.cc

@@ -0,0 +1,6 @@
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+  std::cout << "yo" << std::endl;
+  return 0;
+}

+ 0 - 0
third_party/BUILD


+ 28 - 0
third_party/externals.bzl

@@ -0,0 +1,28 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")
+
+def register_diamantaire_dependencies():
+    http_archive(
+        name = "rules_ragel",
+        url = "https://github.com/jmillikin/rules_ragel/archive/f99f17fcad2e155646745f4827ac636a3b5d4d15.zip",
+        sha256 = "f957682c6350b2e4484c433c7f45d427a86de5c8751a0d2a9836f36995fe0320",
+        strip_prefix = "rules_ragel-f99f17fcad2e155646745f4827ac636a3b5d4d15",
+    )
+
+    http_archive(
+        name = "rules_bison",
+        url = "https://github.com/jmillikin/rules_bison/releases/download/v0.2/rules_bison-v0.2.tar.xz",
+        sha256 = "6ee9b396f450ca9753c3283944f9a6015b61227f8386893fb59d593455141481",
+    )
+
+    http_archive(
+        name = "rules_m4",
+        url = "https://github.com/jmillikin/rules_m4/releases/download/v0.2/rules_m4-v0.2.tar.xz",
+        sha256 = "c67fa9891bb19e9e6c1050003ba648d35383b8cb3c9572f397ad24040fb7f0eb",
+    )
+
+    http_archive(
+        name = "com_google_absl",
+        url = "https://github.com/abseil/abseil-cpp/archive/d9aa92d7fb324314f9df487ac23d32a25650b742.zip",
+        sha256 = "caf4c323eb6211397df96dd5ff96e46c7e5dd77c74d3daed2181f87868159eca",
+        strip_prefix = "abseil-cpp-d9aa92d7fb324314f9df487ac23d32a25650b742",
+    )

+ 7 - 0
third_party/parser/.dockerignore

@@ -0,0 +1,7 @@
+.cargo-in-container
+.git
+*.o
+librubyparser.a
+target
+library
+tests/ruby

+ 4 - 0
third_party/parser/.gitignore

@@ -0,0 +1,4 @@
+/.cargo-in-container
+/target
+*.o
+Cargo.lock

+ 65 - 0
third_party/parser/BUILD

@@ -0,0 +1,65 @@
+licenses(["notice"])
+
+load("@rules_ragel//ragel:ragel.bzl", "ragel")
+load("@rules_bison//bison:bison.bzl", "bison")
+
+ragel(
+    name = "ragel_lexer",
+    src = "cc/lexer.rl",
+    language = "c++",
+)
+
+bison(
+    name = "typedruby_bison",
+    src = "cc/grammars/typedruby.ypp",
+    bison_options = [
+        "-Wno-empty-rule",
+        "-Wno-precedence",
+    ],
+)
+
+cc_binary(
+    name = "generate_diagnostics",
+    srcs = [
+        "codegen/generate_diagnostics.cc",
+    ],
+    linkstatic = select({
+        "//conditions:default": 1,
+    }),
+    visibility = ["//visibility:public"],
+)
+
+genrule(
+    name = "gen_diagnostics_dclass",
+    outs = [
+        "include/ruby_parser/diagnostic_class.hh",
+    ],
+    cmd = "$(location :generate_diagnostics) dclass > $@",
+    tools = [
+        ":generate_diagnostics",
+    ],
+)
+
+cc_library(
+    name = "parser",
+    srcs = glob(["cc/*.cc"]) + [
+        ":gen_diagnostics_dclass",
+        ":ragel_lexer",
+        ":typedruby_bison",
+    ],
+    hdrs = glob(["include/**/*.hh"]),
+    copts = [
+        "-Wno-unused-const-variable",
+    ],
+    includes = [
+        "include",
+        "include/ruby_parser",
+    ],
+    linkstatic = select({
+        "//conditions:default": 1,
+    }),
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_google_absl//absl/strings",
+    ],
+)

+ 5 - 0
third_party/parser/README.md

@@ -0,0 +1,5 @@
+# Our very own Ruby parser
+
+## Development
+
+There's a simple `Dockerfile` if you don't want to install Rust or run Linux. Run `script/bootstrap` to build an image and `script/build-env` to run a command in the build environment. Maybe try `script/build-env cargo test`.

+ 101 - 0
third_party/parser/cc/capi.cc

@@ -0,0 +1,101 @@
+#include <ruby_parser/capi.hh>
+#include <cstdio>
+
+ruby_parser::typedruby25*
+rbdriver_typedruby25_new(const char* source_ptr, size_t source_length, const ruby_parser::builder* builder)
+{
+	std::string source { source_ptr, source_length };
+	return new ruby_parser::typedruby25(source, *builder);
+}
+
+void
+rbdriver_typedruby25_free(ruby_parser::typedruby25* driver)
+{
+	delete driver;
+}
+
+const void*
+rbdriver_parse(ruby_parser::base_driver* driver, ruby_parser::SelfPtr self)
+{
+	return driver->parse(self);
+}
+
+bool
+rbdriver_in_definition(const ruby_parser::base_driver *driver)
+{
+	return driver->def_level > 0;
+}
+
+bool
+rbdriver_env_is_declared(const ruby_parser::base_driver *driver, const char* name, size_t length)
+{
+	std::string id { name, length };
+	return driver->lex.is_declared(id);
+}
+
+void
+rbdriver_env_declare(ruby_parser::base_driver *driver, const char* name, size_t length)
+{
+  std::string id { name, length };
+  driver->lex.declare(id);
+}
+
+size_t
+rbtoken_get_start(const ruby_parser::token* tok)
+{
+	return tok->start();
+}
+
+size_t
+rbtoken_get_end(const ruby_parser::token* tok)
+{
+	return tok->end();
+}
+
+size_t
+rbtoken_get_string(const ruby_parser::token* tok, const char** out_ptr)
+{
+	*out_ptr = tok->string().data();
+	return tok->string().size();
+}
+
+size_t
+rblist_get_length(const ruby_parser::node_list* list)
+{
+	return list->size();
+}
+
+const void*
+rblist_index(ruby_parser::node_list* list, size_t index)
+{
+	return list->at(index);
+}
+
+size_t
+rbdriver_diag_get_length(const ruby_parser::base_driver* driver)
+{
+	return driver->diagnostics.size();
+}
+
+void
+rbdriver_diag_get(const ruby_parser::base_driver* driver, size_t index, struct cdiagnostic *diag)
+{
+	auto &cppdiag = driver->diagnostics.at(index);
+	diag->level = cppdiag.level();
+	diag->type = cppdiag.error_class();
+	diag->data = cppdiag.data().c_str();
+	diag->beginPos = cppdiag.location().beginPos;
+	diag->endPos = cppdiag.location().endPos;
+}
+
+void
+rbdriver_diag_report(ruby_parser::base_driver* driver, const struct cdiagnostic *diag)
+{
+	driver->external_diagnostic(
+		diag->level,
+		diag->type,
+		diag->beginPos,
+		diag->endPos,
+		diag->data ? std::string(diag->data) : ""
+	);
+}

+ 67 - 0
third_party/parser/cc/context.cc

@@ -0,0 +1,67 @@
+#include <ruby_parser/context.hh>
+
+using namespace ruby_parser;
+using State = Context::State;
+
+std::optional<int> Context::firstIndexOfState(State state) {
+    for (int i = 0; i < stack.size(); i++) {
+        if (stack[i] == state) {
+            return i;
+        }
+    }
+
+    return std::nullopt;
+}
+
+std::optional<int> Context::lastIndexOfState(State state) {
+    for (int i = stack.size() - 1; i >= 0; i--) {
+        if (stack[i] == state) {
+            return i;
+        }
+    }
+
+    return std::nullopt;
+}
+
+bool Context::contains(State state) {
+    return firstIndexOfState(state) != std::nullopt;
+}
+
+void Context::push(State state) {
+    stack.push_back(state);
+}
+
+void Context::pop() {
+    stack.pop_back();
+}
+
+void Context::reset() {
+    stack.clear();
+}
+
+bool Context::inClass() {
+    return !stack.empty() && stack[stack.size() - 1] == State::CLASS;
+}
+
+bool Context::indirectlyInDef() {
+    return contains(State::DEF) || contains(State::DEFS);
+}
+
+bool Context::classDefintinionAllowed() {
+    auto defIndex = std::max(lastIndexOfState(State::DEF), lastIndexOfState(State::DEFS));
+    auto sclassIndex = lastIndexOfState(State::SCLASS);
+
+    if (!defIndex) {
+        return true;
+    }
+
+    return sclassIndex && defIndex && (*sclassIndex) > (*defIndex);
+}
+
+bool Context::moduleDefintinionAllowed() {
+    return classDefintinionAllowed();
+}
+
+bool Context::dynamicConstDefintinionAllowed() {
+    return classDefintinionAllowed();
+}

+ 28 - 0
third_party/parser/cc/driver.cc

@@ -0,0 +1,28 @@
+#include <ruby_parser/driver.hh>
+#include <ruby_parser/lexer.hh>
+
+// Autogenerated code
+#include "third_party/parser/typedruby_bison.h"
+
+namespace ruby_parser {
+
+base_driver::base_driver(ruby_version version, const std::string& source, const struct builder& builder)
+	: build(builder),
+	lex(diagnostics, version, source),
+	pending_error(false),
+	def_level(0),
+	ast(nullptr)
+{
+}
+
+typedruby25::typedruby25(const std::string& source, const struct builder& builder)
+	: base_driver(ruby_version::RUBY_25, source, builder)
+{}
+
+ForeignPtr typedruby25::parse(SelfPtr self) {
+	bison::typedruby25::parser p(*this, self);
+	p.parse();
+	return ast;
+}
+
+}

+ 3009 - 0
third_party/parser/cc/grammars/typedruby.ypp

@@ -0,0 +1,3009 @@
+%require "3.0"
+%skeleton "lalr1.cc"
+%token-table
+
+%code requires {
+  #include <ruby_parser/builder.hh>
+  #include <ruby_parser/node.hh>
+  #include <ruby_parser/token.hh>
+  #include <ruby_parser/lexer.hh>
+  #include <ruby_parser/driver.hh>
+  #include <ruby_parser/state_stack.hh>
+  #include <iterator>
+  #include <utility>
+  #include <cstdlib>
+
+using namespace ruby_parser;
+using namespace std::string_literals;
+
+#ifndef YY_NULLPTR
+#define YY_NULLPTR nullptr
+#endif
+}
+
+%param { ruby_parser::typedruby25& driver }
+%parse-param { ruby_parser::SelfPtr self }
+%define api.namespace {ruby_parser::bison::typedruby25}
+%define api.prefix {typedruby25}
+%define api.value.type { union parser_value }
+%define api.token.constructor false
+%define parse.assert
+%define parse.error verbose
+
+// mirrored in inc/ruby_parser/token.hh
+// if any of these token values are changed here, the header must be updated
+// as well.
+%token <token>
+  kCLASS              1001
+  kMODULE             1002
+  kDEF                1003
+  kUNDEF              1004
+  kBEGIN              1005
+  kRESCUE             1006
+  kENSURE             1007
+  kEND                1008
+  kIF                 1009
+  kUNLESS             1010
+  kTHEN               1011
+  kELSIF              1012
+  kELSE               1013
+  kCASE               1014
+  kWHEN               1015
+  kWHILE              1016
+  kUNTIL              1017
+  kFOR                1018
+  kBREAK              1019
+  kNEXT               1020
+  kREDO               1021
+  kRETRY              1022
+  kIN                 1023
+  kDO                 1024
+  kDO_COND            1025
+  kDO_BLOCK           1026
+  kDO_LAMBDA          1027
+  kRETURN             1028
+  kYIELD              1029
+  kSUPER              1030
+  kSELF               1031
+  kNIL                1032
+  kTRUE               1033
+  kFALSE              1034
+  kAND                1035
+  kOR                 1036
+  kNOT                1037
+  kIF_MOD             1038
+  kUNLESS_MOD         1039
+  kWHILE_MOD          1040
+  kUNTIL_MOD          1041
+  kRESCUE_MOD         1042
+  kALIAS              1043
+  kDEFINED            1044
+  klBEGIN             1045
+  klEND               1046
+  k__LINE__           1047
+  k__FILE__           1048
+  k__ENCODING__       1049
+  tIDENTIFIER         1050
+  tFID                1051
+  tGVAR               1052
+  tIVAR               1053
+  tCONSTANT           1054
+  tLABEL              1055
+  tCVAR               1056
+  tNTH_REF            1057
+  tBACK_REF           1058
+  tSTRING_CONTENT     1059
+  tINTEGER            1060
+  tFLOAT              1061
+  tUPLUS              1062
+  tUMINUS             1063
+  tUNARY_NUM          1064
+  tPOW                1065
+  tCMP                1066
+  tEQ                 1067
+  tEQQ                1068
+  tNEQ                1069
+  tEQL                1070
+  tGEQ                1071
+  tLEQ                1072
+  tANDOP              1073
+  tOROP               1074
+  tMATCH              1075
+  tNMATCH             1076
+  tDOT                1077
+  tDOT2               1078
+  tDOT3               1079
+  tAREF               1080
+  tASET               1081
+  tLSHFT              1082
+  tRSHFT              1083
+  tCOLON2             1084
+  tCOLON3             1085
+  tOP_ASGN            1086
+  tASSOC              1087
+  tLPAREN             1088
+  tLPAREN2            1089
+  tRPAREN             1090
+  tLPAREN_ARG         1091
+  tLBRACK             1092
+  tLBRACK2            1093
+  tRBRACK             1094
+  tLBRACE             1095
+  tLBRACE_ARG         1096
+  tSTAR               1097
+  tSTAR2              1098
+  tAMPER              1099
+  tAMPER2             1100
+  tTILDE              1101
+  tPERCENT            1102
+  tDIVIDE             1103
+  tDSTAR              1104
+  tPLUS               1105
+  tMINUS              1106
+  tLT                 1107
+  tGT                 1108
+  tPIPE               1109
+  tBANG               1110
+  tCARET              1111
+  tLCURLY             1112
+  tRCURLY             1113
+  tBACK_REF2          1114
+  tSYMBEG             1115
+  tSTRING_BEG         1116
+  tXSTRING_BEG        1117
+  tREGEXP_BEG         1118
+  tREGEXP_OPT         1119
+  tWORDS_BEG          1120
+  tQWORDS_BEG         1121
+  tSYMBOLS_BEG        1122
+  tQSYMBOLS_BEG       1123
+  tSTRING_DBEG        1124
+  tSTRING_DVAR        1125
+  tSTRING_END         1126
+  tSTRING_DEND        1127
+  tSTRING             1128
+  tSYMBOL             1129
+  tNL                 1130
+  tEH                 1131
+  tCOLON              1132
+  tCOMMA              1133
+  tSPACE              1134
+  tSEMI               1135
+  tLAMBDA             1136
+  tLAMBEG             1137
+  tCHARACTER          1138
+  tRATIONAL           1139
+  tIMAGINARY          1140
+  tLABEL_END          1141
+  tANDDOT             1142
+  tRATIONAL_IMAGINARY 1143
+  tFLOAT_IMAGINARY    1144
+
+%type <node>
+  arg
+  arg_rhs
+  arg_value
+  assoc
+  backref
+  block_arg
+  block_call
+  block_command
+  block_param_def
+  bodystmt
+  bvar
+  command
+  command_asgn
+  command_call
+  command_rhs
+  compstmt
+  cpath
+  dsym
+  expr
+  expr_value
+  f_arg_item
+  f_arglist
+  f_block_kw
+  f_block_opt
+  f_kw
+  f_larglist
+  f_marg
+  f_opt
+  fitem
+  for_var
+  fsym
+  keyword_variable
+  lhs
+  literal
+  method_call
+  mlhs
+  mlhs_inner
+  mlhs_item
+  mlhs_node
+  mrhs_arg
+  none
+  numeric
+  opt_block_param
+  primary
+  primary_value
+  qsymbols
+  qwords
+  regexp
+  simple_numeric
+  singleton
+  stmt
+  stmt_or_begin
+  string1
+  string_content
+  string_dvar
+  strings
+  symbol
+  symbols
+  top_compstmt
+  top_stmt
+  user_variable
+  var_lhs
+  var_ref
+  words
+  xstring
+  rel_expr
+
+%type <list>
+  aref_args
+  args
+  args_tail
+  assoc_list
+  assocs
+  block_args_tail
+  block_param
+  bv_decls
+  call_args
+  command_args
+  exc_list
+  f_arg
+  f_args
+  f_block_arg
+  f_block_kwarg
+  f_block_optarg
+  f_kwarg
+  f_kwrest
+  f_marg_list
+  f_margs
+  f_optarg
+  f_rest_arg
+  list_none
+  mlhs_basic
+  mlhs_head
+  mlhs_post
+  mrhs
+  opt_args_tail
+  opt_block_arg
+  opt_block_args_tail
+  opt_bv_decl
+  opt_call_args
+  opt_f_block_arg
+  opt_rescue
+  qsym_list
+  qword_list
+  regexp_contents
+  stmts
+  string
+  string_contents
+  symbol_list
+  top_stmts
+  undef_list
+  word
+  word_list
+  xstring_contents
+
+%type <token>
+  blkarg_mark
+  call_op
+  cname
+  do
+  dot_or_colon
+  f_arg_asgn
+  f_bad_arg
+  f_label
+  f_norm_arg
+  fcall
+  fname
+  kwrest_mark
+  op
+  operation
+  operation2
+  operation3
+  rbracket
+  restarg_mark
+  reswords
+  rparen
+  term
+  then
+  relop
+  k_return
+
+%type <delimited_list>
+  opt_paren_args
+  paren_args
+
+%type <delimited_block>
+  brace_block
+  brace_body
+  cmd_brace_block
+  do_block
+  do_body
+  lambda
+  lambda_body
+  begin_block
+
+%type <with_token>
+  exc_var
+  if_tail
+  opt_else
+  opt_ensure
+  superclass
+  expr_value_do
+
+%type <case_body>
+  case_body
+  cases
+
+%nonassoc tLOWEST
+%nonassoc tLBRACE_ARG
+%nonassoc kIF_MOD kUNLESS_MOD kWHILE_MOD kUNTIL_MOD
+%left     kOR kAND
+%right    kNOT
+%nonassoc kDEFINED
+%right    tEQL tOP_ASGN
+%left     kRESCUE_MOD
+%right    tEH tCOLON
+%nonassoc tDOT2 tDOT3
+%left     tOROP
+%left     tANDOP
+%nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
+%left     tGT tGEQ tLT tLEQ
+%left     tPIPE tCARET
+%left     tAMPER2
+%left     tLSHFT tRSHFT
+%left     tPLUS tMINUS
+%left     tSTAR2 tDIVIDE tPERCENT
+%right    tUNARY_NUM tUMINUS
+%right    tPOW
+%right    tBANG tTILDE tUPLUS
+
+%code requires {
+
+union parser_value {
+  ruby_parser::token *token;
+  ruby_parser::delimited_node_list *delimited_list;
+  ruby_parser::delimited_block *delimited_block;
+  ruby_parser::node_with_token *with_token;
+  ruby_parser::case_body *case_body;
+  ruby_parser::ForeignPtr node;
+  ruby_parser::node_list *list;
+  ruby_parser::state_stack *stack;
+  size_t size;
+  bool boolean;
+};
+
+}
+
+%code {
+namespace ruby_parser {
+namespace bison {
+namespace typedruby25 {
+
+#define DIAGCHECK() do { \
+	if (driver.pending_error) { \
+		driver.pending_error = false; \
+		YYERROR; \
+	} \
+} while(false);
+
+void parser::error(const std::string &msg) {
+  std::string error_message = msg;
+
+  int token_type = static_cast<int>(driver.last_token->type());
+  const char* token_str_name = yytname_[yytranslate_(token_type)];
+
+  if (token_str_name != nullptr) {
+    error_message = token_str_name;
+  }
+
+	driver.diagnostics.emplace_back(
+		dlevel::ERROR, dclass::UnexpectedToken,
+		diagnostic::range(driver.lex.last_token_s, driver.lex.last_token_e),
+		error_message);
+}
+
+int yylex(parser::semantic_type *lval, ruby_parser::typedruby25 &driver) {
+	auto token = driver.lex.advance();
+	driver.last_token = token;
+	int token_type = static_cast<int>(token->type());
+	assert(token_type >= 0);
+	lval->token = token;
+	return token_type;
+}
+
+}}} // namespace
+} // %code
+
+%%
+         program: top_compstmt
+                    {
+                      driver.ast = $1;
+                    }
+
+    top_compstmt: top_stmts opt_terms
+                    {
+                      $$ = driver.build.compstmt(self, $1);
+                    }
+
+       top_stmts: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | top_stmt
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | top_stmts terms top_stmt
+                    {
+                      $1->emplace_back($3);
+                      $$ = $1;
+                    }
+                | error top_stmt
+                    {
+                      $$ = driver.alloc.node_list($2);
+                    }
+
+        top_stmt: stmt
+                | klBEGIN begin_block
+                    {
+                      auto &delimited_block = $2;
+                      $$ = driver.build.preexe(self, $1, delimited_block->body, delimited_block->end);
+                    }
+
+     begin_block: tLCURLY top_compstmt tRCURLY
+                    {
+                      $$ = driver.alloc.delimited_block($1, nullptr, $2, $3);
+                    }
+
+        bodystmt: compstmt opt_rescue opt_else opt_ensure
+                    {
+                      auto &rescueBodies = $2;
+                      auto &else_ = $3;
+                      auto &ensure = $4;
+
+                      if (rescueBodies->size() == 0 && else_ != nullptr) {
+                        driver.diagnostics.emplace_back(
+			                    dlevel::ERROR,
+                          dclass::UselessElse,
+                          else_->tok
+                        );
+                      }
+
+                      $$ = driver.build.beginBody(
+                        self,
+                        $1,
+                        rescueBodies,
+                        else_ ? else_->tok : nullptr,
+                        else_ ? else_->nod : nullptr,
+                        ensure ? ensure->tok : nullptr,
+                        ensure ? ensure->nod : nullptr
+                      );
+                    }
+
+        compstmt: stmts opt_terms
+                    {
+                      $$ = driver.build.compstmt(self, $1);
+                    }
+
+           stmts: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | stmt_or_begin
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | stmts terms stmt_or_begin
+                    {
+                      $1->emplace_back($3);
+                      $$ = $1;
+                    }
+                | error
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+   stmt_or_begin: stmt
+                | klBEGIN begin_block
+                    {
+                      driver.diagnostics.emplace_back(
+                        dlevel::ERROR,
+			                  dclass::BeginInMethod,
+                        $1
+                      );
+                      YYERROR;
+                    }
+
+            stmt: kALIAS fitem
+                    {
+                      driver.lex.set_state_expr_fname();
+                    }
+                    fitem
+                    {
+                      $$ = driver.build.alias(self, $1, $2, $4);
+                    }
+                | kALIAS tGVAR tGVAR
+                    {
+                      $$ = driver.build.alias(self, $1, driver.build.gvar(self, $2), driver.build.gvar(self, $3));
+                    }
+                | kALIAS tGVAR tBACK_REF
+                    {
+                      $$ = driver.build.alias(self, $1, driver.build.gvar(self, $2), driver.build.backRef(self, $3));
+                    }
+                | kALIAS tGVAR tNTH_REF
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::NthRefAlias, $3);
+                      YYERROR;
+                    }
+                | kUNDEF undef_list
+                    {
+                      $$ = driver.build.undefMethod(self, $1, $2);
+                    }
+                | stmt kIF_MOD expr_value
+                    {
+                      $$ = driver.build.conditionMod(self, $1, nullptr, $3);
+                    }
+                | stmt kUNLESS_MOD expr_value
+                    {
+                      $$ = driver.build.conditionMod(self, nullptr, $1, $3);
+                    }
+                | stmt kWHILE_MOD expr_value
+                    {
+                      $$ = driver.build.loop_while_mod(self, $1, $3);
+                    }
+                | stmt kUNTIL_MOD expr_value
+                    {
+                      $$ = driver.build.loopUntil_mod(self, $1, $3);
+                    }
+                | stmt kRESCUE_MOD stmt
+                    {
+                      ruby_parser::node_list rescue_body(
+						driver.build.rescue_body(self, $2, nullptr, nullptr, nullptr, nullptr, $3));
+                      $$ = driver.build.beginBody(self, $1, &rescue_body, nullptr, nullptr, nullptr, nullptr);
+                    }
+                | klEND tLCURLY compstmt tRCURLY
+                    {
+                      $$ = driver.build.postexe(self, $1, $3, $4);
+                    }
+                | command_asgn
+                | mlhs tEQL command_call
+                    {
+                      $$ = driver.build.multi_assign(self, $1, $3);
+                    }
+                | lhs tEQL mrhs
+                    {
+                      $$ = driver.build.assign(self, $1, $2, driver.build.array(self, nullptr, $3, nullptr));
+                    }
+                | mlhs tEQL mrhs_arg
+                    {
+                      $$ = driver.build.multi_assign(self, $1, $3);
+                    }
+                | expr
+
+    command_asgn: lhs tEQL command_rhs
+                    {
+                      $$ = driver.build.assign(self, $1, $2, $3);
+                    }
+                | var_lhs tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, $1, $2, $3);
+                      DIAGCHECK();
+                    }
+                | primary_value tLBRACK2 opt_call_args rbracket tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.index(self, $1, $2, $3, $4), $5, $6);
+                      DIAGCHECK();
+                    }
+                | primary_value call_op tIDENTIFIER tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value call_op tCONSTANT tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value tCOLON2 tCONSTANT tOP_ASGN command_rhs
+                    {
+                      auto const_node = driver.build.constOpAssignable(self, driver.build.constFetch(self, $1, $2, $3));
+                      $$ = driver.build.op_assign(self, const_node, $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | backref tOP_ASGN command_rhs
+                    {
+                      $$ = driver.build.op_assign(self, $1, $2, $3);
+                      DIAGCHECK();
+                    }
+
+     command_rhs: command_call %prec tOP_ASGN
+                | command_call kRESCUE_MOD stmt
+                    {
+                      node_list rescue_body(
+			                  driver.build.rescue_body(self, $2, nullptr, nullptr, nullptr, nullptr, $3)
+                      );
+                      $$ = driver.build.beginBody(self, $1, &rescue_body, nullptr, nullptr, nullptr, nullptr);
+                    }
+                | command_asgn
+
+            expr: command_call
+                | expr kAND expr
+                    {
+                      $$ = driver.build.logicalAnd(self, $1, $2, $3);
+                    }
+                | expr kOR expr
+                    {
+                      $$ = driver.build.logicalOr(self, $1, $2, $3);
+                    }
+                | kNOT opt_nl expr
+                    {
+                      $$ = driver.build.not_op(self, $1, nullptr, $3, nullptr);
+                    }
+                | tBANG command_call
+                    {
+                      $$ = driver.build.not_op(self, $1, nullptr, $2, nullptr);
+                    }
+                | arg
+
+      expr_value: expr
+
+   expr_value_do:   {
+                      driver.lex.cond.push(true);
+                    }
+                  expr_value do
+                    {
+                      driver.lex.cond.pop();
+
+                      $$ = driver.alloc.node_with_token($3, $2);
+                    }
+
+    command_call: command
+                | block_command
+
+   block_command: block_call
+                | block_call dot_or_colon operation2 command_args
+                    {
+                      $$ = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                    }
+
+ cmd_brace_block: tLBRACE_ARG
+                    {
+                      driver.lex.context.push(Context::State::BLOCK);
+                    }
+                  brace_body tRCURLY
+                    {
+                      auto &block = $3;
+                      block->begin = $1;
+                      block->end = $4;
+                      $$ = block;
+                      driver.lex.context.pop();
+                    }
+
+           fcall: operation
+
+         command: fcall command_args %prec tLOWEST
+                    {
+                      $$ = driver.build.call_method(self, nullptr, nullptr, $1, nullptr, $2, nullptr);
+                    }
+                | fcall command_args cmd_brace_block
+                    {
+                      auto method_call = driver.build.call_method(self, nullptr, nullptr, $1, nullptr, $2, nullptr);
+                      auto &delimited_block = $3;
+                      $$ = driver.build.block(self, method_call,
+                                      delimited_block->begin,
+                                      delimited_block->args,
+                                      delimited_block->body,
+                                      delimited_block->end);
+                      DIAGCHECK();
+                    }
+                | primary_value call_op operation2 command_args %prec tLOWEST
+                    {
+                      $$ = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                    }
+                | primary_value call_op operation2 command_args cmd_brace_block
+                    {
+                      auto method_call = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                      auto &delimited_block = $5;
+                      $$ = driver.build.block(self, method_call,
+                                      delimited_block->begin,
+                                      delimited_block->args,
+                                      delimited_block->body,
+                                      delimited_block->end);
+                      DIAGCHECK();
+                    }
+                | primary_value tCOLON2 operation2 command_args %prec tLOWEST
+                    {
+                      $$ = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                    }
+                | primary_value tCOLON2 operation2 command_args cmd_brace_block
+                    {
+                      auto method_call = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                      auto &delimited_block = $5;
+                      $$ = driver.build.block(self, method_call,
+                                      delimited_block->begin,
+                                      delimited_block->args,
+                                      delimited_block->body,
+                                      delimited_block->end);
+                      DIAGCHECK();
+                    }
+                | kSUPER command_args
+                    {
+                      $$ = driver.build.keywordSuper(self, $1, nullptr, $2, nullptr);
+                    }
+                | kYIELD command_args
+                    {
+                      $$ = driver.build.keywordYield(self, $1, nullptr, $2, nullptr);
+                      DIAGCHECK();
+                    }
+                | k_return call_args
+                    {
+                      $$ = driver.build.keywordReturn(self, $1, nullptr, $2, nullptr);
+                    }
+                | kBREAK call_args
+                    {
+                      $$ = driver.build.keywordBreak(self, $1, nullptr, $2, nullptr);
+                    }
+                | kNEXT call_args
+                    {
+                      $$ = driver.build.keywordNext(self, $1, nullptr, $2, nullptr);
+                    }
+
+            mlhs: mlhs_basic
+                    {
+                      $$ = driver.build.multi_lhs(self, nullptr, $1, nullptr);
+                    }
+                | tLPAREN mlhs_inner rparen
+                    {
+                      $$ = driver.build.begin(self, $1, $2, $3);
+                    }
+
+      mlhs_inner: mlhs_basic
+                    {
+                      $$ = driver.build.multi_lhs(self, nullptr, $1, nullptr);
+                    }
+                | tLPAREN mlhs_inner rparen
+                    {
+                      $$ = driver.build.multi_lhs1(self, $1, $2, $3);
+                    }
+
+      mlhs_basic: mlhs_head
+                | mlhs_head mlhs_item
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+                | mlhs_head tSTAR mlhs_node
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.splat_mlhs(self, $2, $3));
+                      $$ = list;
+                    }
+                | mlhs_head tSTAR mlhs_node tCOMMA mlhs_post
+                    {
+                      auto &head = $1;
+                      head->emplace_back(driver.build.splat_mlhs(self, $2, $3));
+                      head->concat($5);
+                      $$ = head;
+                    }
+                | mlhs_head tSTAR
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.splat_mlhs(self, $2, nullptr));
+                      $$ = list;
+                    }
+                | mlhs_head tSTAR tCOMMA mlhs_post
+                    {
+                      auto &head = $1;
+                      head->emplace_back(driver.build.splat_mlhs(self, $2, nullptr));
+                      head->concat($4);
+                      $$ = head;
+                    }
+                | tSTAR mlhs_node
+                    {
+                      $$ = driver.alloc.node_list(driver.build.splat_mlhs(self, $1, $2));
+                    }
+                | tSTAR mlhs_node tCOMMA mlhs_post
+                    {
+                      node_list *items = driver.alloc.node_list(driver.build.splat_mlhs(self, $1, $2));
+                      items->concat($4);
+                      $$ = items;
+                    }
+                | tSTAR
+                    {
+                      $$ = driver.alloc.node_list(driver.build.splat_mlhs(self, $1, nullptr));
+                    }
+                | tSTAR tCOMMA mlhs_post
+                    {
+                      node_list *items = driver.alloc.node_list(driver.build.splat_mlhs(self, $1, nullptr));
+                      items->concat($3);
+                      $$ = items;
+                    }
+
+       mlhs_item: mlhs_node
+                | tLPAREN mlhs_inner rparen
+                    {
+                      $$ = driver.build.begin(self, $1, $2, $3);
+                    }
+
+       mlhs_head: mlhs_item tCOMMA
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | mlhs_head mlhs_item tCOMMA
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+       mlhs_post: mlhs_item
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | mlhs_post tCOMMA mlhs_item
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+       mlhs_node: user_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+                | keyword_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+                | primary_value tLBRACK2 opt_call_args rbracket
+                    {
+                      $$ = driver.build.indexAsgn(self, $1, $2, $3, $4);
+                    }
+                | primary_value call_op tIDENTIFIER
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value tCOLON2 tIDENTIFIER
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value call_op tCONSTANT
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value tCOLON2 tCONSTANT
+                    {
+                      $$ = driver.build.assignable(self, driver.build.constFetch(self, $1, $2, $3));
+                      DIAGCHECK();
+                    }
+                | tCOLON3 tCONSTANT
+                    {
+                      $$ = driver.build.assignable(self, driver.build.constGlobal(self, $1, $2));
+                      DIAGCHECK();
+                    }
+                | backref
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+
+             lhs: user_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+                | keyword_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+                | primary_value tLBRACK2 opt_call_args rbracket
+                    {
+                      $$ = driver.build.indexAsgn(self, $1, $2, $3, $4);
+                    }
+                | primary_value call_op tIDENTIFIER
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value tCOLON2 tIDENTIFIER
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value call_op tCONSTANT
+                    {
+                      $$ = driver.build.attrAsgn(self, $1, $2, $3);
+                    }
+                | primary_value tCOLON2 tCONSTANT
+                    {
+                      $$ = driver.build.assignable(self, driver.build.constFetch(self, $1, $2, $3));
+                      DIAGCHECK();
+                    }
+                | tCOLON3 tCONSTANT
+                    {
+                      $$ = driver.build.assignable(self, driver.build.constGlobal(self, $1, $2));
+                      DIAGCHECK();
+                    }
+                | backref
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+
+           cname: tIDENTIFIER
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ModuleNameConst, $1);
+                      YYERROR;
+                    }
+                | tCONSTANT
+
+           cpath: tCOLON3 cname
+                    {
+                      $$ = driver.build.constGlobal(self, $1, $2);
+                    }
+                | cname
+                    {
+                      $$ = driver.build.const_(self, $1);
+                    }
+                | primary_value tCOLON2 cname
+                    {
+                      $$ = driver.build.constFetch(self, $1, $2, $3);
+                    }
+
+           fname: tIDENTIFIER | tCONSTANT | tFID
+                | op
+                | reswords
+
+            fsym: fname
+                    {
+                      $$ = driver.build.symbol(self, $1);
+                    }
+                | symbol
+
+           fitem: fsym
+                | dsym
+
+      undef_list: fitem
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | undef_list tCOMMA
+                    {
+                      driver.lex.set_state_expr_fname();
+                    }
+                    fitem
+                    {
+                      auto &list = $1;
+                      list->emplace_back($4);
+                      $$ = list;
+                    }
+
+              op:   tPIPE    | tCARET  | tAMPER2  | tCMP  | tEQ     | tEQQ
+                |   tMATCH   | tNMATCH | tGT      | tGEQ  | tLT     | tLEQ
+                |   tNEQ     | tLSHFT  | tRSHFT   | tPLUS | tMINUS  | tSTAR2
+                |   tSTAR    | tDIVIDE | tPERCENT | tPOW  | tBANG   | tTILDE
+                |   tUPLUS   | tUMINUS | tAREF    | tASET | tDSTAR  | tBACK_REF2
+
+        reswords: k__LINE__ | k__FILE__ | k__ENCODING__ | klBEGIN | klEND
+                | kALIAS    | kAND      | kBEGIN        | kBREAK  | kCASE
+                | kCLASS    | kDEF      | kDEFINED      | kDO     | kELSE
+                | kELSIF    | kEND      | kENSURE       | kFALSE  | kFOR
+                | kIN       | kMODULE   | kNEXT         | kNIL    | kNOT
+                | kOR       | kREDO     | kRESCUE       | kRETRY  | kRETURN
+                | kSELF     | kSUPER    | kTHEN         | kTRUE   | kUNDEF
+                | kWHEN     | kYIELD    | kIF           | kUNLESS | kWHILE
+                | kUNTIL
+
+             arg: lhs tEQL arg_rhs
+                    {
+                      $$ = driver.build.assign(self, $1, $2, $3);
+                    }
+                | var_lhs tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, $1, $2, $3);
+                      DIAGCHECK();
+                    }
+                | primary_value tLBRACK2 opt_call_args rbracket tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.index(self, $1, $2, $3, $4), $5, $6);
+                      DIAGCHECK();
+                    }
+                | primary_value call_op tIDENTIFIER tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value call_op tCONSTANT tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr), $4, $5);
+                      DIAGCHECK();
+                    }
+                | primary_value tCOLON2 tCONSTANT tOP_ASGN arg_rhs
+                    {
+                      auto const_ = driver.build.constOpAssignable(self, driver.build.constFetch(self, $1, $2, $3));
+                      $$ = driver.build.op_assign(self, const_, $4, $5);
+                      DIAGCHECK();
+                    }
+                | tCOLON3 tCONSTANT tOP_ASGN arg_rhs
+                    {
+                      auto const_ = driver.build.constOpAssignable(self, driver.build.constGlobal(self, $1, $2));
+                      $$ = driver.build.op_assign(self, const_, $3, $4);
+                      DIAGCHECK();
+                    }
+                | backref tOP_ASGN arg_rhs
+                    {
+                      $$ = driver.build.op_assign(self, $1, $2, $3);
+                      DIAGCHECK();
+                    }
+                | arg tDOT2 arg
+                    {
+                      $$ = driver.build.range_inclusive(self, $1, $2, $3);
+                    }
+                | arg tDOT3 arg
+                    {
+                      $$ = driver.build.range_exclusive(self, $1, $2, $3);
+                    }
+                | arg tDOT2
+                    {
+                      $$ = driver.build.range_inclusive(self, $1, $2, nullptr);
+                    }
+                | arg tDOT3
+                    {
+                      $$ = driver.build.range_exclusive(self, $1, $2, nullptr);
+                    }
+                | arg tPLUS arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tMINUS arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tSTAR2 arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tDIVIDE arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tPERCENT arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tPOW arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | tUNARY_NUM simple_numeric tPOW arg
+                    {
+                      $$ = driver.build.unary_op(self, $1, driver.build.binaryOp(self, $2, $3, $4));
+                    }
+                | tUPLUS arg
+                    {
+                      $$ = driver.build.unary_op(self, $1, $2);
+                    }
+                | tUMINUS arg
+                    {
+                      $$ = driver.build.unary_op(self, $1, $2);
+                    }
+                | arg tPIPE arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tCARET arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tAMPER2 arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tCMP arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | rel_expr %prec tCMP
+                | arg tEQ arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tEQQ arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tNEQ arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tMATCH arg
+                    {
+                      $$ = driver.build.match_op(self, $1, $2, $3);
+                      DIAGCHECK();
+                    }
+                | arg tNMATCH arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | tBANG arg
+                    {
+                      $$ = driver.build.not_op(self, $1, nullptr, $2, nullptr);
+                    }
+                | tTILDE arg
+                    {
+                      $$ = driver.build.unary_op(self, $1, $2);
+                    }
+                | arg tLSHFT arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tRSHFT arg
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | arg tANDOP arg
+                    {
+                      $$ = driver.build.logicalAnd(self, $1, $2, $3);
+                    }
+                | arg tOROP arg
+                    {
+                      $$ = driver.build.logicalOr(self, $1, $2, $3);
+                    }
+                | kDEFINED opt_nl arg
+                    {
+                      $$ = driver.build.keywordDefined(self, $1, $3);
+                    }
+                | arg tEH arg opt_nl tCOLON arg
+                    {
+                      $$ = driver.build.ternary(self, $1, $2, $3, $5, $6);
+                    }
+                | primary
+
+           relop: tGT | tLT | tGEQ | tLEQ
+
+        rel_expr: arg relop arg %prec tGT
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+                | rel_expr relop arg %prec tGT
+                    {
+                      $$ = driver.build.binaryOp(self, $1, $2, $3);
+                    }
+
+       arg_value: arg
+
+       aref_args: list_none
+                | args trailer
+                | args tCOMMA assocs trailer
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.associate(self, nullptr, $3, nullptr));
+                      $$ = list;
+                    }
+                | assocs trailer
+                    {
+                      $$ = driver.alloc.node_list(driver.build.associate(self, nullptr, $1, nullptr));
+                    }
+
+         arg_rhs: arg %prec tOP_ASGN
+                | arg kRESCUE_MOD arg
+                    {
+                      node_list rescue_body(driver.build.rescue_body(self, $2, nullptr, nullptr, nullptr, nullptr, $3));
+                      $$ = driver.build.beginBody(self, $1, &rescue_body, nullptr, nullptr, nullptr, nullptr);
+                    }
+
+      paren_args: tLPAREN2 opt_call_args rparen
+                    {
+                      $$ = driver.alloc.delimited_node_list($1, $2, $3);
+                    }
+
+  opt_paren_args: // nothing
+                    {
+                      $$ = driver.alloc.delimited_node_list(nullptr, driver.alloc.node_list(), nullptr);
+                    }
+                | paren_args
+
+   opt_call_args: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | call_args
+                | args tCOMMA
+                | args tCOMMA assocs tCOMMA
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.associate(self, nullptr, $3, nullptr));
+                      $$ = list;
+                    }
+                | assocs tCOMMA
+                    {
+                      $$ = driver.alloc.node_list(driver.build.associate(self, nullptr, $1, nullptr));
+                    }
+
+       call_args: command
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | args opt_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | assocs opt_block_arg
+                    {
+                      node_list *args = driver.alloc.node_list(driver.build.associate(self, nullptr, $1, nullptr));
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | args tCOMMA assocs opt_block_arg
+                    {
+                      auto &args = $1;
+                      args->emplace_back(driver.build.associate(self, nullptr, $3, nullptr));
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | block_arg
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+
+    command_args:   {
+                      ruby_parser::token_type last_token = driver.last_token->type();
+                      bool lookahead = (last_token == ruby_parser::token_type::tLBRACK) || (last_token == ruby_parser::token_type::tLPAREN_ARG);
+
+                      if (lookahead) {
+                        auto top = driver.lex.cmdarg.pop();
+                        driver.lex.cmdarg.push(true);
+                        driver.lex.cmdarg.push(top);
+                      } else {
+                        driver.lex.cmdarg.push(true);
+                      }
+                    }
+                  call_args
+                    {
+                      ruby_parser::token_type last_token = driver.last_token->type();
+                      bool lookahead = (last_token == ruby_parser::token_type::tLBRACE_ARG);
+
+                      if (lookahead) {
+                        auto top = driver.lex.cmdarg.pop();
+                        driver.lex.cmdarg.pop();
+                        driver.lex.cmdarg.push(top);
+                      } else {
+                        driver.lex.cmdarg.pop();
+                      }
+
+                      $$ = $2;
+                    }
+
+       block_arg: tAMPER arg_value
+                    {
+                      $$ = driver.build.blockPass(self, $1, $2);
+                    }
+
+   opt_block_arg: tCOMMA block_arg
+                    {
+                      $$ = driver.alloc.node_list($2);
+                    }
+                | // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+            args: arg_value
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | tSTAR arg_value
+                    {
+                      $$ = driver.alloc.node_list(driver.build.splat(self, $1, $2));
+                    }
+                | args tCOMMA arg_value
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+                | args tCOMMA tSTAR arg_value
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.splat(self, $3, $4));
+                      $$ = list;
+                    }
+
+        mrhs_arg: mrhs
+                    {
+                      $$ = driver.build.array(self, nullptr, $1, nullptr);
+                    }
+                | arg_value
+
+            mrhs: args tCOMMA arg_value
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+                | args tCOMMA tSTAR arg_value
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.splat(self, $3, $4));
+                      $$ = list;
+                    }
+                | tSTAR arg_value
+                    {
+                      $$ = driver.alloc.node_list(driver.build.splat(self, $1, $2));
+                    }
+
+         primary: literal
+                | strings
+                | xstring
+                | regexp
+                | words
+                | qwords
+                | symbols
+                | qsymbols
+                | var_ref
+                | backref
+                | tFID
+                    {
+                      $$ = driver.build.call_method(self, nullptr, nullptr, $1, nullptr, nullptr, nullptr);
+                    }
+                | kBEGIN
+                    {
+                      $<stack>$ = driver.copy_stack();
+                      driver.lex.cmdarg.clear();
+                    }
+                    bodystmt kEND
+                    {
+                      driver.replace_stack($<stack>2);
+                      $$ = driver.build.beginKeyword(self, $1, $3, $4);
+                    }
+                | tLPAREN_ARG stmt
+                    {
+                      driver.lex.set_state_expr_endarg();
+                    }
+                    rparen
+                    {
+                      $$ = driver.build.begin(self, $1, $2, $4);
+                    }
+                | tLPAREN_ARG
+                    {
+                      driver.lex.set_state_expr_endarg();
+                    }
+                    opt_nl tRPAREN
+                    {
+                      $$ = driver.build.begin(self, $1, nullptr, $4);
+                    }
+                | tLPAREN compstmt tRPAREN
+                    {
+                      $$ = driver.build.begin(self, $1, $2, $3);
+                    }
+                | primary_value tCOLON2 tCONSTANT
+                    {
+                      $$ = driver.build.constFetch(self, $1, $2, $3);
+                    }
+                | tCOLON3 tCONSTANT
+                    {
+                      $$ = driver.build.constGlobal(self, $1, $2);
+                    }
+                | tLBRACK aref_args tRBRACK
+                    {
+                      $$ = driver.build.array(self, $1, $2, $3);
+                    }
+                | tLBRACE assoc_list tRCURLY
+                    {
+                      $$ = driver.build.associate(self, $1, $2, $3);
+                    }
+                | k_return
+                    {
+                      $$ = driver.build.keywordReturn(self, $1, nullptr, nullptr, nullptr);
+                    }
+                | kYIELD tLPAREN2 call_args rparen
+                    {
+                      $$ = driver.build.keywordYield(self, $1, $2, $3, $4);
+                      DIAGCHECK();
+                    }
+                | kYIELD tLPAREN2 rparen
+                    {
+                      node_list tmp;
+                      $$ = driver.build.keywordYield(self, $1, $2, &tmp, $3);
+                      DIAGCHECK();
+                    }
+                | kYIELD
+                    {
+                      $$ = driver.build.keywordYield(self, $1, nullptr, nullptr, nullptr);
+                      DIAGCHECK();
+                    }
+                | kDEFINED opt_nl tLPAREN2 expr rparen
+                    {
+                      $$ = driver.build.keywordDefined(self, $1, $4);
+                    }
+                | kNOT tLPAREN2 expr rparen
+                    {
+                      $$ = driver.build.not_op(self, $1, $2, $3, $4);
+                    }
+                | kNOT tLPAREN2 rparen
+                    {
+                      $$ = driver.build.not_op(self, $1, $2, nullptr, $3);
+                    }
+                | fcall brace_block
+                    {
+                      auto method_call = driver.build.call_method(self, nullptr, nullptr, $1, nullptr, nullptr, nullptr);
+                      auto &delimited_block = $2;
+
+                      $$ = driver.build.block(self, method_call,
+                        delimited_block->begin,
+                        delimited_block->args,
+                        delimited_block->body,
+                        delimited_block->end);
+                      DIAGCHECK();
+                    }
+                | method_call
+                | method_call brace_block
+                    {
+                      auto &delimited_block = $2;
+                      $$ = driver.build.block(self, $1,
+                        delimited_block->begin,
+                        delimited_block->args,
+                        delimited_block->body,
+                        delimited_block->end);
+                      DIAGCHECK();
+                    }
+                | tLAMBDA lambda
+                    {
+                      auto lambda_call = driver.build.callLambda(self, $1);
+                      auto &lambda = $2;
+                      $$ = driver.build.block(self, lambda_call,
+                        lambda->begin,
+                        lambda->args,
+                        lambda->body,
+                        lambda->end);
+                      DIAGCHECK();
+                    }
+                | kIF expr_value then compstmt if_tail kEND
+                    {
+                      auto &else_ = $5;
+                      $$ = driver.build.condition(self, $1, $2, $3, $4,
+                        else_ ? else_->tok : nullptr,
+                        else_ ? else_->nod : nullptr, $6);
+                    }
+                | kUNLESS expr_value then compstmt opt_else kEND
+                    {
+                      auto &else_ = $5;
+                      $$ = driver.build.condition(self, $1, $2, $3,
+                        else_ ? else_->nod : nullptr,
+			                  else_ ? else_->tok : nullptr, $4, $6);
+                    }
+                | kWHILE expr_value_do compstmt kEND
+                    {
+                      $$ = driver.build.loop_while(self, $1, $2->nod, $2->tok, $3, $4);
+                    }
+                | kUNTIL expr_value_do compstmt kEND
+                    {
+                      $$ = driver.build.loopUntil(self, $1, $2->nod, $2->tok, $3, $4);
+                    }
+                | kCASE expr_value opt_terms case_body kEND
+                    {
+                      auto &case_body = $4;
+                      auto &else_ = case_body->els;
+                      $$ = driver.build.case_(self, $1, $2,
+                        &case_body->whens,
+                        else_ ? else_->tok : nullptr,
+			                  else_ ? else_->nod : nullptr, $5);
+                    }
+                | kCASE            opt_terms case_body kEND
+                    {
+                      auto &case_body = $3;
+                      auto &else_ = case_body->els;
+                      $$ = driver.build.case_(self, $1, nullptr,
+                        &case_body->whens,
+                        else_ ? else_->tok : nullptr,
+			                  else_ ? else_->nod : nullptr, $4);
+                    }
+                | kFOR for_var kIN expr_value_do compstmt kEND
+                    {
+                      $$ = driver.build.for_(self, $1, $2, $3, $4->nod, $4->tok, $5, $6);
+                    }
+                | kCLASS cpath superclass
+                    {
+                      driver.lex.extend_static();
+                      driver.lex.cmdarg.push(false);
+                      driver.lex.cond.push(false);
+                      driver.lex.context.push(Context::State::CLASS);
+                    }
+                    bodystmt kEND
+                    {
+                      if (!driver.lex.context.classDefintinionAllowed()) {
+                        driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ClassInDef, $1);
+                        YYERROR;
+                      }
+
+                      auto class_tok = $1;
+                      auto end_tok = $6;
+
+                      auto &superclass_ = $3;
+                      auto lt_t       = superclass_ ? superclass_->tok : nullptr;
+                      auto superclass = superclass_ ? superclass_->nod : nullptr;
+
+                      $$ = driver.build.def_class(self, class_tok, $2, lt_t, superclass, $5, end_tok);
+
+                      driver.lex.cmdarg.pop();
+                      driver.lex.cond.pop();
+                      driver.lex.unextend();
+                      driver.lex.context.pop();
+                    }
+                | kCLASS tLSHFT expr term
+                    {
+                      driver.lex.extend_static();
+                      driver.lex.cmdarg.push(false);
+                      driver.lex.cond.push(false);
+                      driver.lex.context.push(Context::State::SCLASS);
+                    }
+                    bodystmt kEND
+                    {
+                      $$ = driver.build.def_sclass(self, $1, $2, $3, $6, $7);
+
+                      driver.lex.cmdarg.pop();
+                      driver.lex.cond.pop();
+                      driver.lex.unextend();
+                      driver.lex.context.pop();
+                    }
+                | kMODULE cpath
+                    {
+                      driver.lex.extend_static();
+                      driver.lex.cmdarg.push(false);
+                    }
+                    bodystmt kEND
+                    {
+                      if (!driver.lex.context.moduleDefintinionAllowed()) {
+                        driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ModuleInDef, $1);
+                        YYERROR;
+                      }
+
+                      auto module_tok = $1;
+                      auto end_tok = $5;
+
+                      $$ = driver.build.defModule(self, module_tok, $2, $4, end_tok);
+
+                      driver.lex.cmdarg.pop();
+                      driver.lex.unextend();
+                    }
+                | kDEF fname
+                    {
+                      driver.lex.extend_static();
+                      driver.lex.cmdarg.push(false);
+                      driver.lex.cond.push(false);
+                      driver.lex.context.push(Context::State::DEF);
+                    }
+                    f_arglist bodystmt kEND
+                    {
+                      $$ = driver.build.defMethod(self, $1, $2, $4, $5, $6);
+
+                      driver.lex.cmdarg.pop();
+                      driver.lex.cond.pop();
+                      driver.lex.unextend();
+                      driver.lex.context.pop();
+                    }
+                | kDEF singleton dot_or_colon
+                    {
+                      driver.lex.set_state_expr_fname();
+                    }
+                    fname
+                    {
+                      driver.lex.extend_static();
+                      driver.lex.cmdarg.push(false);
+                      driver.lex.cond.push(false);
+                      driver.lex.context.push(Context::State::DEFS);
+                    }
+                    f_arglist bodystmt kEND
+                    {
+                      $$ = driver.build.defSingleton(self, $1, $2, $3, $5, $7, $8, $9);
+                      DIAGCHECK();
+
+                      driver.lex.cmdarg.pop();
+                      driver.lex.cond.pop();
+                      driver.lex.unextend();
+                      driver.lex.context.pop();
+                    }
+                | kBREAK
+                    {
+                      $$ = driver.build.keywordBreak(self, $1, nullptr, nullptr, nullptr);
+                    }
+                | kNEXT
+                    {
+                      $$ = driver.build.keywordNext(self, $1, nullptr, nullptr, nullptr);
+                    }
+                | kREDO
+                    {
+                      $$ = driver.build.keywordRedo(self, $1);
+                    }
+                | kRETRY
+                    {
+                      $$ = driver.build.keywordRetry(self, $1);
+                    }
+
+   primary_value: primary
+
+        k_return: kRETURN
+                    {
+                      if (driver.lex.context.inClass()) {
+                        driver.diagnostics.emplace_back(dlevel::ERROR, dclass::InvalidReturn, $1);
+                        YYERROR;
+                      }
+                    }
+
+            then: term
+                | kTHEN
+                | term kTHEN
+                    {
+                      $$ = $2;
+                    }
+
+              do: term
+                | kDO_COND
+
+         if_tail: opt_else
+                | kELSIF expr_value then compstmt if_tail
+                    {
+                      auto elsif_t = $1;
+                      auto &else_ = $5;
+                      $$ = driver.alloc.node_with_token(elsif_t,
+                        driver.build.condition(self,
+                          elsif_t, $2, $3, $4,
+                          else_ ? else_->tok : nullptr,
+                          else_ ? else_->nod : nullptr, nullptr)
+                      );
+                    }
+
+        opt_else: none
+                    {
+                      $$ = nullptr;
+                    }
+                | kELSE compstmt
+                    {
+                      $$ = driver.alloc.node_with_token($1, $2);
+                    }
+
+         for_var: lhs
+                | mlhs
+
+          f_marg: f_norm_arg
+                    {
+                      $$ = driver.build.arg(self, $1);
+                    }
+                | tLPAREN f_margs rparen
+                    {
+                      $$ = driver.build.multi_lhs(self, $1, $2, $3);
+                    }
+
+     f_marg_list: f_marg
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_marg_list tCOMMA f_marg
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+         f_margs: f_marg_list
+                | f_marg_list tCOMMA tSTAR f_norm_arg
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.restarg(self, $3, $4));
+                      $$ = list;
+                    }
+                | f_marg_list tCOMMA tSTAR f_norm_arg tCOMMA f_marg_list
+                    {
+                      auto &args = $1;
+                      args->emplace_back(driver.build.restarg(self, $3, $4));
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_marg_list tCOMMA tSTAR
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.restarg(self, $3, nullptr));
+                      $$ = list;
+                    }
+                | f_marg_list tCOMMA tSTAR            tCOMMA f_marg_list
+                    {
+                      auto &args = $1;
+                      args->emplace_back(driver.build.restarg(self, $3, nullptr));
+                      args->concat($5);
+                      $$ = args;
+                    }
+                |                    tSTAR f_norm_arg
+                    {
+                      $$ = driver.alloc.node_list(driver.build.restarg(self, $1, $2));
+                    }
+                |                    tSTAR f_norm_arg tCOMMA f_marg_list
+                    {
+                      auto &args = $4;
+                      args->push_front(driver.build.restarg(self, $1, $2));
+                      $$ = args;
+                    }
+                |                    tSTAR
+                    {
+                      $$ = driver.alloc.node_list(driver.build.restarg(self, $1, nullptr));
+                    }
+                |                    tSTAR tCOMMA f_marg_list
+                    {
+                      auto &args = $3;
+                      args->push_front(driver.build.restarg(self, $1, nullptr));
+                      $$ = args;
+                    }
+
+ block_args_tail: f_block_kwarg tCOMMA f_kwrest opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_block_kwarg opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | f_kwrest opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | f_block_arg
+                    {
+                      $$ = $1;
+                    }
+
+opt_block_args_tail:
+                  tCOMMA block_args_tail
+                    {
+                      $$ = $2;
+                    }
+                | // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+     block_param: f_arg tCOMMA f_block_optarg tCOMMA f_rest_arg              opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_block_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($7);
+                      args->concat($8);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_block_optarg                                opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_block_optarg tCOMMA                   f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA                       f_rest_arg              opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA
+                | f_arg tCOMMA                       f_rest_arg tCOMMA f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg                                                      opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      auto &block_args_tail = $2;
+
+                      if (block_args_tail->size() == 0 && args->size() == 1) {
+                        $$ = driver.alloc.node_list(driver.build.procarg0(self, args->at(0)));
+                      } else {
+                        args->concat(block_args_tail);
+                        $$ = args;
+                      }
+                    }
+                | f_block_optarg tCOMMA              f_rest_arg              opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_block_optarg tCOMMA              f_rest_arg tCOMMA f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_block_optarg                                             opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | f_block_optarg tCOMMA                                f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                |                                    f_rest_arg              opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                |                                    f_rest_arg tCOMMA f_arg opt_block_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                |                                                                block_args_tail
+
+ opt_block_param: // nothing
+                    {
+                      $$ = driver.build.args(self, nullptr, nullptr, nullptr, true);
+                      DIAGCHECK();
+                    }
+                | block_param_def
+                    {
+                      driver.lex.set_state_expr_value();
+                    }
+
+ block_param_def: tPIPE opt_bv_decl tPIPE
+                    {
+                      $$ = driver.build.args(self, $1, $2, $3, true);
+                      DIAGCHECK();
+                    }
+                | tOROP
+                    {
+                      $$ = driver.build.args(self, $1, nullptr, $1, true);
+                      DIAGCHECK();
+                    }
+                | tPIPE block_param opt_bv_decl tPIPE
+                    {
+                      auto &params = $2;
+                      params->concat($3);
+                      $$ = driver.build.args(self, $1, params, $4, true);
+                      DIAGCHECK();
+                    }
+
+     opt_bv_decl: opt_nl
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | opt_nl tSEMI bv_decls opt_nl
+                    {
+                      $$ = $3;
+                    }
+
+        bv_decls: bvar
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | bv_decls tCOMMA bvar
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+            bvar: tIDENTIFIER
+                    {
+                      auto ident = $1;
+                      driver.lex.declare(ident->string());
+                      $$ = driver.build.shadowarg(self, ident);
+                    }
+                | f_bad_arg
+                    {
+                      $$ = nullptr;
+                    }
+
+          lambda:   {
+                      driver.lex.extend_dynamic();
+                    }
+                  f_larglist
+                    {
+                      driver.lex.cmdarg.push(false);
+                    }
+                  lambda_body
+                    {
+                      driver.lex.cmdarg.pop();
+
+                      auto &delimited_block = $4;
+                      delimited_block->args = $2;
+                      $$ = delimited_block;
+
+                      driver.lex.unextend();
+                    }
+
+     f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN
+                    {
+                      auto &args = $2;
+                      args->concat($3);
+                      $$ = driver.build.args(self, $1, args, $4, true);
+                      DIAGCHECK();
+                    }
+                | f_args
+                    {
+                      $$ = driver.build.args(self, nullptr, $1, nullptr, true);
+                      DIAGCHECK();
+                    }
+
+     lambda_body: tLAMBEG
+                    {
+                      driver.lex.context.push(Context::State::LAMBDA);
+                    }
+                  compstmt tRCURLY
+                    {
+                      $$ = driver.alloc.delimited_block($1, nullptr, $3, $4);
+                      driver.lex.context.pop();
+                    }
+                | kDO_LAMBDA
+                    {
+                      driver.lex.context.push(Context::State::LAMBDA);
+                    }
+                  bodystmt kEND
+                    {
+                      $$ = driver.alloc.delimited_block($1, nullptr, $3, $4);
+                      driver.lex.context.pop();
+                    }
+
+        do_block: kDO_BLOCK
+                    {
+                      driver.lex.context.push(Context::State::BLOCK);
+                    }
+                  do_body kEND
+                    {
+                      auto &delimited_block = $3;
+                      delimited_block->begin = $1;
+                      delimited_block->end = $4;
+                      $$ = delimited_block;
+                      driver.lex.context.pop();
+                    }
+
+      block_call: command do_block
+                    {
+                      auto &delimited_block = $2;
+                      $$ = driver.build.block(self, $1,
+                          delimited_block->begin,
+                          delimited_block->args,
+                          delimited_block->body,
+                          delimited_block->end
+                        );
+                      DIAGCHECK();
+                    }
+                | block_call dot_or_colon operation2 opt_paren_args
+                    {
+                      auto &delimited = $4;
+                      $$ = driver.build.call_method(self, $1, $2, $3,
+                                  delimited->begin,
+                                  delimited->inner,
+                                  delimited->end);
+                    }
+                | block_call dot_or_colon operation2 opt_paren_args brace_block
+                    {
+                      auto &delimited = $4;
+                      auto method_call = driver.build.call_method(self, $1, $2, $3,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                      auto &block = $5;
+                      $$ = driver.build.block(self, method_call,
+                          block->begin,
+                          block->args,
+                          block->body,
+                          block->end);
+                      DIAGCHECK();
+                    }
+                | block_call dot_or_colon operation2 command_args do_block
+                    {
+                      auto method_call = driver.build.call_method(self, $1, $2, $3, nullptr, $4, nullptr);
+                      auto &block = $5;
+                      $$ = driver.build.block(self, method_call, block->begin, block->args, block->body, block->end);
+                      DIAGCHECK();
+                    }
+
+     method_call: fcall paren_args
+                    {
+                      auto &delimited = $2;
+                      $$ = driver.build.call_method(self, nullptr, nullptr, $1,
+                        delimited->begin,
+                        delimited->inner,
+                        delimited->end);
+                    }
+                | primary_value call_op operation2 opt_paren_args
+                    {
+                      auto &delimited = $4;
+                      $$ = driver.build.call_method(self, $1, $2, $3,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                    }
+                | primary_value tCOLON2 operation2 paren_args
+                    {
+                      auto &delimited = $4;
+                      $$ = driver.build.call_method(self, $1, $2, $3,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                    }
+                | primary_value tCOLON2 operation3
+                    {
+                      $$ = driver.build.call_method(self, $1, $2, $3, nullptr, nullptr, nullptr);
+                    }
+                | primary_value call_op paren_args
+                    {
+                      auto &delimited = $3;
+                      $$ = driver.build.call_method(self, $1, $2, nullptr,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                    }
+                | primary_value tCOLON2 paren_args
+                    {
+                      auto &delimited = $3;
+                      $$ = driver.build.call_method(self, $1, $2, nullptr,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                    }
+                | kSUPER paren_args
+                    {
+                      auto &delimited = $2;
+                      $$ = driver.build.keywordSuper(self, $1,
+                          delimited->begin,
+                          delimited->inner,
+                          delimited->end);
+                    }
+                | kSUPER
+                    {
+                      $$ = driver.build.keywordZsuper(self, $1);
+                    }
+                | primary_value tLBRACK2 opt_call_args rbracket
+                    {
+                      $$ = driver.build.index(self, $1, $2, $3, $4);
+                    }
+
+     brace_block: tLCURLY
+                    {
+                      driver.lex.context.push(Context::State::BLOCK);
+                    }
+                  brace_body tRCURLY
+                    {
+                      auto &block = $3;
+                      block->begin = $1;
+                      block->end = $4;
+                      $$ = block;
+                      driver.lex.context.pop();
+                    }
+                | kDO
+                    {
+                      driver.lex.context.push(Context::State::BLOCK);
+                    }
+                  do_body kEND
+                    {
+                      auto &block = $3;
+                      block->begin = $1;
+                      block->end = $4;
+                      $$ = block;
+                      driver.lex.context.pop();
+                    }
+
+      brace_body:   {
+                      driver.lex.extend_dynamic();
+                    }
+                    opt_block_param bodystmt
+                    {
+                      $$ = driver.alloc.delimited_block(nullptr, $2, $3, nullptr);
+
+                      driver.lex.unextend();
+                    }
+
+         do_body:   {
+                      driver.lex.extend_dynamic();
+                    }
+                    {
+                      driver.lex.cmdarg.push(false);
+                    }
+                    opt_block_param bodystmt
+                    {
+                      $$ = driver.alloc.delimited_block(nullptr, $3, $4, nullptr);
+                      driver.lex.unextend();
+
+                      driver.lex.cmdarg.pop();
+                    }
+
+       case_body: kWHEN args then compstmt cases
+                    {
+                      auto &cases = $5;
+                      cases->whens.push_front(driver.build.when(self, $1, $2, $3, $4));
+                      $$ = cases;
+                    }
+
+           cases: opt_else
+                    {
+                      $$ = driver.alloc.case_body($1);
+                    }
+                | case_body
+
+      opt_rescue: kRESCUE exc_list exc_var then compstmt opt_rescue
+                    {
+                      auto &exc_var = $3;
+                      auto &exc_list_ = $2;
+                      auto exc_list = exc_list_
+                        ? driver.build.array(self, nullptr, exc_list_, nullptr)
+                        : nullptr;
+                      auto &rescues = $6;
+
+                      rescues->push_front(driver.build.rescue_body(self, $1,
+                          exc_list,
+                          exc_var ? exc_var->tok : nullptr,
+                          exc_var ? exc_var->nod : nullptr,
+                          $4, $5));
+
+                      $$ = rescues;
+                    }
+                |
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+        exc_list: arg_value
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | mrhs
+                | list_none
+
+         exc_var: tASSOC lhs
+                    {
+                      $$ = driver.alloc.node_with_token($1, $2);
+                    }
+                | // nothing
+                    {
+                      $$ = nullptr;
+                    }
+
+      opt_ensure: kENSURE compstmt
+                    {
+                      $$ = driver.alloc.node_with_token($1, $2);
+                    }
+                | // nothing
+                    {
+                      $$ = nullptr;
+                    }
+
+         literal: numeric
+                | symbol
+                | dsym
+
+         strings: string
+                    {
+                      $$ = driver.build.string_compose(self, nullptr, $1, nullptr);
+                    }
+
+          string: string1
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | string string1
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+         string1: tSTRING_BEG string_contents tSTRING_END
+                    {
+                      auto str = driver.build.string_compose(self, $1, $2, $3);
+                      $$ = driver.build.dedentString(self, str, driver.lex.dedentLevel().value_or(0));
+                    }
+                | tSTRING
+                    {
+                      auto str = driver.build.string(self, $1);
+                      $$ = driver.build.dedentString(self, str, driver.lex.dedentLevel().value_or(0));
+                    }
+                | tCHARACTER
+                    {
+                      $$ = driver.build.character(self, $1);
+                    }
+
+         xstring: tXSTRING_BEG xstring_contents tSTRING_END
+                    {
+                      auto xstr = driver.build.xstring_compose(self, $1, $2, $3);
+                      $$ = driver.build.dedentString(self, xstr, driver.lex.dedentLevel().value_or(0));
+                    }
+
+          regexp: tREGEXP_BEG regexp_contents tSTRING_END tREGEXP_OPT
+                    {
+                      auto opts = driver.build.regexp_options(self, $4);
+                      $$ = driver.build.regexp_compose(self, $1, $2, $3, opts);
+                      DIAGCHECK();
+                    }
+
+           words: tWORDS_BEG word_list tSTRING_END
+                    {
+                      $$ = driver.build.words_compose(self, $1, $2, $3);
+                    }
+
+       word_list: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | word_list word tSPACE
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.word(self, $2));
+                      $$ = list;
+                    }
+
+            word: string_content
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | word string_content
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+         symbols: tSYMBOLS_BEG symbol_list tSTRING_END
+                    {
+                      $$ = driver.build.symbols_compose(self, $1, $2, $3);
+                    }
+
+     symbol_list: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | symbol_list word tSPACE
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.word(self, $2));
+                      $$ = list;
+                    }
+
+          qwords: tQWORDS_BEG qword_list tSTRING_END
+                    {
+                      $$ = driver.build.words_compose(self, $1, $2, $3);
+                    }
+
+        qsymbols: tQSYMBOLS_BEG qsym_list tSTRING_END
+                    {
+                      $$ = driver.build.symbols_compose(self, $1, $2, $3);
+                    }
+
+      qword_list: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | qword_list tSTRING_CONTENT tSPACE
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.string_internal(self, $2));
+                      $$ = list;
+                    }
+
+       qsym_list: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | qsym_list tSTRING_CONTENT tSPACE
+                    {
+                      auto &list = $1;
+                      list->emplace_back(driver.build.symbol_internal(self, $2));
+                      $$ = list;
+                    }
+
+ string_contents: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | string_contents string_content
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+xstring_contents: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | xstring_contents string_content
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+regexp_contents: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | regexp_contents string_content
+                    {
+                      auto &list = $1;
+                      list->emplace_back($2);
+                      $$ = list;
+                    }
+
+  string_content: tSTRING_CONTENT
+                    {
+                      $$ = driver.build.string_internal(self, $1);
+                    }
+                | tSTRING_DVAR string_dvar
+                    {
+                      $$ = $2;
+                    }
+                | tSTRING_DBEG
+                    {
+                      driver.lex.cond.push(false);
+                      driver.lex.cmdarg.push(false);
+                    }
+                    compstmt tSTRING_DEND
+                    {
+                      driver.lex.cond.pop();
+                      driver.lex.cmdarg.pop();
+                      $$ = driver.build.begin(self, $1, $3, $4);
+                    }
+
+     string_dvar: tGVAR
+                    {
+                      $$ = driver.build.gvar(self, $1);
+                    }
+                | tIVAR
+                    {
+                      $$ = driver.build.ivar(self, $1);
+                    }
+                | tCVAR
+                    {
+                      $$ = driver.build.cvar(self, $1);
+                    }
+                | backref
+
+
+          symbol: tSYMBOL
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.symbol(self, $1);
+                    }
+
+            dsym: tSYMBEG xstring_contents tSTRING_END
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.symbol_compose(self, $1, $2, $3);
+                    }
+
+         numeric: simple_numeric
+                    {
+                      $$ = $1;
+                    }
+                | tUNARY_NUM simple_numeric %prec tLOWEST
+                    {
+                      $$ = driver.build.unary_op(self, $1, $2);
+                    }
+
+  simple_numeric: tINTEGER
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.integer(self, $1);
+                    }
+                | tFLOAT
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.float_(self, $1);
+                    }
+                | tRATIONAL
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.rational(self, $1);
+                    }
+                | tIMAGINARY
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.complex(self, $1);
+                    }
+                | tRATIONAL_IMAGINARY
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.rational_complex(self, $1);
+                    }
+                | tFLOAT_IMAGINARY
+                    {
+                      driver.lex.set_state_expr_end();
+                      $$ = driver.build.floatComplex(self, $1);
+                    }
+
+   user_variable: tIDENTIFIER
+                    {
+                      $$ = driver.build.ident(self, $1);
+                    }
+                | tIVAR
+                    {
+                      $$ = driver.build.ivar(self, $1);
+                    }
+                | tGVAR
+                    {
+                      $$ = driver.build.gvar(self, $1);
+                    }
+                | tCONSTANT
+                    {
+                      $$ = driver.build.const_(self, $1);
+                    }
+                | tCVAR
+                    {
+                      $$ = driver.build.cvar(self, $1);
+                    }
+
+keyword_variable: kNIL
+                    {
+                      $$ = driver.build.nil(self, $1);
+                    }
+                | kSELF
+                    {
+                      $$ = driver.build.self_(self, $1);
+                    }
+                | kTRUE
+                    {
+                      $$ = driver.build.true_(self, $1);
+                    }
+                | kFALSE
+                    {
+                      $$ = driver.build.false_(self, $1);
+                    }
+                | k__FILE__
+                    {
+                      $$ = driver.build.fileLiteral(self, $1);
+                    }
+                | k__LINE__
+                    {
+                      $$ = driver.build.line_literal(self, $1);
+                    }
+                | k__ENCODING__
+                    {
+                      $$ = driver.build.encodingLiteral(self, $1);
+                    }
+
+         var_ref: user_variable
+                    {
+                      $$ = driver.build.accessible(self, $1);
+                    }
+                | keyword_variable
+                    {
+                      $$ = driver.build.accessible(self, $1);
+                    }
+
+         var_lhs: user_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+                | keyword_variable
+                    {
+                      $$ = driver.build.assignable(self, $1);
+                      DIAGCHECK();
+                    }
+
+         backref: tNTH_REF
+                    {
+                      $$ = driver.build.nth_ref(self, $1);
+                    }
+                | tBACK_REF
+                    {
+                      $$ = driver.build.backRef(self, $1);
+                    }
+
+      superclass: tLT
+                    {
+                      driver.lex.set_state_expr_value();
+                    }
+                    expr_value term
+                    {
+                      $$ = driver.alloc.node_with_token($1, $3);
+                    }
+                | // nothing
+                    {
+                      $$ = nullptr;
+                    }
+
+       f_arglist: tLPAREN2 f_args rparen
+                    {
+                      driver.lex.set_state_expr_value();
+                      $$ = driver.build.args(self, $1, $2, $3, true);
+                    }
+                |   {
+                      $<boolean>$ = driver.lex.in_kwarg;
+                      driver.lex.in_kwarg = true;
+                    }
+                  f_args term
+                    {
+                      driver.lex.in_kwarg = $<boolean>1;
+                      $$ = driver.build.args(self, nullptr, $2, nullptr, true);
+                      DIAGCHECK();
+                    }
+
+       args_tail: f_kwarg tCOMMA f_kwrest opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_kwarg opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | f_kwrest opt_f_block_arg
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                | f_block_arg
+                    {
+                      $$ = $1;
+                    }
+
+   opt_args_tail: tCOMMA args_tail
+                    {
+                      $$ = $2;
+                    }
+                | // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+          f_args: f_arg tCOMMA f_optarg tCOMMA f_rest_arg              opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($7);
+                      args->concat($8);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_optarg                                opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA f_optarg tCOMMA                   f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA                 f_rest_arg              opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                | f_arg tCOMMA                 f_rest_arg tCOMMA f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                | f_arg                                                opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                |              f_optarg tCOMMA f_rest_arg              opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                |              f_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($5);
+                      args->concat($6);
+                      $$ = args;
+                    }
+                |              f_optarg                                opt_args_tail
+                    {
+
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                |              f_optarg tCOMMA                   f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                |                              f_rest_arg              opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($2);
+                      $$ = args;
+                    }
+                |                              f_rest_arg tCOMMA f_arg opt_args_tail
+                    {
+                      auto &args = $1;
+                      args->concat($3);
+                      args->concat($4);
+                      $$ = args;
+                    }
+                |                                                          args_tail
+                    {
+                      $$ = $1;
+                    }
+                | // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+       f_bad_arg: tCONSTANT
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ArgumentConst, $1);
+                      YYERROR;
+                    }
+                | tIVAR
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ArgumentIvar, $1);
+                      YYERROR;
+                    }
+                | tGVAR
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ArgumentGvar, $1);
+                      YYERROR;
+                    }
+                | tCVAR
+                    {
+                      driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ArgumentCvar, $1);
+                      YYERROR;
+                    }
+
+      f_norm_arg: f_bad_arg
+                | tIDENTIFIER
+                    {
+                      auto ident = $1;
+                      driver.lex.declare(ident->string());
+                      $$ = ident;
+                    }
+
+      f_arg_asgn: f_norm_arg
+                    {
+                      $$ = $1;
+                    }
+
+      f_arg_item: f_arg_asgn
+                    {
+                      $$ = driver.build.arg(self, $1);
+                    }
+                | tLPAREN f_margs rparen
+                    {
+                      $$ = driver.build.multi_lhs(self, $1, $2, $3);
+                    }
+
+           f_arg: f_arg_item
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_arg tCOMMA f_arg_item
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+         f_label: tLABEL
+                    {
+                      auto label = $1;
+                      if (!driver.valid_kwarg_name(label)) {
+                        driver.diagnostics.emplace_back(dlevel::ERROR, dclass::ArgumentConst, label);
+                        YYERROR;
+                      }
+                      driver.lex.declare(label->string());
+                      $$ = label;
+                    }
+
+            f_kw: f_label arg_value
+                    {
+                      $$ = driver.build.kwoptarg(self, $1, $2);
+                    }
+                | f_label
+                    {
+                      $$ = driver.build.kwarg(self, $1);
+                    }
+
+      f_block_kw: f_label primary_value
+                    {
+                      $$ = driver.build.kwoptarg(self, $1, $2);
+                    }
+                | f_label
+                    {
+                      $$ = driver.build.kwarg(self, $1);
+                    }
+
+   f_block_kwarg: f_block_kw
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_block_kwarg tCOMMA f_block_kw
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+         f_kwarg: f_kw
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_kwarg tCOMMA f_kw
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+     kwrest_mark: tPOW | tDSTAR
+
+        f_kwrest: kwrest_mark tIDENTIFIER
+                    {
+                      auto ident = $2;
+
+                      driver.lex.declare(ident->string());
+
+                      auto kwrestarg = driver.build.kwrestarg(self, $1, ident);
+
+                      $$ = driver.alloc.node_list(kwrestarg);
+                    }
+                | kwrest_mark
+                    {
+                      auto kwrestarg = driver.build.kwrestarg(self, $1, nullptr);
+
+                      $$ = driver.alloc.node_list(kwrestarg);
+                    }
+
+           f_opt: f_arg_asgn tEQL arg_value
+                    {
+                      $$ = driver.build.optarg(self, $1, $2, $3);
+                    }
+
+     f_block_opt: f_arg_asgn tEQL primary_value
+                    {
+                      $$ = driver.build.optarg(self, $1, $2, $3);
+                    }
+
+  f_block_optarg: f_block_opt
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_block_optarg tCOMMA f_block_opt
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+        f_optarg: f_opt
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | f_optarg tCOMMA f_opt
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+    restarg_mark: tSTAR2 | tSTAR
+
+      f_rest_arg: restarg_mark tIDENTIFIER
+                    {
+                      auto ident = $2;
+
+                      driver.lex.declare(ident->string());
+
+                      auto restarg = driver.build.restarg(self, $1, ident);
+
+                      $$ = driver.alloc.node_list(restarg);
+                    }
+                | restarg_mark
+                    {
+                      auto restarg = driver.build.restarg(self, $1, nullptr);
+
+                      $$ = driver.alloc.node_list(restarg);
+                    }
+
+     blkarg_mark: tAMPER2 | tAMPER
+
+     f_block_arg: blkarg_mark tIDENTIFIER
+                    {
+                      auto ident = $2;
+
+                      driver.lex.declare(ident->string());
+
+                      auto blockarg = driver.build.blockarg(self, $1, ident);
+
+                      $$ = driver.alloc.node_list(blockarg);
+                    }
+
+ opt_f_block_arg: tCOMMA f_block_arg
+                    {
+                      $$ = $2;
+                    }
+                |
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+
+       singleton: var_ref
+                | tLPAREN2 expr rparen
+                    {
+                      $$ = $2;
+                    }
+
+      assoc_list: // nothing
+                    {
+                      $$ = driver.alloc.node_list();
+                    }
+                | assocs trailer
+
+          assocs: assoc
+                    {
+                      $$ = driver.alloc.node_list($1);
+                    }
+                | assocs tCOMMA assoc
+                    {
+                      auto &list = $1;
+                      list->emplace_back($3);
+                      $$ = list;
+                    }
+
+           assoc: arg_value tASSOC arg_value
+                    {
+                      $$ = driver.build.pair(self, $1, $2, $3);
+                    }
+                | tLABEL arg_value
+                    {
+                      $$ = driver.build.pair_keyword(self, $1, $2);
+                    }
+                | tSTRING_BEG string_contents tLABEL_END arg_value
+                    {
+                      $$ = driver.build.pair_quoted(self, $1, $2, $3, $4);
+                    }
+                | tDSTAR arg_value
+                    {
+                      $$ = driver.build.kwsplat(self, $1, $2);
+                    }
+
+       operation: tIDENTIFIER | tCONSTANT | tFID
+      operation2: tIDENTIFIER | tCONSTANT | tFID | op
+      operation3: tIDENTIFIER | tFID | op
+    dot_or_colon: call_op | tCOLON2
+         call_op: tDOT
+                    {
+                      // XXX what is this???
+                      // $$ = put(p, [:dot, $1[1]]
+                      // whitequark/parser doesn't check
+                      // raw source of the token,
+                      // so :dot/:anddot works as a flag.
+                      $$ = $1;
+                    }
+                | tANDDOT
+                    {
+                      // XXX what is this???
+                      // $$ = [:anddot, $1[1]]
+                      $$ = $1;
+                    }
+       opt_terms:  | terms
+          opt_nl:  | tNL
+          rparen: opt_nl tRPAREN
+                    {
+                      $$ = $2;
+                    }
+        rbracket: opt_nl tRBRACK
+                    {
+                      $$ = $2;
+                    }
+         trailer:  | tNL | tCOMMA
+
+            term: tSEMI
+                  {
+                    yyerrok;
+                  }
+                | tNL
+
+           terms: term
+                | terms tSEMI
+
+            none: // nothing
+                  {
+                    $$ = nullptr;
+                  }
+
+       list_none: // nothing
+                  {
+                    $$ = nullptr;
+                  }
+
+%%

+ 2753 - 0
third_party/parser/cc/lexer.rl

@@ -0,0 +1,2753 @@
+/*
+Copyright (c) 2013-2016 whitequark  <whitequark@whitequark.org>
+
+Parts of the source are derived from ruby_parser:
+Copyright (c) Ryan Davis, seattle.rb
+
+This lexer is a rewrite of the original in Ragel/C:
+Copyright (c) Charlie Somerville, GitHub
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+%%machine lex; # % fix highlighting
+
+/*
+#
+# === BEFORE YOU START ===
+#
+# Read the Ruby Hacking Guide chapter 11, available in English at
+# http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
+#
+# Remember two things about Ragel scanners:
+#
+#   1) Longest match wins.
+#
+#   2) If two matches have the same length, the first
+#      in source code wins.
+#
+# General rules of making Ragel and Bison happy:
+#
+#  * `p` (position) and `@te` contain the index of the character
+#    they're pointing to ("current"), plus one. `@ts` contains the index
+#    of the corresponding character. The code for extracting matched token is:
+#
+#       @source_buffer.slice(@ts...@te)
+#
+#  * If your input is `foooooooobar` and the rule is:
+#
+#       'f' 'o'+
+#
+#    the result will be:
+#
+#       foooooooobar
+#       ^ ts=0   ^ p=te=9
+#
+#  * A Ragel lexer action should not emit more than one token, unless
+#    you know what you are doing.
+#
+#  * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
+#
+#  * If an action emits the token and transitions to another state, use
+#    these Ragel commands:
+#
+#       emit($whatever)
+#       fnext $next_state; fbreak;
+#
+#    If you perform `fgoto` in an action which does not emit a token nor
+#    rewinds the stream pointer, the parser's side-effectful,
+#    context-sensitive lookahead actions will break in a hard to detect
+#    and debug way.
+#
+#  * If an action does not emit a token:
+#
+#       fgoto $next_state;
+#
+#  * If an action features lookbehind, i.e. matches characters with the
+#    intent of passing them to another action:
+#
+#       p = @ts - 1
+#       fgoto $next_state;
+#
+#    or, if the lookbehind consists of a single character:
+#
+#       fhold; fgoto $next_state;
+#
+#  * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
+#    `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
+#    _will_ invoke the action `act`.
+#
+#    e_something stands for "something with **e**mbedded action".
+#
+#  * EOF is explicit and is matched by `c_eof`. If you want to introspect
+#    the state of the lexer, add this rule to the state:
+#
+#       c_eof => do_eof;
+#
+#  * If you proceed past EOF, the lexer will complain:
+#
+#       NoMethodError: undefined method `ord' for nil:NilClass
+#
+*/
+
+#include <ruby_parser/driver.hh>
+#include <cassert>
+#include "absl/strings/numbers.h"
+
+%% write data nofinal;
+
+using namespace ruby_parser;
+using namespace std::string_literals;
+
+%% prepush { check_stack_capacity(); }
+
+lexer::lexer(diagnostics_t &diag, ruby_version version, const std::string& source_buffer_)
+  : diagnostics(diag)
+  , version(version)
+  , source_buffer(source_buffer_ + std::string("\0\0", 2))
+  , cs(lex_en_line_begin)
+  , _p(source_buffer.data())
+  , _pe(source_buffer.data() + source_buffer.size())
+  , ts(nullptr)
+  , te(nullptr)
+  , act(0)
+  , top(0)
+  , eq_begin_s(nullptr)
+  , sharp_s(nullptr)
+  , newline_s(nullptr)
+  , paren_nest(0)
+  , command_start(true)
+  , num_base(0)
+  , num_digits_s(nullptr)
+  , num_suffix_s(nullptr)
+  , num_xfrm(num_xfrm_type::NONE)
+  , escape_s(nullptr)
+  , herebody_s(nullptr)
+  , in_kwarg(false)
+{
+  // ensure the stack is non-empty so we can just double in
+  // check_stack_capacity:
+  stack.resize(16);
+
+  static_env.push(environment());
+
+  cs_before_block_comment = lex_en_line_begin;
+}
+
+void lexer::check_stack_capacity() {
+    if (stack.size() == (size_t)top) {
+    stack.resize(stack.size() * 2);
+  }
+}
+
+int lexer::stack_pop() {
+  return stack[--top];
+}
+
+int lexer::arg_or_cmdarg(int cmd_state) {
+  if (cmd_state) {
+    return lex_en_expr_cmdarg;
+  } else {
+    return lex_en_expr_arg;
+  }
+}
+
+void lexer::emit_comment(const char* s, const char* e) {
+  /* unused for now */
+  (void)s;
+  (void)e;
+}
+
+std::string lexer::tok() {
+  return tok(ts);
+}
+
+std::string lexer::tok(const char* start) {
+  return tok(start, te);
+}
+
+std::string lexer::tok(const char* start, const char* end) {
+  assert(start <= end);
+
+  return std::string(start, (size_t)(end - start));
+}
+
+char lexer::unescape(uint32_t codepoint) {
+    switch (codepoint) {
+    case 'a': return '\a';
+    case 'b': return '\b';
+    case 'e': return 0x1b;
+    case 'f': return '\f';
+    case 'n': return '\n';
+    case 'r': return '\r';
+    case 's': return ' ';
+    case 't': return '\t';
+    case 'v': return '\v';
+    case '\\': return '\\';
+    default: return '\0';
+    }
+}
+
+static const lexer::token_table_entry PUNCTUATION[] = {
+  { "=", token_type::tEQL },
+  { "&", token_type::tAMPER2 },
+  { "|", token_type::tPIPE },
+  { "!", token_type::tBANG },
+  { "^", token_type::tCARET },
+  { "+", token_type::tPLUS },
+  { "-", token_type::tMINUS },
+  { "*", token_type::tSTAR2 },
+  { "/", token_type::tDIVIDE },
+  { "%", token_type::tPERCENT },
+  { "~", token_type::tTILDE },
+  { ",", token_type::tCOMMA },
+  { ";", token_type::tSEMI },
+  { ".", token_type::tDOT },
+  { "..", token_type::tDOT2 },
+  { "...", token_type::tDOT3 },
+  { "[", token_type::tLBRACK2 },
+  { "]", token_type::tRBRACK },
+  { "(", token_type::tLPAREN2 },
+  { ")", token_type::tRPAREN },
+  { "?", token_type::tEH },
+  { ":", token_type::tCOLON },
+  { "&&", token_type::tANDOP },
+  { "||", token_type::tOROP },
+  { "-@", token_type::tUMINUS },
+  { "+@", token_type::tUPLUS },
+  { "~@", token_type::tTILDE },
+  { "**", token_type::tPOW },
+  { "->", token_type::tLAMBDA },
+  { "=~", token_type::tMATCH },
+  { "!~", token_type::tNMATCH },
+  { "==", token_type::tEQ },
+  { "!=", token_type::tNEQ },
+  { ">", token_type::tGT },
+  { ">>", token_type::tRSHFT },
+  { ">=", token_type::tGEQ },
+  { "<", token_type::tLT },
+  { "<<", token_type::tLSHFT },
+  { "<=", token_type::tLEQ },
+  { "=>", token_type::tASSOC },
+  { "::", token_type::tCOLON2 },
+  { "===", token_type::tEQQ },
+  { "<=>", token_type::tCMP },
+  { "[]", token_type::tAREF },
+  { "[]=", token_type::tASET },
+  { "{", token_type::tLCURLY },
+  { "}", token_type::tRCURLY },
+  { "`", token_type::tBACK_REF2 },
+  { "!@", token_type::tBANG },
+  { "&.", token_type::tANDDOT },
+  { NULL, token_type::error },
+};
+
+static const lexer::token_table_entry PUNCTUATION_BEGIN[] = {
+  { "&", token_type::tAMPER },
+  { "*", token_type::tSTAR },
+  { "**", token_type::tDSTAR },
+  { "+", token_type::tUPLUS },
+  { "-", token_type::tUMINUS },
+  { "::", token_type::tCOLON3 },
+  { "(", token_type::tLPAREN },
+  { "{", token_type::tLBRACE },
+  { "[", token_type::tLBRACK },
+  { NULL, token_type::error },
+};
+
+static const lexer::token_table_entry KEYWORDS[] = {
+  { "if", token_type::kIF_MOD },
+  { "unless", token_type::kUNLESS_MOD },
+  { "while", token_type::kWHILE_MOD },
+  { "until", token_type::kUNTIL_MOD },
+  { "rescue", token_type::kRESCUE_MOD },
+  { "defined?", token_type::kDEFINED },
+  { "BEGIN", token_type::klBEGIN },
+  { "END", token_type::klEND },
+  { "class", token_type::kCLASS },
+  { "module", token_type::kMODULE },
+  { "def", token_type::kDEF },
+  { "undef", token_type::kUNDEF },
+  { "begin", token_type::kBEGIN },
+  { "end", token_type::kEND },
+  { "then", token_type::kTHEN },
+  { "elsif", token_type::kELSIF },
+  { "else", token_type::kELSE },
+  { "ensure", token_type::kENSURE },
+  { "case", token_type::kCASE },
+  { "when", token_type::kWHEN },
+  { "for", token_type::kFOR },
+  { "break", token_type::kBREAK },
+  { "next", token_type::kNEXT },
+  { "redo", token_type::kREDO },
+  { "retry", token_type::kRETRY },
+  { "in", token_type::kIN },
+  { "do", token_type::kDO },
+  { "return", token_type::kRETURN },
+  { "yield", token_type::kYIELD },
+  { "super", token_type::kSUPER },
+  { "self", token_type::kSELF },
+  { "nil", token_type::kNIL },
+  { "true", token_type::kTRUE },
+  { "false", token_type::kFALSE },
+  { "and", token_type::kAND },
+  { "or", token_type::kOR },
+  { "not", token_type::kNOT },
+  { "alias", token_type::kALIAS },
+  { "__FILE__", token_type::k__FILE__ },
+  { "__LINE__", token_type::k__LINE__ },
+  { "__ENCODING__", token_type::k__ENCODING__ },
+  { NULL, token_type::error },
+};
+
+static const lexer::token_table_entry KEYWORDS_BEGIN[] = {
+  { "if", token_type::kIF },
+  { "unless", token_type::kUNLESS },
+  { "while", token_type::kWHILE },
+  { "until", token_type::kUNTIL },
+  { "rescue", token_type::kRESCUE },
+  { "defined?", token_type::kDEFINED },
+  { "BEGIN", token_type::klBEGIN },
+  { "END", token_type::klEND },
+  { "class", token_type::kCLASS },
+  { "module", token_type::kMODULE },
+  { "def", token_type::kDEF },
+  { "undef", token_type::kUNDEF },
+  { "begin", token_type::kBEGIN },
+  { "end", token_type::kEND },
+  { "then", token_type::kTHEN },
+  { "elsif", token_type::kELSIF },
+  { "else", token_type::kELSE },
+  { "ensure", token_type::kENSURE },
+  { "case", token_type::kCASE },
+  { "when", token_type::kWHEN },
+  { "for", token_type::kFOR },
+  { "break", token_type::kBREAK },
+  { "next", token_type::kNEXT },
+  { "redo", token_type::kREDO },
+  { "retry", token_type::kRETRY },
+  { "in", token_type::kIN },
+  { "do", token_type::kDO },
+  { "return", token_type::kRETURN },
+  { "yield", token_type::kYIELD },
+  { "super", token_type::kSUPER },
+  { "self", token_type::kSELF },
+  { "nil", token_type::kNIL },
+  { "true", token_type::kTRUE },
+  { "false", token_type::kFALSE },
+  { "and", token_type::kAND },
+  { "or", token_type::kOR },
+  { "not", token_type::kNOT },
+  { "alias", token_type::kALIAS },
+  { "__FILE__", token_type::k__FILE__ },
+  { "__LINE__", token_type::k__LINE__ },
+  { "__ENCODING__", token_type::k__ENCODING__ },
+  { NULL, token_type::error },
+};
+
+static size_t utf8_encode_char(int32_t uc, std::string &dst) {
+  if (uc < 0x00) {
+    return 0;
+  } else if (uc < 0x80) {
+    dst.push_back(static_cast<uint8_t>(uc));
+    return 1;
+  } else if (uc < 0x800) {
+    dst.push_back(static_cast<uint8_t>(0xC0 + (uc >> 6)));
+    dst.push_back(static_cast<uint8_t>(0x80 + (uc & 0x3F)));
+    return 2;
+  } else if (uc < 0x10000) {
+    dst.push_back(static_cast<uint8_t>(0xE0 + (uc >> 12)));
+    dst.push_back(static_cast<uint8_t>(0x80 + ((uc >> 6) & 0x3F)));
+    dst.push_back(static_cast<uint8_t>(0x80 + (uc & 0x3F)));
+    return 3;
+  } else if (uc < 0x110000) {
+    dst.push_back(static_cast<uint8_t>(0xF0 + (uc >> 18)));
+    dst.push_back(static_cast<uint8_t>(0x80 + ((uc >> 12) & 0x3F)));
+    dst.push_back(static_cast<uint8_t>(0x80 + ((uc >> 6) & 0x3F)));
+    dst.push_back(static_cast<uint8_t>(0x80 + (uc & 0x3F)));
+    return 4;
+  } else return 0;
+}
+
+static bool split_codepoints(const std::string &str, std::string &output) {
+  auto isspace = [](char c) { return c == ' ' || c == '\t'; };
+  const char *ptr = str.c_str();
+
+  while (*ptr) {
+    while (isspace(*ptr))
+      ptr++;
+
+    const char *start = ptr;
+    while (*ptr && !isspace(*ptr))
+      ptr++;
+
+    std::string cp {start, static_cast<size_t>(ptr - start)};
+    if (utf8_encode_char(std::stoi(cp, nullptr, 16), output) == 0)
+      return false;
+  }
+  return true;
+}
+
+static std::string gsub(const std::string&& str, const std::string&& search, const std::string&& replace) {
+  std::string result;
+
+  std::string::size_type from = 0;
+
+  while (true) {
+    auto index = str.find(search, from);
+
+    if (index == std::string::npos) {
+      result += str.substr(from);
+      break;
+    } else {
+      result += str.substr(from, index - from);
+      result += replace;
+      from = index + search.size();
+    }
+  }
+
+  return result;
+}
+
+static bool eof_codepoint(char c) {
+  return c == 0 || c == 0x04 || c == 0x1a;
+}
+
+token_t lexer::advance_() {
+  if (!token_queue.empty()) {
+    token_t token = token_queue.front();
+    token_queue.pop();
+    return token;
+  }
+
+  int cmd_state = command_start;
+  command_start = false;
+
+  const char* p = _p;
+  const char* pe = _pe;
+  const char* eof = _pe;
+
+  const char* tm = NULL;
+  const char* heredoc_e = NULL;
+  const char* new_herebody_s = NULL;
+
+  const char* ident_ts = NULL;
+  const char* ident_te = NULL;
+  std::string ident_tok;
+
+  %% write exec;
+
+  _p = p;
+
+  if (!token_queue.empty()) {
+    token_t token = token_queue.front();
+    token_queue.pop();
+    return token;
+  }
+
+  if (cs == lex_error) {
+    size_t start = (size_t)(p - source_buffer.data());
+    return mempool.alloc(token_type::error, start, start + 1, std::string(p - 1, 1));
+  }
+
+  return mempool.alloc(token_type::eof, source_buffer.size(), source_buffer.size(), "");
+}
+
+void lexer::emit(token_type type) {
+  emit(type, tok());
+}
+
+void lexer::emit(token_type type, const std::string& str) {
+  emit(type, str, ts, te);
+}
+
+void lexer::emit(token_type type, const std::string& str, const char* start, const char* end) {
+  size_t offset_start = (size_t)(start - source_buffer.data());
+  size_t offset_end = (size_t)(end - source_buffer.data());
+
+  token_queue.push(mempool.alloc(type, offset_start, offset_end, str));
+}
+
+void lexer::emit_do(bool do_block) {
+  if (cond.active()) {
+    emit(token_type::kDO_COND, "do");
+  } else if (cmdarg.active() || do_block) {
+    emit(token_type::kDO_BLOCK, "do");
+  } else {
+    emit(token_type::kDO, "do");
+  }
+}
+
+void lexer::emit_table(const token_table_entry* table) {
+  auto value = tok();
+
+  for (; table->token; ++table) {
+    if (value == table->token) {
+      emit(table->type, value);
+      return;
+    }
+  }
+
+  // whitequark emits a `nil` token here, but if we do `yylex` hits an assert,
+  // so just drop the token.
+  return;
+}
+
+void lexer::emit_num(const std::string& num) {
+  switch (num_xfrm) {
+    case num_xfrm_type::NONE:
+      emit(token_type::tINTEGER, num);
+      break;
+    case num_xfrm_type::RATIONAL:
+      emit(token_type::tRATIONAL, num);
+      break;
+    case num_xfrm_type::IMAGINARY:
+      emit(token_type::tIMAGINARY, num);
+      break;
+    case num_xfrm_type::RATIONAL_IMAGINARY:
+      emit(token_type::tRATIONAL_IMAGINARY, num);
+      break;
+    case num_xfrm_type::FLOAT:
+      emit(token_type::tFLOAT, num);
+      break;
+    case num_xfrm_type::FLOAT_IMAGINARY:
+      emit(token_type::tFLOAT_IMAGINARY, num);
+      break;
+  }
+}
+
+std::string lexer::convert_base(const std::string& num, int num_base) {
+    long int result;
+    if (num_base == 10) {
+        return num;
+    }
+    // This doesn't match Ruby's parsing but it is better than not handling it
+    if (!absl::numbers_internal::safe_strtoi_base(num, &result, num_base)) {
+        result = 0;
+        // dmitry: appartently we assume that outer functions reported all the errors!!!
+    }
+    return std::to_string(result);
+}
+
+diagnostic::range lexer::range(const char *start, const char *end) {
+  size_t token_start = (size_t)(start - source_buffer.data());
+  size_t token_end = (size_t)(end - source_buffer.data());
+  return diagnostic::range(token_start, token_end);
+}
+
+void lexer::diagnostic_(dlevel level, dclass type, const std::string &data) {
+  diagnostics.emplace_back(level, type, range(ts, te), data);
+}
+
+void lexer::diagnostic_(dlevel level, dclass type, diagnostic::range &&range, const std::string &data) {
+  diagnostics.emplace_back(level, type, range, data);
+}
+
+//
+// === LITERAL STACK ===
+//
+
+template<typename... Args>
+int lexer::push_literal(Args&&... args) {
+  literal_stack.emplace(*this, std::forward<Args>(args)...);
+
+  auto& literal = literal_stack.top();
+
+  return next_state_for_literal(literal);
+}
+
+int lexer::next_state_for_literal(literal &lit) {
+  if (lit.words() && lit.backslash_delimited()) {
+    if (lit.interpolate()) {
+      return lex_en_interp_backslash_delimited_words;
+    } else {
+      return lex_en_plain_backslash_delimited_words;
+    }
+  } else if (lit.words() && !lit.backslash_delimited()) {
+    if (lit.interpolate()) {
+      return lex_en_interp_words;
+    } else {
+      return lex_en_plain_words;
+    }
+  } else if (!lit.words() && lit.backslash_delimited()) {
+    if (lit.interpolate()) {
+      return lex_en_interp_backslash_delimited;
+    } else {
+      return lex_en_plain_backslash_delimited;
+    }
+  } else {
+    if (lit.interpolate()) {
+      return lex_en_interp_string;
+    } else {
+      return lex_en_plain_string;
+    }
+  }
+}
+
+literal& lexer::literal_() {
+  return literal_stack.top();
+}
+
+int lexer::pop_literal() {
+  bool was_regexp;
+
+  {
+    auto& old_literal = literal_stack.top();
+
+    was_regexp = old_literal.regexp();
+    dedentLevel_ = old_literal.dedentLevel();
+  }
+
+  literal_stack.pop();
+
+  if (was_regexp) {
+    return lex_en_regexp_modifiers;
+  } else {
+    return lex_en_expr_end;
+  }
+}
+
+void lexer::set_state_expr_beg() {
+  cs = lex_en_expr_beg;
+}
+
+void lexer::set_state_expr_end() {
+  cs = lex_en_expr_end;
+}
+
+void lexer::set_state_expr_endarg() {
+  cs = lex_en_expr_endarg;
+}
+
+void lexer::set_state_expr_fname() {
+  cs = lex_en_expr_fname;
+}
+
+void lexer::set_state_expr_value() {
+  cs = lex_en_expr_value;
+}
+
+%%{
+  # access @;
+  # getkey (@source_pts[p] || 0);
+
+  # === CHARACTER CLASSES ===
+  #
+  # Pay close attention to the differences between c_any and any.
+  # c_any does not include EOF and so will cause incorrect behavior
+  # for machine subtraction (any-except rules) and default transitions
+  # for scanners.
+
+  action do_nl {
+    // Record position of a newline for precise location reporting on tNL
+    // tokens.
+    //
+    // This action is embedded directly into c_nl, as it is idempotent and
+    // there are no cases when we need to skip it.
+    newline_s = p;
+  }
+
+  c_nl       = '\n' $ do_nl;
+  c_space    = [ \t\r\f\v];
+  c_space_nl = c_space | c_nl;
+
+  c_eof      = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
+  c_eol      = c_nl | c_eof;
+  c_any      = any - c_eof;
+
+  c_nl_zlen  = c_nl | zlen;
+  c_line     = any - c_nl_zlen;
+
+  c_unicode  = c_any - 0x00..0x7f;
+  c_upper    = [A-Z];
+  c_lower    = [a-z_]  | c_unicode;
+  c_alpha    = c_lower | c_upper;
+  c_alnum    = c_alpha | [0-9];
+
+  action do_eof {
+    // Sit at EOF indefinitely. #advance would return $eof each time.
+    // This allows to feed the lexer more data if needed; this is only used
+    // in tests.
+    //
+    // Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
+    // below. This is due to the fact that scanner state at EOF is observed
+    // by tests, and encapsulating it in a rule would break the introspection.
+    fhold; fbreak;
+  }
+
+  #
+  # === TOKEN DEFINITIONS ===
+  #
+
+  # All operators are punctuation. There is more to punctuation
+  # than just operators. Operators can be overridden by user;
+  # punctuation can not.
+
+  # A list of operators which are valid in the function name context, but
+  # have different semantics in others.
+  operator_fname      = '[]' | '[]=' | '`'  | '-@' | '+@' | '~@'  | '!@' ;
+
+  # A list of operators which can occur within an assignment shortcut (+ → +=).
+  operator_arithmetic = '&'  | '|'   | '&&' | '||' | '^'  | '+'   | '-'  |
+                        '*'  | '/'   | '**' | '~'  | '<<' | '>>'  | '%'  ;
+
+  # A list of all user-definable operators not covered by groups above.
+  operator_rest       = '=~' | '!~' | '==' | '!=' | '!'   | '===' |
+                        '<'  | '<=' | '>'  | '>=' | '<=>' | '=>'  ;
+
+  # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
+  # as they are ambiguous with interpolation `#{}` and should be counted.
+  # These braces are not present in punctuation lists.
+
+  # A list of punctuation which has different meaning when used at the
+  # beginning of expression.
+  punctuation_begin   = '-'  | '+'  | '::' | '('  | '['  |
+                        '*'  | '**' | '&'  ;
+
+  # A list of all punctuation except punctuation_begin.
+  punctuation_end     = ','  | '='  | '->' | '('  | '['  | ']'   |
+                        '::' | '?'  | ':'  | '.'  | '..' | '...' ;
+
+  # A list of keywords which have different meaning at the beginning of expression.
+  keyword_modifier    = 'if'     | 'unless' | 'while'  | 'until' | 'rescue' ;
+
+  # A list of keywords which accept an argument-like expression, i.e. have the
+  # same post-processing as method calls or commands. Example: `yield 1`,
+  # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
+  keyword_with_arg    = 'yield'  | 'super'  | 'not'    | 'defined?' ;
+
+  # A list of keywords which accept a literal function name as an argument.
+  keyword_with_fname  = 'def'    | 'undef'  | 'alias'  ;
+
+  # A list of keywords which accept an expression after them.
+  keyword_with_value  = 'else'   | 'case'   | 'ensure' | 'module' | 'elsif' | 'then'  |
+                        'for'    | 'in'     | 'do'     | 'when'   | 'begin' | 'class' |
+                        'and'    | 'or'     ;
+
+  # A list of keywords which accept a value, and treat the keywords from
+  # `keyword_modifier` list as modifiers.
+  keyword_with_mid    = 'rescue' | 'return' | 'break'  | 'next'   ;
+
+  # A list of keywords which do not accept an expression after them.
+  keyword_with_end    = 'end'    | 'self'   | 'true'   | 'false'  | 'retry'    |
+                        'redo'   | 'nil'    | 'BEGIN'  | 'END'    | '__FILE__' |
+                        '__LINE__' | '__ENCODING__';
+
+  # All keywords.
+  keyword             = keyword_with_value | keyword_with_mid |
+                        keyword_with_end   | keyword_with_arg |
+                        keyword_with_fname | keyword_modifier ;
+
+  constant       = c_upper c_alnum*;
+  bareword       = c_alpha c_alnum*;
+
+  call_or_var    = c_lower c_alnum*;
+  class_var      = '@@' bareword;
+  instance_var   = '@' bareword;
+  global_var     = '$'
+      ( bareword | digit+
+      | [`'+~*$&?!@/\\;,.=:<>"] # `
+      | '-' c_alnum
+      )
+  ;
+
+  # Ruby accepts (and fails on) variables with leading digit
+  # in literal context, but not in unquoted symbol body.
+  class_var_v    = '@@' c_alnum+;
+  instance_var_v = '@' c_alnum+;
+
+  label          = bareword [?!]? ':';
+
+  #
+  # === NUMERIC PARSING ===
+  #
+
+  int_hex  = ( xdigit+ '_' )* xdigit* '_'? ;
+  int_dec  = ( digit+ '_' )* digit* '_'? ;
+  int_bin  = ( [01]+ '_' )* [01]* '_'? ;
+
+  flo_int  = [1-9] [0-9]* ( '_' digit+ )* | '0';
+  flo_frac = '.' ( digit+ '_' )* digit+;
+  flo_pow  = [eE] [+\-]? ( digit+ '_' )* digit+;
+
+  int_suffix =
+    ''   % { num_xfrm = num_xfrm_type::NONE; }
+  | 'r'  % { num_xfrm = num_xfrm_type::RATIONAL; }
+  | 'i'  % { num_xfrm = num_xfrm_type::IMAGINARY; }
+  | 'ri' % { num_xfrm = num_xfrm_type::RATIONAL_IMAGINARY; };
+
+  flo_pow_suffix =
+    ''   % { num_xfrm = num_xfrm_type::FLOAT; }
+  | 'i'  % { num_xfrm = num_xfrm_type::FLOAT_IMAGINARY; };
+
+  flo_suffix =
+    flo_pow_suffix
+  | 'r'  % { num_xfrm = num_xfrm_type::RATIONAL; }
+  | 'ri' % { num_xfrm = num_xfrm_type::RATIONAL_IMAGINARY; };
+
+  #
+  # === ESCAPE SEQUENCE PARSING ===
+  #
+
+  # Escape parsing code is a Ragel pattern, not a scanner, and therefore
+  # it shouldn't directly raise errors or perform other actions with side effects.
+  # In reality this would probably just mess up error reporting in pathological
+  # cases, through.
+
+  # The amount of code required to parse \M\C stuff correctly is ridiculous.
+
+  escaped_nl = "\\" c_nl;
+
+  action unicode_points {
+    auto codepoint_str = tok(escape_s + 2, p - 1);
+    std::string result;
+
+    if (split_codepoints(codepoint_str, result)) {
+      escape = std::make_unique<std::string>(result);
+    } else {
+      auto codepoint_s = escape_s + 2;
+      diagnostic_(dlevel::ERROR, dclass::UnicodePointTooLarge,
+        range(codepoint_s, codepoint_s + codepoint_str.size()));
+    }
+  }
+
+  action unescape_char {
+    char esc = unescape(p[-1]);
+    if (esc) {
+      escape = std::make_unique<std::string>(&esc, 1);
+    } else {
+      escape = std::make_unique<std::string>(p - 1, 1);
+    }
+  }
+
+  action invalid_complex_escape {
+    diagnostic_(dlevel::FATAL, dclass::InvalidEscape);
+  }
+
+  action slash_c_char {
+    // TODO multibyte
+    char c = escape->at(0) & 0x9f;
+    escape = std::make_unique<std::string>(&c, 1);
+  }
+
+  action slash_m_char {
+    // TODO multibyte
+    char c = escape->at(0) | 0x80;
+    escape = std::make_unique<std::string>(&c, 1);
+  }
+
+  maybe_escaped_char = (
+        '\\' c_any      %unescape_char
+    | ( c_any - [\\] )  % { escape = std::make_unique<std::string>(p - 1, 1); /* TODO multibyte */ }
+  );
+
+  maybe_escaped_ctrl_char = ( # why?!
+        '\\' c_any      %unescape_char %slash_c_char
+    |   '?'             % { escape = std::make_unique<std::string>("\x7f"); }
+    | ( c_any - [\\?] ) % { escape = std::make_unique<std::string>(p - 1, 1); /* TODO multibyte */ } %slash_c_char
+  );
+
+  escape = (
+      # \377
+      [0-7]{1,3}
+      % {
+	auto esc = tok(escape_s, p);
+	char c = std::stoi(esc, nullptr, 8);
+	escape = std::make_unique<std::string>(&c, 1);
+      }
+      # \xff
+    | 'x' xdigit{1,2}
+        % {
+	  auto esc = tok(escape_s + 1, p);
+	  char c = std::stoi(esc, nullptr, 16);
+	  escape = std::make_unique<std::string>(&c, 1);
+      }
+      # \u263a
+    | 'u' xdigit{4}
+      % {
+	std::string result;
+	split_codepoints(tok(escape_s + 1, p), result);
+	escape = std::make_unique<std::string>(result);
+      }
+      # %q[\x]
+    | 'x' ( c_any - xdigit )
+      % {
+        diagnostic_(dlevel::FATAL, dclass::InvalidHexEscape, range(escape_s - 1, p + 2));
+      }
+
+      # %q[\u123] %q[\u{12]
+    | 'u' ( c_any{0,4}  -
+            xdigit{4}   -            # \u1234 is valid
+            ( '{' xdigit{1,3}        # \u{1 \u{12 \u{123 are valid
+            | '{' xdigit [ \t}] any? # \u{1. \u{1} are valid
+            | '{' xdigit{2} [ \t}]   # \u{12. \u{12} are valid
+            )
+          )
+      % {
+        diagnostic_(dlevel::FATAL, dclass::InvalidUnicodeEscape, range(escape_s - 1, p));
+      }
+
+      # \u{123 456}
+    | 'u{' ( xdigit{1,6} [ \t] )*
+      ( xdigit{1,6} '}'
+        %unicode_points
+      | ( xdigit* ( c_any - xdigit - '}' )+ '}'
+        | ( c_any - '}' )* c_eof
+        | xdigit{7,}
+        ) % {
+          diagnostic_(dlevel::FATAL, dclass::UnterminatedUnicode, range(p - 1, p));
+        }
+      )
+
+      # \C-\a \cx
+    | ( 'C-' | 'c' ) escaped_nl?
+      maybe_escaped_ctrl_char
+
+      # \M-a
+    | 'M-' escaped_nl?
+      maybe_escaped_char
+      %slash_m_char
+
+      # \C-\M-f \M-\cf \c\M-f
+    | ( ( 'C-'   | 'c' ) escaped_nl?   '\\M-'
+      |   'M-\\'         escaped_nl? ( 'C-'   | 'c' ) ) escaped_nl?
+      maybe_escaped_ctrl_char
+      %slash_m_char
+
+    | 'C' c_any %invalid_complex_escape
+    | 'M' c_any %invalid_complex_escape
+    | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
+
+    | ( c_any - [0-7xuCMc] ) %unescape_char
+
+    | c_eof % {
+      diagnostic_(dlevel::FATAL, dclass::EscapeEof, range(p - 1, p));
+    }
+  );
+
+  # Use rules in form of `e_bs escape' when you need to parse a sequence.
+  e_bs = '\\' % {
+    escape_s = p;
+    escape   = nullptr;
+  };
+
+  #
+  # === STRING AND HEREDOC PARSING ===
+  #
+
+  # Heredoc parsing is quite a complex topic. First, consider that heredocs
+  # can be arbitrarily nested. For example:
+  #
+  #     puts <<CODE
+  #     the result is: #{<<RESULT.inspect
+  #       i am a heredoc
+  #     RESULT
+  #     }
+  #     CODE
+  #
+  # which, incidentally, evaluates to:
+  #
+  #     the result is: "  i am a heredoc\n"
+  #
+  # To parse them, lexer refers to two kinds (remember, nested heredocs)
+  # of positions in the input stream, namely heredoc_e
+  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
+  #
+  # heredoc_e is simply contained inside the corresponding Literal, and
+  # when the heredoc is closed, the lexing is restarted from that position.
+  #
+  # @herebody_s is quite more complex. First, @herebody_s changes after each
+  # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
+  # contains the current line, and also when a heredoc is started, @herebody_s
+  # contains the position from which the heredoc will be lexed.
+  #
+  # Second, as (insanity) there are nested heredocs, we need to maintain a
+  # stack of these positions. Each time #push_literal is called, it saves current
+  # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
+  # containing another heredocs) is closed, the previous value is restored.
+
+  e_heredoc_nl = c_nl % {
+    // After every heredoc was parsed, herebody_s contains the
+    // position of next token after all heredocs.
+    if (herebody_s) {
+      p = herebody_s;
+      herebody_s = NULL;
+    }
+  };
+
+  action extend_string {
+    auto str = tok();
+    std::string lookahead;
+
+    // tLABEL_END is only possible in non-cond context on >= 2.2
+    if (version >= ruby_version::RUBY_22 && !cond.active()) {
+      const char* lookahead_s = te;
+      const char* lookahead_e = te + 2;
+
+      if (lookahead_e > eof) {
+        lookahead_e = eof;
+      }
+
+      lookahead = std::string(lookahead_s, (size_t)(lookahead_e - lookahead_s));
+    }
+
+    auto& current_literal = literal_();
+
+    if (!current_literal.heredoc() && current_literal.nest_and_try_closing(str, ts, te, lookahead)) {
+      if (token_queue.back()->type() == token_type::tLABEL_END) {
+        p += 1;
+        pop_literal();
+        fnext expr_labelarg;
+      } else {
+        fnext *pop_literal();
+      }
+      fbreak;
+    } else {
+      current_literal.extend_string(str, ts, te);
+    }
+  }
+
+  action extend_string_escaped {
+    auto& current_literal = literal_();
+
+    // TODO multibyte
+    auto escaped_char = *escape_s;
+
+    if (current_literal.munge_escape(escaped_char)) {
+      // If this particular literal uses this character as an opening
+      // or closing delimiter, it is an escape sequence for that
+      // particular character. Write it without the backslash.
+
+      if (current_literal.regexp()
+          && (escaped_char == '\\' ||
+              escaped_char == '$'  ||
+              escaped_char == '$'  ||
+              escaped_char == '('  ||
+              escaped_char == ')'  ||
+              escaped_char == '*'  ||
+              escaped_char == '+'  ||
+              escaped_char == '.'  ||
+              escaped_char == '<'  ||
+              escaped_char == '>'  ||
+              escaped_char == '?'  ||
+              escaped_char == '['  ||
+              escaped_char == ']'  ||
+              escaped_char == '^'  ||
+              escaped_char == '{'  ||
+              escaped_char == '|'  ||
+              escaped_char == '}')) {
+        // Regular expressions should include escaped delimiters in their
+        // escaped form, except when the escaped character is
+        // a closing delimiter but not a regexp metacharacter.
+        //
+        // The backslash itself cannot be used as a closing delimiter
+        // at the same time as an escape symbol, but it is always munged,
+        // so this branch also executes for the non-closing-delimiter case
+        // for the backslash.
+        auto str = tok();
+        current_literal.extend_string(str, ts, te);
+      } else {
+        auto str = std::string(&escaped_char, 1);
+        current_literal.extend_string(str, ts, te);
+      }
+    } else {
+      // It does not. So this is an actual escape sequence, yay!
+      if (current_literal.regexp()) {
+        // Regular expressions should include escape sequences in their
+        // escaped form. On the other hand, escaped newlines are removed.
+        std::string str = gsub(tok(), "\\\n", "");
+        current_literal.extend_string(str, ts, te);
+      } else {
+        auto str = escape ? *escape : tok();
+        current_literal.extend_string(str, ts, te);
+      }
+    }
+  }
+
+  # Extend a string with a newline or a EOF character.
+  # As heredoc closing line can immediately precede EOF, this action
+  # has to handle such case specially.
+  action extend_string_eol {
+    auto& current_literal = literal_();
+
+    if (te == pe) {
+      diagnostic_(dlevel::FATAL, dclass::EscapeEof, range(current_literal.str_s, current_literal.str_s + 1));
+    }
+
+    if (current_literal.heredoc()) {
+      auto line = tok(herebody_s, ts);
+
+      while (!line.empty() && line.back() == '\r') {
+        line.pop_back();
+      }
+
+      if (version <= ruby_version::RUBY_20) {
+        // See ruby:c48b4209c
+        auto riter = line.rfind('\r');
+
+        if (riter != std::string::npos) {
+          line.erase(riter);
+        }
+      }
+
+      // Try ending the heredoc with the complete most recently
+      // scanned line. @herebody_s always refers to the start of such line.
+      if (current_literal.nest_and_try_closing(line, herebody_s, ts)) {
+        herebody_s = te;
+
+        // Continue regular lexing after the heredoc reference (<<END).
+        p = current_literal.heredoc_e - 1;
+        fnext *pop_literal(); fbreak;
+      } else {
+        // Calculate indentation level for <<~HEREDOCs.
+        current_literal.infer_indent_level(line);
+
+        // Ditto.
+        herebody_s = te;
+      }
+    } else {
+      // Try ending the literal with a newline.
+      auto str = tok();
+      if (current_literal.nest_and_try_closing(str, ts, te)) {
+        fnext *pop_literal(); fbreak;
+      }
+
+      if (herebody_s) {
+        // This is a regular literal intertwined with a heredoc. Like:
+        //
+        //     p <<-foo+"1
+        //     bar
+        //     foo
+        //     2"
+        //
+        // which, incidentally, evaluates to "bar\n1\n2".
+        p = herebody_s - 1;
+        herebody_s = nullptr;
+      }
+    }
+
+    if (current_literal.words() && !eof_codepoint(*p)) {
+      current_literal.extend_space(ts, te);
+    } else {
+      // A literal newline is appended if the heredoc was _not_ closed
+      // this time (see f break above). See also Literal#nest_and_try_closing
+      // for rationale of calling #flush_string here.
+      std::string str = tok();
+      current_literal.extend_string(str, ts, te);
+      current_literal.flush_string();
+    }
+  }
+
+  action extend_string_space {
+    literal_().extend_space(ts, te);
+  }
+
+  #
+  # === INTERPOLATION PARSING ===
+  #
+
+  # Interpolations with immediate variable names simply call into
+  # the corresponding machine.
+
+  interp_var = '#' ( global_var | class_var_v | instance_var_v );
+
+  action extend_interp_var {
+    auto& current_literal = literal_();
+    current_literal.flush_string();
+    current_literal.extend_content();
+
+    emit(token_type::tSTRING_DVAR, "", ts, ts + 1);
+
+    p = ts;
+    fcall expr_variable;
+  }
+
+  # Interpolations with code blocks must match nested curly braces, as
+  # interpolation ending is ambiguous with a block ending. So, every
+  # opening and closing brace should be matched with e_[lr]brace rules,
+  # which automatically perform the counting.
+  #
+  # Note that interpolations can themselves be nested, so brace balance
+  # is tied to the innermost literal.
+  #
+  # Also note that literals themselves should not use e_[lr]brace rules
+  # when matching their opening and closing delimiters, as the amount of
+  # braces inside the characters of a string literal is independent.
+
+  interp_code = '#{';
+
+  e_lbrace = '{' % {
+    cond.push(false); cmdarg.push(false);
+
+    if (!literal_stack.empty()) {
+      literal_().start_interp_brace();
+    }
+  };
+
+  e_rbrace = '}' % {
+    if (!literal_stack.empty()) {
+      auto& current_literal = literal_();
+
+      if (current_literal.end_interp_brace_and_try_closing()) {
+        if (version == ruby_version::RUBY_18 || version == ruby_version::RUBY_19) {
+          emit(token_type::tRCURLY, "}", p - 1, p);
+        } else {
+          emit(token_type::tSTRING_DEND, "}", p - 1, p);
+        }
+
+        if (current_literal.saved_herebody_s) {
+          herebody_s = current_literal.saved_herebody_s;
+        }
+
+        fhold;
+        fnext *next_state_for_literal(current_literal);
+        fbreak;
+      }
+    }
+  };
+
+  action extend_interp_code {
+    auto& current_literal = literal_();
+    current_literal.flush_string();
+    current_literal.extend_content();
+
+    emit(token_type::tSTRING_DBEG, "#{");
+
+    if (current_literal.heredoc()) {
+      current_literal.saved_herebody_s = herebody_s;
+      herebody_s = nullptr;
+    }
+
+    current_literal.start_interp_brace();
+    command_start = true;
+    fnext expr_value;
+    fbreak;
+  }
+
+  # Actual string parsers are simply combined from the primitives defined
+  # above.
+
+  interp_words := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      e_bs escape => extend_string_escaped;
+      c_space+    => extend_string_space;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  interp_string := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      e_bs escape => extend_string_escaped;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  plain_words := |*
+      e_bs c_any  => extend_string_escaped;
+      c_space+    => extend_string_space;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  plain_string := |*
+      '\\' c_nl   => extend_string_eol;
+      e_bs c_any  => extend_string_escaped;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  interp_backslash_delimited := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  plain_backslash_delimited := |*
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  interp_backslash_delimited_words := |*
+      interp_code => extend_interp_code;
+      interp_var  => extend_interp_var;
+      c_space+    => extend_string_space;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  plain_backslash_delimited_words := |*
+      c_space+    => extend_string_space;
+      c_eol       => extend_string_eol;
+      c_any       => extend_string;
+  *|;
+
+  regexp_modifiers := |*
+      [A-Za-z]+
+      => {
+        auto options = tok();
+        std::string unknown_options;
+
+        for (auto i = options.cbegin(); i != options.cend(); ++i) {
+          switch (char opt = *i) {
+            case 'i':
+            case 'm':
+            case 'x':
+            case 'o':
+            case 'u':
+            case 'e':
+            case 's':
+            case 'n':
+              continue;
+            default:
+              unknown_options += opt;
+              break;
+          }
+        }
+
+        if (!unknown_options.empty()) {
+          diagnostic_(dlevel::ERROR, dclass::RegexpOptions, unknown_options);
+        }
+
+        emit(token_type::tREGEXP_OPT, options);
+        fnext expr_end;
+        fbreak;
+      };
+
+      any
+      => {
+        emit(token_type::tREGEXP_OPT, tok(ts, te - 1), ts, te - 1);
+        fhold;
+        fgoto expr_end;
+      };
+  *|;
+
+  #
+  # === WHITESPACE HANDLING ===
+  #
+
+  # Various contexts in Ruby allow various kinds of whitespace
+  # to be used. They are grouped to clarify the lexing machines
+  # and ease collection of comments.
+
+  # A line of code with inline #comment at end is always equivalent
+  # to a line of code ending with just a newline, so an inline
+  # comment is deemed equivalent to non-newline whitespace
+  # (c_space character class).
+
+  w_space =
+      c_space+
+    | '\\' e_heredoc_nl
+    ;
+
+  w_comment =
+      '#'     %{ sharp_s = p - 1; }
+      # The (p == pe) condition compensates for added "\0" and
+      # the way Ragel handles EOF.
+      c_line* %{ emit_comment(sharp_s, p == pe ? p - 2 : p); }
+    ;
+
+  w_space_comment =
+      w_space
+    | w_comment
+    ;
+
+  # A newline in non-literal context always interoperates with
+  # here document logic and can always be escaped by a backslash,
+  # still interoperating with here document logic in the same way,
+  # yet being invisible to anything else.
+  #
+  # To demonstrate:
+  #
+  #     foo = <<FOO \
+  #     bar
+  #     FOO
+  #      + 2
+  #
+  # is equivalent to `foo = "bar\n" + 2`.
+
+  w_newline =
+      e_heredoc_nl;
+
+  w_any =
+      w_space
+    | w_comment
+    | w_newline
+    ;
+
+
+  #
+  # === EXPRESSION PARSING ===
+  #
+
+  # These rules implement a form of manually defined lookahead.
+  # The default longest-match scanning does not work here due
+  # to sheer ambiguity.
+
+  ambiguous_fid_suffix =         # actual    parsed
+      [?!]    %{ tm = p; }     | # a?        a?
+      [?!]'=' %{ tm = p - 2; }   # a!=b      a != b
+  ;
+
+  ambiguous_ident_suffix =       # actual    parsed
+      ambiguous_fid_suffix     |
+      '='     %{ tm = p; }     | # a=        a=
+      '=='    %{ tm = p - 2; } | # a==b      a == b
+      '=~'    %{ tm = p - 2; } | # a=~b      a =~ b
+      '=>'    %{ tm = p - 2; } | # a=>b      a => b
+      '==='   %{ tm = p - 3; }   # a===b     a === b
+  ;
+
+  ambiguous_symbol_suffix =      # actual    parsed
+      ambiguous_ident_suffix |
+      '==>'   %{ tm = p - 2; }   # :a==>b    :a= => b
+  ;
+
+  # Ambiguous with 1.9 hash labels.
+  ambiguous_const_suffix =       # actual    parsed
+      '::'    %{ tm = p - 2; }   # A::B      A :: B
+  ;
+
+  # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
+  # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
+
+  e_lbrack = '[' % {
+    cond.push(false); cmdarg.push(false);
+  };
+
+  # Ruby 1.9 lambdas require parentheses counting in order to
+  # emit correct opening kDO/tLBRACE.
+
+  e_lparen = '(' % {
+    cond.push(false); cmdarg.push(false);
+
+    paren_nest += 1;
+  };
+
+  e_rparen = ')' % {
+    paren_nest -= 1;
+  };
+
+  # Ruby is context-sensitive wrt/ local identifiers.
+  action local_ident {
+    auto ident = tok();
+
+    emit(token_type::tIDENTIFIER, ident);
+
+    if (is_declared(ident)) {
+      fnext expr_endfn; fbreak;
+    } else {
+      fnext *arg_or_cmdarg(cmd_state); fbreak;
+    }
+  }
+
+  # Variable lexing code is accessed from both expressions and
+  # string interpolation related code.
+  #
+  expr_variable := |*
+      global_var
+      => {
+        if (ts[1] >= '1' && ts[1] <= '9') {
+          emit(token_type::tNTH_REF, tok(ts + 1));
+        } else if (ts[1] == '&' || ts[1] == '`' || ts[1] == '\'' || ts[1] == '+') {
+          emit(token_type::tBACK_REF);
+        } else {
+          emit(token_type::tGVAR);
+        }
+
+        fnext *stack_pop(); fbreak;
+      };
+
+      class_var_v
+      => {
+        if (ts[2] >= '0' && ts[2] <= '9') {
+          diagnostic_(dlevel::ERROR, dclass::CvarName, tok(ts, te));
+        }
+
+        emit(token_type::tCVAR);
+        fnext *stack_pop(); fbreak;
+      };
+
+      instance_var_v
+      => {
+        if (ts[1] >= '0' && ts[1] <= '9') {
+          diagnostic_(dlevel::ERROR, dclass::IvarName, tok(ts, te));
+        }
+
+        emit(token_type::tIVAR);
+        fnext *stack_pop(); fbreak;
+      };
+  *|;
+
+  # Literal function name in definition (e.g. `def class`).
+  # Keywords are returned as their respective tokens; this is used
+  # to support singleton def `def self.foo`. Global variables are
+  # returned as `tGVAR`; this is used in global variable alias
+  # statements `alias $a $b`. Symbols are returned verbatim; this
+  # is used in `alias :a :"b#{foo}"` and `undef :a`.
+  #
+  # Transitions to `expr_endfn` afterwards.
+  #
+  expr_fname := |*
+      keyword
+      => { emit_table(KEYWORDS_BEGIN);
+           fnext expr_endfn; fbreak; };
+
+      constant
+      => { emit(token_type::tCONSTANT);
+           fnext expr_endfn; fbreak; };
+
+      bareword [?=!]?
+      => { emit(token_type::tIDENTIFIER);
+           fnext expr_endfn; fbreak; };
+
+      global_var
+      => { p = ts - 1;
+           fnext expr_end; fcall expr_variable; };
+
+      # If the handling was to be delegated to expr_end,
+      # these cases would transition to something else than
+      # expr_endfn, which is incorrect.
+      operator_fname      |
+      operator_arithmetic |
+      operator_rest
+      => { emit_table(PUNCTUATION);
+           fnext expr_endfn; fbreak; };
+
+      '::'
+      => { fhold; fhold; fgoto expr_end; };
+
+      ':'
+      => { fhold; fgoto expr_beg; };
+
+      '%s' c_any
+      => {
+        if (version == ruby_version::RUBY_23) {
+          fgoto *push_literal(literal_type::LOWERS_SYMBOL, std::string(ts + 2, 1), ts);
+        } else {
+          p = ts - 1;
+          fgoto expr_end;
+        }
+      };
+
+      w_any;
+
+      c_any
+      => { fhold; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # After literal function name in definition. Behaves like `expr_end`,
+  # but allows a tLABEL.
+  #
+  # Transitions to `expr_end` afterwards.
+  #
+  expr_endfn := |*
+      label ( any - ':' )
+      => { emit(token_type::tLABEL, tok(ts, te - 2), ts, te - 1);
+           fhold; fnext expr_labelarg; fbreak; };
+
+      w_space_comment;
+
+      c_any
+      => { fhold; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # Literal function name in method call (e.g. `a.class`).
+  #
+  # Transitions to `expr_arg` afterwards.
+  #
+  # KEEP IN SYNC WITH expr_dot_after_newline!
+  #
+  expr_dot := |*
+      constant
+      => { emit(token_type::tCONSTANT);
+           fnext *arg_or_cmdarg(cmd_state); fbreak; };
+
+      call_or_var
+      => { emit(token_type::tIDENTIFIER);
+           fnext *arg_or_cmdarg(cmd_state); fbreak; };
+
+      bareword ambiguous_fid_suffix
+      => { emit(token_type::tFID, tok(ts, tm), ts, tm);
+           fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
+
+      # See the comment in `expr_fname`.
+      operator_fname      |
+      operator_arithmetic |
+      operator_rest
+      => { emit_table(PUNCTUATION);
+           fnext expr_arg; fbreak; };
+
+      # This breaks compatibility with Ruby for better partial parses (useful
+      # for LSP especially). See comment for expr_dot_after_newline below.
+      w_newline
+      => { fhold; fgoto expr_dot_after_newline; };
+
+      w_any;
+
+      c_any
+      => { fhold; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # KEEP IN SYNC WITH expr_dot!
+  #
+  # This state breaks from valid Ruby syntax, but in a way that enables Sorbet
+  # to recover better from parse errors. Recovering from parse errors is
+  # important because it lets us service LSP queries faster.
+  #
+  # Specifically, this state makes is so that any keyword seen after w_newline
+  # is emitted as a keyword (like kEND) instead of a tIDENTIFIER. Examples:
+  #
+  #   # Valid Ruby, valid in Sorbet (no newline between '.' and 'end')
+  #   def foo
+  #     x.end
+  #   end
+  #
+  #   # Parse error in Ruby and Sorbet, but Sorbet at least sees the method def
+  #   # with an empty body (Ruby wouldn't even see an empty method def)
+  #   def foo
+  #     x.
+  #   end
+  #
+  #   # Valid Ruby, not valid in Sorbet (newline between '.' and 'end')
+  #   def foo
+  #     x.
+  #       end
+  #   end
+  #
+  expr_dot_after_newline := |*
+      constant
+      => { emit(token_type::tCONSTANT);
+           fnext *arg_or_cmdarg(cmd_state); fbreak; };
+
+      # This is different from expr_dot. Here, keywords are NOT identifiers.
+      keyword
+      => { emit_table(KEYWORDS);
+           fnext expr_end; fbreak; };
+
+      call_or_var
+      => { emit(token_type::tIDENTIFIER);
+           fnext *arg_or_cmdarg(cmd_state); fbreak; };
+
+      bareword ambiguous_fid_suffix
+      => { emit(token_type::tFID, tok(ts, tm), ts, tm);
+           fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
+
+      # See the comment in `expr_fname`.
+      operator_fname      |
+      operator_arithmetic |
+      operator_rest
+      => { emit_table(PUNCTUATION);
+           fnext expr_arg; fbreak; };
+
+      w_any;
+
+      c_any
+      => { fhold; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
+  # is consumed; the current expression is a command or method call.
+  #
+  expr_arg := |*
+      #
+      # COMMAND MODE SPECIFIC TOKENS
+      #
+
+      # cmd (1 + 2)
+      # See below the rationale about expr_endarg.
+      w_space+ e_lparen
+      => {
+        if (version == ruby_version::RUBY_18) {
+          emit(token_type::tLPAREN2, "(", te - 1, te);
+          fnext expr_value; fbreak;
+        } else {
+          emit(token_type::tLPAREN_ARG, "(", te - 1, te);
+          fnext expr_beg; fbreak;
+        }
+      };
+
+      # meth(1 + 2)
+      # Regular method call.
+      e_lparen
+      => { emit(token_type::tLPAREN2, "(");
+           fnext expr_beg; fbreak; };
+
+      # meth [...]
+      # Array argument. Compare with indexing `meth[...]`.
+      w_space+ e_lbrack
+      => { emit(token_type::tLBRACK, "[", te - 1, te);
+           fnext expr_beg; fbreak; };
+
+      # cmd {}
+      # Command: method call without parentheses.
+      w_space* e_lbrace
+      => {
+        if (!lambda_stack.empty() && lambda_stack.top() == paren_nest) {
+          lambda_stack.pop();
+          emit(token_type::tLAMBEG, "{", te - 1, te);
+        } else {
+          emit(token_type::tLCURLY, "{", te - 1, te);
+        }
+        command_start = true;
+        fnext expr_value; fbreak;
+      };
+
+      #
+      # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
+      #
+
+      # a??
+      # Ternary operator
+      '?' c_space_nl
+      => {
+        // Unlike expr_beg as invoked in the next rule, do not warn
+        p = ts - 1;
+        fgoto expr_end;
+      };
+
+      # a ?b, a? ?
+      # Character literal or ternary operator
+      w_space* '?'
+      => { fhold; fgoto expr_beg; };
+
+      # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
+      # a /foo/ (but not "a / foo" or "a /=foo")
+      # a <<HEREDOC
+      w_space+ %{ tm = p; }
+      ( [%/] ( c_any - c_space_nl - '=' ) # /
+      | '<<'
+      )
+      => {
+        if (*tm == '/') {
+          // Ambiguous regexp literal.
+          diagnostic_(dlevel::WARNING, dclass::AmbiguousLiteral, range(tm, tm + 1));
+        }
+
+        p = tm - 1;
+        fgoto expr_beg;
+      };
+
+      # x *1
+      # Ambiguous splat, kwsplat or block-pass.
+      w_space+ %{ tm = p; } ( '+' | '-' | '*' | '&' | '**' )
+      => {
+        diagnostic_(dlevel::WARNING, dclass::AmbiguousPrefix, range(tm, te), tok(tm, te));
+
+        p = tm - 1;
+        fgoto expr_beg;
+      };
+
+      # x ::Foo
+      # Ambiguous toplevel constant access.
+      w_space+ '::'
+      => { fhold; fhold; fgoto expr_beg; };
+
+      # x:b
+      # Symbol.
+      w_space* ':'
+      => { fhold; fgoto expr_beg; };
+
+      w_space+ label
+      => { p = ts - 1; fgoto expr_beg; };
+
+      #
+      # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
+      #
+
+      # a ? b
+      # Ternary operator.
+      w_space+ %{ tm = p; } '?' c_space_nl
+      => { p = tm - 1; fgoto expr_end; };
+
+      # x + 1: Binary operator or operator-assignment.
+      w_space* operator_arithmetic
+                  ( '=' | c_space_nl )?    |
+      # x rescue y: Modifier keyword.
+      w_space* keyword_modifier            |
+      # a &. b: Safe navigation operator.
+      w_space* '&.'                        |
+      # Miscellanea.
+      w_space* punctuation_end
+      => {
+        p = ts - 1;
+        fgoto expr_end;
+      };
+
+      w_space;
+
+      w_comment
+      => { fgoto expr_end; };
+
+      w_newline
+      => { fhold; fgoto expr_end; };
+
+      c_any
+      => { fhold; fgoto expr_beg; };
+
+      c_eof => do_eof;
+  *|;
+
+  # The previous token was an identifier which was seen while in the
+  # command mode (that is, the state at the beginning of #advance was
+  # expr_value). This state is very similar to expr_arg, but disambiguates
+  # two very rare and specific condition:
+  #   * In 1.8 mode, "foo (lambda do end)".
+  #   * In 1.9+ mode, "f x: -> do foo do end end".
+  expr_cmdarg := |*
+      w_space+ e_lparen
+      => {
+        emit(token_type::tLPAREN_ARG, "(", te - 1, te);
+
+        if (version == ruby_version::RUBY_18) {
+          fnext expr_value; fbreak;
+        } else {
+          fnext expr_beg; fbreak;
+        }
+      };
+
+      w_space* 'do'
+      => {
+        if (cond.active()) {
+          emit(token_type::kDO_COND, "do", te - 2, te);
+        } else {
+          emit(token_type::kDO, "do", te - 2, te);
+        }
+        fnext expr_value; fbreak;
+      };
+
+      c_any             |
+      # Disambiguate with the `do' rule above.
+      w_space* bareword |
+      w_space* label
+      => { p = ts - 1;
+           fgoto expr_arg; };
+
+      c_eof => do_eof;
+  *|;
+
+  # The rationale for this state is pretty complex. Normally, if an argument
+  # is passed to a command and then there is a block (tLCURLY...tRCURLY),
+  # the block is attached to the innermost argument (`f` in `m f {}`), or it
+  # is a parse error (`m 1 {}`). But there is a special case for passing a single
+  # primary expression grouped with parentheses: if you write `m (1) {}` or
+  # (2.0 only) `m () {}`, then the block is attached to `m`.
+  #
+  # Thus, we recognize the opening `(` of a command (remember, a command is
+  # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
+  # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
+  # lexer's state to `expr_endarg`, which makes it emit the possibly following
+  # `{` as `tLBRACE_ARG`.
+  #
+  # The default post-`expr_endarg` state is `expr_end`, so this state also handles
+  # `do` (as `kDO_BLOCK` in `expr_beg`).
+  expr_endarg := |*
+      e_lbrace
+      => {
+        if (!lambda_stack.empty() && lambda_stack.top() == paren_nest) {
+          lambda_stack.pop();
+          emit(token_type::tLAMBEG, "{");
+        } else {
+          emit(token_type::tLBRACE_ARG, "{");
+        }
+        command_start = true;
+        fnext expr_value; fbreak;
+      };
+
+      'do'
+      => { emit_do(true);
+           fnext expr_value; fbreak; };
+
+      w_space_comment;
+
+      c_any
+      => { fhold; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # The rationale for this state is that several keywords accept value
+  # (i.e. should transition to `expr_beg`), do not accept it like a command
+  # (i.e. not an `expr_arg`), and must behave like a statement, that is,
+  # accept a modifier if/while/etc.
+  #
+  expr_mid := |*
+      keyword_modifier
+      => { emit_table(KEYWORDS);
+           fnext expr_beg; fbreak; };
+
+      bareword
+      => { p = ts - 1; fgoto expr_beg; };
+
+      w_space_comment;
+
+      w_newline
+      => { fhold; fgoto expr_end; };
+
+      c_any
+      => { fhold; fgoto expr_beg; };
+
+      c_eof => do_eof;
+  *|;
+
+  # Beginning of an expression.
+  #
+  # Don't fallthrough to this state from `c_any`; make sure to handle
+  # `c_space* c_nl` and let `expr_end` handle the newline.
+  # Otherwise code like `f\ndef x` gets glued together and the parser
+  # explodes.
+  #
+  expr_beg := |*
+      # +5, -5, - 5
+      [+\-] w_any* [0-9]
+      => {
+        emit(token_type::tUNARY_NUM, tok(ts, ts + 1), ts, ts + 1);
+        fhold; fnext expr_end; fbreak;
+      };
+
+      # splat *a
+      '*'
+      => { emit(token_type::tSTAR, "*");
+           fbreak; };
+
+      #
+      # STRING AND REGEXP LITERALS
+      #
+
+      # /regexp/oui
+      # /=/ (disambiguation with /=)
+      '/' c_any
+      => {
+        fhold; fgoto *push_literal(literal_type::SLASH_REGEXP, std::string(ts + 0, 1), ts);
+      };
+
+      # %<string>
+      '%' ( any - [A-Za-z] )
+      => {
+        fgoto *push_literal(literal_type::PERCENT_STRING, std::string(ts + 1, 1), ts);
+      };
+
+      # %w(we are the people)
+      '%' [A-Za-z]+ c_any
+      => {
+        literal_type type;
+
+        bool single_char_type = (ts + 3 == te);
+
+        if (single_char_type && ts[1] == 'q') {
+          type = literal_type::LOWERQ_STRING;
+        } else if (single_char_type && ts[1] == 'Q') {
+          type = literal_type::UPPERQ_STRING;
+        } else if (single_char_type && ts[1] == 'w') {
+          type = literal_type::LOWERW_WORDS;
+        } else if (single_char_type && ts[1] == 'W') {
+          type = literal_type::UPPERW_WORDS;
+        } else if (single_char_type && ts[1] == 'i') {
+          type = literal_type::LOWERI_SYMBOLS;
+        } else if (single_char_type && ts[1] == 'I') {
+          type = literal_type::UPPERI_SYMBOLS;
+        } else if (single_char_type && ts[1] == 's') {
+          type = literal_type::LOWERS_SYMBOL;
+        } else if (single_char_type && ts[1] == 'r') {
+          type = literal_type::PERCENT_REGEXP;
+        } else if (single_char_type && ts[1] == 'x') {
+          type = literal_type::LOWERX_XSTRING;
+        } else {
+          type = literal_type::PERCENT_STRING;
+          diagnostic_(dlevel::ERROR, dclass::UnexpectedPercentStr, range(ts, te - 1), tok(ts, te-1));
+        }
+
+        fgoto *push_literal(type, std::string(te - 1, 1), ts);
+      };
+
+      '%' c_eof
+      => {
+        diagnostic_(dlevel::FATAL, dclass::StringEof, range(ts, ts + 1));
+      };
+
+      # Heredoc start.
+      # <<END  | <<'END'  | <<"END"  | <<`END`  |
+      # <<-END | <<-'END' | <<-"END" | <<-`END` |
+      # <<~END | <<~'END' | <<~"END" | <<~`END`
+      '<<' [~\-]?
+        ( '"' ( c_line - '"' )* '"'
+        | "'" ( c_line - "'" )* "'"
+        | "`" ( c_line - "`" )* "`"
+        | bareword ) % { heredoc_e      = p; }
+        c_line* c_nl % { new_herebody_s = p; }
+      => {
+        bool indent;
+        bool dedent_body;
+
+        const char* delim_s = ts + 2;
+        const char* delim_e = heredoc_e;
+
+        if (*delim_s == '-') {
+          indent = true;
+          dedent_body = false;
+          delim_s++;
+        } else if (*delim_s == '~') {
+          indent = true;
+          dedent_body = true;
+          delim_s++;
+        } else {
+          indent = false;
+          dedent_body = false;
+        }
+
+        literal_type type;
+
+        if (*delim_s == '"') {
+          type = literal_type::DQUOTE_HEREDOC;
+          delim_s++;
+          delim_e--;
+        } else if (*delim_s == '\'') {
+          type = literal_type::SQUOTE_HEREDOC;
+          delim_s++;
+          delim_e--;
+        } else if (*delim_s == '`') {
+          type = literal_type::BACKTICK_HEREDOC;
+          delim_s++;
+          delim_e--;
+        } else {
+          type = literal_type::DQUOTE_HEREDOC;
+        }
+
+        if (dedent_body && (version == ruby_version::RUBY_18 ||
+                            version == ruby_version::RUBY_19 ||
+                            version == ruby_version::RUBY_20 ||
+                            version == ruby_version::RUBY_21 ||
+                            version == ruby_version::RUBY_22)) {
+          emit(token_type::tLSHFT, "<<", ts, ts + 2);
+          p = ts + 1;
+          fnext expr_beg; fbreak;
+        } else {
+          fnext *push_literal(type, std::string(delim_s, (size_t)(delim_e - delim_s)), ts, heredoc_e, indent, dedent_body);
+
+          if (!herebody_s) {
+            herebody_s = new_herebody_s;
+          }
+
+          p = herebody_s - 1;
+        }
+      };
+
+      #
+      # SYMBOL LITERALS
+      #
+
+      # :&&, :||
+      ':' ('&&' | '||') => {
+        fhold; fhold;
+        emit(token_type::tSYMBEG, tok(ts, ts + 1), ts, ts + 1);
+        fgoto expr_fname;
+      };
+
+      # :"bar", :'baz'
+      ':' ['"] # '
+      => {
+        literal_type type;
+
+        if (ts[1] == '\'') {
+          type = literal_type::SQUOTE_SYMBOL;
+        } else { // '"'
+          type = literal_type::DQUOTE_SYMBOL;
+        }
+
+        fgoto *push_literal(type, std::string(ts + 1, 1), ts);
+      };
+
+      # :!@ is :!
+      # :~@ is :~
+      ':' [!~] '@'
+      => {
+        emit(token_type::tSYMBEG, tok(ts + 1, ts + 2), ts, te);
+        fnext expr_end; fbreak;
+      };
+
+      ':' bareword ambiguous_symbol_suffix
+      => {
+        emit(token_type::tSYMBOL, tok(ts + 1, tm), ts, tm);
+        p = tm - 1;
+        fnext expr_end; fbreak;
+      };
+
+      ':' ( bareword | global_var | class_var | instance_var |
+            operator_fname | operator_arithmetic | operator_rest )
+      => {
+        emit(token_type::tSYMBOL, tok(ts + 1), ts, te);
+        fnext expr_end; fbreak;
+      };
+
+      #
+      # AMBIGUOUS TERNARY OPERATOR
+      #
+
+      # Character constant, like ?a, ?\n, ?\u1000, and so on
+      # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
+      '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
+          | (c_any - c_space_nl - e_bs) % { escape = nullptr; }
+          )
+      => {
+        if (version == ruby_version::RUBY_18) {
+          emit(token_type::tINTEGER, std::to_string(static_cast<unsigned char>(ts[1])));
+        } else {
+          emit(token_type::tCHARACTER, escape ? *escape : tok(ts + 1));
+        }
+
+        fnext expr_end; fbreak;
+      };
+
+      '?' c_space_nl
+      => {
+        static const struct escape_map_ent { char c; const char* s; } escape_map[] {
+          { ' ',  "\\s" },
+          { '\r', "\\r" },
+          { '\n', "\\n" },
+          { '\t', "\\t" },
+          { '\v', "\\v" },
+          { '\f', "\\f" },
+          { 0, 0 },
+        };
+
+        for (const struct escape_map_ent* ent = escape_map; ent->c; ++ent) {
+          if (ts[1] == ent->c) {
+            diagnostic_(dlevel::WARNING, dclass::InvalidEscapeUse, ent->s);
+            break;
+          }
+        }
+
+        p = ts - 1;
+        fgoto expr_end;
+      };
+
+      '?' c_eof
+      => {
+        diagnostic_(dlevel::FATAL, dclass::IncompleteEscape, range(ts, ts + 1));
+      };
+
+      # f ?aa : b: Disambiguate with a character literal.
+      '?' [A-Za-z_] bareword
+      => {
+        p = ts - 1;
+        fgoto expr_end;
+      };
+
+      #
+      # KEYWORDS AND PUNCTUATION
+      #
+
+      # a({b=>c})
+      e_lbrace
+      => {
+        if (!lambda_stack.empty() && lambda_stack.top() == paren_nest) {
+          lambda_stack.pop();
+          command_start = true;
+          emit(token_type::tLAMBEG, "{");
+        } else {
+          emit(token_type::tLBRACE, "{");
+        }
+        fbreak;
+      };
+
+      # a([1, 2])
+      e_lbrack
+      => { emit(token_type::tLBRACK, "[");
+           fbreak; };
+
+      # a()
+      e_lparen
+      => { emit(token_type::tLPAREN, "(");
+           fbreak; };
+
+      # a(+b)
+      punctuation_begin
+      => { emit_table(PUNCTUATION_BEGIN);
+           fbreak; };
+
+      # rescue Exception => e: Block rescue.
+      # Special because it should transition to expr_mid.
+      'rescue' %{ tm = p; } '=>'?
+      => { emit(token_type::kRESCUE, "rescue", ts, tm);
+           p = tm - 1;
+           fnext expr_mid; fbreak; };
+
+      # if a: Statement if.
+      keyword_modifier
+      => { emit_table(KEYWORDS_BEGIN);
+           command_start = true;
+           fnext expr_value; fbreak; };
+
+      #
+      # RUBY 1.9 HASH LABELS
+      #
+
+      label ( any - ':' )
+      => {
+        fhold;
+
+        if (version == ruby_version::RUBY_18) {
+          auto ident = tok(ts, te - 2);
+
+          if (*ts >= 'A' && *ts <= 'Z') {
+            emit(token_type::tCONSTANT, ident, ts, te - 2);
+          } else {
+            emit(token_type::tIDENTIFIER, ident, ts, te - 2);
+          }
+          fhold; // continue as a symbol
+
+          if (is_declared(ident)) {
+            fnext expr_end;
+          } else {
+            fnext *arg_or_cmdarg(cmd_state);
+          }
+        } else {
+          emit(token_type::tLABEL, tok(ts, te - 2), ts, te - 1);
+          fnext expr_labelarg;
+        }
+
+        fbreak;
+      };
+
+      #
+      # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
+      #
+
+      # foo= bar:  Disambiguate with bareword rule below.
+      bareword ambiguous_ident_suffix |
+      # def foo:   Disambiguate with bareword rule below.
+      keyword
+      => { p = ts - 1;
+           fgoto expr_end; };
+
+      # a = 42;     a [42]: Indexing.
+      # def a; end; a [42]: Array argument.
+      call_or_var
+      => local_ident;
+
+      (call_or_var - keyword)
+        % { ident_tok = tok(ts, te); ident_ts = ts; ident_te = te; }
+      w_space+ '('
+      => {
+        emit(token_type::tIDENTIFIER, ident_tok, ident_ts, ident_te);
+        p = ident_te - 1;
+
+        fnext expr_cmdarg;
+        fbreak;
+      };
+
+      #
+      # WHITESPACE
+      #
+
+      w_any;
+
+      e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
+      => { p = ts - 1;
+           cs_before_block_comment = cs;
+           fgoto line_begin; };
+
+      #
+      # DEFAULT TRANSITION
+      #
+
+      # The following rules match most binary and all unary operators.
+      # Rules for binary operators provide better error reporting.
+      operator_arithmetic '='    |
+      operator_rest              |
+      punctuation_end            |
+      c_any
+      => { p = ts - 1; fgoto expr_end; };
+
+      c_eof => do_eof;
+  *|;
+
+  # Special newline handling for "def a b:"
+  #
+  expr_labelarg := |*
+    w_space_comment;
+
+    w_newline
+    => {
+      if (in_kwarg) {
+        fhold; fgoto expr_end;
+      } else {
+        fgoto line_begin;
+      }
+    };
+
+    c_any
+    => { fhold; fgoto expr_beg; };
+
+    c_eof => do_eof;
+  *|;
+
+  # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
+  #
+  expr_value := |*
+      # a:b: a(:b), a::B, A::B
+      label (any - ':')
+      => { p = ts - 1;
+           fgoto expr_end; };
+
+      # "bar", 'baz'
+      ['"] # '
+      => {
+        literal_type type;
+
+        if (ts[0] == '\'') {
+          type = literal_type::SQUOTE_STRING;
+        } else { // '"'
+          type = literal_type::DQUOTE_STRING;
+        }
+
+        fgoto *push_literal(type, tok(), ts);
+      };
+
+      w_space_comment;
+
+      w_newline
+      => { fgoto line_begin; };
+
+      c_any
+      => { fhold; fgoto expr_beg; };
+
+      c_eof => do_eof;
+  *|;
+
+  expr_end := |*
+      #
+      # STABBY LAMBDA
+      #
+
+      '->'
+      => {
+        emit(token_type::tLAMBDA, "->", ts, ts + 2);
+
+        lambda_stack.push(paren_nest);
+        fnext expr_endfn; fbreak;
+      };
+
+      e_lbrace | 'do'
+      => {
+        if (!lambda_stack.empty() && lambda_stack.top() == paren_nest) {
+          lambda_stack.pop();
+
+          if (ts[0] == '{') {
+            emit(token_type::tLAMBEG, "{");
+          } else { // 'do'
+            emit(token_type::kDO_LAMBDA, "do");
+          }
+        } else {
+          if (ts[0] == '{') {
+            emit(token_type::tLCURLY, "{");
+          } else { // 'do'
+            emit_do();
+          }
+        }
+        command_start = true;
+
+        fnext expr_value; fbreak;
+      };
+
+      #
+      # KEYWORDS
+      #
+
+      keyword_with_fname
+      => { emit_table(KEYWORDS);
+           fnext expr_fname; fbreak; };
+
+      'class' w_any* '<<'
+      => { emit(token_type::kCLASS, "class", ts, ts + 5);
+           emit(token_type::tLSHFT, "<<",    te - 2, te);
+           fnext expr_value; fbreak; };
+
+      # a if b:c: Syntax error.
+      keyword_modifier
+      => { emit_table(KEYWORDS);
+           fnext expr_beg; fbreak; };
+
+      # elsif b:c: elsif b(:c)
+      keyword_with_value
+      => { emit_table(KEYWORDS);
+           command_start = true;
+           fnext expr_value; fbreak; };
+
+      keyword_with_mid
+      => { emit_table(KEYWORDS);
+           fnext expr_mid; fbreak; };
+
+      keyword_with_arg
+      => {
+        emit_table(KEYWORDS);
+
+        if (version == ruby_version::RUBY_18 && ts + 3 == te && ts[0] == 'n' && ts[1] == 'o' && ts[2] == 't') {
+          fnext expr_beg; fbreak;
+        } else {
+          fnext expr_arg; fbreak;
+        }
+      };
+
+      '__ENCODING__'
+      => {
+        if (version == ruby_version::RUBY_18) {
+          auto ident = tok();
+
+          emit(token_type::tIDENTIFIER, ident);
+
+          if (!is_declared(ident)) {
+            fnext *arg_or_cmdarg(cmd_state);
+          }
+        } else {
+          emit(token_type::k__ENCODING__, "__ENCODING__");
+        }
+        fbreak;
+      };
+
+      keyword_with_end
+      => { emit_table(KEYWORDS);
+           fbreak; };
+
+      #
+      # NUMERIC LITERALS
+      #
+
+      ( '0' [Xx] %{ num_base = 16; num_digits_s = p; } int_hex
+      | '0' [Dd] %{ num_base = 10; num_digits_s = p; } int_dec
+      | '0' [Oo] %{ num_base = 8;  num_digits_s = p; } int_dec
+      | '0' [Bb] %{ num_base = 2;  num_digits_s = p; } int_bin
+      | [1-9] digit* '_'? %{ num_base = 10; num_digits_s = ts; } int_dec
+      | '0'   digit* '_'? %{ num_base = 8;  num_digits_s = ts; } int_dec
+      ) %{ num_suffix_s = p; } int_suffix
+      => {
+        auto digits = tok(num_digits_s, num_suffix_s);
+
+        if (num_suffix_s[-1] == '_') {
+          diagnostic_(dlevel::ERROR, dclass::TrailingInNumber, range(te - 1, te), "_");
+        } else if (num_digits_s == num_suffix_s && num_base == 8 && version == ruby_version::RUBY_18) {
+          // 1.8 did not raise an error on 0o.
+        } else if (num_digits_s == num_suffix_s) {
+          diagnostic_(dlevel::ERROR, dclass::EmptyNumeric);
+        } else if (num_base == 8) {
+          for (const char* digit_p = num_digits_s; digit_p < num_suffix_s; digit_p++) {
+            if (*digit_p == '8' || *digit_p == '9') {
+              diagnostic_(dlevel::ERROR, dclass::InvalidOctal,
+                range(digit_p, digit_p + 1));
+            }
+          }
+        }
+
+        if (version == ruby_version::RUBY_18 || version == ruby_version::RUBY_19 || version == ruby_version::RUBY_20) {
+          emit(token_type::tINTEGER, convert_base(digits, num_base), ts, num_suffix_s);
+          p = num_suffix_s - 1;
+        } else {
+          emit_num(convert_base(digits, num_base));
+        }
+        fbreak;
+      };
+
+      flo_frac flo_pow?
+      => {
+        diagnostic_(dlevel::ERROR, dclass::NoDotDigitLiteral);
+      };
+
+      flo_int [eE]
+      => {
+        if (version == ruby_version::RUBY_18 || version == ruby_version::RUBY_19 || version == ruby_version::RUBY_20) {
+          diagnostic_(dlevel::ERROR, dclass::TrailingInNumber, range(te - 1, te), tok(te-1, te));
+        } else {
+          emit(token_type::tINTEGER, tok(ts, te - 1), ts, te - 1);
+          fhold; fbreak;
+        }
+      };
+
+      flo_int flo_frac [eE]
+      => {
+        if (version == ruby_version::RUBY_18 || version == ruby_version::RUBY_19 || version == ruby_version::RUBY_20) {
+          diagnostic_(dlevel::ERROR, dclass::TrailingInNumber, range(te - 1, te), tok(te - 1, te));
+        } else {
+          emit(token_type::tFLOAT, tok(ts, te - 1), ts, te - 1);
+          fhold; fbreak;
+        }
+      };
+
+      flo_int
+      ( flo_frac? flo_pow %{ num_suffix_s = p; } flo_pow_suffix
+      | flo_frac          %{ num_suffix_s = p; } flo_suffix
+      )
+      => {
+        auto digits = tok(ts, num_suffix_s);
+
+        if (version == ruby_version::RUBY_18 || version == ruby_version::RUBY_19 || version == ruby_version::RUBY_20) {
+          emit(token_type::tFLOAT, digits, ts, num_suffix_s);
+          p = num_suffix_s - 1;
+        } else {
+          emit_num(digits);
+        }
+        fbreak;
+      };
+
+      #
+      # STRING AND XSTRING LITERALS
+      #
+
+      # `echo foo`, "bar", 'baz'
+      '`' | ['"] # '
+      => {
+        literal_type type;
+
+        if (ts[0] == '`') {
+          type = literal_type::BACKTICK_XSTRING;
+        } else if (ts[0] == '\'') {
+          type = literal_type::SQUOTE_STRING;
+        } else { // '"'
+          type = literal_type::DQUOTE_STRING;
+        }
+
+        fgoto *push_literal(type, std::string(te - 1, 1), ts, nullptr, false, false, true);
+      };
+
+      #
+      # CONSTANTS AND VARIABLES
+      #
+
+      constant
+      => { emit(token_type::tCONSTANT);
+           fnext *arg_or_cmdarg(cmd_state); fbreak; };
+
+      constant ambiguous_const_suffix
+      => { emit(token_type::tCONSTANT, tok(ts, tm), ts, tm);
+           p = tm - 1; fbreak; };
+
+      global_var | class_var_v | instance_var_v
+      => { p = ts - 1; fcall expr_variable; };
+
+      #
+      # METHOD CALLS
+      #
+
+      '.' | '&.' | '::'
+      => { emit_table(PUNCTUATION);
+           fnext expr_dot; fbreak; };
+
+      call_or_var
+      => local_ident;
+
+      bareword ambiguous_fid_suffix
+      => {
+        if (tm == te) {
+          // Suffix was consumed, e.g. foo!
+          emit(token_type::tFID);
+        } else {
+          // Suffix was not consumed, e.g. foo!=
+          emit(token_type::tIDENTIFIER, tok(ts, tm), ts, tm);
+          p = tm - 1;
+        }
+        fnext expr_arg; fbreak;
+      };
+
+      #
+      # OPERATORS
+      #
+
+      '*' | '=>'
+      => {
+        emit_table(PUNCTUATION);
+        fgoto expr_value;
+      };
+
+      # When '|', '~', '!', '=>' are used as operators
+      # they do not accept any symbols (or quoted labels) after.
+      # Other binary operators accept it.
+      ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
+      => {
+        emit_table(PUNCTUATION);
+        fnext expr_value; fbreak;
+      };
+
+      ( e_lparen | '|' | '~' | '!' )
+      => { emit_table(PUNCTUATION);
+           fnext expr_beg; fbreak; };
+
+      e_rbrace | e_rparen | ']'
+      => {
+        emit_table(PUNCTUATION);
+
+        cond.pop();
+        cmdarg.pop();
+
+        if (ts[0] == '}' || ts[0] == ']') {
+          fnext expr_end;
+        } else { // ')'
+          // this was commented out in the original lexer.rl:
+          // fnext expr_endfn; ?
+        }
+
+        fbreak;
+      };
+
+      operator_arithmetic '='
+      => { emit(token_type::tOP_ASGN, tok(ts, te - 1));
+           fnext expr_beg; fbreak; };
+
+      '?'
+      => { emit(token_type::tEH, "?");
+           fnext expr_value; fbreak; };
+
+      e_lbrack
+      => { emit(token_type::tLBRACK2, "[");
+           fnext expr_beg; fbreak; };
+
+      punctuation_end
+      => { emit_table(PUNCTUATION);
+           fnext expr_beg; fbreak; };
+
+      #
+      # WHITESPACE
+      #
+
+      w_space_comment;
+
+      w_newline
+      => { fgoto leading_dot; };
+
+      ';'
+      => { emit(token_type::tSEMI, ";");
+           command_start = true;
+           fnext expr_value; fbreak; };
+
+      '\\' c_line {
+        diagnostic_(dlevel::ERROR, dclass::BareBackslash, range(ts, ts + 1));
+        fhold;
+      };
+
+      c_any
+      => {
+        diagnostic_(dlevel::ERROR, dclass::Unexpected, tok());
+      };
+
+      c_eof => do_eof;
+  *|;
+
+  leading_dot := |*
+      # Insane leading dots:
+      # a #comment
+      #  .b: a.b
+      c_space* %{ tm = p; } ('.' | '&.')
+      => { p = tm - 1; fgoto expr_end; };
+
+      any
+      => { emit(token_type::tNL, std::string(), newline_s, newline_s + 1);
+           fhold; fnext line_begin; fbreak; };
+  *|;
+
+  #
+  # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
+  #
+
+  line_comment := |*
+      '=end' c_line* c_nl_zlen
+      => {
+        emit_comment(eq_begin_s, te);
+        fgoto *cs_before_block_comment;
+      };
+
+      c_line* c_nl;
+
+      c_line* zlen
+      => {
+        diagnostic_(dlevel::FATAL, dclass::EmbeddedDocument,
+          range(eq_begin_s, eq_begin_s + "=begin"s.size()));
+      };
+  *|;
+
+  line_begin := |*
+      w_any;
+
+      '=begin' ( c_space | c_nl_zlen )
+      => { eq_begin_s = ts;
+           fgoto line_comment; };
+
+      '__END__' ( c_eol - zlen )
+      => { p = pe - 3; };
+
+      c_any
+      => { cmd_state = true; fhold; fgoto expr_value; };
+
+      c_eof => do_eof;
+  *|;
+
+}%%
+
+token_t lexer::advance() {
+  auto tok = advance_();
+
+  last_token_s = tok->start();
+  last_token_e = tok->end();
+  return tok;
+}
+
+void lexer::extend_static() {
+  static_env.emplace();
+}
+
+void lexer::extend_dynamic() {
+  if (static_env.empty()) {
+    static_env.emplace();
+  } else {
+    environment& env = static_env.top();
+    static_env.push(env);
+  }
+}
+
+void lexer::unextend() {
+  static_env.pop();
+}
+
+void lexer::declare(const std::string& name) {
+  static_env.top().insert(name);
+}
+
+bool lexer::is_declared(const std::string& identifier) const {
+  const environment& env = static_env.top();
+
+  return env.find(identifier) != env.end();
+}
+
+optional_size lexer::dedentLevel() {
+  // We erase @dedentLevel as a precaution to avoid accidentally
+  // using a stale value.
+  auto ret = dedentLevel_;
+  dedentLevel_ = std::nullopt;
+  return ret;
+}

+ 309 - 0
third_party/parser/cc/literal.cc

@@ -0,0 +1,309 @@
+#include <ruby_parser/literal.hh>
+#include <cassert>
+
+using namespace ruby_parser;
+
+literal::literal(lexer& lexer, literal_type type, std::string delimiter, const char* str_s, const char* heredoc_e, bool indent, bool dedent_body, bool label_allowed)
+  : _lexer(lexer)
+  , _nesting(1)
+  , _type(type)
+  , indent(indent)
+  , dedent_body(dedent_body)
+  , label_allowed(label_allowed)
+  , _interp_braces(0)
+  , space_emitted(true)
+  , str_s(str_s)
+  , saved_herebody_s(nullptr)
+  , heredoc_e(heredoc_e)
+{
+  if (delimiter == "(") {
+    start_delim = "(";
+    end_delim = ")";
+  } else if (delimiter == "[") {
+    start_delim = "[";
+    end_delim = "]";
+  } else if (delimiter == "{") {
+    start_delim = "{";
+    end_delim = "}";
+  } else if (delimiter == "<") {
+    start_delim = "<";
+    end_delim = ">";
+  } else {
+    start_delim = "";
+    end_delim = delimiter;
+  }
+
+  // Monolithic strings are glued into a single token, e.g.
+  // tSTRING_BEG tSTRING_CONTENT tSTRING_END -> tSTRING.
+  monolithic = (type == literal_type::SQUOTE_STRING || type == literal_type::DQUOTE_STRING);
+
+  clear_buffer();
+
+  if (!monolithic) {
+    emit_start_token();
+  }
+}
+
+bool literal::words() const {
+  return _type == literal_type::UPPERW_WORDS
+      || _type == literal_type::LOWERW_WORDS
+      || _type == literal_type::UPPERI_SYMBOLS
+      || _type == literal_type::LOWERI_SYMBOLS
+      ;
+}
+
+bool literal::backslash_delimited() const {
+  return end_delim == "\\";
+}
+
+bool literal::interpolate() const {
+  return _type == literal_type::DQUOTE_STRING
+      || _type == literal_type::DQUOTE_HEREDOC
+      || _type == literal_type::PERCENT_STRING
+      || _type == literal_type::UPPERQ_STRING
+      || _type == literal_type::UPPERW_WORDS
+      || _type == literal_type::UPPERI_SYMBOLS
+      || _type == literal_type::DQUOTE_SYMBOL
+      || _type == literal_type::SLASH_REGEXP
+      || _type == literal_type::PERCENT_REGEXP
+      || _type == literal_type::LOWERX_XSTRING
+      || _type == literal_type::BACKTICK_XSTRING
+      || _type == literal_type::BACKTICK_HEREDOC
+      ;
+}
+
+bool literal::regexp() const {
+  return _type == literal_type::SLASH_REGEXP
+      || _type == literal_type::PERCENT_REGEXP
+      ;
+}
+
+bool literal::heredoc() const {
+  return heredoc_e != nullptr;
+}
+
+token_type literal::start_token_type() const {
+  switch (_type) {
+    case literal_type::SQUOTE_STRING:
+    case literal_type::SQUOTE_HEREDOC:
+    case literal_type::LOWERQ_STRING:
+    case literal_type::DQUOTE_STRING:
+    case literal_type::DQUOTE_HEREDOC:
+    case literal_type::PERCENT_STRING:
+    case literal_type::UPPERQ_STRING:
+      return token_type::tSTRING_BEG;
+
+    case literal_type::LOWERW_WORDS:
+      return token_type::tQWORDS_BEG;
+
+    case literal_type::UPPERW_WORDS:
+      return token_type::tWORDS_BEG;
+
+    case literal_type::LOWERI_SYMBOLS:
+      return token_type::tQSYMBOLS_BEG;
+
+    case literal_type::UPPERI_SYMBOLS:
+      return token_type::tSYMBOLS_BEG;
+
+    case literal_type::SQUOTE_SYMBOL:
+    case literal_type::LOWERS_SYMBOL:
+    case literal_type::DQUOTE_SYMBOL:
+      return token_type::tSYMBEG;
+
+    case literal_type::SLASH_REGEXP:
+    case literal_type::PERCENT_REGEXP:
+      return token_type::tREGEXP_BEG;
+
+    case literal_type::LOWERX_XSTRING:
+    case literal_type::BACKTICK_XSTRING:
+    case literal_type::BACKTICK_HEREDOC:
+      return token_type::tXSTRING_BEG;
+    default:
+      assert(false);
+  }
+}
+
+optional_size literal::dedentLevel() const {
+  return _dedentLevel;
+}
+
+bool literal::munge_escape(char c) const {
+  if (words() && (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == '\f' || c == '\n')) {
+    return true;
+  } else if (c == '\\' || (start_delim.size() == 1 && start_delim.at(0) == c)
+                       || (end_delim.size() == 1   && end_delim.at(0) == c)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void literal::infer_indent_level(std::string& line) {
+  if (!dedent_body) {
+    return;
+  }
+
+  size_t indent_level = 0;
+
+  for (auto it = line.cbegin(); it != line.cend(); ++it) {
+    if (*it == ' ') {
+      indent_level++;
+      continue;
+    }
+
+    if (*it == '\t') {
+      indent_level += (8 - indent_level % 8);
+      continue;
+    }
+
+    if (!_dedentLevel || *_dedentLevel > indent_level) {
+      _dedentLevel = indent_level;
+    }
+    break;
+  }
+}
+
+void literal::start_interp_brace() {
+  _interp_braces++;
+}
+
+bool literal::end_interp_brace_and_try_closing() {
+  _interp_braces--;
+
+  return _interp_braces == 0;
+}
+
+// copied from MRI's include/ruby/ruby.h:
+static bool rb_isspace(char c) {
+  return c == ' ' || ('\t' <= c && c <= '\r');
+}
+
+static void lstrip(std::string& str) {
+  size_t index = 0;
+
+  while (index < str.size()) {
+    if (rb_isspace(str.at(index))) {
+      index++;
+    } else {
+      break;
+    }
+  }
+
+  str.erase(0, index);
+}
+
+bool literal::is_delimiter(std::string& delimiter) const {
+  if (indent) {
+    std::string stripped_delimiter = delimiter;
+    lstrip(stripped_delimiter);
+    return end_delim == stripped_delimiter;
+  } else {
+    return end_delim == delimiter;
+  }
+}
+
+static bool lookahead_quoted_label(std::string& lookahead) {
+  switch (lookahead.size()) {
+    case 0:
+      return false;
+
+    case 1:
+      return lookahead.at(0) == ':';
+
+    default:
+      return lookahead.at(0) == ':' && lookahead.at(1) != ':';
+  }
+}
+
+bool literal::nest_and_try_closing(std::string& delimiter, const char* ts, const char* te, std::string lookahead) {
+  if (start_delim.size() > 0 && start_delim == delimiter) {
+    _nesting++;
+  } else if (is_delimiter(delimiter)) {
+    _nesting--;
+  }
+
+  if (_nesting == 0) {
+    if (words()) {
+      extend_space(ts, ts);
+    }
+
+    if (label_allowed && lookahead_quoted_label(lookahead) && start_token_type() == token_type::tSTRING_BEG) {
+      // This is a quoted label.
+      flush_string();
+      emit(token_type::tLABEL_END, end_delim, ts, te + 1);
+      return true;
+    } else if (monolithic) {
+      // Emit the string as a single token.
+      emit(token_type::tSTRING, buffer, str_s, te);
+      return true;
+    } else {
+      // If this is a heredoc, @buffer contains the sentinel now.
+      // Just throw it out. Lexer flushes the heredoc after each
+      // non-heredoc-terminating \n anyway, so no data will be lost.
+      if (!heredoc()) {
+        flush_string();
+      }
+
+      emit(token_type::tSTRING_END, end_delim, ts, te);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void literal::extend_space(const char* ts, const char* te) {
+  flush_string();
+
+  if (!space_emitted) {
+    std::string nothing;
+    emit(token_type::tSPACE, nothing, ts, te);
+
+    space_emitted = true;
+  }
+}
+
+void literal::extend_string(std::string& str, const char* ts, const char* te) {
+  if (!buffer_s) {
+    buffer_s = ts;
+  }
+
+  buffer_e = te;
+
+  buffer += str;
+}
+
+void literal::extend_content() {
+  space_emitted = false;
+}
+
+void literal::flush_string() {
+  if (monolithic) {
+    emit_start_token();
+    monolithic = false;
+  }
+
+  if (!buffer.empty()) {
+    emit(token_type::tSTRING_CONTENT, buffer, buffer_s, buffer_e);
+
+    clear_buffer();
+    extend_content();
+  }
+}
+
+void literal::clear_buffer() {
+  buffer = "";
+  buffer_s = nullptr;
+  buffer_e = nullptr;
+}
+
+void literal::emit_start_token() {
+  auto str_type_length = 1 /* TODO @str_type.length */;
+  auto str_e = heredoc_e ? heredoc_e : str_s + str_type_length;
+  std::string nothing;
+  emit(start_token_type(), nothing, str_s, str_e);
+}
+
+void literal::emit(token_type tok, std::string& value, const char* s, const char* e) {
+  _lexer.emit(tok, value, s, e);
+}

+ 34 - 0
third_party/parser/cc/state_stack.cc

@@ -0,0 +1,34 @@
+#include <ruby_parser/state_stack.hh>
+
+using namespace ruby_parser;
+
+void state_stack::push(bool state) {
+  stack.emplace_back(state);
+}
+
+bool state_stack::pop() {
+  if (stack.empty()) {
+    return false;
+  } else {
+    bool state = stack.back();
+    stack.pop_back();
+    return state;
+  }
+}
+
+void state_stack::lexpop() {
+  push(pop() || pop());
+}
+
+void state_stack::clear() {
+  stack.clear();
+}
+
+bool state_stack::active() const {
+  if (stack.empty()) {
+    return false;
+  } else {
+    return stack.back();
+  }
+}
+

+ 24 - 0
third_party/parser/cc/token.cc

@@ -0,0 +1,24 @@
+#include <ruby_parser/token.hh>
+#include <map>
+
+using namespace ruby_parser;
+
+token::token(token_type type, size_t start, size_t end, const std::string& str)
+    : _type(type), _start(start), _end(end), _string(str)
+{}
+
+token_type token::type() const {
+  return _type;
+}
+
+size_t token::start() const {
+  return _start;
+}
+
+size_t token::end() const {
+  return _end;
+}
+
+const std::string& token::string() const {
+  return _string;
+}

+ 180 - 0
third_party/parser/codegen/builder.rb

@@ -0,0 +1,180 @@
+#!/usr/bin/env ruby
+
+require 'optparse'
+
+RE_BODY = /struct builder \{(.*?)\}/m
+RE_FUNC = /ForeignPtr\(\*(\w+)\)\((.*?)\);/
+
+CHECK_COOKIES = false
+TYPE_CONVERSION = {
+  "ForeignPtr" => "NodeId",
+  "const token*" => "*const TokenPtr",
+  "const node_list*" => "*mut NodeListPtr",
+  "bool" => "bool",
+  "size_t" => "size_t",
+  "SelfPtr" => "*mut Builder",
+}
+
+class Argument
+  attr_reader :type
+  attr_reader :name
+
+  def initialize(type, name)
+    @type = type
+    @name = name
+  end
+
+  def as_arg
+    "#{name}: #{type}"
+  end
+
+  def as_safe_arg
+    if type == "*mut Builder"
+      return "&mut self"
+    end
+
+    t = case type
+        when "NodeId"
+          "Option<Rc<Node>>"
+        when "*mut NodeListPtr"
+          "Vec<Rc<Node>>"
+        when "*const TokenPtr"
+          "Option<Token>"
+        when "size_t"
+          "usize"
+        else
+          type
+        end
+
+    "#{name}: #{t}"
+  end
+
+  def as_param
+    name
+  end
+
+  def convert
+    case type
+    when "*mut Builder"
+      "let #{name} = &mut *#{name}"
+    when "NodeId"
+      "let #{name} = node_from_c(builder, #{name})"
+    when "*mut NodeListPtr"
+      "let #{name} = node_list_from_c(builder, #{name})"
+    when "*const TokenPtr"
+      "let #{name} = token_from_c(#{name})"
+    when "size_t"
+      "let #{name} = #{name} as usize"
+    end
+  end
+end
+
+class Interface
+  attr_reader :name
+  attr_reader :args
+
+  def initialize(name, args)
+    @name = name
+    @args = args
+  end
+
+  def arg_block
+    args.map {|a| a.as_arg }.join(", ")
+  end
+
+  def signature
+    "pub #{name}: unsafe extern \"C\" fn(#{arg_block}) -> NodeId"
+  end
+
+  def signature_safe
+    _args = args.map {|a| a.as_safe_arg }.join(", ")
+    "fn #{name}(#{_args}) -> Rc<Node>"
+  end
+
+  def definition
+    "unsafe extern \"C\" fn #{name}(#{arg_block}) -> NodeId"
+  end
+
+  def callsite
+    _args = args[1..-1].map {|a| a.as_param }.join(", ")
+    "(*#{args.first.name}).#{name}(#{_args})"
+  end
+
+  def cookie_check
+    "assert_eq!((*#{args.first.name}).cookie, 12345678)"
+  end
+end
+
+def get_definitions(filename)
+  cpp = File.read(filename)
+  builder = RE_BODY.match(cpp)
+
+  abort("failed to match 'struct builder' body in #{filename}") unless builder
+  defs = builder[1].split("\n").map { |d| d.strip }.reject { |d| d.empty? }
+
+  defs.map do |d|
+    match = RE_FUNC.match(d)
+    abort("bad definition: '#{d}'") unless match
+    method, args = match[1], match[2]
+
+    args = args.split(",").map { |a| a.strip }
+    args = args.map do |arg|
+      arg = arg.split(' ')
+      argname = arg.pop
+      ctype = arg.join(' ')
+      rstype = TYPE_CONVERSION[ctype]
+      abort("unknown C type: #{ctype}") unless rstype
+
+      Argument.new(rstype, argname)
+    end
+
+    Interface.new(method, args)
+  end
+end
+
+def generate_rs(apis, out)
+  out.puts "// This file is autogenerated by builder.rb"
+  out.puts "// DO NOT MODIFY"
+  out.puts "#[repr(C)]"
+  out.puts "struct BuilderInterface {"
+
+  apis.each do |api|
+    out.puts "\t#{api.signature},"
+  end
+
+  out.puts "}"
+  out.puts "\n\n"
+
+  apis.each do |api|
+    out.puts "#{api.definition} {"
+    api.args.each do |arg|
+      cv = arg.convert
+      out.puts "\t#{cv};" if cv
+    end
+    out.puts "\t#{api.cookie_check};" if CHECK_COOKIES
+    out.puts "\t#{api.callsite}.to_raw(builder)"
+    out.puts "}"
+  end
+
+  out.puts "\n\n"
+
+  out.puts "static CALLBACKS: BuilderInterface = BuilderInterface {"
+  apis.each do |api|
+    out.puts "\t#{api.name}: #{api.name},"
+  end
+  out.puts "};"
+end
+
+BUILDER_H = File.join(File.dirname(__FILE__), '..', 'include', 'ruby_parser', 'builder.hh')
+OptionParser.new do |opts|
+  opts.banner = "Usage: ruby builder.rb [--rs=FILE]"
+
+  opts.on("--rs [FILE]") do |file|
+    file = file ? File.open(file, "w") : $stdout
+    abort("failed to open '#{file}'") unless file
+
+    apis = get_definitions(BUILDER_H)
+    generate_rs(apis, file)
+  end
+end.parse!
+

+ 118 - 0
third_party/parser/codegen/generate_diagnostics.cc

@@ -0,0 +1,118 @@
+#include <iostream>
+#include <tuple>
+
+using namespace std;
+
+namespace {
+
+tuple<string, string> MESSAGES[] = {
+    // Lexer errors
+    {"UnicodePointTooLarge", "invalid Unicode codepoint (too large)"},
+    {"InvalidEscape", "invalid escape character syntax"},
+    {"IncompleteEscape", "incomplete character syntax"},
+    {"InvalidHexEscape", "invalid hex escape"},
+    {"InvalidUnicodeEscape", "invalid Unicode escape"},
+    {"UnterminatedUnicode", "unterminated Unicode escape"},
+    {"EscapeEof", "escape sequence meets end of file"},
+    {"StringEof", "unterminated string meets end of file"},
+    {"RegexpOptions", "unknown regexp options: {}"},
+    {"CvarName", "`{}` is not allowed as a class variable name"},
+    {"IvarName", "`{}` is not allowed as an instance variable name"},
+    {"TrailingInNumber", "trailing `{}` in number"},
+    {"EmptyNumeric", "numeric literal without digits"},
+    {"InvalidOctal", "invalid octal digit"},
+    {"NoDotDigitLiteral", "no .<digit> floating literal anymore; put 0 before dot"},
+    {"BareBackslash", "bare backslash only allowed before newline"},
+    {"Unexpected", "unexpected `{}`"},
+    {"EmbeddedDocument", "embedded document meets end of file (and they embark on a romantic journey)"},
+
+    // Lexer warnings
+    {"InvalidEscapeUse", "invalid character syntax; use ?{}"},
+    {"AmbiguousLiteral", "ambiguous first argument; put parentheses or a space even after the operator"},
+    {"AmbiguousPrefix", "`{}` interpreted as argument prefix"},
+
+    // Parser errors
+    {"NthRefAlias", "cannot define an alias for a back-reference variable"},
+    {"BeginInMethod", "BEGIN in method"},
+    {"BackrefAssignment", "cannot assign to a back-reference variable"},
+    {"InvalidAssignment", "cannot assign to a keyword"},
+    {"ModuleNameConst", "class or module name must be a constant literal"},
+    {"UnexpectedToken", "unexpected token {}"},
+    {"ArgumentConst", "formal argument cannot be a constant"},
+    {"ArgumentIvar", "formal argument cannot be an instance variable"},
+    {"ArgumentGvar", "formal argument cannot be a global variable"},
+    {"ArgumentCvar", "formal argument cannot be a class variable"},
+    {"DuplicateArgument", "duplicate argument name {}"},
+    {"EmptySymbol", "empty symbol literal"},
+    {"OddHash", "odd number of entries for a hash"},
+    {"SingletonLiteral", "cannot define a singleton method for a literal"},
+    {"DynamicConst", "dynamic constant assignment"},
+    {"ConstReassignment", "constant re-assignment"},
+    {"ModuleInDef", "module definition in method body"},
+    {"ClassInDef", "class definition in method body"},
+    {"UnexpectedPercentStr", "{}: unknown type of percent-literal"},
+    {"BlockAndBlockarg", "both block argument and literal block are passed"},
+    {"MasgnAsCondition", "multiple assignment in conditional context"},
+    {"BlockGivenToYield", "block given to yield"},
+    {"InvalidRegexp", "{}"},
+    {"InvalidReturn", "Invalid return in class/module body"},
+
+    // Parser warnings
+    {"UselessElse", "else without rescue is useless"},
+
+    // Parser errors that are not Ruby errors
+    {"InvalidEncoding", "literal contains escape sequences incompatible with UTF-8"},
+
+    // Rewriter diagnostics
+    {"InvalidAction", "cannot {}"},
+    {"Clobbered", "clobbered by: {}"},
+
+    // TypedRuby diagnostics
+    {"NotStaticCpathInGeninst", "Type name in generic instance must be a static constant path"},
+};
+
+void generateDclass() {
+    cout << "// This file is autogenerated by generate_diagnostics.cc\n";
+    cout << "#ifndef RUBY_PARSER_DIAGNOSTICS\n";
+    cout << "#define RUBY_PARSER_DIAGNOSTICS\n";
+    cout << "namespace ruby_parser {\n";
+    cout << "// DO NOT MODIFY\n";
+    cout << "enum class dclass {\n";
+    for (auto [err, _msg] : MESSAGES) {
+        cout << "    " << err << ",\n";
+    }
+    cout << "};\n";
+    cout << "}\n";
+    cout << "#endif\n";
+}
+
+void generateDclassStrings() {
+    cout << "namespace sorbet {\n";
+    cout << "namespace parser {\n";
+    cout << "const char * dclassStrings[] = {\n";
+    for (auto [_err, msg] : MESSAGES) {
+        cout << "    \"" << msg << "\",\n";
+    }
+    cout << "};\n";
+    cout << "}\n";
+    cout << "}\n";
+}
+
+} // namespace
+
+int main(int argc, char **argv) {
+    if (argc != 2) {
+        cout << "Usage: {} (dclass|dclassStrings)\n", (string)argv[0];
+        return 1;
+    }
+
+    if ((string)argv[1] == "dclass") {
+        generateDclass();
+    } else if ((string)argv[1] == "dclassStrings") {
+        generateDclassStrings();
+    } else {
+        cout << "Usage: {} (dclass|dclassStrings)\n", (string)argv[0];
+        return 1;
+    }
+    return 0;
+}

+ 131 - 0
third_party/parser/include/ruby_parser/builder.hh

@@ -0,0 +1,131 @@
+#ifndef RUBY_PARSER_BUILDER_HH
+#define RUBY_PARSER_BUILDER_HH
+
+#include <vector>
+#include <memory>
+#include <type_traits>
+
+#include "node.hh"
+#include "token.hh"
+#include "driver.hh"
+
+namespace ruby_parser {
+
+struct builder {
+	ForeignPtr(*accessible)(SelfPtr builder, ForeignPtr node);
+	ForeignPtr(*alias)(SelfPtr builder, const token* alias, ForeignPtr to, ForeignPtr from);
+	ForeignPtr(*arg)(SelfPtr builder, const token* name);
+	ForeignPtr(*args)(SelfPtr builder, const token* begin, const node_list* args, const token* end, bool check_args);
+	ForeignPtr(*array)(SelfPtr builder, const token* begin, const node_list* elements, const token* end);
+	ForeignPtr(*assign)(SelfPtr builder, ForeignPtr lhs, const token* eql, ForeignPtr rhs);
+	ForeignPtr(*assignable)(SelfPtr builder, ForeignPtr node);
+	ForeignPtr(*associate)(SelfPtr builder, const token* begin, const node_list* pairs, const token* end);
+	ForeignPtr(*attrAsgn)(SelfPtr builder, ForeignPtr receiver, const token* dot, const token* selector);
+	ForeignPtr(*backRef)(SelfPtr builder, const token* tok);
+	ForeignPtr(*begin)(SelfPtr builder, const token* begin, ForeignPtr body, const token* end);
+	ForeignPtr(*beginBody)(SelfPtr builder, ForeignPtr body, const node_list* rescueBodies, const token* elseTok, ForeignPtr else_, const token* ensure_tok, ForeignPtr ensure);
+	ForeignPtr(*beginKeyword)(SelfPtr builder, const token* begin, ForeignPtr body, const token* end);
+	ForeignPtr(*binaryOp)(SelfPtr builder, ForeignPtr receiver, const token* oper, ForeignPtr arg);
+	ForeignPtr(*block)(SelfPtr builder, ForeignPtr methodCall, const token* begin, ForeignPtr args, ForeignPtr body, const token* end);
+	ForeignPtr(*blockPass)(SelfPtr builder, const token* amper, ForeignPtr arg);
+	ForeignPtr(*blockarg)(SelfPtr builder, const token* amper, const token* name);
+	ForeignPtr(*callLambda)(SelfPtr builder, const token* lambda);
+	ForeignPtr(*call_method)(SelfPtr builder, ForeignPtr receiver, const token* dot, const token* selector, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*case_)(SelfPtr builder, const token* case_, ForeignPtr expr, const node_list* whenBodies, const token* elseTok, ForeignPtr elseBody, const token* end);
+	ForeignPtr(*character)(SelfPtr builder, const token* char_);
+	ForeignPtr(*complex)(SelfPtr builder, const token* tok);
+	ForeignPtr(*compstmt)(SelfPtr builder, const node_list* node);
+	ForeignPtr(*condition)(SelfPtr builder, const token* cond_tok, ForeignPtr cond, const token* then, ForeignPtr ifTrue, const token* else_, ForeignPtr ifFalse, const token* end);
+	ForeignPtr(*conditionMod)(SelfPtr builder, ForeignPtr ifTrue, ForeignPtr ifFalse, ForeignPtr cond);
+	ForeignPtr(*const_)(SelfPtr builder, const token* name);
+	ForeignPtr(*constFetch)(SelfPtr builder, ForeignPtr scope, const token* colon, const token* name);
+	ForeignPtr(*constGlobal)(SelfPtr builder, const token* colon, const token* name);
+	ForeignPtr(*constOpAssignable)(SelfPtr builder, ForeignPtr node);
+	ForeignPtr(*cvar)(SelfPtr builder, const token* tok);
+	ForeignPtr(*dedentString)(SelfPtr builder, ForeignPtr node, size_t dedentLevel);
+	ForeignPtr(*def_class)(SelfPtr builder, const token* class_, ForeignPtr name, const token* lt_, ForeignPtr superclass, ForeignPtr body, const token* end_);
+	ForeignPtr(*defMethod)(SelfPtr builder, const token* def, const token* name, ForeignPtr args, ForeignPtr body, const token* end);
+	ForeignPtr(*defModule)(SelfPtr builder, const token* module, ForeignPtr name, ForeignPtr body, const token* end_);
+	ForeignPtr(*def_sclass)(SelfPtr builder, const token* class_, const token* lshft_, ForeignPtr expr, ForeignPtr body, const token* end_);
+	ForeignPtr(*defSingleton)(SelfPtr builder, const token* def, ForeignPtr definee, const token* dot, const token* name, ForeignPtr args, ForeignPtr body, const token* end);
+	ForeignPtr(*encodingLiteral)(SelfPtr builder, const token* tok);
+	ForeignPtr(*false_)(SelfPtr builder, const token* tok);
+	ForeignPtr(*fileLiteral)(SelfPtr builder, const token* tok);
+	ForeignPtr(*float_)(SelfPtr builder, const token* tok);
+	ForeignPtr(*floatComplex)(SelfPtr builder, const token* tok);
+	ForeignPtr(*for_)(SelfPtr builder, const token* for_, ForeignPtr iterator, const token* in_, ForeignPtr iteratee, const token* do_, ForeignPtr body, const token* end);
+	ForeignPtr(*gvar)(SelfPtr builder, const token* tok);
+	ForeignPtr(*ident)(SelfPtr builder, const token* tok);
+	ForeignPtr(*index)(SelfPtr builder, ForeignPtr receiver, const token* lbrack, const node_list* indexes, const token* rbrack);
+	ForeignPtr(*indexAsgn)(SelfPtr builder, ForeignPtr receiver, const token* lbrack, const node_list* indexes, const token* rbrack);
+	ForeignPtr(*integer)(SelfPtr builder, const token* tok);
+	ForeignPtr(*ivar)(SelfPtr builder, const token* tok);
+	ForeignPtr(*keywordBreak)(SelfPtr builder, const token* keyword, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*keywordDefined)(SelfPtr builder, const token* keyword, ForeignPtr arg);
+	ForeignPtr(*keywordNext)(SelfPtr builder, const token* keyword, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*keywordRedo)(SelfPtr builder, const token* keyword);
+	ForeignPtr(*keywordRetry)(SelfPtr builder, const token* keyword);
+	ForeignPtr(*keywordReturn)(SelfPtr builder, const token* keyword, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*keywordSuper)(SelfPtr builder, const token* keyword, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*keywordYield)(SelfPtr builder, const token* keyword, const token* lparen, const node_list* args, const token* rparen);
+	ForeignPtr(*keywordZsuper)(SelfPtr builder, const token* keyword);
+	ForeignPtr(*kwarg)(SelfPtr builder, const token* name);
+	ForeignPtr(*kwoptarg)(SelfPtr builder, const token* name, ForeignPtr value);
+	ForeignPtr(*kwrestarg)(SelfPtr builder, const token* dstar, const token* name);
+	ForeignPtr(*kwsplat)(SelfPtr builder, const token* dstar, ForeignPtr arg);
+	ForeignPtr(*line_literal)(SelfPtr builder, const token* tok);
+	ForeignPtr(*logicalAnd)(SelfPtr builder, ForeignPtr lhs, const token* op, ForeignPtr rhs);
+	ForeignPtr(*logicalOr)(SelfPtr builder, ForeignPtr lhs, const token* op, ForeignPtr rhs);
+	ForeignPtr(*loopUntil)(SelfPtr builder, const token* keyword, ForeignPtr cond, const token* do_, ForeignPtr body, const token* end);
+	ForeignPtr(*loopUntil_mod)(SelfPtr builder, ForeignPtr body, ForeignPtr cond);
+	ForeignPtr(*loop_while)(SelfPtr builder, const token* keyword, ForeignPtr cond, const token* do_, ForeignPtr body, const token* end);
+	ForeignPtr(*loop_while_mod)(SelfPtr builder, ForeignPtr body, ForeignPtr cond);
+	ForeignPtr(*match_op)(SelfPtr builder, ForeignPtr receiver, const token* oper, ForeignPtr arg);
+	ForeignPtr(*multi_assign)(SelfPtr builder, ForeignPtr mlhs, ForeignPtr rhs);
+	ForeignPtr(*multi_lhs)(SelfPtr builder, const token* begin, const node_list* items, const token* end);
+	ForeignPtr(*multi_lhs1)(SelfPtr builder, const token* begin, ForeignPtr item, const token* end);
+	ForeignPtr(*nil)(SelfPtr builder, const token* tok);
+	ForeignPtr(*not_op)(SelfPtr builder, const token* not_, const token* begin, ForeignPtr receiver, const token* end);
+	ForeignPtr(*nth_ref)(SelfPtr builder, const token* tok);
+	ForeignPtr(*op_assign)(SelfPtr builder, ForeignPtr lhs, const token* op, ForeignPtr rhs);
+	ForeignPtr(*optarg)(SelfPtr builder, const token* name, const token* eql, ForeignPtr value);
+	ForeignPtr(*pair)(SelfPtr builder, ForeignPtr key, const token* assoc, ForeignPtr value);
+	ForeignPtr(*pair_keyword)(SelfPtr builder, const token* key, ForeignPtr value);
+	ForeignPtr(*pair_quoted)(SelfPtr builder, const token* begin, const node_list* parts, const token* end, ForeignPtr value);
+	ForeignPtr(*postexe)(SelfPtr builder, const token* begin, ForeignPtr node, const token* rbrace);
+	ForeignPtr(*preexe)(SelfPtr builder, const token* begin, ForeignPtr node, const token* rbrace);
+	ForeignPtr(*procarg0)(SelfPtr builder, ForeignPtr arg);
+	ForeignPtr(*range_exclusive)(SelfPtr builder, ForeignPtr lhs, const token* oper, ForeignPtr rhs);
+	ForeignPtr(*range_inclusive)(SelfPtr builder, ForeignPtr lhs, const token* oper, ForeignPtr rhs);
+	ForeignPtr(*rational)(SelfPtr builder, const token* tok);
+	ForeignPtr(*rational_complex)(SelfPtr builder, const token* tok);
+	ForeignPtr(*regexp_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end, ForeignPtr options);
+	ForeignPtr(*regexp_options)(SelfPtr builder, const token* regopt);
+	ForeignPtr(*rescue_body)(SelfPtr builder, const token* rescue, ForeignPtr excList, const token* assoc, ForeignPtr excVar, const token* then, ForeignPtr body);
+	ForeignPtr(*restarg)(SelfPtr builder, const token* star, const token* name);
+	ForeignPtr(*self_)(SelfPtr builder, const token* tok);
+	ForeignPtr(*shadowarg)(SelfPtr builder, const token* name);
+	ForeignPtr(*splat)(SelfPtr builder, const token* star, ForeignPtr arg);
+	ForeignPtr(*splat_mlhs)(SelfPtr builder, const token* star, ForeignPtr arg);
+	ForeignPtr(*string)(SelfPtr builder, const token* string_);
+	ForeignPtr(*string_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end);
+	ForeignPtr(*string_internal)(SelfPtr builder, const token* string_);
+	ForeignPtr(*symbol)(SelfPtr builder, const token* symbol);
+	ForeignPtr(*symbol_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end);
+	ForeignPtr(*symbol_internal)(SelfPtr builder, const token* symbol);
+	ForeignPtr(*symbols_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end);
+	ForeignPtr(*ternary)(SelfPtr builder, ForeignPtr cond, const token* question, ForeignPtr ifTrue, const token* colon, ForeignPtr ifFalse);
+	ForeignPtr(*true_)(SelfPtr builder, const token* tok);
+	ForeignPtr(*unary_op)(SelfPtr builder, const token* oper, ForeignPtr receiver);
+	ForeignPtr(*undefMethod)(SelfPtr builder, const token* undef, const node_list* name_list);
+	ForeignPtr(*when)(SelfPtr builder, const token* when, const node_list* patterns, const token* then, ForeignPtr body);
+	ForeignPtr(*word)(SelfPtr builder, const node_list* parts);
+	ForeignPtr(*words_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end);
+	ForeignPtr(*xstring_compose)(SelfPtr builder, const token* begin, const node_list* parts, const token* end);
+};
+
+static_assert(std::is_pod<builder>::value, "`builder` must be a POD type");
+
+}
+
+#endif

+ 63 - 0
third_party/parser/include/ruby_parser/capi.hh

@@ -0,0 +1,63 @@
+#ifndef RUBY_PARSER_CAPI_HH
+#define RUBY_PARSER_CAPI_HH
+
+#include "token.hh"
+#include "node.hh"
+#include "builder.hh"
+#include "driver.hh"
+
+extern "C" {
+
+struct cdiagnostic {
+	ruby_parser::dlevel level;
+	ruby_parser::dclass type;
+	const char *data;
+	size_t beginPos;
+	size_t endPos;
+};
+
+ruby_parser::typedruby25*
+rbdriver_typedruby25_new(const char* source, size_t source_length, const ruby_parser::builder* builder);
+
+void
+rbdriver_typedruby25_free(ruby_parser::typedruby25* parser);
+
+const void*
+rbdriver_parse(ruby_parser::base_driver* parser, ruby_parser::SelfPtr self);
+
+bool
+rbdriver_in_definition(const ruby_parser::base_driver *driver);
+
+bool
+rbdriver_env_is_declared(const ruby_parser::base_driver *p, const char* name, size_t length);
+
+void
+rbdriver_env_declare(ruby_parser::base_driver *p, const char* name, size_t length);
+
+size_t
+rbtoken_get_start(const ruby_parser::token* tok);
+
+size_t
+rbtoken_get_end(const ruby_parser::token* tok);
+
+size_t
+rbtoken_get_string(const ruby_parser::token* tok, const char** out_ptr);
+
+size_t
+rblist_get_length(const ruby_parser::node_list* list);
+
+const void*
+rblist_index(ruby_parser::node_list* list, size_t index);
+
+size_t
+rbdriver_diag_get_length(const ruby_parser::base_driver* parser);
+
+void
+rbdriver_diag_get(const ruby_parser::base_driver* parser, size_t index, struct cdiagnostic *diag);
+
+void
+rbdriver_diag_report(ruby_parser::base_driver* driver, const struct cdiagnostic *diag);
+
+}
+
+#endif

+ 40 - 0
third_party/parser/include/ruby_parser/context.hh

@@ -0,0 +1,40 @@
+#ifndef RUBY_PARSER_CONTEXT_HH
+#define RUBY_PARSER_CONTEXT_HH
+
+#include <optional>
+#include <set>
+#include <vector>
+
+namespace ruby_parser {
+
+class Context {
+public:
+    enum class State {
+        CLASS,
+        SCLASS,
+        DEF,
+        DEFS,
+        BLOCK,
+        LAMBDA,
+    };
+
+    void push(State state);
+    void pop();
+    void reset();
+    bool inClass();
+    bool indirectlyInDef();
+    bool classDefintinionAllowed();
+    bool moduleDefintinionAllowed();
+    bool dynamicConstDefintinionAllowed();
+
+private:
+    std::vector<State> stack;
+
+    std::optional<int> firstIndexOfState(State state);
+    std::optional<int> lastIndexOfState(State state);
+    bool contains(State state);
+};
+
+} // namespace ruby_parser
+
+#endif

+ 74 - 0
third_party/parser/include/ruby_parser/diagnostic.hh

@@ -0,0 +1,74 @@
+#ifndef RUBY_PARSER_DIAGNOSTIC_HH
+#define RUBY_PARSER_DIAGNOSTIC_HH
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "token.hh"
+#include "diagnostic_class.hh"
+
+namespace ruby_parser {
+
+enum class dlevel {
+	NOTE    = 1,
+	WARNING = 2,
+	ERROR   = 3,
+	FATAL   = 4,
+};
+
+class diagnostic {
+public:
+	struct range {
+		const size_t beginPos;
+		const size_t endPos;
+
+		range(size_t beginPos, size_t endPos)
+			: beginPos(beginPos)
+			  , endPos(endPos)
+		{}
+	};
+
+private:
+	dlevel level_;
+	dclass type_;
+	range location_;
+	std::string data_;
+
+public:
+	diagnostic(dlevel lvl, dclass type, range location, const std::string& data = "")
+		: level_(lvl)
+		  , type_(type)
+		  , location_(location)
+		  , data_(data)
+	{}
+
+	diagnostic(dlevel lvl, dclass type, const token *token, const std::string& data = "")
+		: level_(lvl)
+		  , type_(type)
+		  , location_(token->start(), token->end())
+		  , data_(data)
+	{}
+
+	dlevel level() const {
+		return level_;
+	}
+
+	dclass error_class() const {
+		return type_;
+	}
+
+	const std::string& data() const {
+		return data_;
+	}
+
+	const range& location() const {
+		return location_;
+	}
+};
+
+using diagnostics_t = std::vector<diagnostic>;
+
+}
+
+#endif

+ 166 - 0
third_party/parser/include/ruby_parser/driver.hh

@@ -0,0 +1,166 @@
+#ifndef RUBY_PARSER_DRIVER_HH
+#define RUBY_PARSER_DRIVER_HH
+
+#include <memory>
+
+#include "diagnostic.hh"
+#include "lexer.hh"
+#include "node.hh"
+
+namespace ruby_parser {
+
+struct builder;
+
+using ForeignPtr = const void *;
+using SelfPtr = const void *;
+
+struct node_list {
+    node_list() = default;
+    node_list(ForeignPtr node) {
+        nodes.emplace_back(node);
+    }
+
+    node_list &operator=(const ForeignPtr &other) = delete;
+    node_list &operator=(ForeignPtr &&other) = delete;
+
+    inline size_t size() const {
+        return nodes.size();
+    }
+
+    inline void emplace_back(const ForeignPtr &ptr) {
+        nodes.emplace_back(ptr);
+    }
+
+    inline void push_front(const ForeignPtr &ptr) {
+        nodes.insert(nodes.begin(), ptr);
+    }
+
+    inline ForeignPtr &at(size_t n) {
+        return nodes.at(n);
+    }
+
+    inline void concat(node_list *other) {
+        nodes.insert(nodes.end(), std::make_move_iterator(other->nodes.begin()),
+                     std::make_move_iterator(other->nodes.end()));
+    }
+
+protected:
+    std::vector<ForeignPtr> nodes;
+};
+
+struct delimited_node_list {
+    delimited_node_list() = default;
+    delimited_node_list(const token_t &begin, node_list *inner, const token_t &end)
+        : begin(begin), inner(inner), end(end) {}
+
+    token_t begin = nullptr;
+    node_list *inner = nullptr;
+    token_t end = nullptr;
+};
+
+struct delimited_block {
+    delimited_block() = default;
+    delimited_block(const token_t &begin, ForeignPtr args, ForeignPtr body, const token_t &end)
+        : begin(begin), args(args), body(body), end(end) {}
+
+    token_t begin = nullptr;
+    ForeignPtr args = nullptr;
+    ForeignPtr body = nullptr;
+    token_t end = nullptr;
+};
+
+struct node_with_token {
+    node_with_token() = default;
+    node_with_token(const token_t &token_, ForeignPtr node_) : tok(token_), nod(node_) {}
+
+    token_t tok = nullptr;
+    ForeignPtr nod = nullptr;
+};
+
+struct case_body {
+    case_body() = default;
+    case_body(node_with_token *else_) : els(else_) {}
+    node_list whens;
+    node_with_token *els = nullptr;
+};
+
+class mempool {
+    pool<ruby_parser::node_list, 16> _node_list;
+    pool<ruby_parser::delimited_node_list, 32> _delimited_node_list;
+    pool<ruby_parser::delimited_block, 32> _delimited_block;
+    pool<ruby_parser::node_with_token, 32> _node_with_token;
+    pool<ruby_parser::case_body, 32> _case_body;
+    pool<ruby_parser::state_stack, 8> _stacks;
+    friend class base_driver;
+
+public:
+    mempool() = default;
+
+    template <typename... Args> ruby_parser::node_list *node_list(Args &&... args) {
+        return _node_list.alloc(std::forward<Args>(args)...);
+    }
+
+    template <typename... Args> ruby_parser::delimited_node_list *delimited_node_list(Args &&... args) {
+        return _delimited_node_list.alloc(std::forward<Args>(args)...);
+    }
+
+    template <typename... Args> ruby_parser::delimited_block *delimited_block(Args &&... args) {
+        return _delimited_block.alloc(std::forward<Args>(args)...);
+    }
+
+    template <typename... Args> ruby_parser::node_with_token *node_with_token(Args &&... args) {
+        return _node_with_token.alloc(std::forward<Args>(args)...);
+    }
+
+    template <typename... Args> ruby_parser::case_body *case_body(Args &&... args) {
+        return _case_body.alloc(std::forward<Args>(args)...);
+    }
+};
+
+class base_driver {
+public:
+    diagnostics_t diagnostics;
+    const builder &build;
+    lexer lex;
+    mempool alloc;
+
+    bool pending_error;
+    size_t def_level;
+    ForeignPtr ast;
+    token_t last_token;
+
+    base_driver(ruby_version version, const std::string &source, const struct builder &builder);
+    virtual ~base_driver() {}
+    virtual ForeignPtr parse(SelfPtr self) = 0;
+
+    bool valid_kwarg_name(const token *name) {
+        char c = name->string().at(0);
+        return !(c >= 'A' && c <= 'Z');
+    }
+
+    ruby_parser::state_stack *copy_stack() {
+        return alloc._stacks.alloc(lex.cmdarg);
+    }
+
+    void replace_stack(ruby_parser::state_stack *stack) {
+        lex.cmdarg = *stack;
+    }
+
+    void external_diagnostic(dlevel lvl, dclass cls, size_t begin, size_t end, const std::string &msg) {
+        diagnostics.emplace_back(lvl, cls, diagnostic::range(begin, end), msg);
+        if (lvl == dlevel::ERROR) {
+            pending_error = true;
+        }
+    }
+};
+
+class typedruby25 : public base_driver {
+public:
+    typedruby25(const std::string &source, const struct builder &builder);
+    virtual ForeignPtr parse(SelfPtr self);
+    ~typedruby25() {}
+};
+
+} // namespace ruby_parser
+
+#endif

+ 164 - 0
third_party/parser/include/ruby_parser/lexer.hh

@@ -0,0 +1,164 @@
+#ifndef RUBY_PARSER_LEXER_HH
+#define RUBY_PARSER_LEXER_HH
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+
+#include "context.hh"
+#include "diagnostic.hh"
+#include "literal.hh"
+#include "pool.hh"
+#include "state_stack.hh"
+#include "token.hh"
+
+namespace ruby_parser {
+enum class ruby_version {
+    RUBY_18,
+    RUBY_19,
+    RUBY_20,
+    RUBY_21,
+    RUBY_22,
+    RUBY_23,
+    RUBY_24,
+    RUBY_25,
+};
+
+class lexer {
+public:
+    using environment = std::set<std::string>;
+    struct token_table_entry {
+        const char *token;
+        token_type type;
+    };
+
+    enum class num_xfrm_type {
+        NONE,
+        RATIONAL,
+        IMAGINARY,
+        RATIONAL_IMAGINARY,
+        FLOAT,
+        FLOAT_IMAGINARY,
+    };
+
+private:
+    diagnostics_t &diagnostics;
+    pool<token, 64> mempool;
+
+    ruby_version version;
+    const std::string source_buffer;
+
+    std::stack<environment> static_env;
+    std::stack<literal> literal_stack;
+    std::queue<token_t> token_queue;
+
+    int cs;
+    const char *_p;
+    const char *_pe;
+    const char *ts;
+    const char *te;
+    int act;
+
+    // State before =begin / =end block comment
+    int cs_before_block_comment;
+
+    std::vector<int> stack;
+    int top;
+
+    const char *eq_begin_s; // location of last encountered =begin
+    const char *sharp_s;    // location of last encountered #
+    const char *newline_s;  // location of last encountered newline
+
+    // Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
+    // encountered after a matching closing parenthesis.
+    size_t paren_nest;
+    std::stack<size_t> lambda_stack;
+
+    // If the lexer is in `command state' (aka expr_value)
+    // at the entry to #advance, it will transition to expr_cmdarg
+    // instead of expr_arg at certain points.
+    bool command_start;
+
+    int num_base;             // last numeric base
+    const char *num_digits_s; // starting position of numeric digits
+    const char *num_suffix_s; // starting position of numeric suffix
+    num_xfrm_type num_xfrm;   // numeric suffix-induced transformation
+
+    const char *escape_s;                // starting position of current sequence
+    std::unique_ptr<std::string> escape; // last escaped sequence, as string
+
+    const char *herebody_s; // starting position of current heredoc line
+
+    // After encountering the closing line of <<~SQUIGGLY_HEREDOC,
+    // we store the indentation level and give it out to the parser
+    // on request. It is not possible to infer indentation level just
+    // from the AST because escape sequences such as `\ ` or `\t` are
+    // expanded inside the lexer, but count as non-whitespace for
+    // indentation purposes.
+    optional_size dedentLevel_;
+
+    void check_stack_capacity();
+    int stack_pop();
+    int arg_or_cmdarg(int cmd_state);
+    void emit_comment(const char *s, const char *e);
+    char unescape(uint32_t cp);
+    std::string tok();
+    std::string tok(const char *start);
+    std::string tok(const char *start, const char *end);
+    void emit(token_type type);
+    void emit(token_type type, const std::string &str);
+    void emit(token_type type, const std::string &str, const char *start, const char *end);
+    void emit_do(bool do_block = false);
+    void emit_table(const token_table_entry *table);
+    void emit_num(const std::string &num);
+    std::string convert_base(const std::string &num, int num_base);
+    diagnostic::range range(const char *start, const char *end);
+    void diagnostic_(dlevel level, dclass type, const std::string &data = "");
+    void diagnostic_(dlevel level, dclass type, diagnostic::range &&range, const std::string &data = "");
+    template <typename... Args> int push_literal(Args &&... args);
+    int next_state_for_literal(literal &lit);
+    literal &literal_();
+    int pop_literal();
+
+    token_t advance_();
+
+    // literal needs to call emit:
+    friend class literal;
+
+public:
+    state_stack cond;
+    state_stack cmdarg;
+
+    size_t last_token_s;
+    size_t last_token_e;
+
+    bool in_kwarg; // true at the end of "def foo a:"
+    Context context;
+
+    lexer(diagnostics_t &diag, ruby_version version, const std::string &source_buffer_);
+
+    token_t advance();
+
+    void set_state_expr_beg();
+    void set_state_expr_end();
+    void set_state_expr_endarg();
+    void set_state_expr_fname();
+    void set_state_expr_value();
+
+    void extend_static();
+    void extend_dynamic();
+    void unextend();
+    void declare(const std::string &name);
+    bool is_declared(const std::string &identifier) const;
+
+    optional_size dedentLevel();
+};
+} // namespace ruby_parser
+
+#include "driver.hh"
+
+#endif

+ 104 - 0
third_party/parser/include/ruby_parser/literal.hh

@@ -0,0 +1,104 @@
+#ifndef RUBY_PARSER_LITERAL_HH
+#define RUBY_PARSER_LITERAL_HH
+
+#include <string>
+#include <utility>
+#include <optional>
+
+#include "token.hh"
+
+namespace ruby_parser {
+  enum class literal_type {
+    SQUOTE_STRING,
+    SQUOTE_HEREDOC,
+    LOWERQ_STRING,
+    DQUOTE_STRING,
+    DQUOTE_HEREDOC,
+    PERCENT_STRING,
+    UPPERQ_STRING,
+    LOWERW_WORDS,
+    UPPERW_WORDS,
+    LOWERI_SYMBOLS,
+    UPPERI_SYMBOLS,
+    SQUOTE_SYMBOL,
+    LOWERS_SYMBOL,
+    DQUOTE_SYMBOL,
+    SLASH_REGEXP,
+    PERCENT_REGEXP,
+    LOWERX_XSTRING,
+    BACKTICK_XSTRING,
+    BACKTICK_HEREDOC,
+  };
+
+  using optional_size = std::optional<size_t>;
+
+  class lexer;
+
+  class literal {
+    lexer& _lexer;
+    size_t _nesting;
+    literal_type _type;
+    std::string start_delim;
+    std::string end_delim;
+    bool indent;
+    bool dedent_body;
+    bool label_allowed;
+    optional_size _dedentLevel;
+    size_t _interp_braces;
+    bool space_emitted;
+    bool monolithic;
+    std::string buffer;
+    const char* buffer_s;
+    const char* buffer_e;
+
+  public:
+    // lexer needs access to these:
+    const char* str_s;
+    const char* saved_herebody_s;
+    const char* heredoc_e;
+
+    literal(lexer& lexer, literal_type type, std::string delimiter, const char* str_s, const char* heredoc_e = nullptr, bool indent = false, bool dedent_body = false, bool label_allowed = false);
+
+    // delete copy constructor to prevent accidental copies. we never
+    // legitimately need to copy literal.
+    literal(const literal&) = delete;
+
+    bool words() const;
+    bool backslash_delimited() const;
+    bool interpolate() const;
+    bool regexp() const;
+    bool heredoc() const;
+
+    token_type start_token_type() const;
+
+    optional_size dedentLevel() const;
+
+    bool munge_escape(char c) const;
+
+    void infer_indent_level(std::string& line);
+
+    void start_interp_brace();
+    bool end_interp_brace_and_try_closing();
+
+    bool nest_and_try_closing(std::string& delimiter, const char* ts, const char* te, std::string lookahead = "");
+
+    void extend_space(const char* ts, const char* te);
+    void extend_string(std::string& str, const char* ts, const char* te);
+    void extend_content();
+
+    void flush_string();
+
+  private:
+    bool is_delimiter(std::string& delimiter) const;
+    void clear_buffer();
+    void emit_start_token();
+    void emit(token_type tok, std::string& value, const char* s, const char* e);
+  };
+}
+
+// there is a circular dependency between lexer and literal.
+// lexer was forward-declared above, but now we need to include it
+// properly.
+#include "lexer.hh"
+
+#endif

+ 125 - 0
third_party/parser/include/ruby_parser/node.hh

@@ -0,0 +1,125 @@
+#ifndef RUBY_PARSER_NODE_HH
+#define RUBY_PARSER_NODE_HH
+
+#include <memory>
+#include <vector>
+
+#include "token.hh"
+
+namespace ruby_parser {
+  enum class node_type {
+    ENCODING_LITERAL      = 1,
+    FILE_LITERAL          = 2,
+    LINE_LITERAL          = 3,
+    ALIAS                 = 4,
+    AND_ASGN              = 5,
+    ARG                   = 6,
+    ARG_EXPR              = 7,
+    ARGS                  = 8,
+    ARRAY                 = 9,
+    BACK_REF              = 10,
+    BEGIN                 = 11,
+    BLOCK                 = 12,
+    BLOCK_PASS            = 13,
+    BLOCKARG              = 14,
+    BLOCKARG_EXPR         = 15,
+    CASE                  = 16,
+    CBASE                 = 17,
+    CLASS                 = 18,
+    CONST                 = 19,
+    CVAR                  = 20,
+    DEF                   = 21,
+    DEFS                  = 22,
+    DSTR                  = 23,
+    DSYM                  = 24,
+    ENSURE                = 25,
+    EXRANGE               = 26,
+    FALSE                 = 27,
+    FOR                   = 28,
+    GVAR                  = 29,
+    HASH                  = 30,
+    IDENT                 = 31,
+    IF                    = 32,
+    INT                   = 33,
+    INRANGE               = 34,
+    IVAR                  = 35,
+    KWARG                 = 36,
+    KWBEGIN               = 37,
+    KWOPTARG              = 38,
+    KWRESTARG             = 39,
+    KWSPLAT               = 40,
+    LAMBDA                = 41,
+    MASGN                 = 42,
+    MATCH_CURRENT_LINE    = 43,
+    MATCH_WITH_LVASGN     = 44,
+    MLHS                  = 45,
+    MODULE                = 46,
+    NIL                   = 47,
+    NOT                   = 48,
+    NTH_REF               = 49,
+    OBJC_KWARG            = 50,
+    OBJC_RESTARG          = 51,
+    OP_ASGN               = 52,
+    OPTARG                = 53,
+    OR_ASGN               = 54,
+    PAIR                  = 55,
+    POSTEXE               = 56,
+    PREEXE                = 57,
+    PROTOTYPE             = 58,
+    REGEXP                = 59,
+    REGOPT                = 60,
+    RESBODY               = 61,
+    RESCUE                = 62,
+    RESTARG               = 63,
+    RESTARG_EXPR          = 64,
+    SCLASS                = 65,
+    SELF                  = 66,
+    SEND                  = 67,
+    SHADOWARG             = 68,
+    SPLAT                 = 69,
+    STR                   = 70,
+    SYM                   = 71,
+    TR_ARRAY              = 72,
+    TR_CAST               = 73,
+    TR_CPATH              = 74,
+    TR_GENARGS            = 75,
+    TR_GENDECL            = 76,
+    TR_GENINST            = 77,
+    TR_HASH               = 78,
+    TR_IVARDECL           = 79,
+    TR_NIL                = 80,
+    TR_NILLABLE           = 81,
+    TR_OR                 = 82,
+    TR_PROC               = 83,
+    TR_SPECIAL            = 84,
+    TR_TUPLE              = 85,
+    TRUE                  = 86,
+    TYPED_ARG             = 87,
+    UNDEF                 = 88,
+    WHEN                  = 89,
+    XSTR                  = 90,
+    WHILE                 = 91,
+    UNTIL                 = 92,
+    AND                   = 93,
+    OR                    = 94,
+    SUPER                 = 95,
+    YIELD                 = 96,
+    RETURN                = 97,
+    BREAK                 = 98,
+    NEXT                  = 99,
+    DEFINED               = 100,
+    REDO                  = 101,
+    RETRY                 = 102,
+    ZSUPER                = 103,
+
+    // internal pseudo-nodes
+    // TODO - move these out of node.hh and into something specific to the
+    // parser
+    NODE_LIST             = -1,
+    NODE_DELIMITED_LIST   = -2,
+    NODE_DELIMITED_BLOCK  = -3,
+    NODE_WITH_TOKEN       = -4,
+  };
+}
+
+#endif

+ 67 - 0
third_party/parser/include/ruby_parser/pool.hh

@@ -0,0 +1,67 @@
+#ifndef RUBY_PARSER_POOL_HH
+#define RUBY_PARSER_POOL_HH
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+template<typename T, std::size_t N>
+class pool {
+public:
+	pool() : _slab(new(slab)) {}
+
+	template <typename... Args>
+	T *alloc(Args&&... args) {
+		if (_slab->is_full()) {
+			push_slab();
+		}
+		return _slab->alloc(std::forward<Args>(args)...);
+	}
+
+	~pool() {
+		delete _slab;
+		for (auto &p: _history) {
+			delete p;
+		}
+	}
+
+protected:
+	class slab {
+		typename std::aligned_storage<sizeof(T), alignof(T)>::type data[N];
+		std::size_t _size = 0;
+
+	public:
+		inline bool is_full() const {
+			return _size >= N;
+		}
+
+		template<typename ...Args>
+		T *alloc(Args&&... args)
+		{
+			assert(!is_full());
+			T *p = reinterpret_cast<T*>(data+_size);
+			new(p) T(std::forward<Args>(args)...);
+			++_size;
+			return p;
+		}
+
+		~slab() {
+			for (std::size_t pos = 0; pos < _size; ++pos) {
+				reinterpret_cast<T*>(data+pos)->~T();
+			}
+		}
+	};
+
+	using slab_t = slab*;
+
+	std::vector<slab *> _history;
+	slab *_slab;
+
+	void push_slab() {
+		slab *newb = new(slab);
+		_history.emplace_back(_slab);
+		_slab = newb;
+	}
+};
+
+#endif

+ 21 - 0
third_party/parser/include/ruby_parser/state_stack.hh

@@ -0,0 +1,21 @@
+#ifndef RUBY_PARSER_STATE_STACK_HH
+#define RUBY_PARSER_STATE_STACK_HH
+
+#include <vector>
+#include <memory>
+
+namespace ruby_parser {
+  class state_stack {
+    std::vector<bool> stack;
+
+  public:
+    void push(bool state);
+    bool pop();
+    void lexpop();
+    void clear();
+    bool active() const;
+  };
+}
+
+#endif
+

+ 185 - 0
third_party/parser/include/ruby_parser/token.hh

@@ -0,0 +1,185 @@
+#ifndef RUBY_PARSER_TOKEN_HH
+#define RUBY_PARSER_TOKEN_HH
+
+#include <cstddef>
+#include <memory>
+#include <string>
+
+// these token values are mirrored in src/grammars/*.y
+// any changes *must* be applied to the grammars as well.
+#define RUBY_PARSER_TOKEN_TYPES(XX) \
+    XX(eof, 0)                      \
+    XX(error, -1)                   \
+    XX(kCLASS, 1001)                \
+    XX(kMODULE, 1002)               \
+    XX(kDEF, 1003)                  \
+    XX(kUNDEF, 1004)                \
+    XX(kBEGIN, 1005)                \
+    XX(kRESCUE, 1006)               \
+    XX(kENSURE, 1007)               \
+    XX(kEND, 1008)                  \
+    XX(kIF, 1009)                   \
+    XX(kUNLESS, 1010)               \
+    XX(kTHEN, 1011)                 \
+    XX(kELSIF, 1012)                \
+    XX(kELSE, 1013)                 \
+    XX(kCASE, 1014)                 \
+    XX(kWHEN, 1015)                 \
+    XX(kWHILE, 1016)                \
+    XX(kUNTIL, 1017)                \
+    XX(kFOR, 1018)                  \
+    XX(kBREAK, 1019)                \
+    XX(kNEXT, 1020)                 \
+    XX(kREDO, 1021)                 \
+    XX(kRETRY, 1022)                \
+    XX(kIN, 1023)                   \
+    XX(kDO, 1024)                   \
+    XX(kDO_COND, 1025)              \
+    XX(kDO_BLOCK, 1026)             \
+    XX(kDO_LAMBDA, 1027)            \
+    XX(kRETURN, 1028)               \
+    XX(kYIELD, 1029)                \
+    XX(kSUPER, 1030)                \
+    XX(kSELF, 1031)                 \
+    XX(kNIL, 1032)                  \
+    XX(kTRUE, 1033)                 \
+    XX(kFALSE, 1034)                \
+    XX(kAND, 1035)                  \
+    XX(kOR, 1036)                   \
+    XX(kNOT, 1037)                  \
+    XX(kIF_MOD, 1038)               \
+    XX(kUNLESS_MOD, 1039)           \
+    XX(kWHILE_MOD, 1040)            \
+    XX(kUNTIL_MOD, 1041)            \
+    XX(kRESCUE_MOD, 1042)           \
+    XX(kALIAS, 1043)                \
+    XX(kDEFINED, 1044)              \
+    XX(klBEGIN, 1045)               \
+    XX(klEND, 1046)                 \
+    XX(k__LINE__, 1047)             \
+    XX(k__FILE__, 1048)             \
+    XX(k__ENCODING__, 1049)         \
+    XX(tIDENTIFIER, 1050)           \
+    XX(tFID, 1051)                  \
+    XX(tGVAR, 1052)                 \
+    XX(tIVAR, 1053)                 \
+    XX(tCONSTANT, 1054)             \
+    XX(tLABEL, 1055)                \
+    XX(tCVAR, 1056)                 \
+    XX(tNTH_REF, 1057)              \
+    XX(tBACK_REF, 1058)             \
+    XX(tSTRING_CONTENT, 1059)       \
+    XX(tINTEGER, 1060)              \
+    XX(tFLOAT, 1061)                \
+    XX(tUPLUS, 1062)                \
+    XX(tUMINUS, 1063)               \
+    XX(tUNARY_NUM, 1064)            \
+    XX(tPOW, 1065)                  \
+    XX(tCMP, 1066)                  \
+    XX(tEQ, 1067)                   \
+    XX(tEQQ, 1068)                  \
+    XX(tNEQ, 1069)                  \
+    XX(tEQL, 1070)                  \
+    XX(tGEQ, 1071)                  \
+    XX(tLEQ, 1072)                  \
+    XX(tANDOP, 1073)                \
+    XX(tOROP, 1074)                 \
+    XX(tMATCH, 1075)                \
+    XX(tNMATCH, 1076)               \
+    XX(tDOT, 1077)                  \
+    XX(tDOT2, 1078)                 \
+    XX(tDOT3, 1079)                 \
+    XX(tAREF, 1080)                 \
+    XX(tASET, 1081)                 \
+    XX(tLSHFT, 1082)                \
+    XX(tRSHFT, 1083)                \
+    XX(tCOLON2, 1084)               \
+    XX(tCOLON3, 1085)               \
+    XX(tOP_ASGN, 1086)              \
+    XX(tASSOC, 1087)                \
+    XX(tLPAREN, 1088)               \
+    XX(tLPAREN2, 1089)              \
+    XX(tRPAREN, 1090)               \
+    XX(tLPAREN_ARG, 1091)           \
+    XX(tLBRACK, 1092)               \
+    XX(tLBRACK2, 1093)              \
+    XX(tRBRACK, 1094)               \
+    XX(tLBRACE, 1095)               \
+    XX(tLBRACE_ARG, 1096)           \
+    XX(tSTAR, 1097)                 \
+    XX(tSTAR2, 1098)                \
+    XX(tAMPER, 1099)                \
+    XX(tAMPER2, 1100)               \
+    XX(tTILDE, 1101)                \
+    XX(tPERCENT, 1102)              \
+    XX(tDIVIDE, 1103)               \
+    XX(tDSTAR, 1104)                \
+    XX(tPLUS, 1105)                 \
+    XX(tMINUS, 1106)                \
+    XX(tLT, 1107)                   \
+    XX(tGT, 1108)                   \
+    XX(tPIPE, 1109)                 \
+    XX(tBANG, 1110)                 \
+    XX(tCARET, 1111)                \
+    XX(tLCURLY, 1112)               \
+    XX(tRCURLY, 1113)               \
+    XX(tBACK_REF2, 1114)            \
+    XX(tSYMBEG, 1115)               \
+    XX(tSTRING_BEG, 1116)           \
+    XX(tXSTRING_BEG, 1117)          \
+    XX(tREGEXP_BEG, 1118)           \
+    XX(tREGEXP_OPT, 1119)           \
+    XX(tWORDS_BEG, 1120)            \
+    XX(tQWORDS_BEG, 1121)           \
+    XX(tSYMBOLS_BEG, 1122)          \
+    XX(tQSYMBOLS_BEG, 1123)         \
+    XX(tSTRING_DBEG, 1124)          \
+    XX(tSTRING_DVAR, 1125)          \
+    XX(tSTRING_END, 1126)           \
+    XX(tSTRING_DEND, 1127)          \
+    XX(tSTRING, 1128)               \
+    XX(tSYMBOL, 1129)               \
+    XX(tNL, 1130)                   \
+    XX(tEH, 1131)                   \
+    XX(tCOLON, 1132)                \
+    XX(tCOMMA, 1133)                \
+    XX(tSPACE, 1134)                \
+    XX(tSEMI, 1135)                 \
+    XX(tLAMBDA, 1136)               \
+    XX(tLAMBEG, 1137)               \
+    XX(tCHARACTER, 1138)            \
+    XX(tRATIONAL, 1139)             \
+    XX(tIMAGINARY, 1140)            \
+    XX(tLABEL_END, 1141)            \
+    XX(tANDDOT, 1142)               \
+    XX(tRATIONAL_IMAGINARY, 1143)   \
+    XX(tFLOAT_IMAGINARY, 1144)
+
+namespace ruby_parser {
+enum class token_type : int {
+#ifndef YYBISON
+#define XX(name, value) name = value,
+    RUBY_PARSER_TOKEN_TYPES(XX)
+#undef XX
+#endif
+};
+
+class token {
+    token_type _type;
+    size_t _start;
+    size_t _end;
+    std::string _string;
+
+public:
+    token(token_type type, size_t start, size_t end, const std::string &str);
+
+    token_type type() const;
+    size_t start() const;
+    size_t end() const;
+    const std::string &string() const;
+};
+
+using token_t = token *;
+} // namespace ruby_parser
+
+#endif