getty
/
diamantaire


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
							#include <ruby_parser/literal.hh>
#include <cassert>

using namespace ruby_parser;

literal::literal(lexer& lexer, literal_type type, std::string delimiter, const char* str_s, const char* heredoc_e, bool indent, bool dedent_body, bool label_allowed)
  : _lexer(lexer)
  , _nesting(1)
  , _type(type)
  , indent(indent)
  , dedent_body(dedent_body)
  , label_allowed(label_allowed)
  , _interp_braces(0)
  , space_emitted(true)
  , str_s(str_s)
  , saved_herebody_s(nullptr)
  , heredoc_e(heredoc_e)
{
  if (delimiter == "(") {
    start_delim = "(";
    end_delim = ")";
  } else if (delimiter == "[") {
    start_delim = "[";
    end_delim = "]";
  } else if (delimiter == "{") {
    start_delim = "{";
    end_delim = "}";
  } else if (delimiter == "<") {
    start_delim = "<";
    end_delim = ">";
  } else {
    start_delim = "";
    end_delim = delimiter;
  }

  // Monolithic strings are glued into a single token, e.g.
  // tSTRING_BEG tSTRING_CONTENT tSTRING_END -> tSTRING.
  monolithic = (type == literal_type::SQUOTE_STRING || type == literal_type::DQUOTE_STRING);

  clear_buffer();

  if (!monolithic) {
    emit_start_token();
  }
}

bool literal::words() const {
  return _type == literal_type::UPPERW_WORDS
      || _type == literal_type::LOWERW_WORDS
      || _type == literal_type::UPPERI_SYMBOLS
      || _type == literal_type::LOWERI_SYMBOLS
      ;
}

bool literal::backslash_delimited() const {
  return end_delim == "\\";
}

bool literal::interpolate() const {
  return _type == literal_type::DQUOTE_STRING
      || _type == literal_type::DQUOTE_HEREDOC
      || _type == literal_type::PERCENT_STRING
      || _type == literal_type::UPPERQ_STRING
      || _type == literal_type::UPPERW_WORDS
      || _type == literal_type::UPPERI_SYMBOLS
      || _type == literal_type::DQUOTE_SYMBOL
      || _type == literal_type::SLASH_REGEXP
      || _type == literal_type::PERCENT_REGEXP
      || _type == literal_type::LOWERX_XSTRING
      || _type == literal_type::BACKTICK_XSTRING
      || _type == literal_type::BACKTICK_HEREDOC
      ;
}

bool literal::regexp() const {
  return _type == literal_type::SLASH_REGEXP
      || _type == literal_type::PERCENT_REGEXP
      ;
}

bool literal::heredoc() const {
  return heredoc_e != nullptr;
}

token_type literal::start_token_type() const {
  switch (_type) {
    case literal_type::SQUOTE_STRING:
    case literal_type::SQUOTE_HEREDOC:
    case literal_type::LOWERQ_STRING:
    case literal_type::DQUOTE_STRING:
    case literal_type::DQUOTE_HEREDOC:
    case literal_type::PERCENT_STRING:
    case literal_type::UPPERQ_STRING:
      return token_type::tSTRING_BEG;

    case literal_type::LOWERW_WORDS:
      return token_type::tQWORDS_BEG;

    case literal_type::UPPERW_WORDS:
      return token_type::tWORDS_BEG;

    case literal_type::LOWERI_SYMBOLS:
      return token_type::tQSYMBOLS_BEG;

    case literal_type::UPPERI_SYMBOLS:
      return token_type::tSYMBOLS_BEG;

    case literal_type::SQUOTE_SYMBOL:
    case literal_type::LOWERS_SYMBOL:
    case literal_type::DQUOTE_SYMBOL:
      return token_type::tSYMBEG;

    case literal_type::SLASH_REGEXP:
    case literal_type::PERCENT_REGEXP:
      return token_type::tREGEXP_BEG;

    case literal_type::LOWERX_XSTRING:
    case literal_type::BACKTICK_XSTRING:
    case literal_type::BACKTICK_HEREDOC:
      return token_type::tXSTRING_BEG;
    default:
      assert(false);
  }
}

optional_size literal::dedentLevel() const {
  return _dedentLevel;
}

bool literal::munge_escape(char c) const {
  if (words() && (c == ' ' || c == '\t' || c == '\v' || c == '\r' || c == '\f' || c == '\n')) {
    return true;
  } else if (c == '\\' || (start_delim.size() == 1 && start_delim.at(0) == c)
                       || (end_delim.size() == 1   && end_delim.at(0) == c)) {
    return true;
  } else {
    return false;
  }
}

void literal::infer_indent_level(std::string& line) {
  if (!dedent_body) {
    return;
  }

  size_t indent_level = 0;

  for (auto it = line.cbegin(); it != line.cend(); ++it) {
    if (*it == ' ') {
      indent_level++;
      continue;
    }

    if (*it == '\t') {
      indent_level += (8 - indent_level % 8);
      continue;
    }

    if (!_dedentLevel || *_dedentLevel > indent_level) {
      _dedentLevel = indent_level;
    }
    break;
  }
}

void literal::start_interp_brace() {
  _interp_braces++;
}

bool literal::end_interp_brace_and_try_closing() {
  _interp_braces--;

  return _interp_braces == 0;
}

// copied from MRI's include/ruby/ruby.h:
static bool rb_isspace(char c) {
  return c == ' ' || ('\t' <= c && c <= '\r');
}

static void lstrip(std::string& str) {
  size_t index = 0;

  while (index < str.size()) {
    if (rb_isspace(str.at(index))) {
      index++;
    } else {
      break;
    }
  }

  str.erase(0, index);
}

bool literal::is_delimiter(std::string& delimiter) const {
  if (indent) {
    std::string stripped_delimiter = delimiter;
    lstrip(stripped_delimiter);
    return end_delim == stripped_delimiter;
  } else {
    return end_delim == delimiter;
  }
}

static bool lookahead_quoted_label(std::string& lookahead) {
  switch (lookahead.size()) {
    case 0:
      return false;

    case 1:
      return lookahead.at(0) == ':';

    default:
      return lookahead.at(0) == ':' && lookahead.at(1) != ':';
  }
}

bool literal::nest_and_try_closing(std::string& delimiter, const char* ts, const char* te, std::string lookahead) {
  if (start_delim.size() > 0 && start_delim == delimiter) {
    _nesting++;
  } else if (is_delimiter(delimiter)) {
    _nesting--;
  }

  if (_nesting == 0) {
    if (words()) {
      extend_space(ts, ts);
    }

    if (label_allowed && lookahead_quoted_label(lookahead) && start_token_type() == token_type::tSTRING_BEG) {
      // This is a quoted label.
      flush_string();
      emit(token_type::tLABEL_END, end_delim, ts, te + 1);
      return true;
    } else if (monolithic) {
      // Emit the string as a single token.
      emit(token_type::tSTRING, buffer, str_s, te);
      return true;
    } else {
      // If this is a heredoc, @buffer contains the sentinel now.
      // Just throw it out. Lexer flushes the heredoc after each
      // non-heredoc-terminating \n anyway, so no data will be lost.
      if (!heredoc()) {
        flush_string();
      }

      emit(token_type::tSTRING_END, end_delim, ts, te);
      return true;
    }
  }

  return false;
}

void literal::extend_space(const char* ts, const char* te) {
  flush_string();

  if (!space_emitted) {
    std::string nothing;
    emit(token_type::tSPACE, nothing, ts, te);

    space_emitted = true;
  }
}

void literal::extend_string(std::string& str, const char* ts, const char* te) {
  if (!buffer_s) {
    buffer_s = ts;
  }

  buffer_e = te;

  buffer += str;
}

void literal::extend_content() {
  space_emitted = false;
}

void literal::flush_string() {
  if (monolithic) {
    emit_start_token();
    monolithic = false;
  }

  if (!buffer.empty()) {
    emit(token_type::tSTRING_CONTENT, buffer, buffer_s, buffer_e);

    clear_buffer();
    extend_content();
  }
}

void literal::clear_buffer() {
  buffer = "";
  buffer_s = nullptr;
  buffer_e = nullptr;
}

void literal::emit_start_token() {
  auto str_type_length = 1 /* TODO @str_type.length */;
  auto str_e = heredoc_e ? heredoc_e : str_s + str_type_length;
  std::string nothing;
  emit(start_token_type(), nothing, str_s, str_e);
}

void literal::emit(token_type tok, std::string& value, const char* s, const char* e) {
  _lexer.emit(tok, value, s, e);
}