【MySQL源码】sql_parse.cc 中的 parse_sql 函数

2022年 7月 1日 108点热度 0人点赞 0条评论

调用方式

dispatch_command 调用 dispatch_sql_command, dispatch_sql_command 这个函数会调用 parse_sql 这个函数.

使用提供的 parse stateobject creation context 将 SQL 语句转换为抽象语法树, 为后面的解析做准备. 它对 THD::sql_parser() 的包装, 而且一般用于抽象语法树的构建.

这个函数也可用于生成查询摘要, 调用方式如下:

    THD *thd = ...;
    const char *query_text = ...;
    uint query_length = ...;
    Object_creation_ctx *ctx = ...;
    bool rc;

    Parser_state parser_state;
    if (parser_state.init(thd, query_text, query_length)
    {
      ... handle error
    }

    parser_state.m_input.m_has_digest= true;
    parser_state.m_input.m_compute_digest= true;

    rc= parse_sql(the, &parser_state, ctx);
    if (! rc)
    {
      unsigned char md5[MD5_HASH_SIZE];
      char digest_text[1024];
      bool truncated;
      const sql_digest_storage *digest= & thd->m_digest->m_digest_storage;

      compute_digest_md5(digest, & md5[0]);
      compute_digest_text(digest, & digest_text[0], sizeof(digest_text), &truncated);
    }

parse_sql 函数

下面是函数正文:

bool parse_sql(THD *thd, Parser_state *parser_state,
               Object_creation_ctx *creation_ctx) {
  DBUG_TRACE;
  bool ret_value;
  // Parser_state 是 THD 中的一个成员?
  assert(thd->m_parser_state == nullptr);
  // TODO fix to allow parsing gcol exprs after main query.
  //  assert(thd->lex->m_sql_cmd == NULL);

  /* 备份创建对象的上下文,TODO 干啥的? */

  Object_creation_ctx *backup_ctx = nullptr;

  if (creation_ctx) backup_ctx = creation_ctx->set_n_backup(thd);

  /* 设置解析器状态 */

  thd->m_parser_state = parser_state;

  parser_state->m_digest_psi = nullptr;
  parser_state->m_lip.m_digest = nullptr;

  /*
    Partial parsers (GRAMMAR_SELECTOR_*) are not supposed to compute digests.
  */
  assert(! parser_state->m_lip.is_partial_parser() ||
         ! parser_state->m_input.m_has_digest);

  /*
    只考虑应该有摘要的语句, 如顶级查询.(top level queries 是啥?)
  */
  if (parser_state->m_input.m_has_digest) {
    /*
      对于这些语句, 查看是否需要摘要计算.
    */
    if (thd->m_digest != nullptr) {
      /* 开启摘要 */
      parser_state->m_digest_psi = MYSQL_DIGEST_START(thd->m_statement_psi);

      if (parser_state->m_input.m_compute_digest ||
          (parser_state->m_digest_psi != nullptr)) {
        /*
          If either:
          - 调用者想计算摘要
          - performance schema 想要计算摘要
          在 lexer 中设置 digest listener.
        */
        parser_state->m_lip.m_digest = thd->m_digest;
        parser_state->m_lip.m_digest->m_digest_storage.m_charset_number =
            thd->charset()->number;
      }
    }
  }

  /* 解析查询 */

  /*
    在解析的时候使用临时的 DA (Diagnostics_area).
    解析完之后才知道当前的命令是否是 diagnostic statement,
    所以我们需要之前的 DA 来判断这个问题.
  */
  Diagnostics_area *parser_da = thd->get_parser_da();
  Diagnostics_area *da = thd->get_stmt_da();

  Parser_oom_handler poomh;
  // 注意在 INFORMATION_SCHEMA 查询的时候, 我们可能会在这里循环调用.

  thd->mem_root->set_max_capacity(thd->variables.parser_max_mem_size);
  thd->mem_root->set_error_for_capacity_exceeded(true);
  thd->push_internal_handler(&poomh);

  thd->push_diagnostics_area(parser_da, false);

  // 调用 THD::sql_parser 了
  bool mysql_parse_status = thd->sql_parser();

  thd->pop_internal_handler();
  thd->mem_root->set_max_capacity(0);
  thd->mem_root->set_error_for_capacity_exceeded(false);
  /*
    释放 diagnostics area.

    如果在解析过程中出现任何问题, 它们将变为当前语句的唯一条件.

    否则, 如果我们手头有一份诊断声明, 我们将在这里保留以前的诊断区域,
    因此我们可以回答有关它的问题.
    这具体意味着反复询问 DA 不会清除它.

    否则, 在解析期间没有任何问题, 这是一个常规命令, 所以我们将清除 DA 以准备该命令的处理.
  */

  if (parser_da->current_statement_cond_count() != 0) {
    /*
      解析中出现的 Error 或者 Warning: 顶层的 DA 应该包含解析错误!
      之前存在的任何条件都应该被替换掉.
      diagnostics statements 则是一个例外情况, 我们希望能够保存错误以便将其发送给客户端.
    */
    if (thd->lex->sql_command != SQLCOM_SHOW_WARNS &&
        thd->lex->sql_command != SQLCOM_GET_DIAGNOSTICS)
      da->reset_condition_info(thd);

    /*
      我们需要将任何错误以及条件列表放到 DA 中.
    */
    if (parser_da->is_error() && ! da->is_error()) {
      da->set_error_status(parser_da->mysql_errno(), parser_da->message_text(),
                           parser_da->returned_sqlstate());
    }

    da->copy_sql_conditions_from_da(thd, parser_da);

    parser_da->reset_diagnostics_area();
    parser_da->reset_condition_info(thd);

    /*
      开始执行时不要清除条件列表, 因为它现在不包含先前执行的结果,
      而是解析期间抛出的非零错误/警告!
    */
    thd->lex->keep_diagnostics = DA_KEEP_PARSE_ERROR;
  }

  thd->pop_diagnostics_area();

  /*
    检查 THD::sql_parser() 是否失败了, 要么 thd->is_error() 为真,
    或者 internal error handler 被设置了.

    断言不会捕获 error handler 存在但是解析失败又没有报告错误的情况.
    问题是, 错误处理程序可能已经拦截了错误, 所以 thd->is_error() 没有设置.
    但是, 这里没有办法 100% 确定 (错误处理程序可能用于解析错误之外的其他错误).
  */

  assert(! mysql_parse_status || (mysql_parse_status && thd->is_error()) ||
         (mysql_parse_status && thd->get_internal_handler()));

  /* 重置解析器的状态 */

  thd->m_parser_state = nullptr;

  /* 恢复对象创建的上下文 */

  if (creation_ctx) creation_ctx->restore_env(thd, backup_ctx);

  /* That's it. */

  ret_value = mysql_parse_status || thd->is_fatal_error();

  if ((ret_value == 0) && (parser_state->m_digest_psi != nullptr)) {
    /*
      如果解析成功了, 则在 performance schema 中记录摘要.
    */
    assert(thd->m_digest != nullptr);
    MYSQL_DIGEST_END(parser_state->m_digest_psi, &thd->m_digest->m_digest_storage);
  }

  return ret_value;
}

阅读感想

读完了这段感觉就是非常懵逼, 解析的结果不知道是啥样的, 就是解析前后的一些处理, 真正的解析是在 THD:sql_parser().

待了解的类或者结构:

  • Parser_state
  • digest
  • Object_creation_ctx
  • Diagnostics_area
  • mem_root
  • lex

一个个来吧.

Parser_state

Parser_state 的创建

dispatch_sql_command 中的创建方式如下:

      // 这个就是栈上创建的对象
      Parser_state parser_state;
      // 执行 init 方法
      if (parser_state.init(thd, thd->query().str, thd->query().length))
        break;

      parser_state.m_input.m_has_digest = true;

      // 如果没有明确关闭的话, 我们会通过将 maximum digest length 设置为 0 来生成 digest
      if (get_max_digest_length() != 0)
        parser_state.m_input.m_compute_digest = true;

      // 最初, 会给主存储引擎准备和优化的语句.
      // 如果找到符合条件的二级存储引擎, 则可以稍后为二级存储引擎重新准备该语句.
      const auto saved_secondary_engine = thd->secondary_engine_optimization();
      thd->set_secondary_engine_optimization(
               Secondary_engine_optimization::PRIMARY_TENTATIVELY);

      // 这个就是绑定参数吗
      copy_bind_parameter_values(thd, com_data->com_query.parameters,
                                 com_data->com_query.parameter_count);

      dispatch_sql_command(thd, &parser_state);

Parser_state 的源码

Parser_state 这个类的位置是在: /mnt/data/mysql-server/sql/sql_lex.h

/**
  解析器的内部状态.
  完整的状态包括:
   - 控制解析器行为的输入参数
   - 词法解析期间使用的状态数据,
   - 句法解析期间使用的状态数据.
*/
class Parser_state {
 protected:
  /**
    为部分 SQL 语句构造特殊的解析器 (DD)

    @param grammar_selector_token   See Lex_input_stream::grammar_selector_token
  */
  explicit Parser_state(int grammar_selector_token)
      : m_input(), m_lip(grammar_selector_token), m_yacc(), m_comment(false) {}

 public:
  // 构造函数
  Parser_state() : m_input(), m_lip(~0U), m_yacc(), m_comment(false) {}

  /**
     对象的初始化, 必须在使用前调用

     @retval false OK,false 的话就是没啥问题
     @retval true  Error true 话就是有问题了
  */
  bool init(THD *thd, const char *buff, size_t length) {
    return m_lip.init(thd, buff, length);
  }

  // 应该是重置
  void reset(const char *found_semicolon, size_t length) {
    m_lip.reset(found_semicolon, length);
    m_yacc.reset();
  }

  /// 标记当前的查询包含注释
  void add_comment() { m_comment = true; }
  /// 检查当前的查询是否包含注释
  bool has_comment() const { return m_comment; }

 public:

  // 封装了两个状态: 语句能否计算摘要, 是否需要计算摘要
  Parser_input m_input;

  Lex_input_stream m_lip;
  Yacc_state m_yacc;
  /**
    Current performance digest instrumentation.
  */
  PSI_digest_locker *m_digest_psi;

 private:
  bool m_comment;  ///< True 如果当前的查询中包含注释的话则为 true
};

Parser_input

Parser_input 这个结构体还挺简单的, 就封装了两个状态.

/**
  解析器的输入参数
*/
struct Parser_input {
  /**
    如果解析的文本对应于实际查询, 而不是另一个文本 artifact 则为真
    这个 flag 用于禁用 nested 的解析:
    - view definitions
    - table trigger definitions
    - table partition definitions
    - event scheduler event definitions
  */
  bool m_has_digest;
  /**
    如果调用者需要计算摘要, 则为真.
    此标志用于显式请求摘要计算, 独立于性能模式配置.
  */
  bool m_compute_digest;

  Parser_input() : m_has_digest(false), m_compute_digest(false) {}
};

Lex_input_stream

给 lex 投喂字符的输入流的封装, 内部维护了两个缓冲区:

  • 原始字符的输入流
  • 预处理字符的输入流 (去掉了注释)
/**
  此类表示词法分析期间消耗的字符输入流.

  除了使用输入流之外, 该类还通过从查询输入流中过滤掉超出范围的特殊文本来执行一些注释预处理.

  两个缓冲区, 每个缓冲区内部都有指针, 是并行维护的.
  "raw" 缓冲区是原始查询文本, 可能包含越界注释.
  "cpp"(comments pre processor) 是预处理缓冲区, 仅包含删除注释后应该看到的查询文本.
*/

class Lex_input_stream {
 public:
  /**
    构造函数

    @param grammar_selector_token_arg   See grammar_selector_token.
  */

  explicit Lex_input_stream(uint grammar_selector_token_arg)
      : grammar_selector_token(grammar_selector_token_arg) {}

  /**
     对象初始化. 必须在使用前调用.

     @retval false OK
     @retval true  Error
  */
  bool init(THD *thd, const char *buff, size_t length);

  void reset(const char *buff, size_t length);

  /**
    设置 echo 模式.

    当 echo 为真时, 从原始输入流中解析的字符将被保留.
    当为 false 时, 已解析的字符将被静默忽略.
    @param echo the echo mode.
  */
  void set_echo(bool echo) { m_echo = echo; }

  void save_in_comment_state() {
    m_echo_saved = m_echo;
    in_comment_saved = in_comment;
  }

  void restore_in_comment_state() {
    m_echo = m_echo_saved;
    in_comment = in_comment_saved;
  }

  /**
    Skip binary from the input stream.
    @param n number of bytes to accept.
  */
  void skip_binary(int n) {
    assert(m_ptr + n <= m_end_of_query);
    if (m_echo) {
      memcpy(m_cpp_ptr, m_ptr, n);
      m_cpp_ptr += n;
    }
    m_ptr += n;
  }

  /**
    Get a character, and advance in the stream.
    @return the next character to parse.
  */
  unsigned char yyGet() {
    assert(m_ptr <= m_end_of_query);
    char c = *m_ptr++;
    if (m_echo) *m_cpp_ptr++ = c;
    return c;
  }

  /**
    Get the last character accepted.
    @return the last character accepted.
  */
  unsigned char yyGetLast() const { return m_ptr[-1]; }

  /**
    Look at the next character to parse, but do not accept it.
  */
  unsigned char yyPeek() const {
    assert(m_ptr <= m_end_of_query);
    return m_ptr[0];
  }

  /**
    Look ahead at some character to parse.
    @param n offset of the character to look up
  */
  unsigned char yyPeekn(int n) const {
    assert(m_ptr + n <= m_end_of_query);
    return m_ptr[n];
  }

  /**
    Cancel the effect of the last yyGet() or yySkip().
    Note that the echo mode should not change between calls to yyGet / yySkip
    and yyUnget. The caller is responsible for ensuring that.
  */
  void yyUnget() {
    m_ptr--;
    if (m_echo) m_cpp_ptr--;
  }

  /**
    Accept a character, by advancing the input stream.
  */
  void yySkip() {
    assert(m_ptr <= m_end_of_query);
    if (m_echo)
      *m_cpp_ptr++ = *m_ptr++;
    else
      m_ptr++;
  }

  /**
    Accept multiple characters at once.
    @param n the number of characters to accept.
  */
  void yySkipn(int n) {
    assert(m_ptr + n <= m_end_of_query);
    if (m_echo) {
      memcpy(m_cpp_ptr, m_ptr, n);
      m_cpp_ptr += n;
    }
    m_ptr += n;
  }

  /**
    Puts a character back into the stream, canceling
    the effect of the last yyGet() or yySkip().
    Note that the echo mode should not change between calls
    to unput, get, or skip from the stream.
  */
  char *yyUnput(char ch) {
    *--m_ptr = ch;
    if (m_echo) m_cpp_ptr--;
    return m_ptr;
  }

  /**
    Inject a character into the pre-processed stream.

    Note, this function is used to inject a space instead of multi-character
    C-comment. Thus there is no boundary checks here (basically, we replace
    N-chars by 1-char here).
  */
  char *cpp_inject(char ch) {
    *m_cpp_ptr = ch;
    return ++m_cpp_ptr;
  }

  /**
    End of file indicator for the query text to parse.
    @return true if there are no more characters to parse
  */
  bool eof() const { return (m_ptr >= m_end_of_query); }

  /**
    End of file indicator for the query text to parse.
    @param n number of characters expected
    @return true if there are less than n characters to parse
  */
  bool eof(int n) const { return ((m_ptr + n) >= m_end_of_query); }

  /** Get the raw query buffer. */
  const char *get_buf() const { return m_buf; }

  /** Get the pre-processed query buffer. */
  const char *get_cpp_buf() const { return m_cpp_buf; }

  /** Get the end of the raw query buffer. */
  const char *get_end_of_query() const { return m_end_of_query; }

  /** Mark the stream position as the start of a new token. */
  void start_token() {
    m_tok_start = m_ptr;
    m_tok_end = m_ptr;

    m_cpp_tok_start = m_cpp_ptr;
    m_cpp_tok_end = m_cpp_ptr;
  }

  /**
    Adjust the starting position of the current token.
    This is used to compensate for starting whitespace.
  */
  void restart_token() {
    m_tok_start = m_ptr;
    m_cpp_tok_start = m_cpp_ptr;
  }

  /** Get the token start position, in the raw buffer. */
  const char *get_tok_start() const { return m_tok_start; }

  /** Get the token start position, in the pre-processed buffer. */
  const char *get_cpp_tok_start() const { return m_cpp_tok_start; }

  /** Get the token end position, in the raw buffer. */
  const char *get_tok_end() const { return m_tok_end; }

  /** Get the token end position, in the pre-processed buffer. */
  const char *get_cpp_tok_end() const { return m_cpp_tok_end; }

  /** Get the current stream pointer, in the raw buffer. */
  const char *get_ptr() const { return m_ptr; }

  /** Get the current stream pointer, in the pre-processed buffer. */
  const char *get_cpp_ptr() const { return m_cpp_ptr; }

  /** Get the length of the current token, in the raw buffer. */
  uint yyLength() const {
    /*
      The assumption is that the lexical analyser is always 1 character ahead,
      which the -1 account for.
    */
    assert(m_ptr > m_tok_start);
    return (uint)((m_ptr - m_tok_start) - 1);
  }

  /** Get the utf8-body string. */
  const char *get_body_utf8_str() const { return m_body_utf8; }

  /** Get the utf8-body length. */
  uint get_body_utf8_length() const {
    return (uint)(m_body_utf8_ptr - m_body_utf8);
  }

  void body_utf8_start(THD *thd, const char *begin_ptr);
  void body_utf8_append(const char *ptr);
  void body_utf8_append(const char *ptr, const char *end_ptr);
  void body_utf8_append_literal(THD *thd, const LEX_STRING *txt,
                                const CHARSET_INFO *txt_cs,
                                const char *end_ptr);

  uint get_lineno(const char *raw_ptr) const;

  /** 当前的线程 */
  THD *m_thd;

  /** 当前的行号 */
  uint yylineno;

  /** 最后一个解析的 token 的长度 */
  uint yytoklen;

  /** 和 bison 交互的接口, 最后一个解析的 token 的值 */
  Lexer_yystype *yylval;

  /**
    LALR(2) resolution, look ahead token.
    Value of the next token to return, if any,
    or -1, if no token was parsed in advance.
    Note: 0 is a legal token, and represents YYEOF.
  */
  int lookahead_token;

  /** LALR(2) resolution, value of the look ahead token.*/
  Lexer_yystype *lookahead_yylval;

  /// Skip adding of the current token's digest since it is already added
  ///
  /// Usually we calculate a digest token by token at the top-level function
  /// of the lexer: MYSQLlex(). However, some complex ("hintable") tokens break
  /// that data flow: for example, the `SELECT /*+ HINT(t) */` is the single
  /// token from the main parser's point of view, and we add the "SELECT"
  /// keyword to the digest buffer right after the lex_one_token() call,
  /// but the "/*+ HINT(t) */" is a sequence of separate tokens from the hint
  /// parser's point of view, and we add those tokens to the digest buffer
  /// *inside* the lex_one_token() call. Thus, the usual data flow adds
  /// tokens from the "/*+ HINT(t) */" string first, and only than it appends
  /// the "SELECT" keyword token to that stream: "/*+ HINT(t) */ SELECT".
  /// This is not acceptable, since we use the digest buffer to restore
  /// query strings in their normalized forms, so the order of added tokens is
  /// important. Thus, we add tokens of "hintable" keywords to a digest buffer
  /// right in the hint parser and skip adding of them at the caller with the
  /// help of skip_digest flag.
  bool skip_digest;

  void add_digest_token(uint token, Lexer_yystype *yylval);

  void reduce_digest_token(uint token_left, uint token_right);

  /**
    True if this scanner tokenizes a partial query (partition expression,
    generated column expression etc.)

    @return true if parsing a partial query, otherwise false.
  */
  bool is_partial_parser() const { return grammar_selector_token >= 0; }

  /**
    Outputs warnings on deprecated charsets in complete SQL statements

    @param [in] cs    The character set/collation to check for a deprecation.
    @param [in] alias The name/alias of @p cs.
  */
  void warn_on_deprecated_charset(const CHARSET_INFO *cs,
                                  const char *alias) const {
    if (! is_partial_parser()) {
      ::warn_on_deprecated_charset(m_thd, cs, alias);
    }
  }

  /**
    Outputs warnings on deprecated collations in complete SQL statements

    @param [in] collation     The collation to check for a deprecation.
  */
  void warn_on_deprecated_collation(const CHARSET_INFO *collation) const {
    if (! is_partial_parser()) {
      ::warn_on_deprecated_collation(m_thd, collation);
    }
  }

  const CHARSET_INFO *query_charset;

 private:
  /** Pointer to the current position in the raw input stream. */
  char *m_ptr;

  /** Starting position of the last token parsed, in the raw buffer. */
  const char *m_tok_start;

  /** Ending position of the previous token parsed, in the raw buffer. */
  const char *m_tok_end;

  /** 在原始缓冲区输入流中的查询文本结束的位置 */
  const char *m_end_of_query;

  /** 在原始缓冲区输入流中的查询文本起始的位置 */
  const char *m_buf;

  /** 原始缓冲区的长度,Length of the raw buffer. */
  size_t m_buf_length;

  /** Echo the parsed stream to the pre-processed buffer. */
  bool m_echo;
  bool m_echo_saved;

  /** 预处理的缓冲区,Pre-processed buffer. */
  char *m_cpp_buf;

  /** 指向预处理输入流当前位置的指针 */
  char *m_cpp_ptr;

  /** 在预处理缓冲区中最后一个被解析的 token 的起始位置 */
  const char *m_cpp_tok_start;

  /**
    Ending position of the previous token parsed,
    in the pre-processed buffer.
  */
  const char *m_cpp_tok_end;

  /** UTF8-body buffer created during parsing. */
  char *m_body_utf8;

  /** Pointer to the current position in the UTF8-body buffer. */
  char *m_body_utf8_ptr;

  /**
    Position in the pre-processed buffer. The query from m_cpp_buf to
    m_cpp_utf_processed_ptr is converted to UTF8-body.
  */
  const char *m_cpp_utf8_processed_ptr;

 public:
  /** Current state of the lexical analyser. */
  enum my_lex_states next_state;

  /**
    Position of ';' in the stream, to delimit multiple queries.
    This delimiter is in the raw buffer.
  */
  const char *found_semicolon;

  /** Token character bitmaps, to detect 7bit strings. */
  uchar tok_bitmap;

  /** SQL_MODE = IGNORE_SPACE. */
  bool ignore_space;

  /**
    如果我们正在解析准备好的语句, 则为 true: 在这种模式下, 我们应该允许使用占位符.
  */
  bool stmt_prepare_mode;
  /**
    如果我们允许多语句, 则为 true.
  */
  bool multi_statements;

  /** State of the lexical analyser for comments. */
  enum_comment_state in_comment;
  enum_comment_state in_comment_saved;

  /**
    Starting position of the TEXT_STRING or IDENT in the pre-processed
    buffer.

    NOTE: this member must be used within MYSQLlex() function only.
  */
  const char *m_cpp_text_start;

  /**
    Ending position of the TEXT_STRING or IDENT in the pre-processed
    buffer.

    NOTE: this member must be used within MYSQLlex() function only.
    */
  const char *m_cpp_text_end;

  /**
    Character set specified by the character-set-introducer.

    NOTE: this member must be used within MYSQLlex() function only.
  */
  const CHARSET_INFO *m_underscore_cs;

  /**
    Current statement digest instrumentation.
  */
  sql_digest_state *m_digest;

  /**
    The synthetic 1st token to prepend token stream with.

    This token value tricks parser to simulate multiple %start-ing points.
    Currently the grammar is aware of 4 such synthetic tokens:
    1. GRAMMAR_SELECTOR_PART for partitioning stuff from DD,
    2. GRAMMAR_SELECTOR_GCOL for generated column stuff from DD,
    3. GRAMMAR_SELECTOR_EXPR for generic single expressions from DD/.frm.
    4. GRAMMAR_SELECTOR_CTE for generic subquery expressions from CTEs.
    5. -1 when parsing with the main grammar (no grammar selector available).

    @note yylex() is expected to return the value of type int:
          0 is for EOF and everything else for real token numbers.
          Bison, in its turn, generates positive token numbers.
          So, the negative grammar_selector_token means "not a token".
          In other words, -1 is "empty value".
  */
  const int grammar_selector_token;

  bool text_string_is_7bit() const { return ! (tok_bitmap & 0x80); }
};

Object_creation_ctx

Object_creation_ctx 用于创建数据库对象 (视图, 存储的例程, 事件, 触发器) 上下文的接口. 创建上下文是一组属性, 应该在创建时固定, 然后在每次解析或执行对象时使用.

class Object_creation_ctx {
 public:
  // 备份对象创建的上下文
  Object_creation_ctx *set_n_backup(THD *thd);

  // 恢复对象创建的上下文
  void restore_env(THD *thd, Object_creation_ctx *backup_ctx);

 protected:
  Object_creation_ctx() = default;
  virtual Object_creation_ctx *create_backup_ctx(THD *thd) const = 0;
  virtual void delete_backup_ctx() = 0;

  virtual void change_env(THD *thd) const = 0;

 public:
  virtual ~Object_creation_ctx() = default;
};

Diagnostic_area

Diagnostic_area: 存储当前执行语句的状态. 在语句开头清除, 然后状态可以是 OK,ERROR 或 EOF. 每个语句不能分配两次.

class Diagnostics_area {
  /** The type of the counted and doubly linked list of conditions. */
  // 双向链表,Sql_condition_list 是啥?
  typedef I_P_List<
      Sql_condition,
      I_P_List_adapter<Sql_condition, &Sql_condition::m_next_condition,
                       &Sql_condition::m_prev_condition>,
      I_P_List_counter, I_P_List_fast_push_back<Sql_condition>>
      Sql_condition_list;

 public:
  /** Const iterator used to iterate through the condition list. */
  // const 类型的迭代器, 迭代器还可以当成员变量使用?
  typedef Sql_condition_list::Const_Iterator Sql_condition_iterator;

  enum enum_diagnostics_status {
    /** 语句开始前会被清空 */
    DA_EMPTY = 0,
    /** Set whenever one calls my_ok(). */
    DA_OK,
    /** Set whenever one calls my_eof(). */
    DA_EOF,
    /** Set whenever one calls my_error() or my_message(). */
    DA_ERROR,
    /** Set in case of a custom response, such as one from COM_STMT_PREPARE. */
    DA_DISABLED
  };

  // 构造函数
  Diagnostics_area(bool allow_unlimited_conditions);
  // 析构函数
  ~Diagnostics_area();

  void set_overwrite_status(bool can_overwrite_status) {
    m_can_overwrite_status = can_overwrite_status;
  }

  bool is_sent() const { return m_is_sent; }

  void set_is_sent(bool is_sent) { m_is_sent = is_sent; }

  /**
    设置 OK 状态 -- 并不会返回结果集的命令结束时会调用这个
    比如:INSERT/UPDATE/DELETE.

    @param affected_rows  最后一条语句影响的行数.
                          @sa Diagnostics_area::m_affected_rows.
    @param last_insert_id LAST_INSERT_ID() 返回的值
                          @sa Diagnostics_area::m_last_insert_id.
    @param message_text   The OK-message text.
  */
  void set_ok_status(ulonglong affected_rows, ulonglong last_insert_id,
                     const char *message_text);

  /**
    设置 EOF 状态

    @param thd  Thread context.
  */
  void set_eof_status(THD *thd);

  /**
    在 Diagnostics Area 设置 ERROR 状态.
    这个函数应该被用于报告致命错误 (比如 out-of-memory 错误), 后续不会在进行任何处理了.

    @param thd              Thread handle
    @param mysql_errno      SQL-condition error number
  */
  void set_error_status(THD *thd, uint mysql_errno);

  /**
    在 Diagnostics Area 设置 ERROR 状态.

    @param mysql_errno        SQL-condition error number
    @param message_text       SQL-condition message
    @param returned_sqlstate  SQL-condition state
  */
  void set_error_status(uint mysql_errno, const char *message_text,
                        const char *returned_sqlstate);

  /**
    讲 Diagnostics Area 标记为 'DISABLED'.

    这个使用的情况很少, 当 COM_ 命令以自定义格式发送响应时会用到.
    其中一个例子就是 COM_STMT_PREPARE.
  */
  void disable_status() {
    assert(m_status == DA_EMPTY);
    m_status = DA_DISABLED;
  }

  /**
    清理 Diagnostics Area.
    一般是在语句执行结束后调用
  */
  void reset_diagnostics_area();

  bool is_set() const { return m_status != DA_EMPTY; }

  bool is_error() const { return m_status == DA_ERROR; }

  bool is_eof() const { return m_status == DA_EOF; }

  bool is_ok() const { return m_status == DA_OK; }

  bool is_disabled() const { return m_status == DA_DISABLED; }

  enum_diagnostics_status status() const { return m_status; }

  const char *message_text() const {
    assert(m_status == DA_ERROR || m_status == DA_OK);
    return m_message_text;
  }

  uint mysql_errno() const {
    assert(m_status == DA_ERROR);
    return m_mysql_errno;
  }

  const char *returned_sqlstate() const {
    assert(m_status == DA_ERROR);
    return m_returned_sqlstate;
  }

  ulonglong affected_rows() const {
    assert(m_status == DA_OK);
    return m_affected_rows;
  }

  ulonglong last_insert_id() const {
    assert(m_status == DA_OK);
    return m_last_insert_id;
  }

  uint last_statement_cond_count() const {
    assert(m_status == DA_OK || m_status == DA_EOF);
    return m_last_statement_cond_count;
  }

  /** Return the number of conditions raised by the current statement. */
  ulong current_statement_cond_count() const {
    return m_current_statement_cond_count;
  }

  /**
    Reset between two COM_ commands. Conditions are preserved
    between commands, but m_current_statement_cond_count indicates
    the number of conditions of this particular statement only.
  */
  void reset_statement_cond_count() { m_current_statement_cond_count = 0; }

  /**
    Checks if the condition list contains SQL-condition with the given message.

    @param message_text    Message text
    @param message_length  Length of message_text

    @return true if the condition list contains an SQL-condition with the given
    message text.
  */
  bool has_sql_condition(const char *message_text, size_t message_length) const;

  /**
    Checks if the condition list contains SQL-condition with the given error
    code.

    @param sql_errno    Error code

    @return true if the condition list contains an SQL-condition with the given
    error code.
  */
  bool has_sql_condition(uint sql_errno) const;

  /**
    Reset the current condition information stored in the Diagnostics Area.
    Clear all conditions, the number of conditions, reset current row counter
    to point to the first row.
  */
  void reset_condition_info(THD *thd);

  /** Return the current counter value. */
  ulong current_row_for_condition() const {
    return m_current_row_for_condition;
  }

  /** Increment the current row counter to point at the next row. */
  void inc_current_row_for_condition() { m_current_row_for_condition++; }

  /** Set the current row counter to point to the given row number. */
  void set_current_row_for_condition(ulong rowno) {
    m_current_row_for_condition = rowno;
  }

  /** Reset the current row counter. Start counting from 1. */
  void reset_current_row_for_condition() { m_current_row_for_condition = 1; }

  /**
    The number of errors, or number of rows returned by SHOW ERRORS,
    also the value of session variable @@error_count.
  */
  ulong error_count(THD *thd) const;

  /**
    Used for @@warning_count system variable, which prints
    the number of rows returned by SHOW WARNINGS.
 */
  ulong warn_count(THD *thd) const;

  /**
    The number of conditions (errors, warnings and notes) in the list.
  */
  uint cond_count() const { return m_conditions_list.elements(); }

  Sql_condition_iterator sql_conditions() const { return m_conditions_list; }

  const char *get_first_condition_message();

  /** Make sure there is room for the given number of conditions. */
  void reserve_number_of_conditions(THD *thd, uint count);

  /**
    Add a new SQL-condition to the current list and increment the respective
    counters.

    @param thd                Thread context.
    @param mysql_errno        SQL-condition error number.
    @param returned_sqlstate  SQL-condition state.
    @param severity           SQL-condition severity.
    @param message_text       SQL-condition message.

    @return a pointer to the added SQL-condition.
  */
  Sql_condition *push_warning(THD *thd, uint mysql_errno,
                              const char *returned_sqlstate,
                              Sql_condition::enum_severity_level severity,
                              const char *message_text);

  /**
    Mark current SQL-conditions so that we can later know which
    SQL-conditions have been added.
  */
  void mark_preexisting_sql_conditions();

  /**
    Copy SQL-conditions that have been added since
    mark_preexisting_sql_conditions() was called.

    @param thd    Thread context.
    @param src_da Diagnostics Area to copy from.
  */
  void copy_new_sql_conditions(THD *thd, const Diagnostics_area *src_da);

  /**
    Copy all SQL-conditions from src_da to this DA.

    @param thd    Thread context.
    @param src_da Diagnostics Area to copy from.
  */
  void copy_sql_conditions_from_da(THD *thd, const Diagnostics_area *src_da);

  /**
    Copy Sql_conditions that are not SL_ERROR from the source
    Diagnostics Area to the current Diagnostics Area.

    @param thd    Thread context.
    @param src_da Diagnostics Area to copy from.
  */
  void copy_non_errors_from_da(THD *thd, const Diagnostics_area *src_da);

  /**
    @return SQL-condition, which corresponds to the error state in
    Diagnostics Area.
  */
  Sql_condition *error_condition() const;

 private:
  /**
    Add a new SQL-condition to the current list and increment the respective
    counters.

    @param thd            Thread context.
    @param sql_condition  SQL-condition to copy values from.

    @return a pointer to the added SQL-condition.
  */
  Sql_condition *push_warning(THD *thd, const Sql_condition *sql_condition);

  /**
    Push the given Diagnostics Area on top of the stack.
    "This" will then become the stacked Diagnostics Area.
    Conditions present in the new stacked Diagnostics Area
    will be copied to the new top Diagnostics Area.

    @note This function will not set THD::m_stmt_da.
          Use THD::push_diagnostics_area() instead.

    @param thd  Thread context
    @param da   Diagnostics Area to be come the top of
                the Diagnostics Area stack.
    @param copy_conditions
                Copy the conditions from the new second Diagnostics Area
                to the new first Diagnostics Area, as per SQL standard.
  */
  void push_diagnostics_area(THD *thd, Diagnostics_area *da,
                             bool copy_conditions);

  /**
    Pop "this" off the Diagnostics Area stack.

    @note This function will not set THD::m_stmt_da.
          Use THD::pop_diagnostics_area() instead.

    @returns The new top of the Diagnostics Area stack.
  */
  Diagnostics_area *pop_diagnostics_area();

  /**
    Returns the Diagnostics Area below the current diagnostics
    area on the stack.
  */
  const Diagnostics_area *stacked_da() const { return m_stacked_da; }

 private:
  /** Pointer to the Diagnostics Area below on the stack. */
  Diagnostics_area *m_stacked_da;

  /** A memory root to allocate conditions */
  MEM_ROOT m_condition_root{PSI_INSTRUMENT_ME, WARN_ALLOC_BLOCK_SIZE};

  /** List of conditions of all severities. */
  Sql_condition_list m_conditions_list;

  /** List of conditions present in DA at handler activation. */
  List<const Sql_condition> m_preexisting_sql_conditions;

  /** 如果将状态信息发送到客户端, 则为真. */
  bool m_is_sent;

  /** Set to make set_error_status after set_{ok,eof}_status possible. */
  bool m_can_overwrite_status;

  /** Indicates if push_warning() allows unlimited number of conditions. */
  bool m_allow_unlimited_conditions;

  enum_diagnostics_status m_status;

 private:
  /*
    This section contains basic attributes of Sql_condition to store
    information about error (SQL-condition of error severity) or OK-message.
    The attributes are inlined here (instead of using Sql_condition) to be able
    to store the information in case of out-of-memory error.
  */

  /**
    Message buffer. It is used only when DA is in OK or ERROR status.
    If DA status is ERROR, it's the MESSAGE_TEXT attribute of SQL-condition.
    If DA status is OK, it's the OK-message to be sent.
  */
  char m_message_text[MYSQL_ERRMSG_SIZE];

  /**
    SQL RETURNED_SQLSTATE condition item.
    This member is always NUL terminated.
  */
  char m_returned_sqlstate[SQLSTATE_LENGTH + 1];

  /**
    SQL error number. One of ER_ codes from share/errmsg.txt.
    Set by set_error_status.
  */
  uint m_mysql_errno;

  /**
    The number of rows affected by the last statement. This is
    semantically close to thd->row_count_func, but has a different
    life cycle. thd->row_count_func stores the value returned by
    function ROW_COUNT() and is cleared only by statements that
    update its value, such as INSERT, UPDATE, DELETE and few others.
    This member is cleared at the beginning of the next statement.

    We could possibly merge the two, but life cycle of thd->row_count_func
    can not be changed.
  */
  ulonglong m_affected_rows;

  /**
    Similarly to the previous member, this is a replacement of
    thd->first_successful_insert_id_in_prev_stmt, which is used
    to implement LAST_INSERT_ID().
  */
  ulonglong m_last_insert_id;

  /**
    Number of conditions of this last statement. May differ from
    the number of conditions returned by SHOW WARNINGS e.g. in case
    the statement doesn't clear the conditions, and doesn't generate
    them.
  */
  uint m_last_statement_cond_count;

  /**
    The number of conditions of the current statement. m_conditions_list
    life cycle differs from statement life cycle -- it may span
    multiple statements. In that case we get
    m_current_statement_cond_count 0, whereas m_conditions_list is not empty.
  */
  uint m_current_statement_cond_count;

  /** A break down of the number of conditions per severity (level). */
  uint m_current_statement_cond_count_by_qb[(uint)Sql_condition::SEVERITY_END];

  /**
    Row counter, to print in errors and warnings. Not increased in
    create_sort_index(); may differ from examined_row_count.
  */
  ulong m_current_row_for_condition;

  /** Save @@error_count before pre-clearing the DA. */
  ulong m_saved_error_count;

  /** Save @@warning_count before pre-clearing the DA. */
  ulong m_saved_warn_count;

  // 最后还偷偷摸摸藏了个友元 THD
  friend class THD;
};

struct LEX

/**
  The LEX object currently serves three different purposes:

  - It contains some universal properties of an SQL command, such as
    sql_command, presence of IGNORE in data change statement syntax, and list
    of tables (query_tables).

  - It contains some execution state variables, like m_exec_started
    (set to true when execution is started), plugins (list of plugins used
    by statement), insert_update_values_map (a map of objects used by certain
    INSERT statements), etc.

  - It contains a number of members that should be local to subclasses of
    Sql_cmd, like purge_value_list (for the PURGE command), kill_value_list
    (for the KILL command).

  The LEX object is strictly a part of class Sql_cmd, for those SQL commands
  that are represented by an Sql_cmd class. For the remaining SQL commands,
  it is a standalone object linked to the current THD.

  The lifecycle of a LEX object is as follows:

  - The LEX object is constructed either on the execution mem_root
    (for regular statements), on a Prepared_statement mem_root (for
    prepared statements), on an SP mem_root (for stored procedure instructions),
    or created on the current mem_root for short-lived uses.

  - Call lex_start() to initialize a LEX object before use.
    This initializes the execution state part of the object.
    It also calls LEX::reset() to ensure that all members are properly inited.

  - Parse and resolve the statement, using the LEX as a work area.

  - Execute an SQL command: call set_exec_started() when starting to execute
    (actually when starting to optimize).
    Typically call is_exec_started() to distinguish between preparation
    and optimization/execution stages of SQL command execution.

  - Call clear_execution() when execution is finished. This will clear all
    execution state associated with the SQL command, it also includes calling
    LEX::reset_exec_started().

  @todo - Create subclasses of Sql_cmd to contain data that are local
          to specific commands.

  @todo - Create a Statement context object that will hold the execution state
          part of struct LEX.

  @todo - Ensure that a LEX struct is never reused, thus making e.g
          LEX::reset() redundant.
*/

struct LEX : public Query_tables_list {
  friend bool lex_start(THD *thd);

  Query_expression *unit;  ///< Outer-most query expression
  /// @todo: query_block can be replaced with unit->first-select()
  Query_block *query_block;            ///< First query block
  Query_block *all_query_blocks_list;  ///< List of all query blocks
 private:
  /* current Query_block in parsing */
  Query_block *m_current_query_block;

 public:
  inline Query_block *current_query_block() const {
    return m_current_query_block;
  }

  /*
    We want to keep current_thd out of header files, so the debug assert
    is moved to the .cc file.
  */
  void assert_ok_set_current_query_block();
  inline void set_current_query_block(Query_block *select) {
#ifndef NDEBUG
    assert_ok_set_current_query_block();
#endif
    m_current_query_block = select;
  }
  /// @return true if this is an EXPLAIN statement
  bool is_explain() const { return explain_format != nullptr; }
  bool is_explain_analyze = false;
  /**
    Whether the currently-running query should be (attempted) executed in
    the hypergraph optimizer. This will not change after the query is
    done parsing, so you can use it in any query phase to e.g. figure out
    whether to inhibit some transformation that the hypergraph optimizer
    does not properly understand yet.
   */
  bool using_hypergraph_optimizer = false;
  LEX_STRING name;
  char *help_arg;
  char *to_log; /* For PURGE MASTER LOGS TO */
  const char *x509_subject, *x509_issuer, *ssl_cipher;
  // Widcard from SHOW ... LIKE <wildcard> statements.
  String *wild;
  Query_result *result;
  LEX_STRING binlog_stmt_arg = {
      nullptr, 0};  ///< Argument of the BINLOG event statement.
  LEX_STRING ident;
  LEX_USER *grant_user;
  LEX_ALTER alter_password;
  enum_alter_user_attribute alter_user_attribute;
  LEX_STRING alter_user_comment_text;
  LEX_GRANT_AS grant_as;
  THD *thd;

  /* Optimizer hints */
  Opt_hints_global *opt_hints_global;

  /* maintain a list of used plugins for this LEX */
  typedef Prealloced_array<plugin_ref, INITIAL_LEX_PLUGIN_LIST_SIZE>
      Plugins_array;
  Plugins_array plugins;

  /// Table being inserted into (may be a view)
  TABLE_LIST *insert_table;
  /// Leaf table being inserted into (always a base table)
  TABLE_LIST *insert_table_leaf;

  /** SELECT of CREATE VIEW statement */
  LEX_STRING create_view_query_block;

  /* Partition info structure filled in by PARTITION BY parse part */
  partition_info *part_info;

  /*
    The definer of the object being created (view, trigger, stored routine).
    I.e. the value of DEFINER clause.
  */
  LEX_USER *definer;

  List<LEX_USER> users_list;
  List<LEX_COLUMN> columns;
  List<LEX_CSTRING> dynamic_privileges;
  List<LEX_USER> *default_roles;

  ulonglong bulk_insert_row_cnt;

  // PURGE statement-specific fields:
  List<Item> purge_value_list;

  // KILL statement-specific fields:
  List<Item> kill_value_list;

  // other stuff:
  List<set_var_base> var_list;
  List<Item_func_set_user_var> set_var_list;  // in-query assignment list
  /**
    List of placeholders ('?') for parameters of a prepared statement. Because
    we append to this list during parsing, it is naturally sorted by
    position of the '?' in the query string. The code which fills placeholders
    with user-supplied values, and the code which writes a query for
    statement-based logging, rely on this order.
    This list contains only real placeholders, not the clones which originate
    in a re-parsed CTE definition.
  */
  List<Item_param> param_list;

  bool locate_var_assignment(const Name_string &name);

  void insert_values_map(Item_field *f1, Field *f2) {
    if (! insert_update_values_map)
      insert_update_values_map = new std::map<Item_field *, Field *>;
    insert_update_values_map->insert(std::make_pair(f1, f2));
  }
  void destroy_values_map() {
    if (insert_update_values_map) {
      insert_update_values_map->clear();
      delete insert_update_values_map;
      insert_update_values_map = nullptr;
    }
  }
  void clear_values_map() {
    if (insert_update_values_map) {
      insert_update_values_map->clear();
    }
  }
  bool has_values_map() const { return insert_update_values_map != nullptr; }
  std::map<Item_field *, Field *>::iterator begin_values_map() {
    return insert_update_values_map->begin();
  }
  std::map<Item_field *, Field *>::iterator end_values_map() {
    return insert_update_values_map->end();
  }

 private:
  /*
    With Visual Studio, an std::map will always allocate two small objects
    on the heap. Sometimes we put LEX objects in a MEM_ROOT, and never run
    the LEX DTOR. To avoid memory leaks, put this std::map on the heap,
    and call clear_values_map() at the end of each statement.
   */
  std::map<Item_field *, Field *> *insert_update_values_map;

 public:
  /*
    A stack of name resolution contexts for the query. This stack is used
    at parse time to set local name resolution contexts for various parts
    of a query. For example, in a JOIN ... ON (some_condition) clause the
    Items in 'some_condition' must be resolved only against the operands
    of the the join, and not against the whole clause. Similarly, Items in
    subqueries should be resolved against the subqueries (and outer queries).
    The stack is used in the following way: when the parser detects that
    all Items in some clause need a local context, it creates a new context
    and pushes it on the stack. All newly created Items always store the
    top-most context in the stack. Once the parser leaves the clause that
    required a local context, the parser pops the top-most context.
  */
  List<Name_resolution_context> context_stack;

  Item_sum *in_sum_func;
  udf_func udf;
  HA_CHECK_OPT check_opt;  // check/repair options
  HA_CREATE_INFO *create_info;
  KEY_CREATE_INFO key_create_info;
  LEX_MASTER_INFO mi;  // used by CHANGE MASTER
  LEX_SLAVE_CONNECTION slave_connection;
  Server_options server_options;
  USER_RESOURCES mqh;
  LEX_RESET_SLAVE reset_slave_info;
  ulong type;
  /**
    This field is used as a work field during resolving to validate
    the use of aggregate functions. For example in a query
    SELECT ... FROM ...WHERE MIN(i) == 1 GROUP BY ... HAVING MIN(i) > 2
    MIN(i) in the WHERE clause is not allowed since only non-aggregated data
    is present, whereas MIN(i) in the HAVING clause is allowed because HAVING
    operates on the output of a grouping operation.
    Each query block is assigned a nesting level. This field is a bit field
    that contains the value one in the position of that nesting level if
    aggregate functions are allowed for that query block.
  */
  nesting_map allow_sum_func;
  /**
    Windowing functions are not allowed in HAVING - in contrast to group
    aggregates - then we need to be stricter than allow_sum_func.
    One bit per query block, as allow_sum_func.
  */
  nesting_map m_deny_window_func;

  /// If true: during prepare, we did a subquery transformation (IN-to-EXISTS,
  /// SOME/ANY) that doesn't currently work for subquery to a derived table
  /// transformation.
  bool m_subquery_to_derived_is_impossible;

  Sql_cmd *m_sql_cmd;

  /*
    Usually `expr` rule of yacc is quite reused but some commands better
    not support subqueries which comes standard with this rule, like
    KILL, HA_READ, CREATE/ALTER EVENT etc. Set this to `false` to get
    syntax error back.
  */
  bool expr_allows_subselect;
  /**
    If currently re-parsing a CTE's definition, this is the offset in bytes
    of that definition in the original statement which had the WITH
    clause. Otherwise this is 0.
  */
  uint reparse_common_table_expr_at;
  /**
    If currently re-parsing a condition which is pushed down to a derived
    table, this will be set to true.
  */
  bool reparse_derived_table_condition{false};
  /**
    If currently re-parsing a condition that is being pushed down to a
    derived table, this has the positions of all the parameters that are
    part of that condition in the original statement. Otherwise it is empty.
  */
  std::vector<uint> reparse_derived_table_params_at;

  enum SSL_type ssl_type; /* defined in violite.h */
  enum enum_duplicates duplicates;
  enum enum_tx_isolation tx_isolation;
  enum enum_var_type option_type;
  enum_view_create_mode create_view_mode;

  /// QUERY ID for SHOW PROFILE
  my_thread_id show_profile_query_id;
  uint profile_options;
  uint grant, grant_tot_col;
  /**
   Set to true when GRANT ... GRANT OPTION ... TO ...
   is used (vs. GRANT ... WITH GRANT OPTION).
   The flag is used by @ref mysql_grant to grant GRANT OPTION (@ref GRANT_ACL)
   to all dynamic privileges.
  */
  bool grant_privilege;
  uint slave_thd_opt, start_transaction_opt;
  int select_number;  ///< Number of query block (by EXPLAIN)
  uint8 create_view_algorithm;
  uint8 create_view_check;
  /**
    @todo ensure that correct CONTEXT_ANALYSIS_ONLY is set for all preparation
          code, so we can fully rely on this field.
  */
  uint8 context_analysis_only;
  bool drop_if_exists;
  bool drop_temporary;
  bool autocommit;
  bool verbose, no_write_to_binlog;
  // For show commands to show hidden columns and indexes.
  bool m_extended_show;

  enum enum_yes_no_unknown tx_chain, tx_release;

  /**
    Whether this query will return the same answer every time, given unchanged
    data. Used to be for the query cache, but is now used to find out if an
    expression is usable for partitioning.
  */
  bool safe_to_cache_query;

 private:
  /// True if statement references UDF functions
  bool m_has_udf{false};
  bool ignore;

 public:
  bool is_ignore() const { return ignore; }
  void set_ignore(bool ignore_param) { ignore = ignore_param; }
  void set_has_udf() { m_has_udf = true; }
  bool has_udf() const { return m_has_udf; }
  st_parsing_options parsing_options;
  Alter_info *alter_info;
  /* Prepared statements SQL syntax:*/
  LEX_CSTRING prepared_stmt_name; /* Statement name (in all queries) */
  /*
    Prepared statement query text or name of variable that holds the
    prepared statement (in PREPARE ... queries)
  */
  LEX_STRING prepared_stmt_code;
  /* If true, prepared_stmt_code is a name of variable that holds the query */
  bool prepared_stmt_code_is_varref;
  /* Names of user variables holding parameters (in EXECUTE) */
  List<LEX_STRING> prepared_stmt_params;
  sp_head *sphead;
  sp_name *spname;
  bool sp_lex_in_use; /* Keep track on lex usage in SPs for error handling */
  bool all_privileges;
  bool contains_plaintext_password;
  enum_keep_diagnostics keep_diagnostics;
  uint32 next_binlog_file_nr;

 private:
  bool m_broken;  ///< see mark_broken()
  /**
    Set to true when execution has started (after parsing, tables opened and
    query preparation is complete. Used to track arena state for SPs).
  */
  bool m_exec_started;
  /**
    Set to true when execution is completed, ie optimization has been done
    and execution is successful or ended in error.
  */
  bool m_exec_completed;
  /**
    Current SP parsing context.
    @see also sp_head::m_root_parsing_ctx.
  */
  sp_pcontext *sp_current_parsing_ctx;

  /**
    Statement context for Query_block::make_active_options.
  */
  ulonglong m_statement_options{0};

 public:
  /**
    Gets the options that have been set for this statement. The options are
    propagated to the Query_block objects and should usually be read with
    #Query_block::active_options().

    @return a bit set of options set for this statement
  */
  ulonglong statement_options() { return m_statement_options; }
  /**
    Add options to values of m_statement_options. options is an ORed
    bit set of options defined in query_options.h

    @param options Add this set of options to the set already in
                   m_statement_options
  */
  void add_statement_options(ulonglong options) {
    m_statement_options |= options;
  }
  bool is_broken() const { return m_broken; }
  /**
     Certain permanent transformations (like in2exists), if they fail, may
     leave the LEX in an inconsistent state. They should call the
     following function, so that this LEX is not reused by another execution.

     @todo If lex_start () were a member function of LEX, the "broken"
     argument could always be "true" and thus could be removed.
  */
  void mark_broken(bool broken = true) {
    if (broken) {
      /*
        "OPEN <cursor>" cannot be re-prepared if the cursor uses no tables
        ("SELECT FROM DUAL"). Indeed in that case cursor_query is left empty
        in constructions of sp_instr_cpush, and thus
        sp_lex_instr::parse_expr() cannot re-prepare. So we mark the statement
        as broken only if tables are used.
      */
      if (is_metadata_used()) m_broken = true;
    } else
      m_broken = false;
  }

  bool check_preparation_invalid(THD *thd);

  void cleanup(THD *thd, bool full) {
    unit->cleanup(thd, full);
    if (full) {
      m_IS_table_stats.invalidate_cache();
      m_IS_tablespace_stats.invalidate_cache();
    }
  }

  bool is_exec_started() const { return m_exec_started; }
  void set_exec_started() { m_exec_started = true; }
  void reset_exec_started() {
    m_exec_started = false;
    m_exec_completed = false;
  }
  /**
    Check whether the statement has been executed (regardless of completion -
    successful or in error).
    Check this instead of Query_expression::is_executed() to determine
    the state of a complete statement.
  */
  bool is_exec_completed() const { return m_exec_completed; }
  void set_exec_completed() { m_exec_completed = true; }
  sp_pcontext *get_sp_current_parsing_ctx() { return sp_current_parsing_ctx; }

  void set_sp_current_parsing_ctx(sp_pcontext *ctx) {
    sp_current_parsing_ctx = ctx;
  }

  /// Check if the current statement uses meta-data (uses a table or a stored
  /// routine).
  bool is_metadata_used() const {
    return query_tables != nullptr || has_udf() ||
           (sroutines != nullptr && ! sroutines->empty());
  }

 public:
  st_sp_chistics sp_chistics;

  Event_parse_data *event_parse_data;

  bool only_view; /* used for SHOW CREATE TABLE/VIEW */
  /*
    view created to be run from definer (standard behaviour)
  */
  uint8 create_view_suid;

  /**
    Intended to point to the next word after DEFINER-clause in the
    following statements:

      - CREATE TRIGGER (points to "TRIGGER");
      - CREATE PROCEDURE (points to "PROCEDURE");
      - CREATE FUNCTION (points to "FUNCTION" or "AGGREGATE");
      - CREATE EVENT (points to "EVENT")

    This pointer is required to add possibly omitted DEFINER-clause to the
    DDL-statement before dumping it to the binlog.
  */
  const char *stmt_definition_begin;
  const char *stmt_definition_end;

  /**
    During name resolution search only in the table list given by
    Name_resolution_context::first_name_resolution_table and
    Name_resolution_context::last_name_resolution_table
    (see Item_field::fix_fields()).
  */
  bool use_only_table_context;

  bool is_lex_started; /* If lex_start() did run. For debugging. */
  /// Set to true while resolving values in ON DUPLICATE KEY UPDATE clause
  bool in_update_value_clause;

  class Explain_format *explain_format;

  // Maximum execution time for a statement.
  ulong max_execution_time;

  /*
    To flag the current statement as dependent for binary logging
    on explicit_defaults_for_timestamp
  */
  bool binlog_need_explicit_defaults_ts;

  /**
    Used to inform the parser whether it should contextualize the parse
    tree. When we get a pure parser this will not be needed.
  */
  bool will_contextualize;

  LEX();

  virtual ~LEX();

  /// Destroy contained objects, but not the LEX object itself.
  void destroy() {
    if (unit == nullptr) return;
    unit->destroy();
    unit = nullptr;
    query_block = nullptr;
    all_query_blocks_list = nullptr;
    m_current_query_block = nullptr;
    destroy_values_map();
  }

  /// Reset query context to initial state
  void reset();

  /// Create an empty query block within this LEX object.
  Query_block *new_empty_query_block();

  /// Create query expression object that contains one query block.
  Query_block *new_query(Query_block *curr_query_block);

  /// Create query block and attach it to the current query expression.
  Query_block *new_union_query(Query_block *curr_query_block, bool distinct);

  /// Create top-level query expression and query block.
  bool new_top_level_query();

  /// Create query expression and query block in existing memory objects.
  void new_static_query(Query_expression *sel_query_expression,
                        Query_block *select);

  /// Create query expression under current_query_block and a query block under
  /// the new query expression. The new query expression is linked in under
  /// current_query_block. The new query block is linked in under the new
  /// query expression.
  ///
  /// @param thd            current session context
  /// @param current_query_block the root under which we create the new
  /// expression
  ///                       and block
  /// @param where_clause   any where clause for the block
  /// @param having_clause  any having clause for the block
  /// @param ctx            the parsing context
  ///
  /// @returns              the new query expression, or nullptr on error.
  Query_expression *create_query_expr_and_block(
      THD *thd, Query_block *current_query_block, Item *where_clause,
      Item *having_clause, enum_parsing_context ctx);

  inline bool is_ps_or_view_context_analysis() {
    return (context_analysis_only &
            (CONTEXT_ANALYSIS_ONLY_PREPARE | CONTEXT_ANALYSIS_ONLY_VIEW));
  }

  inline bool is_view_context_analysis() {
    return (context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW);
  }

  void clear_execution();

  /**
    Set the current query as uncacheable.

    @param curr_query_block Current select query block
    @param cause       Why this query is uncacheable.

    @details
    All query blocks representing subqueries, from the current one up to
    the outer-most one, but excluding the main query block, are also set
    as uncacheable.
  */
  void set_uncacheable(Query_block *curr_query_block, uint8 cause) {
    safe_to_cache_query = false;

    if (m_current_query_block == nullptr) return;
    Query_block *sl;
    Query_expression *un;
    for (sl = curr_query_block, un = sl->master_query_expression(); un != unit;
         sl = sl->outer_query_block(), un = sl->master_query_expression()) {
      sl->uncacheable |= cause;
      un->uncacheable |= cause;
    }
  }
  void set_trg_event_type_for_tables();

  TABLE_LIST *unlink_first_table(bool *link_to_local);
  void link_first_table_back(TABLE_LIST *first, bool link_to_local);
  void first_lists_tables_same();

  void restore_cmd_properties() { unit->restore_cmd_properties(); }

  void restore_properties_for_insert() {
    for (TABLE_LIST *tr = insert_table->first_leaf_table(); tr != nullptr;
         tr = tr->next_leaf)
      tr->restore_properties();
  }

  bool save_cmd_properties(THD *thd) { return unit->save_cmd_properties(thd); }

  bool can_use_merged();
  bool can_not_use_merged();
  bool need_correct_ident();
  /*
    Is this update command where 'WHITH CHECK OPTION' clause is important

    SYNOPSIS
      LEX::which_check_option_applicable()

    RETURN
      true   have to take 'WHITH CHECK OPTION' clause into account
      false  'WHITH CHECK OPTION' clause do not need
  */
  inline bool which_check_option_applicable() {
    switch (sql_command) {
      case SQLCOM_UPDATE:
      case SQLCOM_UPDATE_MULTI:
      case SQLCOM_INSERT:
      case SQLCOM_INSERT_SELECT:
      case SQLCOM_REPLACE:
      case SQLCOM_REPLACE_SELECT:
      case SQLCOM_LOAD:
        return true;
      default:
        return false;
    }
  }

  void cleanup_after_one_table_open();

  bool push_context(Name_resolution_context *context) {
    return context_stack.push_front(context);
  }

  void pop_context() { context_stack.pop(); }

  bool copy_db_to(char const **p_db, size_t *p_db_length) const;

  bool copy_db_to(char **p_db, size_t *p_db_length) const {
    return copy_db_to(const_cast<const char **>(p_db), p_db_length);
  }

  Name_resolution_context *current_context() { return context_stack.head(); }

  void reset_n_backup_query_tables_list(Query_tables_list *backup);
  void restore_backup_query_tables_list(Query_tables_list *backup);

  bool table_or_sp_used();

  /**
    @brief check if the statement is a single-level join
    @return result of the check
      @retval true  The statement doesn't contain subqueries, unions and
                    stored procedure calls.
      @retval false There are subqueries, UNIONs or stored procedure calls.
  */
  bool is_single_level_stmt() {
    /*
      This check exploits the fact that the last added to all_select_list is
      on its top. So query_block (as the first added) will be at the tail
      of the list.
    */
    if (query_block == all_query_blocks_list &&
        (sroutines == nullptr || sroutines->empty())) {
      assert(! all_query_blocks_list->next_select_in_list());
      return true;
    }
    return false;
  }

  void release_plugins();

  /**
    IS schema queries read some dynamic table statistics from SE.
    These statistics are cached, to avoid opening of table more
    than once while preparing a single output record buffer.
  */
  dd::info_schema::Table_statistics m_IS_table_stats;
  dd::info_schema::Tablespace_statistics m_IS_tablespace_stats;

  bool accept(Select_lex_visitor *visitor);

  bool set_wild(LEX_STRING);
  void clear_privileges();

  bool make_sql_cmd(Parse_tree_root *parse_tree);

 private:
  /**
    Context object used by secondary storage engines to store query
    state during optimization and execution.
  */
  Secondary_engine_execution_context *m_secondary_engine_context{nullptr};

 public:
  /**
    Gets the secondary engine execution context for this statement.
  */
  Secondary_engine_execution_context *secondary_engine_execution_context()
      const {
    return m_secondary_engine_context;
  }

  /**
    Sets the secondary engine execution context for this statement.
    The old context object is destroyed, if there is one. Can be set
    to nullptr to destroy the old context object and clear the
    pointer.

    The supplied context object should be allocated on the execution
    MEM_ROOT, so that its memory doesn't have to be manually freed
    after query execution.
  */
  void set_secondary_engine_execution_context(
      Secondary_engine_execution_context *context);

 private:
  bool m_is_replication_deprecated_syntax_used{false};

 public:
  bool is_replication_deprecated_syntax_used() {
    return m_is_replication_deprecated_syntax_used;
  }

  void set_replication_deprecated_syntax_used() {
    m_is_replication_deprecated_syntax_used = true;
  }

 private:
  bool m_was_replication_command_executed{false};

 public:
  bool was_replication_command_executed() const {
    return m_was_replication_command_executed;
  }

  void set_was_replication_command_executed() {
    m_was_replication_command_executed = true;
  }

  bool set_channel_name(LEX_CSTRING name = {});

 private:
  bool rewrite_required{false};

 public:
  void set_rewrite_required() { rewrite_required = true; }
  void reset_rewrite_required() { rewrite_required = false; }
  bool is_rewrite_required() { return rewrite_required; }
};

Query_tables_list

/*
  Class representing list of all tables used by statement and other
  information which is necessary for opening and locking its tables,
  like SQL command for this statement.

  Also contains information about stored functions used by statement
  since during its execution we may have to add all tables used by its
  stored functions/triggers to this list in order to pre-open and lock
  them.

  Also used by LEX::reset_n_backup/restore_backup_query_tables_list()
  methods to save and restore this information.
*/

class Query_tables_list {
 public:
  Query_tables_list &operator=(Query_tables_list &&) = default;

  /**
    SQL command for this statement. Part of this class since the
    process of opening and locking tables for the statement needs
    this information to determine correct type of lock for some of
    the tables.
  */
  enum_sql_command sql_command;
  /* Global list of all tables used by this statement */
  TABLE_LIST *query_tables;
  /* Pointer to next_global member of last element in the previous list. */
  TABLE_LIST **query_tables_last;
  /*
    If non-0 then indicates that query requires prelocking and points to
    next_global member of last own element in query table list (i.e. last
    table which was not added to it as part of preparation to prelocking).
    0 - indicates that this query does not need prelocking.
  */
  TABLE_LIST **query_tables_own_last;
  /*
    Set of stored routines called by statement.
    (Note that we use lazy-initialization for this hash).

    See Sroutine_hash_entry for explanation why this hash uses binary
    key comparison.
  */
  enum { START_SROUTINES_HASH_SIZE = 16 };
  std::unique_ptr<malloc_unordered_map<std::string, Sroutine_hash_entry *>>
      sroutines;
  /*
    List linking elements of 'sroutines' set. Allows you to add new elements
    to this set as you iterate through the list of existing elements.
    'sroutines_list_own_last' is pointer to ::next member of last element of
    this list which represents routine which is explicitly used by query.
    'sroutines_list_own_elements' number of explicitly used routines.
    We use these two members for restoring of 'sroutines_list' to the state
    in which it was right after query parsing.
  */
  SQL_I_List<Sroutine_hash_entry> sroutines_list;
  Sroutine_hash_entry **sroutines_list_own_last;
  uint sroutines_list_own_elements;

  /**
    Locking state of tables in this particular statement.

    If we under LOCK TABLES or in prelocked mode we consider tables
    for the statement to be "locked" if there was a call to lock_tables()
    (which called handler::start_stmt()) for tables of this statement
    and there was no matching close_thread_tables() call.

    As result this state may differ significantly from one represented
    by Open_tables_state::lock/locked_tables_mode more, which are always
    "on" under LOCK TABLES or in prelocked mode.
  */
  enum enum_lock_tables_state { LTS_NOT_LOCKED = 0, LTS_LOCKED };
  enum_lock_tables_state lock_tables_state;
  bool is_query_tables_locked() const {
    return (lock_tables_state == LTS_LOCKED);
  }

  /**
    Number of tables which were open by open_tables() and to be locked
    by lock_tables().
    Note that we set this member only in some cases, when this value
    needs to be passed from open_tables() to lock_tables() which are
    separated by some amount of code.
  */
  uint table_count;

  /*
    These constructor and destructor serve for creation/destruction
    of Query_tables_list instances which are used as backup storage.
  */
  Query_tables_list() = default;
  ~Query_tables_list() = default;

  /* Initializes (or resets) Query_tables_list object for "real" use. */
  void reset_query_tables_list(bool init);
  void destroy_query_tables_list();
  void set_query_tables_list(Query_tables_list *state) {
    *this = std::move(*state);
  }

  /*
    Direct addition to the list of query tables.
    If you are using this function, you must ensure that the table
    object, in particular table->db member, is initialized.
  */
  void add_to_query_tables(TABLE_LIST *table) {
    *(table->prev_global = query_tables_last) = table;
    query_tables_last = &table->next_global;
  }
  bool requires_prelocking() { return query_tables_own_last; }
  void mark_as_requiring_prelocking(TABLE_LIST **tables_own_last) {
    query_tables_own_last = tables_own_last;
  }
  /* Return pointer to first not-own table in query-tables or 0 */
  TABLE_LIST *first_not_own_table() {
    return (query_tables_own_last ? *query_tables_own_last : nullptr);
  }
  void chop_off_not_own_tables() {
    if (query_tables_own_last) {
      *query_tables_own_last = nullptr;
      query_tables_last = query_tables_own_last;
      query_tables_own_last = nullptr;
    }
  }

  /**
    All types of unsafe statements.

    @note The int values of the enum elements are used to point to
    bits in two bitmaps in two different places:

    - Query_tables_list::binlog_stmt_flags
    - THD::binlog_unsafe_warning_flags

    Hence in practice this is not an enum at all, but a map from
    symbols to bit indexes.

    The ordering of elements in this enum must correspond to the order of
    elements in the array binlog_stmt_unsafe_errcode.
  */
  enum enum_binlog_stmt_unsafe {
    /**
      SELECT..LIMIT is unsafe because the set of rows returned cannot
      be predicted.
    */
    BINLOG_STMT_UNSAFE_LIMIT = 0,
    /**
      Access to log tables is unsafe because slave and master probably
      log different things.
    */
    BINLOG_STMT_UNSAFE_SYSTEM_TABLE,
    /**
      Inserting into an autoincrement column in a stored routine is unsafe.
      Even with just one autoincrement column, if the routine is invoked more
      than once slave is not guaranteed to execute the statement graph same way
      as the master. And since it's impossible to estimate how many times a
      routine can be invoked at the query pre-execution phase (see lock_tables),
      the statement is marked pessimistically unsafe.
    */
    BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS,
    /**
      Using a UDF (user-defined function) is unsafe.
    */
    BINLOG_STMT_UNSAFE_UDF,
    /**
      Using most system variables is unsafe, because slave may run
      with different options than master.
    */
    BINLOG_STMT_UNSAFE_SYSTEM_VARIABLE,
    /**
      Using some functions is unsafe (e.g., UUID).
    */
    BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION,

    /**
      Mixing transactional and non-transactional statements are unsafe if
      non-transactional reads or writes are occur after transactional
      reads or writes inside a transaction.
    */
    BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS,

    /**
      Mixing self-logging and non-self-logging engines in a statement
      is unsafe.
    */
    BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE,

    /**
      Statements that read from both transactional and non-transactional
      tables and write to any of them are unsafe.
    */
    BINLOG_STMT_UNSAFE_MIXED_STATEMENT,

    /**
      INSERT...IGNORE SELECT is unsafe because which rows are ignored depends
      on the order that rows are retrieved by SELECT. This order cannot be
      predicted and may differ on master and the slave.
    */
    BINLOG_STMT_UNSAFE_INSERT_IGNORE_SELECT,

    /**
      INSERT...SELECT...UPDATE is unsafe because which rows are updated depends
      on the order that rows are retrieved by SELECT. This order cannot be
      predicted and may differ on master and the slave.
    */
    BINLOG_STMT_UNSAFE_INSERT_SELECT_UPDATE,

    /**
     Query that writes to a table with auto_inc column after selecting from
     other tables are unsafe as the order in which the rows are retrieved by
     select may differ on master and slave.
    */
    BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT,

    /**
      INSERT...REPLACE SELECT is unsafe because which rows are replaced depends
      on the order that rows are retrieved by SELECT. This order cannot be
      predicted and may differ on master and the slave.
    */
    BINLOG_STMT_UNSAFE_REPLACE_SELECT,

    /**
      CREATE TABLE... IGNORE... SELECT is unsafe because which rows are ignored
      depends on the order that rows are retrieved by SELECT. This order cannot
      be predicted and may differ on master and the slave.
    */
    BINLOG_STMT_UNSAFE_CREATE_IGNORE_SELECT,

    /**
      CREATE TABLE...REPLACE... SELECT is unsafe because which rows are replaced
      depends on the order that rows are retrieved from SELECT. This order
      cannot be predicted and may differ on master and the slave
    */
    BINLOG_STMT_UNSAFE_CREATE_REPLACE_SELECT,

    /**
      CREATE TABLE...SELECT on a table with auto-increment column is unsafe
      because which rows are replaced depends on the order that rows are
      retrieved from SELECT. This order cannot be predicted and may differ on
      master and the slave
    */
    BINLOG_STMT_UNSAFE_CREATE_SELECT_AUTOINC,

    /**
      UPDATE...IGNORE is unsafe because which rows are ignored depends on the
      order that rows are updated. This order cannot be predicted and may differ
      on master and the slave.
    */
    BINLOG_STMT_UNSAFE_UPDATE_IGNORE,

    /**
      INSERT... ON DUPLICATE KEY UPDATE on a table with more than one
      UNIQUE KEYS  is unsafe.
    */
    BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS,

    /**
       INSERT into auto-inc field which is not the first part in composed
       primary key.
    */
    BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST,

    /**
       Using a plugin is unsafe.
    */
    BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN,
    BINLOG_STMT_UNSAFE_SKIP_LOCKED,
    BINLOG_STMT_UNSAFE_NOWAIT,

    /**
      XA transactions and statements.
    */
    BINLOG_STMT_UNSAFE_XA,

    /**
      If a substatement inserts into or updates a table that has a column with
      an unsafe DEFAULT expression, it may not have the same effect on the
      slave.
    */
    BINLOG_STMT_UNSAFE_DEFAULT_EXPRESSION_IN_SUBSTATEMENT,

    /**
      DML or DDL statement that reads a ACL table is unsafe, because the row
      are read without acquiring SE row locks. This would allow ACL tables to
      be updated by concurrent thread. It would not have the same effect on the
      slave.
    */
    BINLOG_STMT_UNSAFE_ACL_TABLE_READ_IN_DML_DDL,

    /* the last element of this enumeration type. */
    BINLOG_STMT_UNSAFE_COUNT
  };
  /**
    This has all flags from 0 (inclusive) to BINLOG_STMT_FLAG_COUNT
    (exclusive) set.
  */
  static const int BINLOG_STMT_UNSAFE_ALL_FLAGS =
      ((1 << BINLOG_STMT_UNSAFE_COUNT) - 1);

  /**
    Maps elements of enum_binlog_stmt_unsafe to error codes.
  */
  static const int binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT];

  /**
    Determine if this statement is marked as unsafe.

    @retval 0 if the statement is not marked as unsafe.
    @retval nonzero if the statement is marked as unsafe.
  */
  inline bool is_stmt_unsafe() const { return get_stmt_unsafe_flags() != 0; }

  inline bool is_stmt_unsafe(enum_binlog_stmt_unsafe unsafe) {
    return binlog_stmt_flags & (1 << unsafe);
  }

  /**
    Flag the current (top-level) statement as unsafe.
    The flag will be reset after the statement has finished.

    @param unsafe_type The type of unsafety: one of the @c
    BINLOG_STMT_FLAG_UNSAFE_* flags in @c enum_binlog_stmt_flag.
  */
  inline void set_stmt_unsafe(enum_binlog_stmt_unsafe unsafe_type) {
    DBUG_TRACE;
    assert(unsafe_type >= 0 && unsafe_type < BINLOG_STMT_UNSAFE_COUNT);
    binlog_stmt_flags |= (1U << unsafe_type);
    return;
  }

  /**
    Set the bits of binlog_stmt_flags determining the type of
    unsafeness of the current statement.  No existing bits will be
    cleared, but new bits may be set.

    @param flags A binary combination of zero or more bits, (1<<flag)
    where flag is a member of enum_binlog_stmt_unsafe.
  */
  inline void set_stmt_unsafe_flags(uint32 flags) {
    DBUG_TRACE;
    assert((flags & ~BINLOG_STMT_UNSAFE_ALL_FLAGS) == 0);
    binlog_stmt_flags |= flags;
    return;
  }

  /**
    Return a binary combination of all unsafe warnings for the
    statement.  If the statement has been marked as unsafe by the
    'flag' member of enum_binlog_stmt_unsafe, then the return value
    from this function has bit (1<<flag) set to 1.
  */
  inline uint32 get_stmt_unsafe_flags() const {
    DBUG_TRACE;
    return binlog_stmt_flags & BINLOG_STMT_UNSAFE_ALL_FLAGS;
  }

  /**
    Determine if this statement is a row injection.

    @retval 0 if the statement is not a row injection
    @retval nonzero if the statement is a row injection
  */
  inline bool is_stmt_row_injection() const {
    return binlog_stmt_flags &
           (1U << (BINLOG_STMT_UNSAFE_COUNT + BINLOG_STMT_TYPE_ROW_INJECTION));
  }

  /**
    Flag the statement as a row injection.  A row injection is either
    a BINLOG statement, or a row event in the relay log executed by
    the slave SQL thread.
  */
  inline void set_stmt_row_injection() {
    DBUG_TRACE;
    binlog_stmt_flags |=
        (1U << (BINLOG_STMT_UNSAFE_COUNT + BINLOG_STMT_TYPE_ROW_INJECTION));
    return;
  }

  enum enum_stmt_accessed_table {
    /*
       If a transactional table is about to be read. Note that
       a write implies a read.
    */
    STMT_READS_TRANS_TABLE = 0,
    /*
       If a non-transactional table is about to be read. Note that
       a write implies a read.
    */
    STMT_READS_NON_TRANS_TABLE,
    /*
       If a temporary transactional table is about to be read. Note
       that a write implies a read.
    */
    STMT_READS_TEMP_TRANS_TABLE,
    /*
       If a temporary non-transactional table is about to be read. Note
      that a write implies a read.
    */
    STMT_READS_TEMP_NON_TRANS_TABLE,
    /*
       If a transactional table is about to be updated.
    */
    STMT_WRITES_TRANS_TABLE,
    /*
       If a non-transactional table is about to be updated.
    */
    STMT_WRITES_NON_TRANS_TABLE,
    /*
       If a temporary transactional table is about to be updated.
    */
    STMT_WRITES_TEMP_TRANS_TABLE,
    /*
       If a temporary non-transactional table is about to be updated.
    */
    STMT_WRITES_TEMP_NON_TRANS_TABLE,
    /*
      The last element of the enumeration. Please, if necessary add
      anything before this.
    */
    STMT_ACCESS_TABLE_COUNT
  };

#ifndef NDEBUG
  static inline const char *stmt_accessed_table_string(
      enum_stmt_accessed_table accessed_table) {
    switch (accessed_table) {
      case STMT_READS_TRANS_TABLE:
        return "STMT_READS_TRANS_TABLE";
        break;
      case STMT_READS_NON_TRANS_TABLE:
        return "STMT_READS_NON_TRANS_TABLE";
        break;
      case STMT_READS_TEMP_TRANS_TABLE:
        return "STMT_READS_TEMP_TRANS_TABLE";
        break;
      case STMT_READS_TEMP_NON_TRANS_TABLE:
        return "STMT_READS_TEMP_NON_TRANS_TABLE";
        break;
      case STMT_WRITES_TRANS_TABLE:
        return "STMT_WRITES_TRANS_TABLE";
        break;
      case STMT_WRITES_NON_TRANS_TABLE:
        return "STMT_WRITES_NON_TRANS_TABLE";
        break;
      case STMT_WRITES_TEMP_TRANS_TABLE:
        return "STMT_WRITES_TEMP_TRANS_TABLE";
        break;
      case STMT_WRITES_TEMP_NON_TRANS_TABLE:
        return "STMT_WRITES_TEMP_NON_TRANS_TABLE";
        break;
      case STMT_ACCESS_TABLE_COUNT:
      default:
        assert(0);
        break;
    }
    MY_ASSERT_UNREACHABLE();
    return "";
  }
#endif /* DBUG */

#define BINLOG_DIRECT_ON                    \
  0xF0 /* unsafe when                       \
          --binlog-direct-non-trans-updates \
          is ON */

#define BINLOG_DIRECT_OFF                  \
  0xF /* unsafe when                       \
         --binlog-direct-non-trans-updates \
         is OFF */

#define TRX_CACHE_EMPTY 0x33 /* unsafe when trx-cache is empty */

#define TRX_CACHE_NOT_EMPTY 0xCC /* unsafe when trx-cache is not empty */

#define IL_LT_REPEATABLE 0xAA /* unsafe when < ISO_REPEATABLE_READ */

#define IL_GTE_REPEATABLE 0x55 /* unsafe when >= ISO_REPEATABLE_READ */

  /**
    Sets the type of table that is about to be accessed while executing a
    statement.

    @param accessed_table Enumeration type that defines the type of table,
                           e.g. temporary, transactional, non-transactional.
  */
  inline void set_stmt_accessed_table(enum_stmt_accessed_table accessed_table) {
    DBUG_TRACE;

    assert(accessed_table >= 0 && accessed_table < STMT_ACCESS_TABLE_COUNT);
    stmt_accessed_table_flag |= (1U << accessed_table);

    return;
  }

  /**
    Checks if a type of table is about to be accessed while executing a
    statement.

    @param accessed_table Enumeration type that defines the type of table,
           e.g. temporary, transactional, non-transactional.

    @retval true  if the type of the table is about to be accessed
    @retval false otherwise
  */
  inline bool stmt_accessed_table(enum_stmt_accessed_table accessed_table) {
    DBUG_TRACE;

    assert(accessed_table >= 0 && accessed_table < STMT_ACCESS_TABLE_COUNT);

    return (stmt_accessed_table_flag & (1U << accessed_table)) != 0;
  }

  /*
    Checks if a mixed statement is unsafe.

    @param in_multi_stmt_transaction_mode defines if there is an on-going
           multi-transactional statement.
    @param binlog_direct defines if --binlog-direct-non-trans-updates is
           active.
    @param trx_cache_is_not_empty defines if the trx-cache is empty or not.
    @param trx_isolation defines the isolation level.

    @return
      @retval true if the mixed statement is unsafe
      @retval false otherwise
  */
  inline bool is_mixed_stmt_unsafe(bool in_multi_stmt_transaction_mode,
                                   bool binlog_direct,
                                   bool trx_cache_is_not_empty,
                                   uint tx_isolation) {
    bool unsafe = false;

    if (in_multi_stmt_transaction_mode) {
      uint condition =
          (binlog_direct ? BINLOG_DIRECT_ON : BINLOG_DIRECT_OFF) &
          (trx_cache_is_not_empty ? TRX_CACHE_NOT_EMPTY : TRX_CACHE_EMPTY) &
          (tx_isolation >= ISO_REPEATABLE_READ ? IL_GTE_REPEATABLE
                                               : IL_LT_REPEATABLE);

      unsafe = (binlog_unsafe_map[stmt_accessed_table_flag] & condition);

#if ! defined(NDEBUG)
      DBUG_PRINT("LEX::is_mixed_stmt_unsafe",
                 ("RESULT %02X %02X %02X\n", condition,
                  binlog_unsafe_map[stmt_accessed_table_flag],
                  (binlog_unsafe_map[stmt_accessed_table_flag] & condition)));

      int type_in = 0;
      for (; type_in < STMT_ACCESS_TABLE_COUNT; type_in++) {
        if (stmt_accessed_table((enum_stmt_accessed_table)type_in))
          DBUG_PRINT("LEX::is_mixed_stmt_unsafe",
                     ("ACCESSED %s ", stmt_accessed_table_string(
                                          (enum_stmt_accessed_table)type_in)));
      }
#endif
    }

    if (stmt_accessed_table(STMT_WRITES_NON_TRANS_TABLE) &&
        stmt_accessed_table(STMT_READS_TRANS_TABLE) &&
        tx_isolation < ISO_REPEATABLE_READ)
      unsafe = true;
    else if (stmt_accessed_table(STMT_WRITES_TEMP_NON_TRANS_TABLE) &&
             stmt_accessed_table(STMT_READS_TRANS_TABLE) &&
             tx_isolation < ISO_REPEATABLE_READ)
      unsafe = true;

    return (unsafe);
  }

  /**
    true if the parsed tree contains references to stored procedures
    or functions, false otherwise
  */
  bool uses_stored_routines() const { return sroutines_list.elements != 0; }

  void set_using_match() { using_match = true; }
  bool get_using_match() { return using_match; }

  void set_stmt_unsafe_with_mixed_mode() { stmt_unsafe_with_mixed_mode = true; }
  bool is_stmt_unsafe_with_mixed_mode() const {
    return stmt_unsafe_with_mixed_mode;
  }

 private:
  /**
    Enumeration listing special types of statements.

    Currently, the only possible type is ROW_INJECTION.
  */
  enum enum_binlog_stmt_type {
    /**
      The statement is a row injection (i.e., either a BINLOG
      statement or a row event executed by the slave SQL thread).
    */
    BINLOG_STMT_TYPE_ROW_INJECTION = 0,

    /** The last element of this enumeration type. */
    BINLOG_STMT_TYPE_COUNT
  };

  /**
    Bit field indicating the type of statement.

    There are two groups of bits:

    - The low BINLOG_STMT_UNSAFE_COUNT bits indicate the types of
      unsafeness that the current statement has.

    - The next BINLOG_STMT_TYPE_COUNT bits indicate if the statement
      is of some special type.

    This must be a member of LEX, not of THD: each stored procedure
    needs to remember its unsafeness state between calls and each
    stored procedure has its own LEX object (but no own THD object).
  */
  uint32 binlog_stmt_flags;

  /**
    Bit field that determines the type of tables that are about to be
    be accessed while executing a statement.
  */
  uint32 stmt_accessed_table_flag;

  /**
     It will be set true if 'MATCH () AGAINST' is used in the statement.
  */
  bool using_match;

  /**
    This flag is set to true if statement is unsafe to be binlogged in STATEMENT
    format, when in MIXED mode.
    Currently this flag is set to true if stored program used in statement has
    CREATE/DROP temporary table operation(s) as sub-statement(s).
  */
  bool stmt_unsafe_with_mixed_mode{false};
};

TODO

  • class Alter_info;
  • class Event_parse_data;
  • class Field;
  • class Item_cond;
  • class Item_func_get_system_var;
  • class Item_func_match;
  • class Item_func_set_user_var;
  • class Item_rollup_group_item;
  • class Item_rollup_sum_switcher;
  • class Item_sum;
  • class JOIN;
  • class Opt_hints_global;
  • class Opt_hints_qb;
  • class PT_subquery;
  • class PT_with_clause;
  • class Parse_tree_root;
  • class Protocol;
  • class Query_result;
  • class Query_result_interceptor;
  • class Query_result_union;
  • class Query_block;
  • class Query_expression;
  • class Select_lex_visitor;
  • class Sql_cmd;
  • class THD;
  • class Value_generator;
  • class Window;
  • class partition_info;
  • class sp_head;
  • class sp_name;
  • class sp_pcontext;
  • struct LEX;
  • struct NESTED_JOIN;
  • struct PSI_digest_locker;
  • struct sql_digest_state;
  • union Lexer_yystype;
  • struct Lifted_fields_map;

可以先把 Query_expression 的结构弄清楚.

MEM_ROOT 设置最大的容量的时候用到了这个参数: parser_max_mem_size.

THD::sql_parser() 源码

/**
  调用 parser 将语句转换为解析树
  然后, 将解析树转换为 AST, 给 resolving 做准备.
*/
bool THD::sql_parser() {
  /*
    SQL 的解析函数是 YACC 根据 sql_yacc.yy 生成的.

    解析成功则返回 0, 并且 THD::is_error() 为 false.
    否则, 返回 1 或者 THD::is_error() 为 true.

    第二个参数实际上是一个输出,root 表示新解析树的根节点.
    It is undefined (unchanged) on error. If "root" is NULL on success,
    then the parser has already called lex->make_sql_cmd() internally.
  */
  extern int MYSQLparse(class THD * thd, class Parse_tree_root * *root);

  Parse_tree_root *root = nullptr;
  if (MYSQLparse(this, &root) || is_error()) {
    /*
      Restore the original LEX if it was replaced when parsing
      a stored procedure. We must ensure that a parsing error
      does not leave any side effects in the THD.
    */
    cleanup_after_parse_error();
    return true;
  }
  // 感觉这一步应该挺重要 lex->make_sql_cmd(root), 返回 true 表示有问题
  if (root != nullptr && lex->make_sql_cmd(root)) {
    return true;
  }
  return false;
}

make_sql_cmd

/**
  使用 Parse_tree_root 来实例化 Sql_cmd 对象并且将它赋值给 Lex.

  @param parse_tree The parse tree.

  @returns false on success, true on error.
*/
bool LEX::make_sql_cmd(Parse_tree_root *parse_tree) {
  if (! will_contextualize) return false;

  // make_cmd 方法
  m_sql_cmd = parse_tree->make_cmd(thd);
  if (m_sql_cmd == nullptr) return true;

  assert(m_sql_cmd->sql_command_code() == sql_command);

  return false;
}

Parse_tree_root make_cmd 方法

Sql_cmd *PT_select_stmt::make_cmd(THD *thd) {
  Parse_context pc(thd, thd->lex->current_query_block());

  thd->lex->sql_command = m_sql_command;

  if (m_qe->contextualize(&pc)) {
    return nullptr;
  }

  const bool has_into_clause_inside_query_block = thd->lex->result != nullptr;

  if (has_into_clause_inside_query_block && m_into != nullptr) {
    my_error(ER_MULTIPLE_INTO_CLAUSES, MYF(0));
    return nullptr;
  }
  if (contextualize_safe(&pc, m_into)) {
    return nullptr;
  }

  if (m_into != nullptr && m_has_trailing_locking_clauses) {
    // Example: ... INTO ... FOR UPDATE;
    push_warning(thd, ER_WARN_DEPRECATED_INNER_INTO);
  } else if (has_into_clause_inside_query_block && thd->lex->unit->is_union()) {
    // Example: ... UNION ... INTO ...;
    if (! m_qe->has_trailing_into_clause()) {
      // Example: ... UNION SELECT * INTO OUTFILE 'foo' FROM ...;
      push_warning(thd, ER_WARN_DEPRECATED_INNER_INTO);
    } else if (m_has_trailing_locking_clauses) {
      // Example: ... UNION SELECT ... FROM ... INTO OUTFILE 'foo' FOR UPDATE;
      push_warning(thd, ER_WARN_DEPRECATED_INNER_INTO);
    }
  }

  if (thd->lex->sql_command == SQLCOM_SELECT)
    return new (thd->mem_root) Sql_cmd_select(thd->lex->result);
  else  // (thd->lex->sql_command == SQLCOM_DO)
    return new (thd->mem_root) Sql_cmd_do(nullptr);
}

yyparse

#if defined __STDC__ || defined __cplusplus
int yyparse (class THD *YYTHD, class Parse_tree_root **parse_tree);
#else
int yyparse ();
#endif

本文来自:https://blog.duhbb.com

本文链接地址:【MySQL源码】sql_parse.cc 中的 parse_sql 函数,英雄不问来路,转载请注明出处,谢谢。

有话想说:那就赶紧去给我留言吧。

rainbow

这个人很懒,什么都没留下

文章评论