001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerOceanbasemysql; 009import gudusoft.gsqlparser.TParserOceanbasemysql; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 020import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 021import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 023import gudusoft.gsqlparser.stmt.mysql.TMySQLSource; 024import gudusoft.gsqlparser.compiler.TContext; 025import gudusoft.gsqlparser.sqlenv.TSQLEnv; 026import gudusoft.gsqlparser.compiler.TGlobalScope; 027import gudusoft.gsqlparser.compiler.TFrame; 028 029import java.io.BufferedReader; 030import java.util.ArrayList; 031import java.util.List; 032import java.util.Stack; 033 034/** 035 * OceanBase user tenant — MySQL compatibility mode parser (Phase 2 fork). 036 * 037 * <p>This class is a verbatim fork of {@link MySqlSqlParser} with class 038 * names rewritten to point at the forked grammar artifacts under 039 * {@code gsp_java_parser/.../parser/oceanbasemysql/}. The structural 040 * intent is identical to the MySQL parser; what differs is: 041 * 042 * <ul> 043 * <li>It uses {@link TLexerOceanbasemysql} and 044 * {@link TParserOceanbasemysql} (generated from the forked 045 * {@code lzlexoceanbasemysql.l} / {@code lzyaccoceanbasemysql.y} 046 * sources by {@code parsetable}'s generic-vendor fallback path).</li> 047 * <li>It registers as {@link EDbVendor#dbvoceanbase} so downstream 048 * components (sqlcmds, IdentifierProfile, the various TSQLEnv 049 * EnumMaps) consult OceanBase rules rather than MySQL rules.</li> 050 * <li>It is selected by {@link OceanBaseSqlParser} when the active 051 * {@link gudusoft.gsqlparser.EOBTenantMode} is 052 * {@link gudusoft.gsqlparser.EOBTenantMode#MYSQL} or 053 * {@link gudusoft.gsqlparser.EOBTenantMode#SYSTEM}.</li> 054 * </ul> 055 * 056 * <p>Phase 2 ships this fork at zero divergence from the MySQL base 057 * grammar — same shift/reduce conflicts, same token positions, same 058 * statement coverage. OceanBase-specific extensions (hints, partition 059 * extensions, tenant DDL, outlines, tablegroups) arrive incrementally 060 * in Phase 4 via additive edits to the {@code .y} source. Each Phase 4 061 * addition is tracked in {@code oceanbasemysql/FORK_DIVERGENCE.md}. 062 * 063 * <p>Backporting upstream MySQL grammar fixes is documented in the 064 * Phase 0 plan ({@code gsp_java_core/doc/oceanbase/PHASE0_REPORT.md}) 065 * and the per-fork {@code oceanbasemysql/REGEN.md}. 066 * 067 * @see MySqlSqlParser the unmodified base parser this class was forked from 068 * @see OceanBaseSqlParser the mode-routing adapter that selects this parser 069 * @see TLexerOceanbasemysql forked lexer 070 * @see TParserOceanbasemysql forked parser 071 * @since 4.0.1.4 072 */ 073public class OceanBaseMysqlSqlParser extends AbstractSqlParser { 074 075 // ========== Lexer and Parser Instances ========== 076 // Created once in constructor, reused for all parsing operations 077 078 /** The MySQL lexer used for tokenization (public for TGSqlParser.getFlexer()) */ 079 public TLexerOceanbasemysql flexer; 080 private TParserOceanbasemysql fparser; 081 082 // ========== State Variables ========== 083 // NOTE: The following fields moved to AbstractSqlParser (inherited): 084 // - sourcetokenlist (TSourceTokenList) 085 // - sqlstatements (TStatementList) 086 // - parserContext (ParserContext) 087 // - sqlcmds (ISqlCmds) 088 // - globalContext (TContext) 089 // - sqlEnv (TSQLEnv) 090 // - frameStack (Stack<TFrame>) 091 // - globalFrame (TFrame) 092 // - lexer (TCustomLexer) 093 094 // ========== State Variables for Raw Statement Extraction ========== 095 private String userDelimiterStr; 096 private char curdelimiterchar; 097 private boolean waitingDelimiter; 098 099 // ========== Constructor ========== 100 101 /** 102 * Construct MySQL SQL parser. 103 * <p> 104 * Configures the parser for MySQL database with default delimiter: semicolon (;) 105 * <p> 106 * Following the original TGSqlParser pattern, the lexer and parser are 107 * created once in the constructor and reused for all parsing operations. 108 */ 109 public OceanBaseMysqlSqlParser() { 110 // Phase 2 fork: register as dbvoceanbase even though the lexer/parser 111 // are forked from MySQL. AST nodes built via this parser report 112 // dbvoceanbase via the NodeFactory back-reference fixup in 113 // TGSqlParser.doDelegatedRawParse(); the vendor passed here is the 114 // one consulted by sqlcmds, IdentifierProfile, and the various 115 // EnumMaps in TSQLEnv. 116 super(EDbVendor.dbvoceanbase); 117 118 // Set delimiter character 119 this.delimiterChar = '$'; 120 this.defaultDelimiterStr = "$"; 121 122 // Create lexer once - will be reused for all parsing operations 123 this.flexer = new TLexerOceanbasemysql(); 124 this.flexer.delimiterchar = this.delimiterChar; 125 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 126 127 // CRITICAL: Set lexer for inherited getanewsourcetoken() method 128 this.lexer = this.flexer; 129 130 // Create parser once - will be reused for all parsing operations 131 this.fparser = new TParserOceanbasemysql(null); 132 this.fparser.lexer = this.flexer; 133 134 // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor 135 } 136 137 // ========== AbstractSqlParser Abstract Methods Implementation ========== 138 139 /** 140 * Return the MySQL lexer instance. 141 * <p> 142 * The lexer is created once in the constructor and reused for all 143 * parsing operations. This method simply returns the existing instance, 144 * matching the original TGSqlParser pattern where the lexer is created 145 * once and reset before each use. 146 * 147 * @param context parser context (not used, lexer already created) 148 * @return the MySQL lexer instance created in constructor 149 */ 150 @Override 151 protected TCustomLexer getLexer(ParserContext context) { 152 // Return existing lexer instance (created in constructor) 153 return this.flexer; 154 } 155 156 /** 157 * Return the MySQL SQL parser instance with updated token list. 158 * <p> 159 * The parser is created once in the constructor and reused for all 160 * parsing operations. This method updates the token list and returns 161 * the existing instance, matching the original TGSqlParser pattern. 162 * 163 * @param context parser context (not used, parser already created) 164 * @param tokens source token list to parse 165 * @return the MySQL SQL parser instance created in constructor 166 */ 167 @Override 168 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 169 // Update token list for reused parser instance 170 this.fparser.sourcetokenlist = tokens; 171 return this.fparser; 172 } 173 174 /** 175 * Call MySQL-specific tokenization logic. 176 * <p> 177 * Delegates to domysqltexttotokenlist which handles MySQL's 178 * specific keyword recognition, delimiter handling, and token generation. 179 */ 180 @Override 181 protected void tokenizeVendorSql() { 182 domysqltexttotokenlist(); 183 } 184 185 /** 186 * Setup MySQL parser for raw statement extraction. 187 * <p> 188 * MySQL uses a single parser, so we inject sqlcmds and update 189 * the token list for the main parser only. 190 */ 191 @Override 192 protected void setupVendorParsersForExtraction() { 193 this.fparser.sqlcmds = this.sqlcmds; 194 this.fparser.sourcetokenlist = this.sourcetokenlist; 195 } 196 197 /** 198 * Call MySQL-specific raw statement extraction logic. 199 * <p> 200 * Delegates to domysqlgetrawsqlstatements which handles MySQL's 201 * statement delimiters (semicolon by default, or custom delimiter via DELIMITER command). 202 */ 203 @Override 204 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 205 domysqlgetrawsqlstatements(builder); 206 } 207 208 /** 209 * Perform full parsing of statements with syntax checking. 210 * <p> 211 * This method orchestrates the parsing of all statements. 212 * 213 * <p><b>Important:</b> This method does NOT extract raw statements - they are 214 * passed in as a parameter already extracted by {@link #extractRawStatements}. 215 * 216 * @param context parser context 217 * @param parser main SQL parser (TParserOceanbasemysql) 218 * @param secondaryParser not used for MySQL 219 * @param tokens source token list 220 * @param rawStatements raw statements already extracted (never null) 221 * @return list of fully parsed statements with AST built 222 */ 223 @Override 224 protected TStatementList performParsing(ParserContext context, 225 TCustomParser parser, 226 TCustomParser secondaryParser, 227 TSourceTokenList tokens, 228 TStatementList rawStatements) { 229 // Store references (fparser is already set, don't reassign final variable) 230 this.sourcetokenlist = tokens; 231 this.parserContext = context; 232 233 // Use the raw statements passed from AbstractSqlParser.parse() 234 // (already extracted - DO NOT re-extract to avoid duplication) 235 this.sqlstatements = rawStatements; 236 237 // Initialize sqlcmds for the parser 238 this.sqlcmds = SqlCmdsFactory.get(vendor); 239 this.fparser.sqlcmds = this.sqlcmds; 240 241 // Initialize global context for statement parsing 242 initializeGlobalContext(); 243 244 // Parse each statement 245 for (int i = 0; i < sqlstatements.size(); i++) { 246 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 247 248 try { 249 // Set frame stack for the statement (needed for parsing) 250 stmt.setFrameStack(frameStack); 251 252 // Parse the statement 253 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 254 255 // Handle error recovery for CREATE TABLE statements if enabled 256 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 257 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 258 handleCreateTableErrorRecovery(stmt); 259 } 260 261 // Collect syntax errors 262 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 263 copyErrorsFromStatement(stmt); 264 } 265 } catch (Exception ex) { 266 // Use inherited exception handler 267 handleStatementParsingException(stmt, i, ex); 268 continue; 269 } 270 } 271 272 // Clean up frame stack 273 if (globalFrame != null) { 274 globalFrame.popMeFromStack(frameStack); 275 } 276 277 return this.sqlstatements; 278 } 279 280 /** 281 * Handle error recovery for CREATE TABLE statements. 282 * <p> 283 * Migrated from TGSqlParser.handleCreateTableErrorRecovery() 284 * <p> 285 * This method marks unparseable table properties as sqlpluscmd tokens 286 * and retries parsing, similar to MSSQL error recovery. 287 * 288 * @param stmt the statement that failed to parse 289 */ 290 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 291 if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) { 292 return; 293 } 294 295 int nested = 0; 296 boolean isIgnore = false, isFoundIgnoreToken = false; 297 TSourceToken firstIgnoreToken = null; 298 299 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 300 TSourceToken st = stmt.sourcetokenlist.get(k); 301 if (isIgnore) { 302 if (st.issolidtoken() && (st.tokencode != ';')) { 303 isFoundIgnoreToken = true; 304 if (firstIgnoreToken == null) { 305 firstIgnoreToken = st; 306 } 307 } 308 if (st.tokencode != ';') { 309 st.tokencode = TBaseType.sqlpluscmd; 310 } 311 continue; 312 } 313 if (st.tokencode == (int) ')') { 314 nested--; 315 if (nested == 0) { 316 boolean isSelect = false; 317 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 318 if (st1 != null) { 319 TSourceToken st2 = st.searchToken((int) '(', 2); 320 if (st2 != null) { 321 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 322 isSelect = (st3 != null); 323 } 324 } 325 if (!isSelect) isIgnore = true; 326 } 327 } else if (st.tokencode == (int) '(') { 328 nested++; 329 } 330 } 331 332 if (isFoundIgnoreToken) { 333 stmt.clearError(); 334 stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree()); 335 } 336 } 337 338 // ========== MySQL-Specific Tokenization ========== 339 340 /** 341 * Perform MySQL-specific tokenization. 342 * <p> 343 * Extracted from TGSqlParser.domysqltexttotokenlist() (lines 4759-4822) 344 */ 345 private void domysqltexttotokenlist() { 346 TSourceToken asourcetoken, lcprevst; 347 int yychar; 348 boolean startDelimiter = false; 349 350 flexer.tmpDelimiter = ""; 351 352 asourcetoken = getanewsourcetoken(); 353 if (asourcetoken == null) return; 354 yychar = asourcetoken.tokencode; 355 checkMySQLCommentToken(asourcetoken); 356 357 if ((asourcetoken.tokencode == TBaseType.rrw_mysql_delimiter)) { 358 startDelimiter = true; 359 } 360 361 while (yychar > 0) { 362 sourcetokenlist.add(asourcetoken); 363 asourcetoken = getanewsourcetoken(); 364 if (asourcetoken == null) break; 365 checkMySQLCommentToken(asourcetoken); 366 367 if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) { 368 startDelimiter = false; 369 flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext(); 370 } 371 372 if ((asourcetoken.tokencode == TBaseType.rrw_mysql_delimiter)) { 373 startDelimiter = true; 374 } 375 376 if (asourcetoken.tokencode == TBaseType.rrw_rollup) { 377 // with rollup 378 lcprevst = getprevsolidtoken(asourcetoken); 379 if (lcprevst != null) { 380 if (lcprevst.tokencode == TBaseType.rrw_with) 381 lcprevst.tokencode = TBaseType.with_rollup; 382 } 383 } 384 385 if ((asourcetoken.tokencode == TBaseType.rrw_mysql_d) 386 || (asourcetoken.tokencode == TBaseType.rrw_mysql_t) 387 || (asourcetoken.tokencode == TBaseType.rrw_mysql_ts)) { 388 // odbc date constant { d 'str' } 389 lcprevst = getprevsolidtoken(asourcetoken); 390 if (lcprevst != null) { 391 if (lcprevst.tokencode != '{') 392 asourcetoken.tokencode = TBaseType.ident; 393 } 394 } 395 396 yychar = asourcetoken.tokencode; 397 } 398 } 399 400 /** 401 * Check if MySQL comment token is valid. 402 * <p> 403 * MySQL requires a space after -- for double-hyphen comments. 404 * This method was present in TGSqlParser but the implementation 405 * was commented out, so we keep it as a placeholder. 406 * 407 * @param cmtToken comment token to check 408 */ 409 private void checkMySQLCommentToken(TSourceToken cmtToken) { 410 // Implementation was commented out in original TGSqlParser 411 // Keeping this method as placeholder for future use 412 } 413 414 /** 415 * Get previous non-whitespace token. 416 * 417 * @param ptoken current token 418 * @return previous solid token, or null 419 */ 420 private TSourceToken getprevsolidtoken(TSourceToken ptoken) { 421 TSourceToken ret = null; 422 TSourceTokenList lctokenlist = ptoken.container; 423 424 if (lctokenlist != null) { 425 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 426 if (!( 427 (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace) 428 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn) 429 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment) 430 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment) 431 )) { 432 ret = lctokenlist.get(ptoken.posinlist - 1); 433 } else { 434 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 435 } 436 } 437 } 438 return ret; 439 } 440 441 // ========== MySQL-Specific Raw Statement Extraction ========== 442 443 /** 444 * Extract raw MySQL SQL statements from tokenized source. 445 * <p> 446 * Extracted from TGSqlParser.domysqlgetrawsqlstatements() (lines 14979-15344) 447 * 448 * @param builder the result builder to populate with raw statements 449 */ 450 private void domysqlgetrawsqlstatements(SqlParseResult.Builder builder) { 451 TCustomSqlStatement gcurrentsqlstatement = null; 452 EFindSqlStateType gst = EFindSqlStateType.stnormal; 453 454 // Reset delimiter 455 userDelimiterStr = defaultDelimiterStr; 456 457 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 458 if (!TBaseType.assigned(sourcetokenlist)) { 459 // No tokens available - populate builder with empty results and return 460 builder.sqlStatements(this.sqlstatements); 461 builder.errorCode(1); 462 builder.errorMessage("No source token list available"); 463 return; 464 } 465 466 for (int i = 0; i < sourcetokenlist.size(); i++) { 467 TSourceToken ast = sourcetokenlist.get(i); 468 sourcetokenlist.curpos = i; 469 470 // Token transformations during raw statement extraction 471 performRawStatementTokenTransformations(ast); 472 473 switch (gst) { 474 case sterror: { 475 if (ast.tokentype == ETokenType.ttsemicolon) { 476 appendToken(gcurrentsqlstatement, ast); 477 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 478 gcurrentsqlstatement = null; 479 gst = EFindSqlStateType.stnormal; 480 } else { 481 appendToken(gcurrentsqlstatement, ast); 482 } 483 break; 484 } 485 486 case stnormal: { 487 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 488 || (ast.tokencode == TBaseType.cmtslashstar) 489 || (ast.tokencode == TBaseType.lexspace) 490 || (ast.tokencode == TBaseType.lexnewline) 491 || (ast.tokentype == ETokenType.ttsemicolon)) { 492 if (TBaseType.assigned(gcurrentsqlstatement)) { 493 appendToken(gcurrentsqlstatement, ast); 494 } 495 continue; 496 } 497 498 if (ast.isFirstTokenOfLine() && (ast.toString().equalsIgnoreCase(userDelimiterStr))) { 499 ast.tokencode = ';';// treat it as semicolon 500 continue; 501 } 502 503 if ((ast.isFirstTokenOfLine()) && ((ast.tokencode == TBaseType.rrw_mysql_source) || (ast.tokencode == TBaseType.slash_dot))) { 504 gst = EFindSqlStateType.stsqlplus; 505 gcurrentsqlstatement = new TMySQLSource(vendor); 506 appendToken(gcurrentsqlstatement, ast); 507 continue; 508 } 509 510 // Find a token to start sql or plsql mode 511 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 512 513 if (TBaseType.assigned(gcurrentsqlstatement)) { 514 ESqlStatementType[] ses = {ESqlStatementType.sstmysqlcreateprocedure, ESqlStatementType.sstmysqlcreatefunction, 515 ESqlStatementType.sstcreateprocedure, ESqlStatementType.sstcreatefunction, 516 ESqlStatementType.sstcreatetrigger, ESqlStatementType.sstmysqlcreateevent, 517 ESqlStatementType.sstmysqlalterevent}; 518 if (includesqlstatementtype(gcurrentsqlstatement.sqlstatementtype, ses)) { 519 gst = EFindSqlStateType.ststoredprocedure; 520 waitingDelimiter = false; 521 appendToken(gcurrentsqlstatement, ast); 522 curdelimiterchar = ';'; 523 // Only initialize userDelimiterStr if not already set by DELIMITER statement 524 if (userDelimiterStr == null || userDelimiterStr.isEmpty()) { 525 userDelimiterStr = ";"; 526 } 527 } else { 528 gst = EFindSqlStateType.stsql; 529 appendToken(gcurrentsqlstatement, ast); 530 } 531 } 532 533 if (!TBaseType.assigned(gcurrentsqlstatement)) { 534 // Error token found 535 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo), 536 "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 537 538 ast.tokentype = ETokenType.tttokenlizererrortoken; 539 gst = EFindSqlStateType.sterror; 540 541 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 542 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 543 appendToken(gcurrentsqlstatement, ast); 544 } 545 break; 546 } 547 548 case stsqlplus: { 549 if (ast.tokencode == TBaseType.lexnewline) { 550 gst = EFindSqlStateType.stnormal; 551 appendToken(gcurrentsqlstatement, ast); // so add it here 552 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 553 gcurrentsqlstatement = null; 554 } else { 555 appendToken(gcurrentsqlstatement, ast); 556 } 557 break; 558 } 559 560 case stsql: { 561 if ((ast.tokentype == ETokenType.ttsemicolon) && (gcurrentsqlstatement.sqlstatementtype != ESqlStatementType.sstmysqldelimiter)) { 562 gst = EFindSqlStateType.stnormal; 563 appendToken(gcurrentsqlstatement, ast); 564 gcurrentsqlstatement.semicolonended = ast; 565 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 566 gcurrentsqlstatement = null; 567 continue; 568 } 569 if (ast.toString().equalsIgnoreCase(userDelimiterStr)) { 570 gst = EFindSqlStateType.stnormal; 571 ast.tokencode = ';';// treat it as semicolon 572 appendToken(gcurrentsqlstatement, ast); 573 gcurrentsqlstatement.semicolonended = ast; 574 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 575 gcurrentsqlstatement = null; 576 continue; 577 } 578 579 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 580 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 581 gst = EFindSqlStateType.stnormal; 582 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 583 gcurrentsqlstatement = null; 584 continue; 585 } 586 } 587 588 appendToken(gcurrentsqlstatement, ast); 589 590 if ((ast.tokencode == TBaseType.lexnewline) 591 && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstmysqldelimiter)) { 592 gst = EFindSqlStateType.stnormal; 593 userDelimiterStr = ""; 594 for (int k = 0; k < gcurrentsqlstatement.sourcetokenlist.size(); k++) { 595 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(k); 596 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) 597 || (st.tokencode == TBaseType.lexnewline) 598 || (st.tokencode == TBaseType.lexspace) 599 || (st.tokencode == TBaseType.rrw_set)) // set delimiter // 600 { 601 continue; 602 } 603 604 userDelimiterStr += st.toString(); 605 } 606 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 607 gcurrentsqlstatement = null; 608 continue; 609 } 610 611 break; 612 } 613 614 case ststoredprocedure: { 615 616 if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) { 617 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 618 gst = EFindSqlStateType.stnormal; 619 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 620 gcurrentsqlstatement = null; 621 continue; 622 } 623 } 624 625 // Handle waitingDelimiter logic (when inside BEGIN...END block) 626 // Skip this check if delimiter is ";" since we need to check for END; pattern instead 627 if (waitingDelimiter && !userDelimiterStr.equals(";")) { 628 if (userDelimiterStr.equalsIgnoreCase(ast.toString())) { 629 gst = EFindSqlStateType.stnormal; 630 gcurrentsqlstatement.semicolonended = ast; 631 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 632 gcurrentsqlstatement = null; 633 continue; 634 } else if (userDelimiterStr.startsWith(ast.toString())) { 635 String lcstr = ast.toString(); 636 for (int k = ast.posinlist + 1; k < ast.container.size(); k++) { 637 TSourceToken st = ast.container.get(k); 638 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) || (st.tokencode == TBaseType.lexnewline) || (st.tokencode == TBaseType.lexspace)) { 639 break; 640 } 641 lcstr = lcstr + st.toString(); 642 } 643 644 if (userDelimiterStr.equalsIgnoreCase(lcstr)) { 645 int lastDelimiterPos = ast.posinlist; 646 for (int k = ast.posinlist; k < ast.container.size(); k++) { 647 TSourceToken st = ast.container.get(k); 648 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) || (st.tokencode == TBaseType.lexnewline) || (st.tokencode == TBaseType.lexspace)) { 649 break; 650 } 651 st.tokenstatus = ETokenStatus.tsignorebyyacc; 652 lastDelimiterPos = k; 653 } 654 gst = EFindSqlStateType.stnormal; 655 gcurrentsqlstatement.semicolonended = ast; 656 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 657 gcurrentsqlstatement = null; 658 i = lastDelimiterPos; // advance past all delimiter tokens 659 continue; 660 } 661 } 662 } 663 664 // Set waitingDelimiter when BEGIN is encountered 665 if (ast.tokencode == TBaseType.rrw_begin) { 666 waitingDelimiter = true; 667 } 668 669 // Main delimiter handling logic 670 // When not waiting for delimiter (no BEGIN block), complete at semicolon regardless of custom delimiter 671 if (!waitingDelimiter) { 672 appendToken(gcurrentsqlstatement, ast); 673 if (ast.tokentype == ETokenType.ttsemicolon) { 674 gst = EFindSqlStateType.stnormal; 675 gcurrentsqlstatement.semicolonended = ast; 676 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 677 gcurrentsqlstatement = null; 678 continue; 679 } 680 } else { 681 // When waitingDelimiter is true AND delimiter is ";", only check for END; pattern 682 if (waitingDelimiter && userDelimiterStr.equals(";")) { 683 // Check for END; pattern 684 if ((ast.tokentype == ETokenType.ttsemicolon)) { 685 TSourceToken lcprevtoken = ast.container.nextsolidtoken(ast, -1, false); 686 if (lcprevtoken != null) { 687 if (lcprevtoken.tokencode == TBaseType.rrw_end) { 688 gst = EFindSqlStateType.stnormal; 689 gcurrentsqlstatement.semicolonended = ast; 690 appendToken(gcurrentsqlstatement, ast); 691 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 692 gcurrentsqlstatement = null; 693 continue; 694 } 695 } 696 } 697 appendToken(gcurrentsqlstatement, ast); 698 } else { 699 // Custom delimiter handling (non-semicolon delimiters) 700 if (ast.toString().equals(userDelimiterStr)) { 701 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 702 appendToken(gcurrentsqlstatement, ast); 703 gst = EFindSqlStateType.stnormal; 704 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 705 gcurrentsqlstatement = null; 706 } else { 707 appendToken(gcurrentsqlstatement, ast); 708 } 709 } 710 } 711 712 /* 713 // OLD LOGIC - replaced by above 714 if (curdelimiterchar == ';') { 715 appendToken(gcurrentsqlstatement, ast); 716 if (ast.tokentype == ETokenType.ttsemicolon) { 717 gst = EFindSqlStateType.stnormal; 718 gcurrentsqlstatement.semicolonended = ast; 719 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 720 continue; 721 } 722 } else { 723 // Handle multi-character delimiters 724 char ch; 725 if (ast.getAstext().length() == 1) { 726 ch = ast.getAstext().charAt(0); 727 } else if ((ast.getAstext().length() > 1) && (ast.issolidtoken())) { 728 ch = ast.getAstext().charAt(ast.getAstext().length() - 1); 729 } else { 730 ch = ' '; 731 } 732 733 if (ch == curdelimiterchar) { 734 if (ast.getAstext().length() > 1) { 735 String lcstr = ast.getAstext().substring(0, ast.getAstext().length() - 1); 736 int c = flexer.getkeywordvalue(lcstr); 737 if (c > 0) { 738 ast.tokencode = c; 739 } 740 } else { 741 // Mark single-character delimiter to be ignored by parser 742 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 743 gcurrentsqlstatement.semicolonended = ast; 744 } 745 appendToken(gcurrentsqlstatement, ast); 746 gst = EFindSqlStateType.stnormal; 747 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 748 } else { 749 appendToken(gcurrentsqlstatement, ast); 750 } 751 } 752 */ 753 break; 754 } 755 } 756 } 757 758 // Last statement 759 if (TBaseType.assigned(gcurrentsqlstatement) && ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || (gst == EFindSqlStateType.sterror))) { 760 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder); 761 } 762 763 // Populate builder with results 764 builder.sqlStatements(this.sqlstatements); 765 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 766 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 767 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 768 } 769 770 /** 771 * Handle token transformations during raw statement extraction. 772 * 773 * @param ast current token being processed 774 */ 775 private void performRawStatementTokenTransformations(TSourceToken ast) { 776 if (ast.tokencode == TBaseType.rrw_date) { 777 TSourceToken st1 = ast.nextSolidToken(); 778 if (st1 != null) { 779 if (st1.tokencode == '(') { 780 ast.tokencode = TBaseType.rrw_mysql_date_function; 781 } else if (st1.tokencode == TBaseType.sconst) { 782 ast.tokencode = TBaseType.rrw_mysql_date_const; 783 } 784 } 785 } else if (ast.tokencode == TBaseType.rrw_time) { 786 TSourceToken st1 = ast.nextSolidToken(); 787 if (st1 != null) { 788 if (st1.tokencode == TBaseType.sconst) { 789 ast.tokencode = TBaseType.rrw_mysql_time_const; 790 } 791 } 792 } else if (ast.tokencode == TBaseType.rrw_timestamp) { 793 TSourceToken st1 = ast.nextSolidToken(); 794 if (st1 != null) { 795 if (st1.tokencode == TBaseType.sconst) { 796 ast.tokencode = TBaseType.rrw_mysql_timestamp_constant; 797 } else if (st1.tokencode == TBaseType.ident) { 798 if (st1.toString().startsWith("\"")) { 799 ast.tokencode = TBaseType.rrw_mysql_timestamp_constant; 800 st1.tokencode = TBaseType.sconst; 801 } 802 } 803 } 804 } else if (ast.tokencode == TBaseType.rrw_mysql_position) { 805 TSourceToken st1 = ast.nextSolidToken(); 806 if (st1 != null) { 807 if (st1.tokencode != '(') { 808 ast.tokencode = TBaseType.ident; // change position keyword to identifier if not followed by () 809 } 810 } 811 } else if (ast.tokencode == TBaseType.rrw_mysql_row) { 812 boolean isIdent = true; 813 TSourceToken st1 = ast.nextSolidToken(); 814 if (st1 != null) { 815 if (st1.tokencode == '(') { 816 isIdent = false; 817 } 818 } 819 st1 = ast.prevSolidToken(); 820 if (st1 != null) { 821 if ((st1.tokencode == TBaseType.rrw_mysql_each) || (st1.tokencode == TBaseType.rrw_mysql_current)) { 822 isIdent = false; 823 } 824 } 825 if (isIdent) ast.tokencode = TBaseType.ident; 826 } else if (ast.tokencode == TBaseType.rrw_interval) { 827 TSourceToken leftParen = ast.searchToken('(', 1); 828 if (leftParen != null) { 829 int k = leftParen.posinlist + 1; 830 int nested = 1; 831 boolean commaToken = false; 832 while (k < ast.container.size()) { 833 if (ast.container.get(k).tokencode == '(') { 834 nested++; 835 } 836 if (ast.container.get(k).tokencode == ')') { 837 nested--; 838 if (nested == 0) break; 839 } 840 if ((ast.container.get(k).tokencode == ',') && (nested == 1)) { 841 // only calculate the comma in the first level which is belong to interval 842 // don't count comma in the nested () like this: INTERVAL (SELECT IF(1=1,2,3)) 843 commaToken = true; 844 break; 845 } 846 k++; 847 } 848 if (commaToken) { 849 ast.tokencode = TBaseType.rrw_mysql_interval_func; 850 } 851 } 852 } 853 } 854 855 /** 856 * Helper method to check if a statement type is in an array of types. 857 * 858 * @param type the type to check 859 * @param types array of types to check against 860 * @return true if type is in the array 861 */ 862 private boolean includesqlstatementtype(ESqlStatementType type, ESqlStatementType[] types) { 863 for (ESqlStatementType t : types) { 864 if (type == t) return true; 865 } 866 return false; 867 } 868 869 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 870 if (statement == null || token == null) { 871 return; 872 } 873 token.stmt = statement; 874 statement.sourcetokenlist.add(token); 875 } 876 877 @Override 878 public String toString() { 879 return "OceanBaseMysqlSqlParser{vendor=" + vendor + "}"; 880 } 881}