001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerSparksql; 009import gudusoft.gsqlparser.TParserSparksql; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.mysql.TMySQLSource; 021import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 023import gudusoft.gsqlparser.compiler.TContext; 024import gudusoft.gsqlparser.sqlenv.TSQLEnv; 025import gudusoft.gsqlparser.compiler.TGlobalScope; 026import gudusoft.gsqlparser.compiler.TFrame; 027import gudusoft.gsqlparser.resolver.TSQLResolver; 028import gudusoft.gsqlparser.TLog; 029import gudusoft.gsqlparser.compiler.TASTEvaluator; 030 031import java.io.BufferedReader; 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Stack; 035 036/** 037 * Apache Spark SQL parser implementation. 038 * 039 * <p>This parser handles SparkSQL-specific SQL syntax including: 040 * <ul> 041 * <li>SparkSQL DML/DDL operations</li> 042 * <li>Special token handling for DATE, TIME, TIMESTAMP, INTERVAL</li> 043 * <li>MySQL-style SOURCE commands</li> 044 * <li>Stored procedures, functions, and triggers</li> 045 * </ul> 046 * 047 * <p><b>Implementation Status:</b> MIGRATED 048 * <ul> 049 * <li><b>Completed:</b> Full migration from TGSqlParser to AbstractSqlParser</li> 050 * <li><b>Tokenization:</b> dosparksqltexttotokenlist()</li> 051 * <li><b>Raw Extraction:</b> dosparksqlgetrawsqlstatements()</li> 052 * <li><b>Parsing:</b> Fully self-contained using TParserSparksql</li> 053 * </ul> 054 * 055 * @see SqlParser 056 * @see AbstractSqlParser 057 * @see TLexerSparksql 058 * @see TParserSparksql 059 * @since 3.2.0.0 060 */ 061public class SparkSqlParser extends AbstractSqlParser { 062 063 // Vendor-specific parser and lexer 064 // Following the original TGSqlParser pattern (lines 1285-1293) 065 private TLexerSparksql flexer; 066 private TParserSparksql fparser; 067 068 // State management for raw statement extraction 069 // Inherited from AbstractSqlParser: sourcetokenlist, sqlstatements, sqlcmds 070 private TCustomSqlStatement gcurrentsqlstatement; 071 private String userDelimiterStr; 072 private char curdelimiterchar; 073 074 /** 075 * Construct SparkSQL parser. 076 * <p> 077 * Configures the parser for SparkSQL with default delimiter (;). 078 * <p> 079 * Following the original TGSqlParser pattern (lines 1285-1293), 080 * the lexer and parser are created once in the constructor and reused 081 * for all parsing operations to avoid unnecessary object allocation overhead. 082 */ 083 public SparkSqlParser() { 084 super(EDbVendor.dbvsparksql); 085 this.delimiterChar = ';'; // SparkSQL delimiter 086 this.defaultDelimiterStr = ";"; // Default delimiter 087 088 // Create lexer once - will be reused for all parsing operations 089 // (matches original TGSqlParser constructor pattern at lines 1287-1290) 090 this.flexer = new TLexerSparksql(); 091 this.flexer.delimiterchar = this.delimiterChar; 092 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 093 094 // Set parent's lexer reference for shared tokenization logic 095 this.lexer = this.flexer; 096 097 // Create parser once - will be reused for all parsing operations 098 // (matches original TGSqlParser constructor pattern at lines 1291-1292) 099 this.fparser = new TParserSparksql(null); 100 this.fparser.lexer = this.flexer; 101 } 102 103 @Override 104 public EDbVendor getVendor() { 105 return vendor; 106 } 107 108 // ========== Abstract Method Implementations ========== 109 110 @Override 111 protected TCustomLexer getLexer(ParserContext context) { 112 return this.flexer; 113 } 114 115 @Override 116 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 117 return this.fparser; 118 } 119 120 @Override 121 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 122 // SparkSQL doesn't have a secondary parser (unlike Oracle which has PL/SQL parser) 123 return null; 124 } 125 126 // ========== Tokenization Phase (Hook Pattern) ========== 127 128 /** 129 * Hook method for vendor-specific tokenization. 130 * <p> 131 * Delegates to dosparksqltexttotokenlist() which implements SparkSQL-specific 132 * token processing logic. 133 */ 134 @Override 135 protected void tokenizeVendorSql() { 136 dosparksqltexttotokenlist(); 137 } 138 139 /** 140 * SparkSQL-specific tokenization logic. 141 * <p> 142 * Migrated from TGSqlParser.dosparksqltexttotokenlist() (lines 4620-4662). 143 * <p> 144 * Special handling: 145 * <ul> 146 * <li>MySQL-style comment validation</li> 147 * <li>WITH ROLLUP token adjustment</li> 148 * <li>Delimiter detection</li> 149 * </ul> 150 */ 151 private void dosparksqltexttotokenlist() { 152 TSourceToken asourcetoken, lcprevst; 153 int yychar; 154 boolean startDelimiter = false; 155 156 flexer.tmpDelimiter = ""; 157 158 asourcetoken = getanewsourcetoken(); 159 if (asourcetoken == null) return; 160 yychar = asourcetoken.tokencode; 161 162 while (yychar > 0) { 163 sourcetokenlist.add(asourcetoken); 164 asourcetoken = getanewsourcetoken(); 165 if (asourcetoken == null) break; 166 checkMySQLCommentToken(asourcetoken); 167 168 if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) { 169 startDelimiter = false; 170 flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext(); 171 } 172 173 if (asourcetoken.tokencode == TBaseType.rrw_rollup) { 174 // with rollup 175 lcprevst = getprevsolidtoken(asourcetoken); 176 if (lcprevst != null) { 177 if (lcprevst.tokencode == TBaseType.rrw_with) 178 lcprevst.tokencode = TBaseType.with_rollup; 179 } 180 } 181 182 yychar = asourcetoken.tokencode; 183 } 184 } 185 186 /** 187 * Helper method for MySQL-style comment validation. 188 * <p> 189 * Migrated from TGSqlParser.checkMySQLCommentToken() (lines 4604-4619). 190 * Currently a no-op (original implementation is commented out). 191 */ 192 private void checkMySQLCommentToken(TSourceToken cmtToken) { 193 // No-op: original implementation is commented out in TGSqlParser 194 } 195 196 /** 197 * Helper method to get previous solid token (non-whitespace, non-comment). 198 * <p> 199 * Migrated from TGSqlParser.getprevsolidtoken(). 200 */ 201 private TSourceToken getprevsolidtoken(TSourceToken ptoken) { 202 TSourceToken lcprevtoken = null; 203 int i = ptoken.posinlist; 204 while (i > 0) { 205 i--; 206 lcprevtoken = sourcetokenlist.get(i); 207 if ((lcprevtoken.tokencode == TBaseType.lexspace) 208 || (lcprevtoken.tokencode == TBaseType.lexnewline) 209 || (lcprevtoken.tokencode == TBaseType.cmtdoublehyphen) 210 || (lcprevtoken.tokencode == TBaseType.cmtslashstar)) { 211 continue; 212 } 213 return lcprevtoken; 214 } 215 return null; 216 } 217 218 /** 219 * Helper method to add token to statement. 220 * <p> 221 * Sets token.stmt reference and adds token to statement's token list. 222 */ 223 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 224 if (statement == null || token == null) { 225 return; 226 } 227 token.stmt = statement; 228 statement.sourcetokenlist.add(token); 229 } 230 231 // ========== Raw Statement Extraction Phase (Hook Pattern) ========== 232 233 /** 234 * Hook method to setup parsers before raw statement extraction. 235 * <p> 236 * Injects sqlcmds and sourcetokenlist into the SparkSQL parser. 237 */ 238 @Override 239 protected void setupVendorParsersForExtraction() { 240 this.fparser.sqlcmds = this.sqlcmds; 241 this.fparser.sourcetokenlist = this.sourcetokenlist; 242 } 243 244 /** 245 * Hook method for vendor-specific raw statement extraction. 246 * <p> 247 * Delegates to dosparksqlgetrawsqlstatements() which implements SparkSQL-specific 248 * statement boundary detection. 249 */ 250 @Override 251 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 252 dosparksqlgetrawsqlstatements(builder); 253 } 254 255 /** 256 * SparkSQL-specific raw statement extraction logic. 257 * <p> 258 * Migrated from TGSqlParser.dosparksqlgetrawsqlstatements() (lines 14659-14977). 259 * <p> 260 * This method: 261 * <ul> 262 * <li>Adjusts DATE, TIME, TIMESTAMP, INTERVAL token codes based on context</li> 263 * <li>Handles statement boundaries (semicolon, custom delimiters)</li> 264 * <li>Supports stored procedures with BEGIN/END blocks</li> 265 * <li>Handles MySQL SOURCE commands</li> 266 * </ul> 267 * 268 * @param builder the result builder to collect errors 269 * @return error count (currently always 0) 270 */ 271 private int dosparksqlgetrawsqlstatements(SqlParseResult.Builder builder) { 272 int errorcount = 0; 273 gcurrentsqlstatement = null; 274 EFindSqlStateType gst = EFindSqlStateType.stnormal; 275 int i; 276 TSourceToken ast; 277 boolean waitingDelimiter = false; 278 279 // Reset delimiter 280 userDelimiterStr = defaultDelimiterStr; 281 282 for (i = 0; i < sourcetokenlist.size(); i++) { 283 ast = sourcetokenlist.get(i); 284 sourcetokenlist.curpos = i; 285 286 // SparkSQL-specific token adjustments 287 if (ast.tokencode == TBaseType.rrw_date) { 288 TSourceToken st1 = ast.nextSolidToken(); 289 if (st1 != null) { 290 if (st1.tokencode == '(') { 291 ast.tokencode = TBaseType.rrw_spark_date_function; 292 } else if (st1.tokencode == TBaseType.sconst) { 293 ast.tokencode = TBaseType.rrw_spark_date_const; 294 } 295 } 296 } else if (ast.tokencode == TBaseType.rrw_time) { 297 TSourceToken st1 = ast.nextSolidToken(); 298 if (st1 != null) { 299 if (st1.tokencode == TBaseType.sconst) { 300 ast.tokencode = TBaseType.rrw_spark_time_const; 301 } 302 } 303 } else if (ast.tokencode == TBaseType.rrw_timestamp) { 304 TSourceToken st1 = ast.nextSolidToken(); 305 if (st1 != null) { 306 if (st1.tokencode == TBaseType.sconst) { 307 ast.tokencode = TBaseType.rrw_spark_timestamp_constant; 308 } else if (st1.tokencode == TBaseType.ident) { 309 if (st1.toString().startsWith("\"")) { 310 ast.tokencode = TBaseType.rrw_spark_timestamp_constant; 311 st1.tokencode = TBaseType.sconst; 312 } 313 } 314 } 315 } else if (ast.tokencode == TBaseType.rrw_interval) { 316 TSourceToken leftParen = ast.searchToken('(', 1); 317 if (leftParen != null) { 318 int k = leftParen.posinlist + 1; 319 boolean commaToken = false; 320 while (k < ast.container.size()) { 321 if (ast.container.get(k).tokencode == ')') break; 322 if (ast.container.get(k).tokencode == ',') { 323 commaToken = true; 324 break; 325 } 326 k++; 327 } 328 if (commaToken) { 329 ast.tokencode = TBaseType.rrw_mysql_interval_func; 330 } 331 } 332 } else if (ast.tokencode == TBaseType.rrw_spark_position) { 333 TSourceToken leftParen = ast.searchToken('(', 1); 334 if (leftParen != null) { 335 // POSITION is a function 336 } else { 337 ast.tokencode = TBaseType.ident; // treat it as identifier 338 } 339 } 340 341 switch (gst) { 342 case sterror: { 343 if (ast.tokentype == ETokenType.ttsemicolon) { 344 appendToken(gcurrentsqlstatement, ast); 345 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 346 gst = EFindSqlStateType.stnormal; 347 } else { 348 appendToken(gcurrentsqlstatement, ast); 349 } 350 break; 351 } 352 case stnormal: { 353 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 354 || (ast.tokencode == TBaseType.cmtslashstar) 355 || (ast.tokencode == TBaseType.lexspace) 356 || (ast.tokencode == TBaseType.lexnewline) 357 || (ast.tokentype == ETokenType.ttsemicolon)) { 358 if (TBaseType.assigned(gcurrentsqlstatement)) { 359 appendToken(gcurrentsqlstatement, ast); 360 } 361 continue; 362 } 363 364 if ((ast.isFirstTokenOfLine()) && ((ast.tokencode == TBaseType.rrw_mysql_source) || (ast.tokencode == TBaseType.slash_dot))) { 365 gst = EFindSqlStateType.stsqlplus; 366 gcurrentsqlstatement = new TMySQLSource(vendor); 367 appendToken(gcurrentsqlstatement, ast); 368 continue; 369 } 370 371 // Find a token to start sql or plsql mode 372 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 373 374 if (TBaseType.assigned(gcurrentsqlstatement)) { 375 ESqlStatementType[] ses = {ESqlStatementType.sstmysqlcreateprocedure, ESqlStatementType.sstmysqlcreatefunction, 376 ESqlStatementType.sstcreateprocedure, ESqlStatementType.sstcreatefunction, 377 ESqlStatementType.sstcreatetrigger}; 378 if (includesqlstatementtype(gcurrentsqlstatement.sqlstatementtype, ses)) { 379 gst = EFindSqlStateType.ststoredprocedure; 380 waitingDelimiter = false; 381 appendToken(gcurrentsqlstatement, ast); 382 curdelimiterchar = ';'; 383 } else { 384 gst = EFindSqlStateType.stsql; 385 appendToken(gcurrentsqlstatement, ast); 386 } 387 } 388 389 if (!TBaseType.assigned(gcurrentsqlstatement)) { 390 // Error token found 391 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo), 392 "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 393 394 ast.tokentype = ETokenType.tttokenlizererrortoken; 395 gst = EFindSqlStateType.sterror; 396 397 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 398 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 399 appendToken(gcurrentsqlstatement, ast); 400 } 401 break; 402 } 403 case stsqlplus: { 404 if (ast.tokencode == TBaseType.lexnewline) { 405 gst = EFindSqlStateType.stnormal; 406 appendToken(gcurrentsqlstatement, ast); 407 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 408 } else { 409 appendToken(gcurrentsqlstatement, ast); 410 } 411 break; 412 } 413 case stsql: { 414 if ((ast.tokentype == ETokenType.ttsemicolon) && (gcurrentsqlstatement.sqlstatementtype != ESqlStatementType.sstmysqldelimiter)) { 415 gst = EFindSqlStateType.stnormal; 416 appendToken(gcurrentsqlstatement, ast); 417 gcurrentsqlstatement.semicolonended = ast; 418 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 419 continue; 420 } 421 if (ast.toString().equalsIgnoreCase(userDelimiterStr)) { 422 gst = EFindSqlStateType.stnormal; 423 ast.tokencode = ';'; // treat it as semicolon 424 appendToken(gcurrentsqlstatement, ast); 425 gcurrentsqlstatement.semicolonended = ast; 426 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 427 continue; 428 } 429 appendToken(gcurrentsqlstatement, ast); 430 431 if ((ast.tokencode == TBaseType.lexnewline) 432 && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstmysqldelimiter)) { 433 gst = EFindSqlStateType.stnormal; 434 userDelimiterStr = ""; 435 for (int k = 0; k < gcurrentsqlstatement.sourcetokenlist.size(); k++) { 436 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(k); 437 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) 438 || (st.tokencode == TBaseType.lexnewline) 439 || (st.tokencode == TBaseType.lexspace) 440 || (st.tokencode == TBaseType.rrw_set)) { 441 continue; 442 } 443 userDelimiterStr += st.toString(); 444 } 445 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 446 continue; 447 } 448 break; 449 } 450 case ststoredprocedure: { 451 // Single stmt in function/procedure/trigger may use ; as terminate char 452 // Default terminate char is ;, if begin is found 453 // then set terminate char to DelimiterChar 454 if (waitingDelimiter) { 455 if (userDelimiterStr.equalsIgnoreCase(ast.toString())) { 456 gst = EFindSqlStateType.stnormal; 457 gcurrentsqlstatement.semicolonended = ast; 458 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 459 continue; 460 } else if (userDelimiterStr.startsWith(ast.toString())) { 461 String lcstr = ast.toString(); 462 for (int k = ast.posinlist + 1; k < ast.container.size(); k++) { 463 TSourceToken st = ast.container.get(k); 464 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) || (st.tokencode == TBaseType.lexnewline) || (st.tokencode == TBaseType.lexspace)) { 465 break; 466 } 467 lcstr = lcstr + st.toString(); 468 } 469 470 if (userDelimiterStr.equalsIgnoreCase(lcstr)) { 471 for (int k = ast.posinlist; k < ast.container.size(); k++) { 472 TSourceToken st = ast.container.get(k); 473 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) || (st.tokencode == TBaseType.lexnewline) || (st.tokencode == TBaseType.lexspace)) { 474 break; 475 } 476 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 477 } 478 gst = EFindSqlStateType.stnormal; 479 gcurrentsqlstatement.semicolonended = ast; 480 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 481 continue; 482 } 483 } 484 } 485 if (ast.tokencode == TBaseType.rrw_begin) 486 waitingDelimiter = true; 487 488 if (userDelimiterStr.equals(";") || (waitingDelimiter == false)) { 489 appendToken(gcurrentsqlstatement, ast); 490 if (ast.tokentype == ETokenType.ttsemicolon) { 491 gst = EFindSqlStateType.stnormal; 492 gcurrentsqlstatement.semicolonended = ast; 493 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 494 continue; 495 } 496 } else { 497 if (ast.toString().equals(userDelimiterStr)) { 498 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 499 appendToken(gcurrentsqlstatement, ast); 500 gst = EFindSqlStateType.stnormal; 501 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 502 } else { 503 if ((ast.tokentype == ETokenType.ttsemicolon) && (userDelimiterStr.equals(";"))) { 504 TSourceToken lcprevtoken = ast.container.nextsolidtoken(ast, -1, false); 505 if (lcprevtoken != null) { 506 if (lcprevtoken.tokencode == TBaseType.rrw_end) { 507 gst = EFindSqlStateType.stnormal; 508 gcurrentsqlstatement.semicolonended = ast; 509 appendToken(gcurrentsqlstatement, ast); 510 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 511 continue; 512 } 513 } 514 } 515 appendToken(gcurrentsqlstatement, ast); 516 } 517 } 518 break; 519 } 520 } 521 } 522 523 // Last statement 524 if (TBaseType.assigned(gcurrentsqlstatement) && ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || (gst == EFindSqlStateType.sterror))) { 525 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder); 526 } 527 528 // Populate builder with results (CRITICAL: required for TGSqlParser.dogetrawsqlstatements) 529 builder.sqlStatements(this.sqlstatements); 530 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 531 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 532 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 533 builder.errorMessage(syntaxErrors.isEmpty() ? "" : 534 String.format("Raw extraction completed with %d error(s)", syntaxErrors.size())); 535 536 return errorcount; 537 } 538 539 /** 540 * Helper method to check if statement type is in the given array. 541 */ 542 private boolean includesqlstatementtype(ESqlStatementType type, ESqlStatementType[] types) { 543 for (ESqlStatementType t : types) { 544 if (type == t) return true; 545 } 546 return false; 547 } 548 549 // ========== Parsing Phase ========== 550 551 @Override 552 protected TStatementList performParsing(ParserContext context, TCustomParser mainParser, 553 TCustomParser secondaryParser, TSourceTokenList tokens, 554 TStatementList rawStatements) { 555 // Store references (inherited from AbstractSqlParser) 556 this.parserContext = context; 557 this.sourcetokenlist = tokens; 558 559 // Use the raw statements passed from AbstractSqlParser.parse() 560 // (already extracted - DO NOT re-extract to avoid duplication) 561 this.sqlstatements = rawStatements; 562 563 // Initialize sqlcmds for this vendor 564 this.sqlcmds = SqlCmdsFactory.get(vendor); 565 566 // CRITICAL: Inject sqlcmds into parser (required for parsing) 567 this.fparser.sqlcmds = this.sqlcmds; 568 569 // Initialize global context using inherited method 570 initializeGlobalContext(); 571 572 // Parse each statement 573 for (int i = 0; i < sqlstatements.size(); i++) { 574 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 575 try { 576 stmt.setFrameStack(frameStack); 577 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 578 579 // Vendor-specific post-processing (if needed) 580 afterStatementParsed(stmt); 581 582 // Error recovery 583 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 584 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 585 handleCreateTableErrorRecovery(stmt); 586 } 587 588 // Collect errors 589 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 590 copyErrorsFromStatement(stmt); 591 } 592 } catch (Exception ex) { 593 // Use inherited exception handler 594 handleStatementParsingException(stmt, i, ex); 595 continue; 596 } 597 } 598 599 // Clean up frame stack 600 if (globalFrame != null) globalFrame.popMeFromStack(frameStack); 601 602 return sqlstatements; 603 } 604 605 /** 606 * Hook for vendor-specific post-processing after statement is parsed. 607 * <p> 608 * Default implementation is no-op for SparkSQL. 609 */ 610 protected void afterStatementParsed(TCustomSqlStatement stmt) { 611 // No special post-processing needed for SparkSQL 612 } 613 614 /** 615 * Handle error recovery for CREATE TABLE statements. 616 * <p> 617 * Migrated from TGSqlParser.handleCreateTableErrorRecovery() (lines 16916-16971). 618 * <p> 619 * SparkSQL allows table properties that may not be fully parsed. 620 * This method marks unparseable properties as SQL*Plus commands to skip them. 621 */ 622 protected void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 623 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) || 624 ((stmt.sqlstatementtype == ESqlStatementType.sstcreateindex) && (vendor != EDbVendor.dbvcouchbase))) && 625 (!TBaseType.c_createTableStrictParsing)) { 626 627 // Find the closing parenthesis of table definition 628 int nested = 0; 629 boolean isIgnore = false, isFoundIgnoreToken = false; 630 TSourceToken firstIgnoreToken = null; 631 632 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 633 TSourceToken st = stmt.sourcetokenlist.get(k); 634 635 if (isIgnore) { 636 if (st.issolidtoken() && (st.tokencode != ';')) { 637 isFoundIgnoreToken = true; 638 if (firstIgnoreToken == null) { 639 firstIgnoreToken = st; 640 } 641 } 642 if (st.tokencode != ';') { 643 st.tokencode = TBaseType.sqlpluscmd; 644 } 645 continue; 646 } 647 648 if (st.tokencode == (int) ')') { 649 nested--; 650 if (nested == 0) { 651 // Check if next token is "AS ( SELECT" (CREATE TABLE AS SELECT) 652 boolean isSelect = false; 653 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 654 if (st1 != null) { 655 TSourceToken st2 = st.searchToken((int) '(', 2); 656 if (st2 != null) { 657 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 658 isSelect = (st3 != null); 659 } 660 } 661 if (!isSelect) isIgnore = true; 662 } 663 } 664 665 if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) { 666 nested++; 667 } 668 } 669 670 // SparkSQL: No vendor-specific table property validation needed 671 // (unlike Oracle which checks searchOracleTablePros) 672 673 // Retry parsing if we found ignoreable properties 674 if (isFoundIgnoreToken) { 675 stmt.clearError(); 676 stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree()); 677 } 678 } 679 } 680 681 // ========== Semantic Analysis ========== 682 683 @Override 684 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 685 if (!TBaseType.isEnableResolver()) { 686 return; 687 } 688 689 if (!getSyntaxErrors().isEmpty()) { 690 return; 691 } 692 693 // Run semantic resolver 694 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 695 resolver.resolve(); 696 } 697 698 // ========== Interpretation ========== 699 700 @Override 701 protected void performInterpreter(ParserContext context, TStatementList statements) { 702 if (!TBaseType.ENABLE_INTERPRETER) { 703 return; 704 } 705 706 // SparkSQL interpretation not currently supported 707 // (TASTEvaluator requires TAnnotatedTree, not TContext) 708 } 709 710 @Override 711 public String toString() { 712 return "SparkSqlParser{vendor=" + vendor + "}"; 713 } 714}