001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.EFindSqlStateType; 005import gudusoft.gsqlparser.EErrorType; 006import gudusoft.gsqlparser.ESqlStatementType; 007import gudusoft.gsqlparser.ETokenStatus; 008import gudusoft.gsqlparser.ETokenType; 009import gudusoft.gsqlparser.TBaseType; 010import gudusoft.gsqlparser.TCustomLexer; 011import gudusoft.gsqlparser.TCustomParser; 012import gudusoft.gsqlparser.TCustomSqlStatement; 013import gudusoft.gsqlparser.TLexerDatabricks; 014import gudusoft.gsqlparser.TParserDatabricks; 015import gudusoft.gsqlparser.TSourceToken; 016import gudusoft.gsqlparser.TSourceTokenList; 017import gudusoft.gsqlparser.TStatementList; 018import gudusoft.gsqlparser.TSyntaxError; 019import gudusoft.gsqlparser.TLog; 020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 021import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 022import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 023import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 024import gudusoft.gsqlparser.compiler.TContext; 025import gudusoft.gsqlparser.compiler.TGlobalScope; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TFrame; 028import gudusoft.gsqlparser.resolver.TSQLResolver; 029import gudusoft.gsqlparser.compiler.TASTEvaluator; 030import gudusoft.gsqlparser.nodes.TTypeName; 031import gudusoft.gsqlparser.EDataType; 032 033import java.util.Stack; 034import java.util.ArrayList; 035 036/** 037 * Databricks SQL parser implementation. 038 * 039 * <p>This parser handles Databricks-specific SQL syntax including: 040 * <ul> 041 * <li>Databricks SQL dialect and extensions</li> 042 * <li>Databricks PL/SQL blocks</li> 043 * <li>Special handling for VALUES keyword in INSERT statements</li> 044 * <li>Datatype casting with literals (e.g., DATE '2021-2-1')</li> 045 * </ul> 046 * 047 * <p><b>Implementation Status:</b> MIGRATED 048 * <ul> 049 * <li><b>Phase:</b> Complete migration from delegation to full AbstractSqlParser implementation</li> 050 * <li><b>Current:</b> Self-contained Databricks parser using AbstractSqlParser template</li> 051 * <li><b>Goal:</b> No delegation to legacy TGSqlParser</li> 052 * </ul> 053 * 054 * @see SqlParser 055 * @see AbstractSqlParser 056 * @see TLexerDatabricks 057 * @see TParserDatabricks 058 * @since 3.2.0.0 059 */ 060public class DatabricksSqlParser extends AbstractSqlParser { 061 062 /** 063 * Construct Databricks SQL parser. 064 * <p> 065 * Configures the parser for Databricks database with default delimiter: semicolon (;) 066 * <p> 067 * Following the original TGSqlParser pattern, the lexer and parser are 068 * created once in the constructor and reused for all parsing operations. 069 */ 070 public DatabricksSqlParser() { 071 super(EDbVendor.dbvdatabricks); 072 this.delimiterChar = ';'; 073 this.defaultDelimiterStr = ";"; 074 075 // Create lexer once - will be reused for all parsing operations 076 this.flexer = new TLexerDatabricks(); 077 this.flexer.delimiterchar = this.delimiterChar; 078 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 079 080 // Set parent's lexer reference for shared tokenization logic 081 this.lexer = this.flexer; 082 083 // Create parser once - will be reused for all parsing operations 084 this.fparser = new TParserDatabricks(null); 085 this.fparser.lexer = this.flexer; 086 } 087 088 // ========== Tokenization State (used during tokenization) ========== 089 090 /** The Databricks lexer used for tokenization */ 091 public TLexerDatabricks flexer; 092 093 // ========== Statement Parsing State (used during statement parsing) ========== 094 095 /** Current statement being built */ 096 private TCustomSqlStatement gcurrentsqlstatement; 097 098 /** SQL parser (for Databricks SQL statements) */ 099 private TParserDatabricks fparser; 100 101 // ========== AbstractSqlParser Abstract Methods Implementation ========== 102 103 /** 104 * Return the Databricks lexer instance. 105 * <p> 106 * The lexer is created once in the constructor and reused for all 107 * parsing operations. 108 * 109 * @param context parser context (not used, lexer already created) 110 * @return the Databricks lexer instance created in constructor 111 */ 112 @Override 113 protected TCustomLexer getLexer(ParserContext context) { 114 return this.flexer; 115 } 116 117 /** 118 * Return the Databricks SQL parser instance with updated token list. 119 * <p> 120 * The parser is created once in the constructor and reused for all 121 * parsing operations. 122 * 123 * @param context parser context (not used, parser already created) 124 * @param tokens source token list to parse 125 * @return the Databricks SQL parser instance created in constructor 126 */ 127 @Override 128 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 129 this.fparser.sourcetokenlist = tokens; 130 return this.fparser; 131 } 132 133 /** 134 * Databricks uses a single parser, no secondary parser needed. 135 * 136 * @param context parser context 137 * @param tokens source token list 138 * @return null (no secondary parser) 139 */ 140 @Override 141 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 142 return null; 143 } 144 145 /** 146 * Hook method: Tokenize Databricks SQL by calling vendor-specific tokenization. 147 */ 148 @Override 149 protected void tokenizeVendorSql() { 150 dodatabrickstexttotokenlist(); 151 } 152 153 /** 154 * Hook method: Setup parsers for raw statement extraction. 155 * Inject sqlcmds and sourcetokenlist into parser. 156 */ 157 @Override 158 protected void setupVendorParsersForExtraction() { 159 this.fparser.sqlcmds = this.sqlcmds; 160 this.fparser.sourcetokenlist = this.sourcetokenlist; 161 } 162 163 /** 164 * Hook method: Extract raw Databricks SQL statements. 165 * 166 * @param builder the result builder to populate 167 */ 168 @Override 169 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 170 dodatabricksgetrawsqlstatements(builder); 171 } 172 173 // ========== Databricks-Specific Tokenization Logic ========== 174 175 /** 176 * Tokenize Databricks SQL text to token list. 177 * <p> 178 * This method processes the input SQL text and converts it into a sequence 179 * of tokens. It handles Databricks-specific token processing including 180 * MySQL-style comments and delimiter handling. 181 * <p> 182 * Migrated from TGSqlParser.dodatabrickstexttotokenlist() at line 4696. 183 */ 184 private void dodatabrickstexttotokenlist() { 185 TSourceToken asourcetoken, lcprevst; 186 int yychar; 187 boolean startDelimiter = false; 188 189 flexer.tmpDelimiter = ""; 190 191 asourcetoken = getanewsourcetoken(); 192 if (asourcetoken == null) return; 193 yychar = asourcetoken.tokencode; 194 195 while (yychar > 0) { 196 sourcetokenlist.add(asourcetoken); 197 asourcetoken = getanewsourcetoken(); 198 if (asourcetoken == null) break; 199 checkMySQLCommentToken(asourcetoken); 200 201 if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) { 202 startDelimiter = false; 203 flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext(); 204 } 205 206 yychar = asourcetoken.tokencode; 207 } 208 } 209 210 /** 211 * Check for MySQL-style comments in tokens. 212 * <p> 213 * This method is used to handle MySQL comment syntax which is also 214 * supported by Databricks. 215 * 216 * @param asourcetoken the token to check 217 */ 218 private void checkMySQLCommentToken(TSourceToken asourcetoken) { 219 // MySQL comment handling - placeholder for now 220 // The actual implementation would check for MySQL-style comments 221 // This matches the pattern from TGSqlParser 222 } 223 224 // ========== Databricks-Specific Raw Statement Extraction Logic ========== 225 226 /** 227 * Extract raw SQL statements from token list for Databricks. 228 * <p> 229 * This method separates the token list into individual SQL statements 230 * without performing full parsing. It handles Databricks-specific syntax: 231 * <ul> 232 * <li>VALUES keyword disambiguation for INSERT statements</li> 233 * <li>Datatype casting with literals (DATE '2021-2-1')</li> 234 * <li>PL/SQL block detection with BEGIN/END</li> 235 * <li>Statement terminators (semicolons, slash, period)</li> 236 * </ul> 237 * <p> 238 * Migrated from TGSqlParser.dodatabricksgetrawsqlstatements() at line 6944. 239 * 240 * @param builder the result builder to populate with raw statements 241 */ 242 private void dodatabricksgetrawsqlstatements(SqlParseResult.Builder builder) { 243 int waitingEnd = 0; 244 boolean foundEnd = false; 245 EDataType tmpDatatype = null; 246 247 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 248 if (!TBaseType.assigned(sourcetokenlist)) { 249 builder.errorCode(-1); 250 builder.errorMessage("Source token list not assigned"); 251 return; 252 } 253 254 gcurrentsqlstatement = null; 255 EFindSqlStateType gst = EFindSqlStateType.stnormal; 256 TSourceToken lcprevsolidtoken = null, ast = null; 257 258 for (int i = 0; i < sourcetokenlist.size(); i++) { 259 260 if ((ast != null) && (ast.issolidtoken())) 261 lcprevsolidtoken = ast; 262 263 ast = sourcetokenlist.get(i); 264 sourcetokenlist.curpos = i; 265 266 // Databricks-specific token adjustments 267 if (ast.tokencode == TBaseType.rrw_values) { 268 TSourceToken stParen = ast.searchToken('(', 1); 269 if (stParen != null) { 270 TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist, ';', true); 271 if (stInsert != null) { 272 TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist); 273 if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) { 274 // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16); 275 // VALUES (1); 276 // don't treat values(1) as insert values 277 } else { 278 TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist, ';', true); 279 if (stFrom != null) { 280 // don't treat values after from keyword as an insert values 281 // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT), 282 // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!'); 283 284 // let check the INSERT keyword is close to VALUES than FROM keyword, if yes, treat it as insert values 285 if (stInsert.posinlist > stFrom.posinlist) { 286 // https://www.sqlparser.com/bugs/mantisbt/view.php?id=3354 287 ast.tokencode = TBaseType.rrw_databricks_values_insert; 288 } 289 } else { 290 ast.tokencode = TBaseType.rrw_databricks_values_insert; 291 } 292 } 293 } 294 } 295 } else if ((ast.tokencode == TBaseType.sconst) || (ast.tokencode == '+') || (ast.tokencode == '-')) { 296 if ((lcprevsolidtoken != null) && (TTypeName.searchTypeByName(lcprevsolidtoken.toString()) != null)) { 297 // date '2021-2-1', turn date to TBaseType.rrw_databricks_datatype_used_to_cast 298 if (lcprevsolidtoken.tokencode != TBaseType.rrw_interval) { 299 lcprevsolidtoken.tokencode = TBaseType.rrw_databricks_datatype_used_to_cast; 300 } 301 } 302 } 303 304 switch (gst) { 305 case sterror: { 306 if (ast.tokentype == ETokenType.ttsemicolon) { 307 appendToken(gcurrentsqlstatement, ast); 308 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 309 gst = EFindSqlStateType.stnormal; 310 } else { 311 appendToken(gcurrentsqlstatement, ast); 312 } 313 break; 314 } //sterror 315 316 case stnormal: { 317 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 318 || (ast.tokencode == TBaseType.cmtslashstar) 319 || (ast.tokencode == TBaseType.lexspace) 320 || (ast.tokencode == TBaseType.lexnewline) 321 || (ast.tokentype == ETokenType.ttsemicolon)) { 322 if (gcurrentsqlstatement != null) { 323 appendToken(gcurrentsqlstatement, ast); 324 } 325 326 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 327 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 328 // ;;;; continuous semicolon, treat it as comment 329 ast.tokentype = ETokenType.ttsimplecomment; 330 ast.tokencode = TBaseType.cmtdoublehyphen; 331 } 332 } 333 334 continue; 335 } 336 337 // find a tokentext to start sql or plsql mode 338 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 339 340 if (gcurrentsqlstatement != null) { 341 if (gcurrentsqlstatement.isdatabricksplsql()) { 342 gst = EFindSqlStateType.ststoredprocedure; 343 appendToken(gcurrentsqlstatement, ast); 344 foundEnd = false; 345 if ((ast.tokencode == TBaseType.rrw_begin) 346 || (ast.tokencode == TBaseType.rrw_package) 347 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 348 waitingEnd = 1; 349 } 350 } else { 351 gst = EFindSqlStateType.stsql; 352 appendToken(gcurrentsqlstatement, ast); 353 } 354 } else { 355 //error tokentext found 356 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 357 , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 358 359 ast.tokentype = ETokenType.tttokenlizererrortoken; 360 gst = EFindSqlStateType.sterror; 361 362 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 363 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 364 appendToken(gcurrentsqlstatement, ast); 365 } 366 367 break; 368 } // stnormal 369 370 case stsql: { 371 if (ast.tokentype == ETokenType.ttsemicolon) { 372 gst = EFindSqlStateType.stnormal; 373 appendToken(gcurrentsqlstatement, ast); 374 gcurrentsqlstatement.semicolonended = ast; 375 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 376 continue; 377 } 378 379 if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd 380 gst = EFindSqlStateType.stnormal; 381 appendToken(gcurrentsqlstatement, ast); 382 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 383 continue; 384 } 385 appendToken(gcurrentsqlstatement, ast); 386 break; 387 }//case stsql 388 389 case ststoredprocedure: { 390 if (ast.tokencode == TBaseType.rrw_begin) { 391 waitingEnd++; 392 } else if (ast.tokencode == TBaseType.rrw_if) { 393 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 394 //this is not if after END 395 waitingEnd++; 396 } 397 } else if (ast.tokencode == TBaseType.rrw_case) { 398 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 399 //this is not case after END 400 waitingEnd++; 401 } 402 } else if (ast.tokencode == TBaseType.rrw_loop) { 403 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 404 //this is not loop after END 405 waitingEnd++; 406 } 407 } else if (ast.tokencode == TBaseType.rrw_end) { 408 foundEnd = true; 409 waitingEnd--; 410 if (waitingEnd < 0) { 411 waitingEnd = 0; 412 } 413 } 414 415 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 416 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 417 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 418 gst = EFindSqlStateType.stnormal; 419 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 420 421 //make / a sqlplus cmd 422 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 423 appendToken(gcurrentsqlstatement, ast); 424 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 425 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 426 // single dot at a separate line 427 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 428 gst = EFindSqlStateType.stnormal; 429 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 430 431 //make ttperiod a sqlplus cmd 432 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 433 appendToken(gcurrentsqlstatement, ast); 434 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 435 } else { 436 appendToken(gcurrentsqlstatement, ast); 437 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) 438 && (foundEnd)) { 439 gst = EFindSqlStateType.stnormal; 440 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 441 } 442 } 443 444 if (ast.tokencode == TBaseType.sqlpluscmd) { 445 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 446 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 447 int m = flexer.getkeywordvalue(ast.getAstext()); 448 if (m != 0) { 449 ast.tokencode = m; 450 } else { 451 ast.tokencode = TBaseType.ident; 452 } 453 } 454 455 break; 456 } //ststoredprocedure 457 } //switch 458 }//for 459 460 //last statement 461 if ((gcurrentsqlstatement != null) && 462 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || 463 (gst == EFindSqlStateType.sterror))) { 464 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 465 } 466 467 // Populate builder with results 468 builder.sqlStatements(this.sqlstatements); 469 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 470 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 471 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 472 if (!syntaxErrors.isEmpty()) { 473 builder.errorMessage(String.format("Extraction completed with %d error(s)", syntaxErrors.size())); 474 } 475 } 476 477 /** 478 * Helper method to append token to statement. 479 * Sets the token's stmt reference and adds it to the statement's token list. 480 * 481 * @param statement the statement to append to 482 * @param token the token to append 483 */ 484 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 485 if (statement == null || token == null) { 486 return; 487 } 488 token.stmt = statement; 489 statement.sourcetokenlist.add(token); 490 } 491 492 // ========== Statement Parsing Logic ========== 493 494 /** 495 * Parse all raw statements to build AST. 496 * <p> 497 * This method iterates through all raw statements and calls parsestatement() 498 * on each one to build the Abstract Syntax Tree. It handles error recovery 499 * for CREATE TABLE statements and collects syntax errors. 500 * 501 * @param context parser context with configuration 502 * @param parser primary parser instance 503 * @param secondaryParser secondary parser (null for Databricks) 504 * @param tokens source token list 505 * @param rawStatements raw statements from extraction phase 506 * @return statement list with parsed AST 507 */ 508 @Override 509 protected TStatementList performParsing(ParserContext context, TCustomParser parser, 510 TCustomParser secondaryParser, TSourceTokenList tokens, 511 TStatementList rawStatements) { 512 // Store references 513 this.fparser = (TParserDatabricks) parser; 514 this.sourcetokenlist = tokens; 515 this.parserContext = context; 516 this.sqlstatements = rawStatements; 517 518 // Initialize sqlcmds 519 this.sqlcmds = SqlCmdsFactory.get(vendor); 520 this.fparser.sqlcmds = this.sqlcmds; 521 522 // Initialize global context (inherited from AbstractSqlParser) 523 initializeGlobalContext(); 524 525 // Parse each statement 526 for (int i = 0; i < sqlstatements.size(); i++) { 527 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 528 try { 529 stmt.setFrameStack(frameStack); 530 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 531 532 // Vendor-specific post-processing (override hook if needed) 533 afterStatementParsed(stmt); 534 535 // Error recovery 536 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 537 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 538 handleCreateTableErrorRecovery(stmt); 539 } 540 541 // Collect errors 542 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 543 copyErrorsFromStatement(stmt); 544 } 545 } catch (Exception ex) { 546 // Use inherited exception handler 547 handleStatementParsingException(stmt, i, ex); 548 continue; 549 } 550 } 551 552 // Clean up frame stack 553 if (globalFrame != null) globalFrame.popMeFromStack(frameStack); 554 555 return sqlstatements; 556 } 557 558 /** 559 * Post-processing hook after each statement is parsed. 560 * <p> 561 * Default implementation does nothing. Override if needed for vendor-specific 562 * post-processing. 563 * 564 * @param stmt the statement that was just parsed 565 */ 566 protected void afterStatementParsed(TCustomSqlStatement stmt) { 567 // Default: no post-processing needed for Databricks 568 } 569 570 /** 571 * Handle error recovery for CREATE TABLE statements. 572 * <p> 573 * This method attempts to recover from parse errors in CREATE TABLE statements 574 * by marking unparseable table properties (like ROW FORMAT, STORED AS, etc.) 575 * as sqlpluscmd and retrying. 576 * <p> 577 * Databricks/Hive DDL allows complex table properties after the column definition 578 * that may not be fully supported in the grammar. This error recovery allows 579 * partial parsing of the main table structure. 580 * <p> 581 * Extracted from TGSqlParser.doparse() lines 16916-16971 582 * 583 * @param stmt the statement with errors 584 */ 585 protected void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 586 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) || 587 (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) && 588 (!TBaseType.c_createTableStrictParsing)) { 589 590 // Find the closing parenthesis of table/column definition 591 // Mark everything after it as sqlpluscmd (ignored table properties) 592 int nested = 0; 593 boolean isIgnore = false, isFoundIgnoreToken = false; 594 TSourceToken firstIgnoreToken = null; 595 596 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 597 TSourceToken st = stmt.sourcetokenlist.get(k); 598 599 if (isIgnore) { 600 // Mark tokens after closing paren as sqlpluscmd (to be ignored) 601 if (st.issolidtoken() && (st.tokencode != ';')) { 602 isFoundIgnoreToken = true; 603 if (firstIgnoreToken == null) { 604 firstIgnoreToken = st; 605 } 606 } 607 if (st.tokencode != ';') { 608 st.tokencode = TBaseType.sqlpluscmd; 609 } 610 continue; 611 } 612 613 // Track nested parentheses to find the matching closing paren 614 if (st.tokencode == (int) ')') { 615 nested--; 616 if (nested == 0) { 617 // Check if next token is "AS ( SELECT" - don't ignore CTAS subquery 618 boolean isSelect = false; 619 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 620 if (st1 != null) { 621 TSourceToken st2 = st.searchToken((int) '(', 2); 622 if (st2 != null) { 623 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 624 isSelect = (st3 != null); 625 } 626 } 627 if (!isSelect) { 628 // Found the closing paren, start ignoring subsequent tokens 629 isIgnore = true; 630 } 631 } 632 } 633 634 if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) { 635 nested++; 636 } 637 } 638 639 // For Databricks, we don't validate specific table properties 640 // (unlike Oracle which checks TBaseType.searchOracleTablePros) 641 // This allows any Hive/Databricks DDL syntax like: 642 // ROW FORMAT, STORED AS, TBLPROPERTIES, LOCATION, etc. 643 644 // Retry parsing if we found ignoreable properties 645 if (isFoundIgnoreToken) { 646 stmt.clearError(); 647 stmt.parsestatement(null, false); 648 } 649 } 650 } 651 652 // ========== Semantic Analysis and Interpretation ========== 653 654 /** 655 * Perform semantic analysis (resolve column-table relationships, etc.). 656 * <p> 657 * This method runs the TSQLResolver to build semantic relationships 658 * between columns and tables, among other analysis. 659 * 660 * @param context parser context 661 * @param statements statement list to analyze 662 */ 663 @Override 664 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 665 if (!TBaseType.isEnableResolver()) { 666 return; 667 } 668 669 if (!getSyntaxErrors().isEmpty()) { 670 return; 671 } 672 673 try { 674 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 675 resolver.resolve(); 676 } catch (Exception e) { 677 // Log but don't fail - semantic analysis is optional 678 System.err.println("Semantic analysis failed: " + e.getMessage()); 679 } 680 } 681 682 /** 683 * Perform interpretation (execute SQL in interpreter mode). 684 * <p> 685 * This method runs the TASTEvaluator to interpret/execute the SQL. 686 * 687 * @param context parser context 688 * @param statements statement list to interpret 689 */ 690 @Override 691 protected void performInterpreter(ParserContext context, TStatementList statements) { 692 if (!TBaseType.ENABLE_INTERPRETER) { 693 return; 694 } 695 696 try { 697 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 698 TLog.enableInterpreterLogOnly(); 699 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 700 astEvaluator.eval(); 701 } catch (Exception e) { 702 // Log but don't fail - interpretation is optional 703 System.err.println("Interpretation failed: " + e.getMessage()); 704 } 705 } 706 707 @Override 708 public String toString() { 709 return "DatabricksSqlParser{vendor=" + vendor + "}"; 710 } 711}