001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerHive; 009import gudusoft.gsqlparser.TParserHive; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 022import gudusoft.gsqlparser.compiler.TContext; 023import gudusoft.gsqlparser.sqlenv.TSQLEnv; 024import gudusoft.gsqlparser.compiler.TGlobalScope; 025import gudusoft.gsqlparser.compiler.TFrame; 026import gudusoft.gsqlparser.resolver.TSQLResolver; 027import gudusoft.gsqlparser.TLog; 028import gudusoft.gsqlparser.compiler.TASTEvaluator; 029 030import java.io.BufferedReader; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.List; 034import java.util.Stack; 035 036/** 037 * Apache Hive SQL parser implementation. 038 * 039 * <p>This parser handles Hive-specific SQL syntax including: 040 * <ul> 041 * <li>Hive DDL statements (CREATE TABLE/DATABASE with Hive-specific options)</li> 042 * <li>Hive DML statements (INSERT OVERWRITE, LOAD DATA, etc.)</li> 043 * <li>HiveQL functions and extensions</li> 044 * <li>Backtick-quoted identifiers including qualified names (`schema.table`)</li> 045 * <li>Hive-specific keywords and data types</li> 046 * </ul> 047 * 048 * <p><b>Design Notes:</b> 049 * <ul> 050 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 051 * <li>Uses {@link TLexerHive} for tokenization</li> 052 * <li>Uses {@link TParserHive} for parsing</li> 053 * <li>Delimiter character: ';' for SQL statements</li> 054 * <li>Splits backtick-quoted qualified names (`schema.table`) into individual tokens</li> 055 * </ul> 056 * 057 * <p><b>Usage Example:</b> 058 * <pre> 059 * // Get Hive parser from factory 060 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvhive); 061 * 062 * // Build context 063 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvhive) 064 * .sqlText("SELECT * FROM `default.employee` WHERE dept = 'IT'") 065 * .build(); 066 * 067 * // Parse 068 * SqlParseResult result = parser.parse(context); 069 * 070 * // Access statements 071 * TStatementList statements = result.getSqlStatements(); 072 * </pre> 073 * 074 * @see SqlParser 075 * @see AbstractSqlParser 076 * @see TLexerHive 077 * @see TParserHive 078 * @since 3.2.0.0 079 */ 080public class HiveSqlParser extends AbstractSqlParser { 081 082 /** 083 * Construct Hive SQL parser. 084 * <p> 085 * Configures the parser for Hive database with default delimiter (;). 086 * <p> 087 * Following the original TGSqlParser pattern, the lexer and parser are 088 * created once in the constructor and reused for all parsing operations. 089 */ 090 public HiveSqlParser() { 091 super(EDbVendor.dbvhive); 092 this.delimiterChar = ';'; 093 this.defaultDelimiterStr = ";"; 094 095 // Create lexer once - will be reused for all parsing operations 096 this.flexer = new TLexerHive(); 097 this.flexer.delimiterchar = this.delimiterChar; 098 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 099 100 // Set parent's lexer reference for shared tokenization logic 101 this.lexer = this.flexer; 102 103 // Create parser once - will be reused for all parsing operations 104 this.fparser = new TParserHive(null); 105 this.fparser.lexer = this.flexer; 106 } 107 108 // ========== Parser Components ========== 109 110 /** The Hive lexer used for tokenization */ 111 public TLexerHive flexer; 112 113 /** SQL parser (for Hive statements) */ 114 private TParserHive fparser; 115 116 /** Current statement being built during extraction */ 117 private TCustomSqlStatement gcurrentsqlstatement; 118 119 /** Parser context for current operation */ 120 private ParserContext parserContext; 121 122 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 123 // - protected TContext globalContext 124 // - protected TSQLEnv sqlEnv 125 // - protected Stack<TFrame> frameStack 126 // - protected TFrame globalFrame 127 // - protected TSourceTokenList sourcetokenlist 128 // - protected TStatementList sqlstatements 129 // - protected ISqlCmds sqlcmds 130 // - protected TCustomLexer lexer 131 132 // ========== AbstractSqlParser Abstract Methods Implementation ========== 133 134 /** 135 * Return the Hive lexer instance. 136 */ 137 @Override 138 protected TCustomLexer getLexer(ParserContext context) { 139 return this.flexer; 140 } 141 142 /** 143 * Return the Hive SQL parser instance with updated token list. 144 */ 145 @Override 146 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 147 this.fparser.sourcetokenlist = tokens; 148 return this.fparser; 149 } 150 151 /** 152 * Hive does not use a secondary parser (unlike Oracle with PL/SQL). 153 */ 154 @Override 155 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 156 return null; 157 } 158 159 /** 160 * Call Hive-specific tokenization logic. 161 * <p> 162 * Delegates to dohivetexttotokenlist which handles Hive's 163 * specific keyword recognition, backtick-quoted identifiers, and 164 * qualified name splitting. 165 */ 166 @Override 167 protected void tokenizeVendorSql() { 168 dohivetexttotokenlist(); 169 } 170 171 /** 172 * Setup Hive parser for raw statement extraction. 173 * <p> 174 * Hive uses a single parser, so we inject sqlcmds and update 175 * the token list for the main parser only. 176 */ 177 @Override 178 protected void setupVendorParsersForExtraction() { 179 // Inject sqlcmds into parser (required for make_stmt) 180 this.fparser.sqlcmds = this.sqlcmds; 181 182 // Update token list for parser 183 this.fparser.sourcetokenlist = this.sourcetokenlist; 184 } 185 186 /** 187 * Call Hive-specific raw statement extraction logic. 188 * <p> 189 * Delegates to dohivegetrawsqlstatements which handles Hive's 190 * statement delimiters (semicolons). 191 * <p> 192 * Note: parserContext is already set by AbstractSqlParser before this is called 193 */ 194 @Override 195 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 196 int errorCount = dohivegetrawsqlstatements(builder); 197 // Error count is tracked internally; errors are already added to syntaxErrors list 198 199 // Set the extracted statements in the builder 200 builder.sqlStatements(this.sqlstatements); 201 } 202 203 // ========== Tokenization Methods ========== 204 205 /** 206 * Tokenize Hive SQL text into a list of tokens. 207 * <p> 208 * This method handles Hive-specific token processing: 209 * <ul> 210 * <li>Splits backtick-quoted qualified names (`schema.table`) into separate tokens</li> 211 * <li>Handles MAP keyword disambiguation</li> 212 * <li>Handles all standard SQL tokens (keywords, identifiers, operators, etc.)</li> 213 * </ul> 214 * <p> 215 * Migrated from TGSqlParser.dohivetexttotokenlist() 216 */ 217 private void dohivetexttotokenlist() { 218 219 TSourceToken asourcetoken, lcprevst; 220 int yychar; 221 222 asourcetoken = getanewsourcetoken(); 223 if (asourcetoken == null) return; 224 yychar = asourcetoken.tokencode; 225 226 while (yychar > 0) { 227 if (asourcetoken != null) { 228 sourcetokenlist.add(asourcetoken); 229 } 230 asourcetoken = getanewsourcetoken(); 231 if (asourcetoken == null) break; 232 if (asourcetoken.tokencode == TBaseType.rrw_map) { 233 TSourceToken token = asourcetoken.searchToken(')', -1); 234 if (token != null) { 235 asourcetoken.tokencode = TBaseType.ident; 236 } 237 } else if (asourcetoken.tokencode == '(') { 238// TSourceToken token = asourcetoken.searchToken(TBaseType.ident,-1); 239// if (token != null){ 240// token.tokencode = TBaseType.HIVE_FUNC_IDENT; 241// } 242 } 243 yychar = asourcetoken.tokencode; 244 245 // `schema.table_name` 246 if ((asourcetoken.tokencode == TBaseType.ident) 247 && (asourcetoken.toString().startsWith("`")) && (asourcetoken.toString().endsWith("`")) 248 && (asourcetoken.toString().indexOf(".") > 0) 249 ) { 250 // Do not split when the backtick-quoted identifier is used as 251 // an alias (follows `)` or `AS`). See MantisBT #3441. 252 TSourceToken prevSolid = null; 253 for (int pi = sourcetokenlist.size() - 1; pi >= 0; pi--) { 254 TSourceToken t = sourcetokenlist.get(pi); 255 if (t.issolidtoken()) { prevSolid = t; break; } 256 } 257 boolean isAlias = (prevSolid != null) 258 && (prevSolid.tokencode == ')'); 259 if (!isAlias) { 260 yychar = splitQualifiedNameInBacktick(asourcetoken); 261 asourcetoken = null; 262 } 263 } 264 265 } 266 267 } 268 269 /** 270 * Turn one token: `schema.table_name` into 3 tokens: `schema` . `table_name` 271 * <p> 272 * This helper method splits backtick-quoted qualified names into individual 273 * identifier and period tokens, preserving line/column information for each part. 274 * <p> 275 * Migrated from TGSqlParser.splitQualifiedNameInBacktick() 276 * 277 * @param asourcetoken the token to split 278 * @return the token code of the last token created 279 */ 280 private int splitQualifiedNameInBacktick(TSourceToken asourcetoken) { 281 int yychar = 0; 282 283 List<String> elephantList = Arrays.asList(TBaseType.getTextWithoutQuoted(asourcetoken.toString()).split("\\.")); 284 int p = 0, offset = 0; 285 for (String s : elephantList) { 286 TSourceToken pst = new TSourceToken("`" + s + "`"); 287 pst.tokencode = asourcetoken.tokencode; 288 pst.tokentype = asourcetoken.tokentype; 289 pst.tokenstatus = asourcetoken.tokenstatus; 290 pst.lineNo = asourcetoken.lineNo; 291 pst.columnNo = asourcetoken.columnNo + offset; 292 if (p == 0) offset++; // this count the first ` token 293 offset = offset + s.length(); 294 pst.container = sourcetokenlist; 295 if (p > 0) { // 第一个token使用被拆分前那个token的位置,从第二个开始的token,需要先把列表的位置指针加 1 296 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 297 } 298 pst.posinlist = sourcetokenlist.curpos; 299 300 sourcetokenlist.add(pst); 301 yychar = pst.tokencode; 302 303 if (p != elephantList.size() - 1) { 304 //`schema.table_name`, add period token in the middle of the backtick included identifier. 305 TSourceToken periodst = new TSourceToken("."); 306 periodst.tokencode = '.'; 307 periodst.tokentype = ETokenType.ttperiod; 308 periodst.tokenstatus = asourcetoken.tokenstatus; 309 periodst.lineNo = asourcetoken.lineNo; 310 periodst.columnNo = asourcetoken.columnNo + offset; 311 offset++; 312 periodst.container = sourcetokenlist; 313 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 314 periodst.posinlist = sourcetokenlist.curpos; 315 sourcetokenlist.add(periodst); 316 yychar = periodst.tokencode; 317 } 318 319 p++; 320 } 321 322 return yychar; 323 324 } 325 326 // ========== Raw Statement Extraction ========== 327 328 /** 329 * Extract raw SQL statements from the token list. 330 * <p> 331 * This method separates individual SQL statements without full syntax checking. 332 * It handles Hive-specific syntax including: 333 * <ul> 334 * <li>Token code adjustments (CharSetName, DATE function, SORT keyword)</li> 335 * <li>Semicolon-terminated statements</li> 336 * <li>Continuous semicolon handling (treated as comments)</li> 337 * </ul> 338 * <p> 339 * Migrated from TGSqlParser.dohivegetrawsqlstatements() 340 * 341 * @param builder the result builder to populate 342 * @return number of errors encountered 343 */ 344 private int dohivegetrawsqlstatements(SqlParseResult.Builder builder) { 345 346 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 347 if (!TBaseType.assigned(sourcetokenlist)) return -1; 348 349 gcurrentsqlstatement = null; 350 EFindSqlStateType gst = EFindSqlStateType.stnormal; 351 TSourceToken lcprevsolidtoken = null, ast = null; 352 int parenDepth = 0; 353 int compoundBlockNesting = 0; 354 int caseDepth = 0; 355 356 for (int i = 0; i < sourcetokenlist.size(); i++) { 357 358 if ((ast != null) && (ast.issolidtoken())) 359 lcprevsolidtoken = ast; 360 361 ast = sourcetokenlist.get(i); 362 sourcetokenlist.curpos = i; 363 364 if (ast.tokencode == TBaseType.hive_CharSetName) { 365 TSourceToken st1 = ast.searchToken(TBaseType.hive_CharSetLiteral, 1); 366 if (st1 == null) { 367 ast.tokencode = TBaseType.ident; 368 } 369 } else if (ast.tokencode == TBaseType.rrw_date) { 370 TSourceToken st1 = ast.nextSolidToken(); //ast.searchToken('(',1); 371 if (st1 != null) { 372 if (st1.tokencode == '(') { 373 ast.tokencode = TBaseType.rrw_hive_DATE_FUNCTION; 374 } 375 } 376 } else if (ast.tokencode == TBaseType.rrw_sort) { 377 TSourceToken st1 = ast.searchToken(TBaseType.rrw_by, 1); 378 if (st1 == null) { 379 ast.tokencode = TBaseType.ident; 380 } 381 } 382 383 switch (gst) { 384 case sterror: { 385 if (ast.tokentype == ETokenType.ttsemicolon) { 386 gcurrentsqlstatement.sourcetokenlist.add(ast); 387 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 388 gst = EFindSqlStateType.stnormal; 389 } else { 390 gcurrentsqlstatement.sourcetokenlist.add(ast); 391 } 392 break; 393 } //sterror 394 395 case stnormal: { 396 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 397 || (ast.tokencode == TBaseType.cmtslashstar) 398 || (ast.tokencode == TBaseType.lexspace) 399 || (ast.tokencode == TBaseType.lexnewline) 400 || (ast.tokentype == ETokenType.ttsemicolon)) { 401 if (gcurrentsqlstatement != null) { 402 gcurrentsqlstatement.sourcetokenlist.add(ast); 403 } 404 405 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 406 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 407 // ;;;; continuous semicolon,treat it as comment 408 ast.tokentype = ETokenType.ttsimplecomment; 409 ast.tokencode = TBaseType.cmtdoublehyphen; 410 } 411 } 412 413 continue; 414 } 415 416 417 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 418 419 if (gcurrentsqlstatement != null) { 420 gst = EFindSqlStateType.stsql; 421 gcurrentsqlstatement.sourcetokenlist.add(ast); 422 } else { 423 //error tokentext found 424 425 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 426 , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 427 428 ast.tokentype = ETokenType.tttokenlizererrortoken; 429 gst = EFindSqlStateType.sterror; 430 431 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 432 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 433 gcurrentsqlstatement.sourcetokenlist.add(ast); 434 435 } 436 437 break; 438 } // stnormal 439 440 case stsql: { 441 // Track BEGIN/END and CASE/END nesting for compound blocks 442 if (ast.tokencode == TBaseType.rrw_begin && ast.issolidtoken()) { 443 compoundBlockNesting++; 444 } else if (ast.tokencode == TBaseType.rrw_case && ast.issolidtoken() && compoundBlockNesting > 0) { 445 caseDepth++; 446 } else if (ast.tokencode == TBaseType.rrw_end && ast.issolidtoken() && compoundBlockNesting > 0) { 447 if (caseDepth > 0) { 448 // This END closes a CASE expression, not a BEGIN block 449 TSourceToken nextSolid = sourcetokenlist.nextsolidtoken(i, 1, false); 450 boolean isEndCase = nextSolid == null || nextSolid.tokencode != TBaseType.rrw_if 451 && nextSolid.tokencode != TBaseType.rrw_while 452 && nextSolid.tokencode != TBaseType.rrw_loop 453 && nextSolid.tokencode != TBaseType.rrw_repeat 454 && nextSolid.tokencode != TBaseType.rrw_for; 455 if (isEndCase) { 456 caseDepth--; 457 } 458 } else { 459 // Only decrement for END that closes a BEGIN block. 460 // END IF, END WHILE, END LOOP, END FOR, END CASE don't close BEGIN. 461 TSourceToken nextSolid = sourcetokenlist.nextsolidtoken(i, 1, false); 462 boolean isStructEnd = nextSolid != null && ( 463 nextSolid.tokencode == TBaseType.rrw_if || 464 nextSolid.tokencode == TBaseType.rrw_while || 465 nextSolid.tokencode == TBaseType.rrw_loop || 466 nextSolid.tokencode == TBaseType.rrw_repeat || 467 nextSolid.tokencode == TBaseType.rrw_case || 468 nextSolid.tokencode == TBaseType.rrw_for); 469 if (!isStructEnd) { 470 compoundBlockNesting--; 471 } 472 } 473 } 474 475 if (ast.tokentype == ETokenType.ttsemicolon) { 476 if (compoundBlockNesting > 0) { 477 // Inside compound block - don't complete on semicolon 478 gcurrentsqlstatement.sourcetokenlist.add(ast); 479 continue; 480 } 481 gst = EFindSqlStateType.stnormal; 482 parenDepth = 0; 483 compoundBlockNesting = 0; 484 caseDepth = 0; 485 gcurrentsqlstatement.sourcetokenlist.add(ast); 486 gcurrentsqlstatement.semicolonended = ast; 487 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 488 continue; 489 } 490 491 // Inside compound block - don't split on DML keywords 492 if (compoundBlockNesting > 0) { 493 gcurrentsqlstatement.sourcetokenlist.add(ast); 494 break; 495 } 496 497 // Track parenthesis depth for subquery detection 498 if (ast.tokencode == '(') parenDepth++; 499 if (ast.tokencode == ')') parenDepth--; 500 501 // SET as new statement: if we encounter SET and the current statement 502 // is not one where SET is a valid clause keyword (UPDATE, MERGE, ALTER), 503 // complete the current statement and start a new one 504 if (ast.tokencode == TBaseType.rrw_set 505 && gcurrentsqlstatement != null) { 506 ESqlStatementType curType = gcurrentsqlstatement.sqlstatementtype; 507 boolean setIsClause = (curType == ESqlStatementType.sstupdate) 508 || (curType == ESqlStatementType.sstmerge) 509 || (curType == ESqlStatementType.sstaltertable) 510 || (curType == ESqlStatementType.sstalterview) 511 || (curType == ESqlStatementType.sstalterindex) 512 || (curType == ESqlStatementType.sstalterdatabase) 513 || (curType == ESqlStatementType.sstAlterMaterializedView) 514 || (curType == ESqlStatementType.ssthiveExplain) 515 || (curType == ESqlStatementType.ssthiveShow); 516 if (!setIsClause) { 517 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 518 parenDepth = 0; 519 gcurrentsqlstatement = sqlcmds.issql(ast, EFindSqlStateType.stnormal, null); 520 if (gcurrentsqlstatement != null) { 521 gcurrentsqlstatement.sourcetokenlist.add(ast); 522 } 523 break; 524 } 525 } 526 527 // SELECT as new statement: when at parenthesis depth 0, SELECT 528 // starts a new statement only when the current statement is SELECT 529 // and the previous token is not a set operator (UNION/INTERSECT/EXCEPT/ALL) 530 // and the token is not marked as part of a CTE 531 if (ast.tokencode == TBaseType.rrw_select 532 && parenDepth <= 0 533 && gcurrentsqlstatement != null 534 && gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect 535 && ast.tokenstatus != ETokenStatus.tsignoredbygetrawstatement) { 536 boolean isSetOp = false; 537 if (lcprevsolidtoken != null) { 538 int prevCode = lcprevsolidtoken.tokencode; 539 isSetOp = (prevCode == TBaseType.rrw_union) 540 || (prevCode == TBaseType.rrw_intersect) 541 || (prevCode == TBaseType.rrw_except) 542 || (prevCode == TBaseType.rrw_all) 543 || (prevCode == TBaseType.rrw_minus); 544 } 545 if (!isSetOp) { 546 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 547 parenDepth = 0; 548 gcurrentsqlstatement = sqlcmds.issql(ast, EFindSqlStateType.stnormal, null); 549 if (gcurrentsqlstatement != null) { 550 gcurrentsqlstatement.sourcetokenlist.add(ast); 551 } 552 break; 553 } 554 } 555 556 gcurrentsqlstatement.sourcetokenlist.add(ast); 557 break; 558 }//case stsql 559 560 } //switch 561 }//for 562 563 //last statement 564 if ((gcurrentsqlstatement != null) && 565 ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) { 566 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 567 } 568 569 return syntaxErrors.size(); 570 } 571 572 // ========== Statement Parsing ========== 573 574 /** 575 * Parse all raw SQL statements. 576 * <p> 577 * This method performs full syntax analysis of each statement: 578 * <ul> 579 * <li>Initializes global context and SQL environment</li> 580 * <li>Parses each statement using TParserHive</li> 581 * <li>Handles errors with optional error recovery</li> 582 * <li>Collects syntax errors for reporting</li> 583 * </ul> 584 * <p> 585 * Migrated from TGSqlParser.performParsing() 586 * 587 * @param context the parser context 588 * @param parser the main parser (TParserHive) 589 * @param secondaryParser the secondary parser (null for Hive) 590 * @param tokens the source token list 591 * @param rawStatements raw statements already extracted (never null) 592 * @return the parsed statement list 593 */ 594 @Override 595 protected TStatementList performParsing(ParserContext context, TCustomParser parser, TCustomParser secondaryParser, TSourceTokenList tokens, TStatementList rawStatements) { 596 this.parserContext = context; 597 this.fparser = (TParserHive) parser; 598 this.sourcetokenlist = tokens; 599 this.sqlstatements = rawStatements; 600 601 // Initialize sqlcmds for this parsing operation 602 if (this.sqlcmds == null) { 603 this.sqlcmds = SqlCmdsFactory.get(vendor); 604 } 605 606 // CRITICAL: Inject sqlcmds into parser (required for make_stmt to work) 607 this.fparser.sqlcmds = this.sqlcmds; 608 609 // Initialize global context (inherited method from AbstractSqlParser) 610 initializeGlobalContext(); 611 612 // Parse each statement 613 for (int i = 0; i < sqlstatements.size(); i++) { 614 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 615 616 try { 617 // Set frame stack for nested scope resolution 618 stmt.setFrameStack(frameStack); 619 620 // Parse the statement 621 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 622 623 // Attempt error recovery using inherited method 624 parseResult = attemptErrorRecovery(stmt, parseResult, context.isOnlyNeedRawParseTree()); 625 626 // Collect errors from statement 627 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 628 copyErrorsFromStatement(stmt); 629 } 630 631 } catch (Exception ex) { 632 // Use inherited exception handler from AbstractSqlParser 633 handleStatementParsingException(stmt, i, ex); 634 continue; 635 } 636 } 637 638 // Clean up frame stack 639 if (globalFrame != null) globalFrame.popMeFromStack(frameStack); 640 641 return sqlstatements; 642 } 643 644 // ========== Semantic Analysis ========== 645 646 /** 647 * Perform semantic analysis on parsed statements. 648 * <p> 649 * Runs TSQLResolver to build relationships between tables and columns, 650 * resolve references, and perform type checking. 651 */ 652 @Override 653 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 654 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 655 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 656 resolver.resolve(); 657 } 658 } 659 660 // ========== Interpretation ========== 661 662 /** 663 * Perform interpretation/evaluation on statements. 664 * <p> 665 * Runs TASTEvaluator for compile-time constant expression evaluation. 666 * Hive does not require interpretation currently. 667 */ 668 @Override 669 protected void performInterpreter(ParserContext context, TStatementList statements) { 670 // Hive does not require interpretation currently 671 } 672 673 @Override 674 public String toString() { 675 return "HiveSqlParser{vendor=" + vendor + "}"; 676 } 677}