001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerRedshift; 009import gudusoft.gsqlparser.TParserRedshift; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 021import gudusoft.gsqlparser.stmt.TCommonBlock; 022import gudusoft.gsqlparser.stmt.TRoutine; 023import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 025import gudusoft.gsqlparser.compiler.TContext; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TGlobalScope; 028import gudusoft.gsqlparser.compiler.TFrame; 029import gudusoft.gsqlparser.resolver.TSQLResolver; 030import gudusoft.gsqlparser.TLog; 031import gudusoft.gsqlparser.compiler.TASTEvaluator; 032 033import java.io.BufferedReader; 034import java.util.ArrayList; 035import java.util.List; 036import java.util.Stack; 037 038/** 039 * Amazon Redshift SQL parser implementation. 040 * 041 * <p>This parser handles Redshift-specific SQL syntax including: 042 * <ul> 043 * <li>PostgreSQL-based syntax (Redshift is based on PostgreSQL 8.0.2)</li> 044 * <li>PL/pgSQL functions and procedures</li> 045 * <li>CREATE FUNCTION with LANGUAGE clause</li> 046 * <li>Function body delimiters ($$)</li> 047 * <li>Redshift-specific types (ARRAY<type>, %ROWTYPE, etc.)</li> 048 * <li>Redshift-specific keywords (FILTER, LANGUAGE, etc.)</li> 049 * </ul> 050 * 051 * <p><b>Design Notes:</b> 052 * <ul> 053 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 054 * <li>Uses {@link TLexerRedshift} for tokenization</li> 055 * <li>Uses {@link TParserRedshift} for parsing</li> 056 * <li>Delimiter character: ';' for SQL statements</li> 057 * </ul> 058 * 059 * <p><b>Usage Example:</b> 060 * <pre> 061 * // Get Redshift parser from factory 062 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvredshift); 063 * 064 * // Build context 065 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvredshift) 066 * .sqlText("SELECT * FROM orders WHERE order_date > CURRENT_DATE - 7") 067 * .build(); 068 * 069 * // Parse 070 * SqlParseResult result = parser.parse(context); 071 * 072 * // Access statements 073 * TStatementList statements = result.getSqlStatements(); 074 * </pre> 075 * 076 * @see SqlParser 077 * @see AbstractSqlParser 078 * @see TLexerRedshift 079 * @see TParserRedshift 080 * @since 3.2.0.0 081 */ 082public class RedshiftSqlParser extends AbstractSqlParser { 083 084 /** 085 * Construct Redshift SQL parser. 086 * <p> 087 * Configures the parser for Redshift database with default delimiter (;). 088 * <p> 089 * Following the original TGSqlParser pattern, the lexer and parser are 090 * created once in the constructor and reused for all parsing operations. 091 */ 092 public RedshiftSqlParser() { 093 super(EDbVendor.dbvredshift); 094 this.delimiterChar = ';'; 095 this.defaultDelimiterStr = ";"; 096 097 // Create lexer once - will be reused for all parsing operations 098 this.flexer = new TLexerRedshift(); 099 this.flexer.delimiterchar = this.delimiterChar; 100 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 101 102 // CRITICAL: Set parent's lexer reference for shared tokenization logic 103 this.lexer = this.flexer; 104 105 // Create parser once - will be reused for all parsing operations 106 this.fparser = new TParserRedshift(null); 107 this.fparser.lexer = this.flexer; 108 } 109 110 // ========== Parser Components ========== 111 112 /** The Redshift lexer used for tokenization */ 113 public TLexerRedshift flexer; 114 115 /** SQL parser (for Redshift statements) */ 116 private TParserRedshift fparser; 117 118 /** Current statement being built during extraction */ 119 private TCustomSqlStatement gcurrentsqlstatement; 120 121 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 122 // - protected TContext globalContext 123 // - protected TSQLEnv sqlEnv 124 // - protected Stack<TFrame> frameStack 125 // - protected TFrame globalFrame 126 127 // ========== AbstractSqlParser Abstract Methods Implementation ========== 128 129 /** 130 * Return the Redshift lexer instance. 131 */ 132 @Override 133 protected TCustomLexer getLexer(ParserContext context) { 134 return this.flexer; 135 } 136 137 /** 138 * Return the Redshift SQL parser instance with updated token list. 139 */ 140 @Override 141 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 142 this.fparser.sourcetokenlist = tokens; 143 return this.fparser; 144 } 145 146 /** 147 * Redshift doesn't have a secondary parser. 148 * <p> 149 * Only Oracle uses a secondary parser (PL/SQL parser). 150 */ 151 @Override 152 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 153 return null; 154 } 155 156 /** 157 * Call Redshift-specific tokenization logic. 158 * <p> 159 * Delegates to doredshiftsqltexttotokenlist which handles Redshift's 160 * specific keyword recognition, PostgreSQL commands, and token generation. 161 */ 162 @Override 163 protected void tokenizeVendorSql() { 164 doredshiftsqltexttotokenlist(); 165 } 166 167 /** 168 * Setup Redshift parser for raw statement extraction. 169 * <p> 170 * Redshift uses a single parser, so we inject sqlcmds and update 171 * the token list for the main parser only. 172 */ 173 @Override 174 protected void setupVendorParsersForExtraction() { 175 // Inject sqlcmds into parser (required for make_stmt) 176 this.fparser.sqlcmds = this.sqlcmds; 177 178 // Update token list for parser 179 this.fparser.sourcetokenlist = this.sourcetokenlist; 180 } 181 182 /** 183 * Call Redshift-specific raw statement extraction logic. 184 * <p> 185 * Delegates to doredshiftgetrawsqlstatements which handles Redshift's 186 * statement delimiters (semicolon, function delimiters $$, etc.). 187 */ 188 @Override 189 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 190 doredshiftgetrawsqlstatements(builder); 191 192 // Set the extracted statements in the builder 193 builder.sqlStatements(this.sqlstatements); 194 } 195 196 /** 197 * Perform full parsing of statements with syntax checking. 198 * <p> 199 * This method orchestrates the parsing of all statements. 200 */ 201 @Override 202 protected TStatementList performParsing(ParserContext context, 203 TCustomParser parser, 204 TCustomParser secondaryParser, 205 TSourceTokenList tokens, 206 TStatementList rawStatements) { 207 // Store references 208 this.fparser = (TParserRedshift) parser; 209 this.sourcetokenlist = tokens; 210 this.parserContext = context; 211 212 // Use the raw statements passed from AbstractSqlParser.parse() 213 this.sqlstatements = rawStatements; 214 215 // Initialize sqlcmds (required for parsing) 216 this.sqlcmds = SqlCmdsFactory.get(vendor); 217 218 // CRITICAL: Inject sqlcmds into parser (required for make_stmt) 219 this.fparser.sqlcmds = this.sqlcmds; 220 221 // Initialize global context for semantic analysis 222 initializeGlobalContext(); 223 224 // Parse each statement 225 for (int i = 0; i < sqlstatements.size(); i++) { 226 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 227 228 try { 229 // Set frame stack for this statement 230 stmt.setFrameStack(frameStack); 231 232 // Parse the statement 233 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 234 235 // Vendor-specific post-processing (override hook if needed) 236 afterStatementParsed(stmt); 237 238 // Error recovery 239 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 240 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 241 handleCreateTableErrorRecovery(stmt); 242 } 243 244 // Collect errors 245 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 246 copyErrorsFromStatement(stmt); 247 } 248 249 } catch (Exception ex) { 250 // Use inherited exception handler 251 handleStatementParsingException(stmt, i, ex); 252 continue; 253 } 254 } 255 256 // Clean up frame stack 257 if (globalFrame != null) { 258 globalFrame.popMeFromStack(frameStack); 259 } 260 261 return sqlstatements; 262 } 263 264 /** 265 * Perform semantic analysis on parsed statements. 266 * <p> 267 * This step resolves column-to-table relationships and performs type checking. 268 */ 269 @Override 270 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 271 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 272 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 273 resolver.resolve(); 274 } 275 } 276 277 /** 278 * Perform interpretation on parsed statements. 279 * <p> 280 * This step evaluates constant expressions and performs other interpretation tasks. 281 */ 282 @Override 283 protected void performInterpreter(ParserContext context, TStatementList statements) { 284 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 285 TLog.clearLogs(); 286 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 287 TLog.enableInterpreterLogOnly(); 288 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 289 astEvaluator.eval(); 290 } 291 } 292 293 // ========== Redshift-Specific Tokenization ========== 294 295 /** 296 * Tokenize Redshift SQL text to token list. 297 * <p> 298 * Migrated from TGSqlParser.doredshiftsqltexttotokenlist(). 299 * <p> 300 * This method handles Redshift-specific token processing: 301 * <ul> 302 * <li>SQL*Plus-like commands detection</li> 303 * <li>Forward slash (/) disambiguation</li> 304 * <li>%ROWTYPE operator detection</li> 305 * <li>Continuation lines (hyphen at end of line)</li> 306 * </ul> 307 */ 308 private void doredshiftsqltexttotokenlist() { 309 boolean insqlpluscmd = false; 310 boolean isvalidplace = true; 311 boolean waitingreturnforfloatdiv = false; 312 boolean waitingreturnforsemicolon = false; 313 boolean continuesqlplusatnewline = false; 314 315 TSourceToken lct = null, prevst = null; 316 317 TSourceToken asourcetoken, lcprevst; 318 int yychar; 319 320 asourcetoken = getanewsourcetoken(); 321 if (asourcetoken == null) return; 322 yychar = asourcetoken.tokencode; 323 324 while (yychar > 0) { 325 sourcetokenlist.add(asourcetoken); 326 switch (yychar) { 327 case TBaseType.cmtdoublehyphen: 328 case TBaseType.cmtslashstar: 329 case TBaseType.lexspace: { 330 if (insqlpluscmd) { 331 asourcetoken.insqlpluscmd = true; 332 } 333 break; 334 } 335 case TBaseType.lexnewline: { 336 if (insqlpluscmd) { 337 insqlpluscmd = false; 338 isvalidplace = true; 339 340 if (continuesqlplusatnewline) { 341 insqlpluscmd = true; 342 isvalidplace = false; 343 asourcetoken.insqlpluscmd = true; 344 } 345 } 346 347 if (waitingreturnforsemicolon) { 348 isvalidplace = true; 349 } 350 if (waitingreturnforfloatdiv) { 351 isvalidplace = true; 352 lct.tokencode = TBaseType.sqlpluscmd; 353 if (lct.tokentype != ETokenType.ttslash) { 354 lct.tokentype = ETokenType.ttsqlpluscmd; 355 } 356 } 357 flexer.insqlpluscmd = insqlpluscmd; 358 break; 359 } //case newline 360 default: { 361 //solid token 362 continuesqlplusatnewline = false; 363 waitingreturnforsemicolon = false; 364 waitingreturnforfloatdiv = false; 365 if (insqlpluscmd) { 366 asourcetoken.insqlpluscmd = true; 367 if (asourcetoken.toString().equalsIgnoreCase("-")) { 368 continuesqlplusatnewline = true; 369 } 370 } else { 371 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 372 waitingreturnforsemicolon = true; 373 } 374 if ((asourcetoken.tokentype == ETokenType.ttslash) 375 && (isvalidplace || (IsValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 376 lct = asourcetoken; 377 waitingreturnforfloatdiv = true; 378 } 379 if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) { 380 asourcetoken.tokencode = TBaseType.sqlpluscmd; 381 if (asourcetoken.tokentype != ETokenType.ttslash) { 382 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 383 } 384 insqlpluscmd = true; 385 flexer.insqlpluscmd = insqlpluscmd; 386 } 387 } 388 isvalidplace = false; 389 390 // Redshift-specific: Handle %ROWTYPE operator 391 if (asourcetoken.tokencode == TBaseType.rrw_redshift_rowtype) { 392 TSourceToken stPercent = asourcetoken.searchToken('%', -1); 393 if (stPercent != null) { 394 stPercent.tokencode = TBaseType.rowtype_operator; 395 } 396 } 397 } 398 } 399 400 //flexer.yylexwrap(asourcetoken); 401 asourcetoken = getanewsourcetoken(); 402 if (asourcetoken != null) { 403 yychar = asourcetoken.tokencode; 404 } else { 405 yychar = 0; 406 407 if (waitingreturnforfloatdiv) { 408 // / at the end of line treat as sqlplus command 409 lct.tokencode = TBaseType.sqlpluscmd; 410 if (lct.tokentype != ETokenType.ttslash) { 411 lct.tokentype = ETokenType.ttsqlpluscmd; 412 } 413 } 414 } 415 416 if ((yychar == 0) && (prevst != null)) { 417 // End of input 418 } 419 } // while 420 } 421 422 /** 423 * Check if this is a valid place for a forward slash to be treated as a SQL*Plus command. 424 * <p> 425 * Migrated from TGSqlParser.IsValidPlaceForDivToSqlplusCmd(). 426 */ 427 private boolean IsValidPlaceForDivToSqlplusCmd(TSourceTokenList tokenlist, int pos) { 428 if (tokenlist == null) return false; 429 if (pos <= 0) return true; 430 431 for (int i = pos - 1; i >= 0; i--) { 432 TSourceToken st = tokenlist.get(i); 433 if (st.tokencode == TBaseType.lexnewline) { 434 return true; 435 } 436 if ((st.tokencode != TBaseType.lexspace) 437 && (st.tokencode != TBaseType.cmtdoublehyphen) 438 && (st.tokencode != TBaseType.cmtslashstar)) { 439 return false; 440 } 441 } 442 return true; 443 } 444 445 /** 446 * Check if this token is a valid PostgreSQL-like command. 447 * <p> 448 * Migrated from TGSqlParser.isvalidsqlpluscmdInPostgresql(). 449 */ 450 private boolean isvalidsqlpluscmdInPostgresql(String str) { 451 if (str == null) return false; 452 if (str.length() == 0) return false; 453 454 String s = str.trim().toLowerCase(); 455 return s.startsWith("\\"); 456 } 457 458 // ========== Redshift-Specific Raw Statement Extraction ========== 459 460 /** 461 * Extract raw SQL statements from token list. 462 * <p> 463 * Migrated from TGSqlParser.doredshiftgetrawsqlstatements(). 464 * <p> 465 * This method handles Redshift-specific statement boundaries: 466 * <ul> 467 * <li>Semicolon (;) for regular SQL statements</li> 468 * <li>Function delimiter ($$) for function bodies</li> 469 * <li>BEGIN/END blocks for PL/pgSQL</li> 470 * <li>DECLARE blocks</li> 471 * </ul> 472 */ 473 private void doredshiftgetrawsqlstatements(SqlParseResult.Builder builder) { 474 int waitingEnd = 0; 475 boolean foundEnd = false, enterDeclare = false; 476 477 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 478 if (!TBaseType.assigned(sourcetokenlist)) { 479 builder.errorCode(-1); 480 return; 481 } 482 483 gcurrentsqlstatement = null; 484 EFindSqlStateType gst = EFindSqlStateType.stnormal; 485 TSourceToken lcprevsolidtoken = null, ast = null; 486 487 if (parserContext.isSinglePLBlock()) { 488 gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql); 489 } 490 491 for (int i = 0; i < sourcetokenlist.size(); i++) { 492 if ((ast != null) && (ast.issolidtoken())) 493 lcprevsolidtoken = ast; 494 495 ast = sourcetokenlist.get(i); 496 sourcetokenlist.curpos = i; 497 498 // Redshift-specific token adjustments 499 if (ast.tokencode == TBaseType.rrw_redshift_filter) { 500 TSourceToken st1 = ast.nextSolidToken(); 501 if (st1 != null) { 502 if (st1.tokencode != '(') { 503 ast.tokencode = TBaseType.ident; 504 } 505 } 506 } else if (ast.tokencode == TBaseType.rrw_redshift_array) { 507 TSourceToken st1 = ast.searchToken('<', 1); 508 if (st1 != null) { // array<varchar(20)> 509 ast.tokencode = TBaseType.rrw_redshift_array_type; 510 } 511 } else if (ast.tokencode == TBaseType.rrw_values) { 512 TSourceToken stParen = ast.searchToken('(', 1); 513 if (stParen != null) { 514 TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist); 515 if (stInsert != null) { 516 TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist); 517 if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) { 518 // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16); 519 // VALUES (1); 520 // don't treat values(1) as insert values 521 } else { 522 TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist); 523 if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) { 524 // don't treat values after from keyword as an insert values 525 // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT), 526 // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!'); 527 } else { 528 ast.tokencode = TBaseType.rrw_postgresql_insert_values; 529 } 530 } 531 } 532 } 533 } 534 535 switch (gst) { 536 case sterror: { 537 if (ast.tokentype == ETokenType.ttsemicolon) { 538 gcurrentsqlstatement.sourcetokenlist.add(ast); 539 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 540 gst = EFindSqlStateType.stnormal; 541 } else { 542 gcurrentsqlstatement.sourcetokenlist.add(ast); 543 } 544 break; 545 } //sterror 546 547 case stnormal: { 548 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 549 || (ast.tokencode == TBaseType.cmtslashstar) 550 || (ast.tokencode == TBaseType.lexspace) 551 || (ast.tokencode == TBaseType.lexnewline) 552 || (ast.tokentype == ETokenType.ttsemicolon)) { 553 if (gcurrentsqlstatement != null) { 554 gcurrentsqlstatement.sourcetokenlist.add(ast); 555 } 556 557 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 558 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 559 // ;;;; continuous semicolon, treat it as comment 560 ast.tokentype = ETokenType.ttsimplecomment; 561 ast.tokencode = TBaseType.cmtdoublehyphen; 562 } 563 } 564 565 continue; 566 } 567 568 if (ast.tokencode == TBaseType.sqlpluscmd) { 569 gst = EFindSqlStateType.stsqlplus; 570 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 571 gcurrentsqlstatement.sourcetokenlist.add(ast); 572 continue; 573 } 574 575 // find a token to start sql or plsql mode 576 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 577 578 if (gcurrentsqlstatement != null) { 579 enterDeclare = false; 580 if (gcurrentsqlstatement.ispgplsql()) { 581 gst = EFindSqlStateType.ststoredprocedure; 582 gcurrentsqlstatement.sourcetokenlist.add(ast); 583 foundEnd = false; 584 if ((ast.tokencode == TBaseType.rrw_begin) 585 || (ast.tokencode == TBaseType.rrw_package) 586 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 587 waitingEnd = 1; 588 } else if (ast.tokencode == TBaseType.rrw_declare) { 589 enterDeclare = true; 590 } 591 } else { 592 gst = EFindSqlStateType.stsql; 593 gcurrentsqlstatement.sourcetokenlist.add(ast); 594 } 595 } else { 596 //error token found 597 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo), 598 "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 599 600 ast.tokentype = ETokenType.tttokenlizererrortoken; 601 gst = EFindSqlStateType.sterror; 602 603 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 604 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 605 gcurrentsqlstatement.sourcetokenlist.add(ast); 606 } 607 608 break; 609 } // stnormal 610 611 case stsqlplus: { 612 if (ast.insqlpluscmd) { 613 gcurrentsqlstatement.sourcetokenlist.add(ast); 614 } else { 615 gst = EFindSqlStateType.stnormal; //this token must be newline, 616 gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here 617 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 618 } 619 620 break; 621 }//case stsqlplus 622 623 case stsql: { 624 if (ast.tokentype == ETokenType.ttsemicolon) { 625 gst = EFindSqlStateType.stnormal; 626 gcurrentsqlstatement.sourcetokenlist.add(ast); 627 gcurrentsqlstatement.semicolonended = ast; 628 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 629 continue; 630 } 631 632 if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd 633 gst = EFindSqlStateType.stnormal; 634 gcurrentsqlstatement.sourcetokenlist.add(ast); 635 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 636 continue; 637 } 638 639 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 640 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 641 gst = EFindSqlStateType.stnormal; 642 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 643 continue; 644 } 645 } 646 647 gcurrentsqlstatement.sourcetokenlist.add(ast); 648 break; 649 }//case stsql 650 651 case ststoredprocedure: { 652 if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) { 653 gcurrentsqlstatement.sourcetokenlist.add(ast); 654 gst = EFindSqlStateType.ststoredprocedurePgStartBody; 655 continue; 656 } 657 658 if (ast.tokencode == TBaseType.rrw_redshift_language) { 659 // check next token which is the language used by this stored procedure 660 TSourceToken nextSt = ast.nextSolidToken(); 661 if (nextSt != null) { 662 if (gcurrentsqlstatement instanceof TRoutine) { // can be TCreateProcedureStmt or TCreateFunctionStmt 663 TRoutine p = (TRoutine) gcurrentsqlstatement; 664 p.setRoutineLanguage(nextSt.toString()); 665 } 666 } 667 } 668 669 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) { 670 gst = EFindSqlStateType.stnormal; 671 gcurrentsqlstatement.sourcetokenlist.add(ast); 672 gcurrentsqlstatement.semicolonended = ast; 673 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 674 continue; 675 } 676 677 if ((ast.tokencode == TBaseType.rrw_begin)) { 678 waitingEnd++; 679 enterDeclare = false; 680 } else if ((ast.tokencode == TBaseType.rrw_declare)) { 681 enterDeclare = true; 682 } else if ((ast.tokencode == TBaseType.rrw_if)) { 683 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 684 //this is not if after END 685 waitingEnd++; 686 } 687 } else if ((ast.tokencode == TBaseType.rrw_case)) { 688 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 689 //this is not case after END 690 waitingEnd++; 691 } 692 } else if ((ast.tokencode == TBaseType.rrw_loop)) { 693 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 694 //this is not loop after END 695 waitingEnd++; 696 } 697 } else if (ast.tokencode == TBaseType.rrw_end) { 698 foundEnd = true; 699 waitingEnd--; 700 if (waitingEnd < 0) { 701 waitingEnd = 0; 702 } 703 } 704 705 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 706 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 707 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 708 gst = EFindSqlStateType.stnormal; 709 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 710 711 //make / a sqlplus cmd 712 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 713 gcurrentsqlstatement.sourcetokenlist.add(ast); 714 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 715 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 716 // single dot at a separate line 717 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 718 gst = EFindSqlStateType.stnormal; 719 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 720 721 //make ttperiod a sqlplus cmd 722 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 723 gcurrentsqlstatement.sourcetokenlist.add(ast); 724 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 725 } else { 726 gcurrentsqlstatement.sourcetokenlist.add(ast); 727 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) 728 && (foundEnd)) { 729 gst = EFindSqlStateType.stnormal; 730 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 731 } 732 } 733 734 if (ast.tokencode == TBaseType.sqlpluscmd) { 735 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 736 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 737 int m = flexer.getkeywordvalue(ast.getAstext()); 738 if (m != 0) { 739 ast.tokencode = m; 740 } else { 741 ast.tokencode = TBaseType.ident; 742 } 743 } 744 745 if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) { 746 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 747 gst = EFindSqlStateType.stnormal; 748 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 749 } 750 } 751 752 break; 753 } //ststoredprocedure 754 755 case ststoredprocedurePgStartBody: { 756 // Check if this is the closing delimiter 757 if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) { 758 gcurrentsqlstatement.sourcetokenlist.add(ast); 759 gst = EFindSqlStateType.ststoredprocedurePgEndBody; 760 continue; 761 } 762 763 // Only add function body tokens if language is SQL or PLPGSQL 764 // For other languages (e.g., plpythonu, plperl), skip tokens so the 765 // parser sees two consecutive delimiters and matches the empty body rule 766 boolean shouldAddToken = true; // Default: add tokens (assume SQL/PLPGSQL) 767 768 // Look ahead to find the LANGUAGE keyword after the closing $$ 769 // to determine if we should skip these tokens 770 TSourceToken languageToken = null; 771 for (int j = i + 1; j < sourcetokenlist.size(); j++) { 772 TSourceToken lookahead = sourcetokenlist.get(j); 773 if (lookahead.tokencode == TBaseType.rrw_redshift_function_delimiter) { 774 // Found closing delimiter, now look for LANGUAGE keyword 775 for (int k = j + 1; k < sourcetokenlist.size(); k++) { 776 TSourceToken st = sourcetokenlist.get(k); 777 if (st.tokencode == TBaseType.rrw_redshift_language) { 778 // Found LANGUAGE, check next solid token for the language name 779 languageToken = st.nextSolidToken(); 780 break; 781 } 782 if (st.tokentype == ETokenType.ttsemicolon) { 783 break; // Reached end of statement 784 } 785 } 786 break; 787 } 788 } 789 790 if (languageToken != null) { 791 String language = languageToken.toString().toLowerCase().trim(); 792 // Remove quotes if present 793 if (language.startsWith("'") && language.endsWith("'")) { 794 language = language.substring(1, language.length() - 1); 795 } 796 // Skip tokens for non-SQL/non-PLPGSQL languages 797 if (!language.equals("sql") && !language.equals("plpgsql")) { 798 shouldAddToken = false; 799 } 800 } 801 802 if (shouldAddToken) { 803 gcurrentsqlstatement.sourcetokenlist.add(ast); 804 } 805 806 break; 807 } 808 809 case ststoredprocedurePgEndBody: { 810 if (ast.tokentype == ETokenType.ttsemicolon) { 811 gst = EFindSqlStateType.stnormal; 812 gcurrentsqlstatement.sourcetokenlist.add(ast); 813 gcurrentsqlstatement.semicolonended = ast; 814 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 815 continue; 816 } else if (ast.tokencode == TBaseType.cmtdoublehyphen) { 817 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 818 gst = EFindSqlStateType.stnormal; 819 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 820 continue; 821 } 822 } 823 824 gcurrentsqlstatement.sourcetokenlist.add(ast); 825 826 if (ast.tokencode == TBaseType.rrw_redshift_language) { 827 // check next token which is the language used by this stored procedure 828 TSourceToken nextSt = ast.nextSolidToken(); 829 if (nextSt != null) { 830 if (gcurrentsqlstatement instanceof TRoutine) { // can be TCreateProcedureStmt or TCreateFunctionStmt 831 TRoutine p = (TRoutine) gcurrentsqlstatement; 832 p.setRoutineLanguage(nextSt.toString()); 833 } 834 } 835 } 836 837 break; 838 } 839 } //switch 840 }//for 841 842 //last statement 843 if ((gcurrentsqlstatement != null) && 844 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) 845 || (gst == EFindSqlStateType.ststoredprocedure) 846 || (gst == EFindSqlStateType.ststoredprocedurePgEndBody) 847 || (gst == EFindSqlStateType.sterror) || (parserContext.isSinglePLBlock()))) { 848 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 849 } 850 851 builder.errorCode(syntaxErrors.size()); 852 } 853 854 /** 855 * Handle CREATE TABLE error recovery. 856 * <p> 857 * Migrated from TGSqlParser.handleCreateTableErrorRecovery(). 858 */ 859 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 860 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) 861 || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) 862 && (!TBaseType.c_createTableStrictParsing)) { 863 864 int nested = 0; 865 boolean isIgnore = false, isFoundIgnoreToken = false; 866 TSourceToken firstIgnoreToken = null; 867 868 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 869 TSourceToken st = stmt.sourcetokenlist.get(k); 870 if (isIgnore) { 871 if (st.issolidtoken() && (st.tokencode != ';')) { 872 isFoundIgnoreToken = true; 873 if (firstIgnoreToken == null) { 874 firstIgnoreToken = st; 875 } 876 } 877 if (st.tokencode != ';') { 878 st.tokencode = TBaseType.sqlpluscmd; 879 } 880 continue; 881 } 882 if (st.tokencode == (int) ')') { 883 nested--; 884 if (nested == 0) { 885 boolean isSelect = false; 886 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 887 if (st1 != null) { 888 TSourceToken st2 = st.searchToken((int) '(', 2); 889 if (st2 != null) { 890 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 891 isSelect = (st3 != null); 892 } 893 } 894 if (!isSelect) isIgnore = true; 895 } 896 } else if (st.tokencode == (int) '(') { 897 nested++; 898 } 899 } 900 901 if (isFoundIgnoreToken) { 902 stmt.clearError(); 903 stmt.parsestatement(null, false); 904 } 905 } 906 } 907 908 @Override 909 public String toString() { 910 return "RedshiftSqlParser{vendor=" + vendor + "}"; 911 } 912}