001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerRedshift; 009import gudusoft.gsqlparser.TParserRedshift; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 021import gudusoft.gsqlparser.stmt.TCommonBlock; 022import gudusoft.gsqlparser.stmt.TRoutine; 023import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 025import gudusoft.gsqlparser.compiler.TContext; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TGlobalScope; 028import gudusoft.gsqlparser.compiler.TFrame; 029import gudusoft.gsqlparser.resolver.TSQLResolver; 030import gudusoft.gsqlparser.TLog; 031import gudusoft.gsqlparser.compiler.TASTEvaluator; 032 033import java.io.BufferedReader; 034import java.util.ArrayList; 035import java.util.List; 036import java.util.Stack; 037 038/** 039 * Amazon Redshift SQL parser implementation. 040 * 041 * <p>This parser handles Redshift-specific SQL syntax including: 042 * <ul> 043 * <li>PostgreSQL-based syntax (Redshift is based on PostgreSQL 8.0.2)</li> 044 * <li>PL/pgSQL functions and procedures</li> 045 * <li>CREATE FUNCTION with LANGUAGE clause</li> 046 * <li>Function body delimiters ($$)</li> 047 * <li>Redshift-specific types (ARRAY<type>, %ROWTYPE, etc.)</li> 048 * <li>Redshift-specific keywords (FILTER, LANGUAGE, etc.)</li> 049 * </ul> 050 * 051 * <p><b>Design Notes:</b> 052 * <ul> 053 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 054 * <li>Uses {@link TLexerRedshift} for tokenization</li> 055 * <li>Uses {@link TParserRedshift} for parsing</li> 056 * <li>Delimiter character: ';' for SQL statements</li> 057 * </ul> 058 * 059 * <p><b>Usage Example:</b> 060 * <pre> 061 * // Get Redshift parser from factory 062 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvredshift); 063 * 064 * // Build context 065 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvredshift) 066 * .sqlText("SELECT * FROM orders WHERE order_date > CURRENT_DATE - 7") 067 * .build(); 068 * 069 * // Parse 070 * SqlParseResult result = parser.parse(context); 071 * 072 * // Access statements 073 * TStatementList statements = result.getSqlStatements(); 074 * </pre> 075 * 076 * @see SqlParser 077 * @see AbstractSqlParser 078 * @see TLexerRedshift 079 * @see TParserRedshift 080 * @since 3.2.0.0 081 */ 082public class RedshiftSqlParser extends AbstractSqlParser { 083 084 /** 085 * Construct Redshift SQL parser. 086 * <p> 087 * Configures the parser for Redshift database with default delimiter (;). 088 * <p> 089 * Following the original TGSqlParser pattern, the lexer and parser are 090 * created once in the constructor and reused for all parsing operations. 091 */ 092 public RedshiftSqlParser() { 093 super(EDbVendor.dbvredshift); 094 this.delimiterChar = ';'; 095 this.defaultDelimiterStr = ";"; 096 097 // Create lexer once - will be reused for all parsing operations 098 this.flexer = new TLexerRedshift(); 099 this.flexer.delimiterchar = this.delimiterChar; 100 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 101 102 // CRITICAL: Set parent's lexer reference for shared tokenization logic 103 this.lexer = this.flexer; 104 105 // Create parser once - will be reused for all parsing operations 106 this.fparser = new TParserRedshift(null); 107 this.fparser.lexer = this.flexer; 108 } 109 110 // ========== Parser Components ========== 111 112 /** The Redshift lexer used for tokenization */ 113 public TLexerRedshift flexer; 114 115 /** SQL parser (for Redshift statements) */ 116 private TParserRedshift fparser; 117 118 /** Current statement being built during extraction */ 119 private TCustomSqlStatement gcurrentsqlstatement; 120 121 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 122 // - protected TContext globalContext 123 // - protected TSQLEnv sqlEnv 124 // - protected Stack<TFrame> frameStack 125 // - protected TFrame globalFrame 126 127 // ========== AbstractSqlParser Abstract Methods Implementation ========== 128 129 /** 130 * Return the Redshift lexer instance. 131 */ 132 @Override 133 protected TCustomLexer getLexer(ParserContext context) { 134 return this.flexer; 135 } 136 137 /** 138 * Return the Redshift SQL parser instance with updated token list. 139 */ 140 @Override 141 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 142 this.fparser.sourcetokenlist = tokens; 143 return this.fparser; 144 } 145 146 /** 147 * Redshift doesn't have a secondary parser. 148 * <p> 149 * Only Oracle uses a secondary parser (PL/SQL parser). 150 */ 151 @Override 152 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 153 return null; 154 } 155 156 /** 157 * Call Redshift-specific tokenization logic. 158 * <p> 159 * Delegates to doredshiftsqltexttotokenlist which handles Redshift's 160 * specific keyword recognition, PostgreSQL commands, and token generation. 161 */ 162 @Override 163 protected void tokenizeVendorSql() { 164 doredshiftsqltexttotokenlist(); 165 } 166 167 /** 168 * Setup Redshift parser for raw statement extraction. 169 * <p> 170 * Redshift uses a single parser, so we inject sqlcmds and update 171 * the token list for the main parser only. 172 */ 173 @Override 174 protected void setupVendorParsersForExtraction() { 175 // Inject sqlcmds into parser (required for make_stmt) 176 this.fparser.sqlcmds = this.sqlcmds; 177 178 // Update token list for parser 179 this.fparser.sourcetokenlist = this.sourcetokenlist; 180 } 181 182 /** 183 * Call Redshift-specific raw statement extraction logic. 184 * <p> 185 * Delegates to doredshiftgetrawsqlstatements which handles Redshift's 186 * statement delimiters (semicolon, function delimiters $$, etc.). 187 */ 188 @Override 189 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 190 doredshiftgetrawsqlstatements(builder); 191 192 // Set the extracted statements in the builder 193 builder.sqlStatements(this.sqlstatements); 194 } 195 196 /** 197 * Perform full parsing of statements with syntax checking. 198 * <p> 199 * This method orchestrates the parsing of all statements. 200 */ 201 @Override 202 protected TStatementList performParsing(ParserContext context, 203 TCustomParser parser, 204 TCustomParser secondaryParser, 205 TSourceTokenList tokens, 206 TStatementList rawStatements) { 207 // Store references 208 this.fparser = (TParserRedshift) parser; 209 this.sourcetokenlist = tokens; 210 this.parserContext = context; 211 212 // Use the raw statements passed from AbstractSqlParser.parse() 213 this.sqlstatements = rawStatements; 214 215 // Initialize sqlcmds (required for parsing) 216 this.sqlcmds = SqlCmdsFactory.get(vendor); 217 218 // CRITICAL: Inject sqlcmds into parser (required for make_stmt) 219 this.fparser.sqlcmds = this.sqlcmds; 220 221 // Initialize global context for semantic analysis 222 initializeGlobalContext(); 223 224 // Parse each statement 225 for (int i = 0; i < sqlstatements.size(); i++) { 226 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 227 228 try { 229 // Set frame stack for this statement 230 stmt.setFrameStack(frameStack); 231 232 // Parse the statement 233 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 234 235 // Vendor-specific post-processing (override hook if needed) 236 afterStatementParsed(stmt); 237 238 // Error recovery 239 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 240 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 241 handleCreateTableErrorRecovery(stmt); 242 } 243 244 // Collect errors 245 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 246 copyErrorsFromStatement(stmt); 247 } 248 249 } catch (Exception ex) { 250 // Use inherited exception handler 251 handleStatementParsingException(stmt, i, ex); 252 continue; 253 } 254 } 255 256 // Clean up frame stack 257 if (globalFrame != null) { 258 globalFrame.popMeFromStack(frameStack); 259 } 260 261 return sqlstatements; 262 } 263 264 /** 265 * Perform semantic analysis on parsed statements. 266 * <p> 267 * This step resolves column-to-table relationships and performs type checking. 268 */ 269 @Override 270 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 271 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 272 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 273 resolver.resolve(); 274 } 275 } 276 277 /** 278 * Perform interpretation on parsed statements. 279 * <p> 280 * This step evaluates constant expressions and performs other interpretation tasks. 281 */ 282 @Override 283 protected void performInterpreter(ParserContext context, TStatementList statements) { 284 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 285 TLog.clearLogs(); 286 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 287 TLog.enableInterpreterLogOnly(); 288 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 289 astEvaluator.eval(); 290 } 291 } 292 293 // ========== Redshift-Specific Tokenization ========== 294 295 /** 296 * Tokenize Redshift SQL text to token list. 297 * <p> 298 * Migrated from TGSqlParser.doredshiftsqltexttotokenlist(). 299 * <p> 300 * This method handles Redshift-specific token processing: 301 * <ul> 302 * <li>SQL*Plus-like commands detection</li> 303 * <li>Forward slash (/) disambiguation</li> 304 * <li>%ROWTYPE operator detection</li> 305 * <li>Continuation lines (hyphen at end of line)</li> 306 * </ul> 307 */ 308 private void doredshiftsqltexttotokenlist() { 309 boolean insqlpluscmd = false; 310 boolean isvalidplace = true; 311 boolean waitingreturnforfloatdiv = false; 312 boolean waitingreturnforsemicolon = false; 313 boolean continuesqlplusatnewline = false; 314 315 TSourceToken lct = null, prevst = null; 316 317 TSourceToken asourcetoken, lcprevst; 318 int yychar; 319 320 asourcetoken = getanewsourcetoken(); 321 if (asourcetoken == null) return; 322 yychar = asourcetoken.tokencode; 323 324 while (yychar > 0) { 325 sourcetokenlist.add(asourcetoken); 326 switch (yychar) { 327 case TBaseType.cmtdoublehyphen: 328 case TBaseType.cmtslashstar: 329 case TBaseType.lexspace: { 330 if (insqlpluscmd) { 331 asourcetoken.insqlpluscmd = true; 332 } 333 break; 334 } 335 case TBaseType.lexnewline: { 336 if (insqlpluscmd) { 337 insqlpluscmd = false; 338 isvalidplace = true; 339 340 if (continuesqlplusatnewline) { 341 insqlpluscmd = true; 342 isvalidplace = false; 343 asourcetoken.insqlpluscmd = true; 344 } 345 } 346 347 if (waitingreturnforsemicolon) { 348 isvalidplace = true; 349 } 350 if (waitingreturnforfloatdiv) { 351 isvalidplace = true; 352 lct.tokencode = TBaseType.sqlpluscmd; 353 if (lct.tokentype != ETokenType.ttslash) { 354 lct.tokentype = ETokenType.ttsqlpluscmd; 355 } 356 } 357 flexer.insqlpluscmd = insqlpluscmd; 358 break; 359 } //case newline 360 default: { 361 //solid token 362 continuesqlplusatnewline = false; 363 waitingreturnforsemicolon = false; 364 waitingreturnforfloatdiv = false; 365 if (insqlpluscmd) { 366 asourcetoken.insqlpluscmd = true; 367 if (asourcetoken.toString().equalsIgnoreCase("-")) { 368 continuesqlplusatnewline = true; 369 } 370 } else { 371 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 372 waitingreturnforsemicolon = true; 373 } 374 if ((asourcetoken.tokentype == ETokenType.ttslash) 375 && (isvalidplace || (IsValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 376 lct = asourcetoken; 377 waitingreturnforfloatdiv = true; 378 } 379 if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) { 380 asourcetoken.tokencode = TBaseType.sqlpluscmd; 381 if (asourcetoken.tokentype != ETokenType.ttslash) { 382 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 383 } 384 insqlpluscmd = true; 385 flexer.insqlpluscmd = insqlpluscmd; 386 } 387 } 388 isvalidplace = false; 389 390 // Redshift-specific: Handle %ROWTYPE operator 391 if (asourcetoken.tokencode == TBaseType.rrw_redshift_rowtype) { 392 TSourceToken stPercent = asourcetoken.searchToken('%', -1); 393 if (stPercent != null) { 394 stPercent.tokencode = TBaseType.rowtype_operator; 395 } 396 } 397 } 398 } 399 400 //flexer.yylexwrap(asourcetoken); 401 asourcetoken = getanewsourcetoken(); 402 if (asourcetoken != null) { 403 yychar = asourcetoken.tokencode; 404 } else { 405 yychar = 0; 406 407 if (waitingreturnforfloatdiv) { 408 // / at the end of line treat as sqlplus command 409 lct.tokencode = TBaseType.sqlpluscmd; 410 if (lct.tokentype != ETokenType.ttslash) { 411 lct.tokentype = ETokenType.ttsqlpluscmd; 412 } 413 } 414 } 415 416 if ((yychar == 0) && (prevst != null)) { 417 // End of input 418 } 419 } // while 420 } 421 422 /** 423 * Check if this is a valid place for a forward slash to be treated as a SQL*Plus command. 424 * <p> 425 * Migrated from TGSqlParser.IsValidPlaceForDivToSqlplusCmd(). 426 */ 427 private boolean IsValidPlaceForDivToSqlplusCmd(TSourceTokenList tokenlist, int pos) { 428 if (tokenlist == null) return false; 429 if (pos <= 0) return true; 430 431 for (int i = pos - 1; i >= 0; i--) { 432 TSourceToken st = tokenlist.get(i); 433 if (st.tokencode == TBaseType.lexnewline) { 434 return true; 435 } 436 if ((st.tokencode != TBaseType.lexspace) 437 && (st.tokencode != TBaseType.cmtdoublehyphen) 438 && (st.tokencode != TBaseType.cmtslashstar)) { 439 return false; 440 } 441 } 442 return true; 443 } 444 445 /** 446 * Check if this token is a valid PostgreSQL-like command. 447 * <p> 448 * Migrated from TGSqlParser.isvalidsqlpluscmdInPostgresql(). 449 */ 450 private boolean isvalidsqlpluscmdInPostgresql(String str) { 451 if (str == null) return false; 452 if (str.length() == 0) return false; 453 454 String s = str.trim().toLowerCase(); 455 return s.startsWith("\\"); 456 } 457 458 // ========== Redshift-Specific Raw Statement Extraction ========== 459 460 /** 461 * Extract raw SQL statements from token list. 462 * <p> 463 * Migrated from TGSqlParser.doredshiftgetrawsqlstatements(). 464 * <p> 465 * This method handles Redshift-specific statement boundaries: 466 * <ul> 467 * <li>Semicolon (;) for regular SQL statements</li> 468 * <li>Function delimiter ($$) for function bodies</li> 469 * <li>BEGIN/END blocks for PL/pgSQL</li> 470 * <li>DECLARE blocks</li> 471 * </ul> 472 */ 473 private void doredshiftgetrawsqlstatements(SqlParseResult.Builder builder) { 474 int waitingEnd = 0; 475 boolean foundEnd = false, enterDeclare = false; 476 477 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 478 if (!TBaseType.assigned(sourcetokenlist)) { 479 builder.errorCode(-1); 480 return; 481 } 482 483 gcurrentsqlstatement = null; 484 EFindSqlStateType gst = EFindSqlStateType.stnormal; 485 TSourceToken lcprevsolidtoken = null, ast = null; 486 487 if (parserContext.isSinglePLBlock()) { 488 gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql); 489 } 490 491 for (int i = 0; i < sourcetokenlist.size(); i++) { 492 if ((ast != null) && (ast.issolidtoken())) 493 lcprevsolidtoken = ast; 494 495 ast = sourcetokenlist.get(i); 496 sourcetokenlist.curpos = i; 497 498 // Redshift-specific token adjustments 499 if (ast.tokencode == TBaseType.rrw_redshift_filter) { 500 TSourceToken st1 = ast.nextSolidToken(); 501 if (st1 != null) { 502 if (st1.tokencode != '(') { 503 ast.tokencode = TBaseType.ident; 504 } 505 } 506 } else if (ast.tokencode == TBaseType.rrw_redshift_array) { 507 TSourceToken st1 = ast.searchToken('<', 1); 508 if (st1 != null) { // array<varchar(20)> 509 ast.tokencode = TBaseType.rrw_redshift_array_type; 510 } 511 } else if (ast.tokencode == TBaseType.rrw_binary) { 512 // Distinguish BINARY as data type from BINARY as identifier 513 // BINARY is a type when: 514 // - Preceded by: AS (CAST), comma, left paren, column name 515 // - Followed by: VARYING, left paren, comma, right paren, NOT, NULL 516 TSourceToken prevToken = ast.prevSolidToken(); 517 TSourceToken nextToken = ast.nextSolidToken(); 518 519 // Check if preceded by a period -> identifier (e.g., table.binary) 520 if (prevToken != null && prevToken.tokencode == '.') { 521 // Keep as identifier, no change 522 } 523 // Check type contexts by previous token 524 else if (prevToken != null && 525 (prevToken.tokencode == TBaseType.rrw_as || // CAST(x AS BINARY) 526 prevToken.tokencode == ',' || // func(INT, BINARY) 527 prevToken.tokencode == '(' || // (col BINARY), CAST(BINARY ... 528 prevToken.tokentype == ETokenType.ttidentifier)) { // column_name BINARY 529 ast.tokencode = TBaseType.rrw_redshift_binary_as_type; 530 } 531 // Check type contexts by next token 532 else if (nextToken != null && 533 (nextToken.tokencode == TBaseType.rrw_varying || // BINARY VARYING 534 nextToken.tokencode == '(' || // BINARY(10) 535 nextToken.tokencode == ',' || // col BINARY, col2 536 nextToken.tokencode == ')' || // col BINARY) 537 nextToken.tokencode == TBaseType.rrw_not || // BINARY NOT NULL 538 nextToken.tokencode == TBaseType.rrw_null)) { // BINARY NULL 539 ast.tokencode = TBaseType.rrw_redshift_binary_as_type; 540 } 541 } else if (ast.tokencode == TBaseType.rrw_values) { 542 TSourceToken stParen = ast.searchToken('(', 1); 543 if (stParen != null) { 544 TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist); 545 if (stInsert != null) { 546 TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist); 547 if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) { 548 // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16); 549 // VALUES (1); 550 // don't treat values(1) as insert values 551 } else { 552 TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist); 553 if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) { 554 // don't treat values after from keyword as an insert values 555 // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT), 556 // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!'); 557 } else { 558 ast.tokencode = TBaseType.rrw_postgresql_insert_values; 559 } 560 } 561 } 562 } 563 } 564 565 switch (gst) { 566 case sterror: { 567 if (ast.tokentype == ETokenType.ttsemicolon) { 568 gcurrentsqlstatement.sourcetokenlist.add(ast); 569 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 570 gst = EFindSqlStateType.stnormal; 571 } else { 572 gcurrentsqlstatement.sourcetokenlist.add(ast); 573 } 574 break; 575 } //sterror 576 577 case stnormal: { 578 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 579 || (ast.tokencode == TBaseType.cmtslashstar) 580 || (ast.tokencode == TBaseType.lexspace) 581 || (ast.tokencode == TBaseType.lexnewline) 582 || (ast.tokentype == ETokenType.ttsemicolon)) { 583 if (gcurrentsqlstatement != null) { 584 gcurrentsqlstatement.sourcetokenlist.add(ast); 585 } 586 587 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 588 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 589 // ;;;; continuous semicolon, treat it as comment 590 ast.tokentype = ETokenType.ttsimplecomment; 591 ast.tokencode = TBaseType.cmtdoublehyphen; 592 } 593 } 594 595 continue; 596 } 597 598 if (ast.tokencode == TBaseType.sqlpluscmd) { 599 gst = EFindSqlStateType.stsqlplus; 600 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 601 gcurrentsqlstatement.sourcetokenlist.add(ast); 602 continue; 603 } 604 605 // find a token to start sql or plsql mode 606 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 607 608 if (gcurrentsqlstatement != null) { 609 enterDeclare = false; 610 if (gcurrentsqlstatement.ispgplsql()) { 611 gst = EFindSqlStateType.ststoredprocedure; 612 gcurrentsqlstatement.sourcetokenlist.add(ast); 613 foundEnd = false; 614 if ((ast.tokencode == TBaseType.rrw_begin) 615 || (ast.tokencode == TBaseType.rrw_package) 616 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 617 waitingEnd = 1; 618 } else if (ast.tokencode == TBaseType.rrw_declare) { 619 enterDeclare = true; 620 } 621 } else { 622 gst = EFindSqlStateType.stsql; 623 gcurrentsqlstatement.sourcetokenlist.add(ast); 624 } 625 } else { 626 //error token found 627 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo), 628 "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 629 630 ast.tokentype = ETokenType.tttokenlizererrortoken; 631 gst = EFindSqlStateType.sterror; 632 633 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 634 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 635 gcurrentsqlstatement.sourcetokenlist.add(ast); 636 } 637 638 break; 639 } // stnormal 640 641 case stsqlplus: { 642 if (ast.insqlpluscmd) { 643 gcurrentsqlstatement.sourcetokenlist.add(ast); 644 } else { 645 gst = EFindSqlStateType.stnormal; //this token must be newline, 646 gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here 647 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 648 } 649 650 break; 651 }//case stsqlplus 652 653 case stsql: { 654 if (ast.tokentype == ETokenType.ttsemicolon) { 655 gst = EFindSqlStateType.stnormal; 656 gcurrentsqlstatement.sourcetokenlist.add(ast); 657 gcurrentsqlstatement.semicolonended = ast; 658 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 659 continue; 660 } 661 662 if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd 663 gst = EFindSqlStateType.stnormal; 664 gcurrentsqlstatement.sourcetokenlist.add(ast); 665 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 666 continue; 667 } 668 669 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 670 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 671 gst = EFindSqlStateType.stnormal; 672 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 673 continue; 674 } 675 } 676 677 gcurrentsqlstatement.sourcetokenlist.add(ast); 678 break; 679 }//case stsql 680 681 case ststoredprocedure: { 682 if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) { 683 gcurrentsqlstatement.sourcetokenlist.add(ast); 684 gst = EFindSqlStateType.ststoredprocedurePgStartBody; 685 continue; 686 } 687 688 if (ast.tokencode == TBaseType.rrw_redshift_language) { 689 // check next token which is the language used by this stored procedure 690 TSourceToken nextSt = ast.nextSolidToken(); 691 if (nextSt != null) { 692 if (gcurrentsqlstatement instanceof TRoutine) { // can be TCreateProcedureStmt or TCreateFunctionStmt 693 TRoutine p = (TRoutine) gcurrentsqlstatement; 694 p.setRoutineLanguage(nextSt.toString()); 695 } 696 } 697 } 698 699 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) { 700 gst = EFindSqlStateType.stnormal; 701 gcurrentsqlstatement.sourcetokenlist.add(ast); 702 gcurrentsqlstatement.semicolonended = ast; 703 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 704 continue; 705 } 706 707 if ((ast.tokencode == TBaseType.rrw_begin)) { 708 waitingEnd++; 709 enterDeclare = false; 710 } else if ((ast.tokencode == TBaseType.rrw_declare)) { 711 enterDeclare = true; 712 } else if ((ast.tokencode == TBaseType.rrw_if)) { 713 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 714 //this is not if after END 715 waitingEnd++; 716 } 717 } else if ((ast.tokencode == TBaseType.rrw_case)) { 718 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 719 //this is not case after END 720 waitingEnd++; 721 } 722 } else if ((ast.tokencode == TBaseType.rrw_loop)) { 723 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 724 //this is not loop after END 725 waitingEnd++; 726 } 727 } else if (ast.tokencode == TBaseType.rrw_end) { 728 foundEnd = true; 729 waitingEnd--; 730 if (waitingEnd < 0) { 731 waitingEnd = 0; 732 } 733 } 734 735 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 736 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 737 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 738 gst = EFindSqlStateType.stnormal; 739 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 740 741 //make / a sqlplus cmd 742 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 743 gcurrentsqlstatement.sourcetokenlist.add(ast); 744 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 745 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 746 // single dot at a separate line 747 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 748 gst = EFindSqlStateType.stnormal; 749 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 750 751 //make ttperiod a sqlplus cmd 752 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 753 gcurrentsqlstatement.sourcetokenlist.add(ast); 754 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 755 } else { 756 gcurrentsqlstatement.sourcetokenlist.add(ast); 757 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) 758 && (foundEnd)) { 759 gst = EFindSqlStateType.stnormal; 760 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 761 } 762 } 763 764 if (ast.tokencode == TBaseType.sqlpluscmd) { 765 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 766 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 767 int m = flexer.getkeywordvalue(ast.getAstext()); 768 if (m != 0) { 769 ast.tokencode = m; 770 } else { 771 ast.tokencode = TBaseType.ident; 772 } 773 } 774 775 if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) { 776 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 777 gst = EFindSqlStateType.stnormal; 778 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 779 } 780 } 781 782 break; 783 } //ststoredprocedure 784 785 case ststoredprocedurePgStartBody: { 786 // Check if this is the closing delimiter 787 if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) { 788 gcurrentsqlstatement.sourcetokenlist.add(ast); 789 gst = EFindSqlStateType.ststoredprocedurePgEndBody; 790 continue; 791 } 792 793 // Only add function body tokens if language is SQL or PLPGSQL 794 // For other languages (e.g., plpythonu, plperl), skip tokens so the 795 // parser sees two consecutive delimiters and matches the empty body rule 796 boolean shouldAddToken = true; // Default: add tokens (assume SQL/PLPGSQL) 797 798 // Look ahead to find the LANGUAGE keyword after the closing $$ 799 // to determine if we should skip these tokens 800 TSourceToken languageToken = null; 801 for (int j = i + 1; j < sourcetokenlist.size(); j++) { 802 TSourceToken lookahead = sourcetokenlist.get(j); 803 if (lookahead.tokencode == TBaseType.rrw_redshift_function_delimiter) { 804 // Found closing delimiter, now look for LANGUAGE keyword 805 for (int k = j + 1; k < sourcetokenlist.size(); k++) { 806 TSourceToken st = sourcetokenlist.get(k); 807 if (st.tokencode == TBaseType.rrw_redshift_language) { 808 // Found LANGUAGE, check next solid token for the language name 809 languageToken = st.nextSolidToken(); 810 break; 811 } 812 if (st.tokentype == ETokenType.ttsemicolon) { 813 break; // Reached end of statement 814 } 815 } 816 break; 817 } 818 } 819 820 if (languageToken != null) { 821 String language = languageToken.toString().toLowerCase().trim(); 822 // Remove quotes if present 823 if (language.startsWith("'") && language.endsWith("'")) { 824 language = language.substring(1, language.length() - 1); 825 } 826 // Skip tokens for non-SQL/non-PLPGSQL languages 827 if (!language.equals("sql") && !language.equals("plpgsql")) { 828 shouldAddToken = false; 829 } 830 } 831 832 if (shouldAddToken) { 833 gcurrentsqlstatement.sourcetokenlist.add(ast); 834 } 835 836 break; 837 } 838 839 case ststoredprocedurePgEndBody: { 840 if (ast.tokentype == ETokenType.ttsemicolon) { 841 gst = EFindSqlStateType.stnormal; 842 gcurrentsqlstatement.sourcetokenlist.add(ast); 843 gcurrentsqlstatement.semicolonended = ast; 844 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 845 continue; 846 } else if (ast.tokencode == TBaseType.cmtdoublehyphen) { 847 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter 848 gst = EFindSqlStateType.stnormal; 849 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 850 continue; 851 } 852 } 853 854 gcurrentsqlstatement.sourcetokenlist.add(ast); 855 856 if (ast.tokencode == TBaseType.rrw_redshift_language) { 857 // check next token which is the language used by this stored procedure 858 TSourceToken nextSt = ast.nextSolidToken(); 859 if (nextSt != null) { 860 if (gcurrentsqlstatement instanceof TRoutine) { // can be TCreateProcedureStmt or TCreateFunctionStmt 861 TRoutine p = (TRoutine) gcurrentsqlstatement; 862 p.setRoutineLanguage(nextSt.toString()); 863 } 864 } 865 } 866 867 break; 868 } 869 } //switch 870 }//for 871 872 //last statement 873 if ((gcurrentsqlstatement != null) && 874 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) 875 || (gst == EFindSqlStateType.ststoredprocedure) 876 || (gst == EFindSqlStateType.ststoredprocedurePgEndBody) 877 || (gst == EFindSqlStateType.sterror) || (parserContext.isSinglePLBlock()))) { 878 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 879 } 880 881 builder.errorCode(syntaxErrors.size()); 882 } 883 884 /** 885 * Handle CREATE TABLE error recovery. 886 * <p> 887 * Migrated from TGSqlParser.handleCreateTableErrorRecovery(). 888 */ 889 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 890 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) 891 || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) 892 && (!TBaseType.c_createTableStrictParsing)) { 893 894 int nested = 0; 895 boolean isIgnore = false, isFoundIgnoreToken = false; 896 TSourceToken firstIgnoreToken = null; 897 898 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 899 TSourceToken st = stmt.sourcetokenlist.get(k); 900 if (isIgnore) { 901 if (st.issolidtoken() && (st.tokencode != ';')) { 902 isFoundIgnoreToken = true; 903 if (firstIgnoreToken == null) { 904 firstIgnoreToken = st; 905 } 906 } 907 if (st.tokencode != ';') { 908 st.tokencode = TBaseType.sqlpluscmd; 909 } 910 continue; 911 } 912 if (st.tokencode == (int) ')') { 913 nested--; 914 if (nested == 0) { 915 boolean isSelect = false; 916 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 917 if (st1 != null) { 918 TSourceToken st2 = st.searchToken((int) '(', 2); 919 if (st2 != null) { 920 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 921 isSelect = (st3 != null); 922 } 923 } 924 if (!isSelect) isIgnore = true; 925 } 926 } else if (st.tokencode == (int) '(') { 927 nested++; 928 } 929 } 930 931 if (isFoundIgnoreToken) { 932 stmt.clearError(); 933 stmt.parsestatement(null, false); 934 } 935 } 936 } 937 938 @Override 939 public String toString() { 940 return "RedshiftSqlParser{vendor=" + vendor + "}"; 941 } 942}