Source code

001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerRedshift;
009import gudusoft.gsqlparser.TParserRedshift;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
021import gudusoft.gsqlparser.stmt.TCommonBlock;
022import gudusoft.gsqlparser.stmt.TRoutine;
023import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
025import gudusoft.gsqlparser.compiler.TContext;
026import gudusoft.gsqlparser.sqlenv.TSQLEnv;
027import gudusoft.gsqlparser.compiler.TGlobalScope;
028import gudusoft.gsqlparser.compiler.TFrame;
029import gudusoft.gsqlparser.resolver.TSQLResolver;
030import gudusoft.gsqlparser.TLog;
031import gudusoft.gsqlparser.compiler.TASTEvaluator;
032
033import java.io.BufferedReader;
034import java.util.ArrayList;
035import java.util.List;
036import java.util.Stack;
037
038/**
039 * Amazon Redshift SQL parser implementation.
040 *
041 * <p>This parser handles Redshift-specific SQL syntax including:
042 * <ul>
043 *   <li>PostgreSQL-based syntax (Redshift is based on PostgreSQL 8.0.2)</li>
044 *   <li>PL/pgSQL functions and procedures</li>
045 *   <li>CREATE FUNCTION with LANGUAGE clause</li>
046 *   <li>Function body delimiters ($$)</li>
047 *   <li>Redshift-specific types (ARRAY&lt;type&gt;, %ROWTYPE, etc.)</li>
048 *   <li>Redshift-specific keywords (FILTER, LANGUAGE, etc.)</li>
049 * </ul>
050 *
051 * <p><b>Design Notes:</b>
052 * <ul>
053 *   <li>Extends {@link AbstractSqlParser} using the template method pattern</li>
054 *   <li>Uses {@link TLexerRedshift} for tokenization</li>
055 *   <li>Uses {@link TParserRedshift} for parsing</li>
056 *   <li>Delimiter character: ';' for SQL statements</li>
057 * </ul>
058 *
059 * <p><b>Usage Example:</b>
060 * <pre>
061 * // Get Redshift parser from factory
062 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvredshift);
063 *
064 * // Build context
065 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvredshift)
066 *     .sqlText("SELECT * FROM orders WHERE order_date > CURRENT_DATE - 7")
067 *     .build();
068 *
069 * // Parse
070 * SqlParseResult result = parser.parse(context);
071 *
072 * // Access statements
073 * TStatementList statements = result.getSqlStatements();
074 * </pre>
075 *
076 * @see SqlParser
077 * @see AbstractSqlParser
078 * @see TLexerRedshift
079 * @see TParserRedshift
080 * @since 3.2.0.0
081 */
082public class RedshiftSqlParser extends AbstractSqlParser {
083
084    /**
085     * Construct Redshift SQL parser.
086     * <p>
087     * Configures the parser for Redshift database with default delimiter (;).
088     * <p>
089     * Following the original TGSqlParser pattern, the lexer and parser are
090     * created once in the constructor and reused for all parsing operations.
091     */
092    public RedshiftSqlParser() {
093        super(EDbVendor.dbvredshift);
094        this.delimiterChar = ';';
095        this.defaultDelimiterStr = ";";
096
097        // Create lexer once - will be reused for all parsing operations
098        this.flexer = new TLexerRedshift();
099        this.flexer.delimiterchar = this.delimiterChar;
100        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
101
102        // CRITICAL: Set parent's lexer reference for shared tokenization logic
103        this.lexer = this.flexer;
104
105        // Create parser once - will be reused for all parsing operations
106        this.fparser = new TParserRedshift(null);
107        this.fparser.lexer = this.flexer;
108    }
109
110    // ========== Parser Components ==========
111
112    /** The Redshift lexer used for tokenization */
113    public TLexerRedshift flexer;
114
115    /** SQL parser (for Redshift statements) */
116    private TParserRedshift fparser;
117
118    /** Current statement being built during extraction */
119    private TCustomSqlStatement gcurrentsqlstatement;
120
121    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
122    // - protected TContext globalContext
123    // - protected TSQLEnv sqlEnv
124    // - protected Stack<TFrame> frameStack
125    // - protected TFrame globalFrame
126
127    // ========== AbstractSqlParser Abstract Methods Implementation ==========
128
129    /**
130     * Return the Redshift lexer instance.
131     */
132    @Override
133    protected TCustomLexer getLexer(ParserContext context) {
134        return this.flexer;
135    }
136
137    /**
138     * Return the Redshift SQL parser instance with updated token list.
139     */
140    @Override
141    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
142        this.fparser.sourcetokenlist = tokens;
143        return this.fparser;
144    }
145
146    /**
147     * Redshift doesn't have a secondary parser.
148     * <p>
149     * Only Oracle uses a secondary parser (PL/SQL parser).
150     */
151    @Override
152    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
153        return null;
154    }
155
156    /**
157     * Call Redshift-specific tokenization logic.
158     * <p>
159     * Delegates to doredshiftsqltexttotokenlist which handles Redshift's
160     * specific keyword recognition, PostgreSQL commands, and token generation.
161     */
162    @Override
163    protected void tokenizeVendorSql() {
164        doredshiftsqltexttotokenlist();
165    }
166
167    /**
168     * Setup Redshift parser for raw statement extraction.
169     * <p>
170     * Redshift uses a single parser, so we inject sqlcmds and update
171     * the token list for the main parser only.
172     */
173    @Override
174    protected void setupVendorParsersForExtraction() {
175        // Inject sqlcmds into parser (required for make_stmt)
176        this.fparser.sqlcmds = this.sqlcmds;
177
178        // Update token list for parser
179        this.fparser.sourcetokenlist = this.sourcetokenlist;
180    }
181
182    /**
183     * Call Redshift-specific raw statement extraction logic.
184     * <p>
185     * Delegates to doredshiftgetrawsqlstatements which handles Redshift's
186     * statement delimiters (semicolon, function delimiters $$, etc.).
187     */
188    @Override
189    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
190        doredshiftgetrawsqlstatements(builder);
191
192        // Set the extracted statements in the builder
193        builder.sqlStatements(this.sqlstatements);
194    }
195
196    /**
197     * Perform full parsing of statements with syntax checking.
198     * <p>
199     * This method orchestrates the parsing of all statements.
200     */
201    @Override
202    protected TStatementList performParsing(ParserContext context,
203                                           TCustomParser parser,
204                                           TCustomParser secondaryParser,
205                                           TSourceTokenList tokens,
206                                           TStatementList rawStatements) {
207        // Store references
208        this.fparser = (TParserRedshift) parser;
209        this.sourcetokenlist = tokens;
210        this.parserContext = context;
211
212        // Use the raw statements passed from AbstractSqlParser.parse()
213        this.sqlstatements = rawStatements;
214
215        // Initialize sqlcmds (required for parsing)
216        this.sqlcmds = SqlCmdsFactory.get(vendor);
217
218        // CRITICAL: Inject sqlcmds into parser (required for make_stmt)
219        this.fparser.sqlcmds = this.sqlcmds;
220
221        // Initialize global context for semantic analysis
222        initializeGlobalContext();
223
224        // Parse each statement
225        for (int i = 0; i < sqlstatements.size(); i++) {
226            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
227
228            try {
229                // Set frame stack for this statement
230                stmt.setFrameStack(frameStack);
231
232                // Parse the statement
233                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
234
235                // Vendor-specific post-processing (override hook if needed)
236                afterStatementParsed(stmt);
237
238                // Error recovery
239                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
240                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
241                    handleCreateTableErrorRecovery(stmt);
242                }
243
244                // Collect errors
245                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
246                    copyErrorsFromStatement(stmt);
247                }
248
249            } catch (Exception ex) {
250                // Use inherited exception handler
251                handleStatementParsingException(stmt, i, ex);
252                continue;
253            }
254        }
255
256        // Clean up frame stack
257        if (globalFrame != null) {
258            globalFrame.popMeFromStack(frameStack);
259        }
260
261        return sqlstatements;
262    }
263
264    /**
265     * Perform semantic analysis on parsed statements.
266     * <p>
267     * This step resolves column-to-table relationships and performs type checking.
268     */
269    @Override
270    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
271        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
272            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
273            resolver.resolve();
274        }
275    }
276
277    /**
278     * Perform interpretation on parsed statements.
279     * <p>
280     * This step evaluates constant expressions and performs other interpretation tasks.
281     */
282    @Override
283    protected void performInterpreter(ParserContext context, TStatementList statements) {
284        if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) {
285            TLog.clearLogs();
286            TGlobalScope interpreterScope = new TGlobalScope(sqlEnv);
287            TLog.enableInterpreterLogOnly();
288            TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope);
289            astEvaluator.eval();
290        }
291    }
292
293    // ========== Redshift-Specific Tokenization ==========
294
295    /**
296     * Tokenize Redshift SQL text to token list.
297     * <p>
298     * Migrated from TGSqlParser.doredshiftsqltexttotokenlist().
299     * <p>
300     * This method handles Redshift-specific token processing:
301     * <ul>
302     *   <li>SQL*Plus-like commands detection</li>
303     *   <li>Forward slash (/) disambiguation</li>
304     *   <li>%ROWTYPE operator detection</li>
305     *   <li>Continuation lines (hyphen at end of line)</li>
306     * </ul>
307     */
308    private void doredshiftsqltexttotokenlist() {
309        boolean insqlpluscmd = false;
310        boolean isvalidplace = true;
311        boolean waitingreturnforfloatdiv = false;
312        boolean waitingreturnforsemicolon = false;
313        boolean continuesqlplusatnewline = false;
314
315        TSourceToken lct = null, prevst = null;
316
317        TSourceToken asourcetoken, lcprevst;
318        int yychar;
319
320        asourcetoken = getanewsourcetoken();
321        if (asourcetoken == null) return;
322        yychar = asourcetoken.tokencode;
323
324        while (yychar > 0) {
325            sourcetokenlist.add(asourcetoken);
326            switch (yychar) {
327                case TBaseType.cmtdoublehyphen:
328                case TBaseType.cmtslashstar:
329                case TBaseType.lexspace: {
330                    if (insqlpluscmd) {
331                        asourcetoken.insqlpluscmd = true;
332                    }
333                    break;
334                }
335                case TBaseType.lexnewline: {
336                    if (insqlpluscmd) {
337                        insqlpluscmd = false;
338                        isvalidplace = true;
339
340                        if (continuesqlplusatnewline) {
341                            insqlpluscmd = true;
342                            isvalidplace = false;
343                            asourcetoken.insqlpluscmd = true;
344                        }
345                    }
346
347                    if (waitingreturnforsemicolon) {
348                        isvalidplace = true;
349                    }
350                    if (waitingreturnforfloatdiv) {
351                        isvalidplace = true;
352                        lct.tokencode = TBaseType.sqlpluscmd;
353                        if (lct.tokentype != ETokenType.ttslash) {
354                            lct.tokentype = ETokenType.ttsqlpluscmd;
355                        }
356                    }
357                    flexer.insqlpluscmd = insqlpluscmd;
358                    break;
359                } //case newline
360                default: {
361                    //solid token
362                    continuesqlplusatnewline = false;
363                    waitingreturnforsemicolon = false;
364                    waitingreturnforfloatdiv = false;
365                    if (insqlpluscmd) {
366                        asourcetoken.insqlpluscmd = true;
367                        if (asourcetoken.toString().equalsIgnoreCase("-")) {
368                            continuesqlplusatnewline = true;
369                        }
370                    } else {
371                        if (asourcetoken.tokentype == ETokenType.ttsemicolon) {
372                            waitingreturnforsemicolon = true;
373                        }
374                        if ((asourcetoken.tokentype == ETokenType.ttslash)
375                                && (isvalidplace || (IsValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) {
376                            lct = asourcetoken;
377                            waitingreturnforfloatdiv = true;
378                        }
379                        if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) {
380                            asourcetoken.tokencode = TBaseType.sqlpluscmd;
381                            if (asourcetoken.tokentype != ETokenType.ttslash) {
382                                asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
383                            }
384                            insqlpluscmd = true;
385                            flexer.insqlpluscmd = insqlpluscmd;
386                        }
387                    }
388                    isvalidplace = false;
389
390                    // Redshift-specific: Handle %ROWTYPE operator
391                    if (asourcetoken.tokencode == TBaseType.rrw_redshift_rowtype) {
392                        TSourceToken stPercent = asourcetoken.searchToken('%', -1);
393                        if (stPercent != null) {
394                            stPercent.tokencode = TBaseType.rowtype_operator;
395                        }
396                    }
397                }
398            }
399
400            //flexer.yylexwrap(asourcetoken);
401            asourcetoken = getanewsourcetoken();
402            if (asourcetoken != null) {
403                yychar = asourcetoken.tokencode;
404            } else {
405                yychar = 0;
406
407                if (waitingreturnforfloatdiv) {
408                    // / at the end of line treat as sqlplus command
409                    lct.tokencode = TBaseType.sqlpluscmd;
410                    if (lct.tokentype != ETokenType.ttslash) {
411                        lct.tokentype = ETokenType.ttsqlpluscmd;
412                    }
413                }
414            }
415
416            if ((yychar == 0) && (prevst != null)) {
417                // End of input
418            }
419        } // while
420    }
421
422    /**
423     * Check if this is a valid place for a forward slash to be treated as a SQL*Plus command.
424     * <p>
425     * Migrated from TGSqlParser.IsValidPlaceForDivToSqlplusCmd().
426     */
427    private boolean IsValidPlaceForDivToSqlplusCmd(TSourceTokenList tokenlist, int pos) {
428        if (tokenlist == null) return false;
429        if (pos <= 0) return true;
430
431        for (int i = pos - 1; i >= 0; i--) {
432            TSourceToken st = tokenlist.get(i);
433            if (st.tokencode == TBaseType.lexnewline) {
434                return true;
435            }
436            if ((st.tokencode != TBaseType.lexspace)
437                    && (st.tokencode != TBaseType.cmtdoublehyphen)
438                    && (st.tokencode != TBaseType.cmtslashstar)) {
439                return false;
440            }
441        }
442        return true;
443    }
444
445    /**
446     * Check if this token is a valid PostgreSQL-like command.
447     * <p>
448     * Migrated from TGSqlParser.isvalidsqlpluscmdInPostgresql().
449     */
450    private boolean isvalidsqlpluscmdInPostgresql(String str) {
451        if (str == null) return false;
452        if (str.length() == 0) return false;
453
454        String s = str.trim().toLowerCase();
455        return s.startsWith("\\");
456    }
457
458    // ========== Redshift-Specific Raw Statement Extraction ==========
459
460    /**
461     * Extract raw SQL statements from token list.
462     * <p>
463     * Migrated from TGSqlParser.doredshiftgetrawsqlstatements().
464     * <p>
465     * This method handles Redshift-specific statement boundaries:
466     * <ul>
467     *   <li>Semicolon (;) for regular SQL statements</li>
468     *   <li>Function delimiter ($$) for function bodies</li>
469     *   <li>BEGIN/END blocks for PL/pgSQL</li>
470     *   <li>DECLARE blocks</li>
471     * </ul>
472     */
473    private void doredshiftgetrawsqlstatements(SqlParseResult.Builder builder) {
474        int waitingEnd = 0;
475        boolean foundEnd = false, enterDeclare = false;
476
477        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
478        if (!TBaseType.assigned(sourcetokenlist)) {
479            builder.errorCode(-1);
480            return;
481        }
482
483        gcurrentsqlstatement = null;
484        EFindSqlStateType gst = EFindSqlStateType.stnormal;
485        TSourceToken lcprevsolidtoken = null, ast = null;
486
487        if (parserContext.isSinglePLBlock()) {
488            gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql);
489        }
490
491        for (int i = 0; i < sourcetokenlist.size(); i++) {
492            if ((ast != null) && (ast.issolidtoken()))
493                lcprevsolidtoken = ast;
494
495            ast = sourcetokenlist.get(i);
496            sourcetokenlist.curpos = i;
497
498            // Redshift-specific token adjustments
499            if (ast.tokencode == TBaseType.rrw_redshift_filter) {
500                TSourceToken st1 = ast.nextSolidToken();
501                if (st1 != null) {
502                    if (st1.tokencode != '(') {
503                        ast.tokencode = TBaseType.ident;
504                    }
505                }
506            } else if (ast.tokencode == TBaseType.rrw_redshift_array) {
507                TSourceToken st1 = ast.searchToken('<', 1);
508                if (st1 != null) { // array<varchar(20)>
509                    ast.tokencode = TBaseType.rrw_redshift_array_type;
510                }
511            } else if (ast.tokencode == TBaseType.rrw_binary) {
512                // Distinguish BINARY as data type from BINARY as identifier
513                // BINARY is a type when:
514                // - Preceded by: AS (CAST), comma, left paren, column name
515                // - Followed by: VARYING, left paren, comma, right paren, NOT, NULL
516                TSourceToken prevToken = ast.prevSolidToken();
517                TSourceToken nextToken = ast.nextSolidToken();
518
519                // Check if preceded by a period -> identifier (e.g., table.binary)
520                if (prevToken != null && prevToken.tokencode == '.') {
521                    // Keep as identifier, no change
522                }
523                // Check type contexts by previous token
524                else if (prevToken != null &&
525                        (prevToken.tokencode == TBaseType.rrw_as ||           // CAST(x AS BINARY)
526                         prevToken.tokencode == ',' ||                         // func(INT, BINARY)
527                         prevToken.tokencode == '(' ||                         // (col BINARY), CAST(BINARY ...
528                         prevToken.tokentype == ETokenType.ttidentifier)) {    // column_name BINARY
529                    ast.tokencode = TBaseType.rrw_redshift_binary_as_type;
530                }
531                // Check type contexts by next token
532                else if (nextToken != null &&
533                        (nextToken.tokencode == TBaseType.rrw_varying ||       // BINARY VARYING
534                         nextToken.tokencode == '(' ||                          // BINARY(10)
535                         nextToken.tokencode == ',' ||                          // col BINARY, col2
536                         nextToken.tokencode == ')' ||                          // col BINARY)
537                         nextToken.tokencode == TBaseType.rrw_not ||           // BINARY NOT NULL
538                         nextToken.tokencode == TBaseType.rrw_null)) {         // BINARY NULL
539                    ast.tokencode = TBaseType.rrw_redshift_binary_as_type;
540                }
541            } else if (ast.tokencode == TBaseType.rrw_values) {
542                TSourceToken stParen = ast.searchToken('(', 1);
543                if (stParen != null) {
544                    TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist);
545                    if (stInsert != null) {
546                        TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist);
547                        if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) {
548                            // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16);
549                            // VALUES (1);
550                            // don't treat values(1) as insert values
551                        } else {
552                            TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist);
553                            if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) {
554                                // don't treat values after from keyword as an insert values
555                                // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT),
556                                // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!');
557                            } else {
558                                ast.tokencode = TBaseType.rrw_postgresql_insert_values;
559                            }
560                        }
561                    }
562                }
563            }
564
565            switch (gst) {
566                case sterror: {
567                    if (ast.tokentype == ETokenType.ttsemicolon) {
568                        gcurrentsqlstatement.sourcetokenlist.add(ast);
569                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
570                        gst = EFindSqlStateType.stnormal;
571                    } else {
572                        gcurrentsqlstatement.sourcetokenlist.add(ast);
573                    }
574                    break;
575                } //sterror
576
577                case stnormal: {
578                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
579                            || (ast.tokencode == TBaseType.cmtslashstar)
580                            || (ast.tokencode == TBaseType.lexspace)
581                            || (ast.tokencode == TBaseType.lexnewline)
582                            || (ast.tokentype == ETokenType.ttsemicolon)) {
583                        if (gcurrentsqlstatement != null) {
584                            gcurrentsqlstatement.sourcetokenlist.add(ast);
585                        }
586
587                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
588                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
589                                // ;;;; continuous semicolon, treat it as comment
590                                ast.tokentype = ETokenType.ttsimplecomment;
591                                ast.tokencode = TBaseType.cmtdoublehyphen;
592                            }
593                        }
594
595                        continue;
596                    }
597
598                    if (ast.tokencode == TBaseType.sqlpluscmd) {
599                        gst = EFindSqlStateType.stsqlplus;
600                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
601                        gcurrentsqlstatement.sourcetokenlist.add(ast);
602                        continue;
603                    }
604
605                    // find a token to start sql or plsql mode
606                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
607
608                    if (gcurrentsqlstatement != null) {
609                        enterDeclare = false;
610                        if (gcurrentsqlstatement.ispgplsql()) {
611                            gst = EFindSqlStateType.ststoredprocedure;
612                            gcurrentsqlstatement.sourcetokenlist.add(ast);
613                            foundEnd = false;
614                            if ((ast.tokencode == TBaseType.rrw_begin)
615                                    || (ast.tokencode == TBaseType.rrw_package)
616                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
617                                waitingEnd = 1;
618                            } else if (ast.tokencode == TBaseType.rrw_declare) {
619                                enterDeclare = true;
620                            }
621                        } else {
622                            gst = EFindSqlStateType.stsql;
623                            gcurrentsqlstatement.sourcetokenlist.add(ast);
624                        }
625                    } else {
626                        //error token found
627                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo),
628                                "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
629
630                        ast.tokentype = ETokenType.tttokenlizererrortoken;
631                        gst = EFindSqlStateType.sterror;
632
633                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
634                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
635                        gcurrentsqlstatement.sourcetokenlist.add(ast);
636                    }
637
638                    break;
639                } // stnormal
640
641                case stsqlplus: {
642                    if (ast.insqlpluscmd) {
643                        gcurrentsqlstatement.sourcetokenlist.add(ast);
644                    } else {
645                        gst = EFindSqlStateType.stnormal; //this token must be newline,
646                        gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here
647                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
648                    }
649
650                    break;
651                }//case stsqlplus
652
653                case stsql: {
654                    if (ast.tokentype == ETokenType.ttsemicolon) {
655                        gst = EFindSqlStateType.stnormal;
656                        gcurrentsqlstatement.sourcetokenlist.add(ast);
657                        gcurrentsqlstatement.semicolonended = ast;
658                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
659                        continue;
660                    }
661
662                    if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd
663                        gst = EFindSqlStateType.stnormal;
664                        gcurrentsqlstatement.sourcetokenlist.add(ast);
665                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
666                        continue;
667                    }
668
669                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
670                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
671                            gst = EFindSqlStateType.stnormal;
672                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
673                            continue;
674                        }
675                    }
676
677                    gcurrentsqlstatement.sourcetokenlist.add(ast);
678                    break;
679                }//case stsql
680
681                case ststoredprocedure: {
682                    if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) {
683                        gcurrentsqlstatement.sourcetokenlist.add(ast);
684                        gst = EFindSqlStateType.ststoredprocedurePgStartBody;
685                        continue;
686                    }
687
688                    if (ast.tokencode == TBaseType.rrw_redshift_language) {
689                        // check next token which is the language used by this stored procedure
690                        TSourceToken nextSt = ast.nextSolidToken();
691                        if (nextSt != null) {
692                            if (gcurrentsqlstatement instanceof TRoutine) {  // can be TCreateProcedureStmt or TCreateFunctionStmt
693                                TRoutine p = (TRoutine) gcurrentsqlstatement;
694                                p.setRoutineLanguage(nextSt.toString());
695                            }
696                        }
697                    }
698
699                    if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) {
700                        gst = EFindSqlStateType.stnormal;
701                        gcurrentsqlstatement.sourcetokenlist.add(ast);
702                        gcurrentsqlstatement.semicolonended = ast;
703                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
704                        continue;
705                    }
706
707                    if ((ast.tokencode == TBaseType.rrw_begin)) {
708                        waitingEnd++;
709                        enterDeclare = false;
710                    } else if ((ast.tokencode == TBaseType.rrw_declare)) {
711                        enterDeclare = true;
712                    } else if ((ast.tokencode == TBaseType.rrw_if)) {
713                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
714                            //this is not if after END
715                            waitingEnd++;
716                        }
717                    } else if ((ast.tokencode == TBaseType.rrw_case)) {
718                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
719                            //this is not case after END
720                            waitingEnd++;
721                        }
722                    } else if ((ast.tokencode == TBaseType.rrw_loop)) {
723                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
724                            //this is not loop after END
725                            waitingEnd++;
726                        }
727                    } else if (ast.tokencode == TBaseType.rrw_end) {
728                        foundEnd = true;
729                        waitingEnd--;
730                        if (waitingEnd < 0) {
731                            waitingEnd = 0;
732                        }
733                    }
734
735                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
736                        // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast;
737                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
738                        gst = EFindSqlStateType.stnormal;
739                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
740
741                        //make / a sqlplus cmd
742                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
743                        gcurrentsqlstatement.sourcetokenlist.add(ast);
744                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
745                    } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) {
746                        // single dot at a separate line
747                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
748                        gst = EFindSqlStateType.stnormal;
749                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
750
751                        //make ttperiod a sqlplus cmd
752                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
753                        gcurrentsqlstatement.sourcetokenlist.add(ast);
754                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
755                    } else {
756                        gcurrentsqlstatement.sourcetokenlist.add(ast);
757                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0)
758                                && (foundEnd)) {
759                            gst = EFindSqlStateType.stnormal;
760                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
761                        }
762                    }
763
764                    if (ast.tokencode == TBaseType.sqlpluscmd) {
765                        //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd
766                        //in a sql statement(almost is plsql block) is not really a sqlplus cmd
767                        int m = flexer.getkeywordvalue(ast.getAstext());
768                        if (m != 0) {
769                            ast.tokencode = m;
770                        } else {
771                            ast.tokencode = TBaseType.ident;
772                        }
773                    }
774
775                    if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) {
776                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
777                            gst = EFindSqlStateType.stnormal;
778                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
779                        }
780                    }
781
782                    break;
783                } //ststoredprocedure
784
785                case ststoredprocedurePgStartBody: {
786                    // Check if this is the closing delimiter
787                    if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) {
788                        gcurrentsqlstatement.sourcetokenlist.add(ast);
789                        gst = EFindSqlStateType.ststoredprocedurePgEndBody;
790                        continue;
791                    }
792
793                    // Only add function body tokens if language is SQL or PLPGSQL
794                    // For other languages (e.g., plpythonu, plperl), skip tokens so the
795                    // parser sees two consecutive delimiters and matches the empty body rule
796                    boolean shouldAddToken = true; // Default: add tokens (assume SQL/PLPGSQL)
797
798                    // Look ahead to find the LANGUAGE keyword after the closing $$
799                    // to determine if we should skip these tokens
800                    TSourceToken languageToken = null;
801                    for (int j = i + 1; j < sourcetokenlist.size(); j++) {
802                        TSourceToken lookahead = sourcetokenlist.get(j);
803                        if (lookahead.tokencode == TBaseType.rrw_redshift_function_delimiter) {
804                            // Found closing delimiter, now look for LANGUAGE keyword
805                            for (int k = j + 1; k < sourcetokenlist.size(); k++) {
806                                TSourceToken st = sourcetokenlist.get(k);
807                                if (st.tokencode == TBaseType.rrw_redshift_language) {
808                                    // Found LANGUAGE, check next solid token for the language name
809                                    languageToken = st.nextSolidToken();
810                                    break;
811                                }
812                                if (st.tokentype == ETokenType.ttsemicolon) {
813                                    break; // Reached end of statement
814                                }
815                            }
816                            break;
817                        }
818                    }
819
820                    if (languageToken != null) {
821                        String language = languageToken.toString().toLowerCase().trim();
822                        // Remove quotes if present
823                        if (language.startsWith("'") && language.endsWith("'")) {
824                            language = language.substring(1, language.length() - 1);
825                        }
826                        // Skip tokens for non-SQL/non-PLPGSQL languages
827                        if (!language.equals("sql") && !language.equals("plpgsql")) {
828                            shouldAddToken = false;
829                        }
830                    }
831
832                    if (shouldAddToken) {
833                        gcurrentsqlstatement.sourcetokenlist.add(ast);
834                    }
835
836                    break;
837                }
838
839                case ststoredprocedurePgEndBody: {
840                    if (ast.tokentype == ETokenType.ttsemicolon) {
841                        gst = EFindSqlStateType.stnormal;
842                        gcurrentsqlstatement.sourcetokenlist.add(ast);
843                        gcurrentsqlstatement.semicolonended = ast;
844                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
845                        continue;
846                    } else if (ast.tokencode == TBaseType.cmtdoublehyphen) {
847                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
848                            gst = EFindSqlStateType.stnormal;
849                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
850                            continue;
851                        }
852                    }
853
854                    gcurrentsqlstatement.sourcetokenlist.add(ast);
855
856                    if (ast.tokencode == TBaseType.rrw_redshift_language) {
857                        // check next token which is the language used by this stored procedure
858                        TSourceToken nextSt = ast.nextSolidToken();
859                        if (nextSt != null) {
860                            if (gcurrentsqlstatement instanceof TRoutine) {  // can be TCreateProcedureStmt or TCreateFunctionStmt
861                                TRoutine p = (TRoutine) gcurrentsqlstatement;
862                                p.setRoutineLanguage(nextSt.toString());
863                            }
864                        }
865                    }
866
867                    break;
868                }
869            } //switch
870        }//for
871
872        //last statement
873        if ((gcurrentsqlstatement != null) &&
874                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql)
875                        || (gst == EFindSqlStateType.ststoredprocedure)
876                        || (gst == EFindSqlStateType.ststoredprocedurePgEndBody)
877                        || (gst == EFindSqlStateType.sterror) || (parserContext.isSinglePLBlock()))) {
878            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
879        }
880
881        builder.errorCode(syntaxErrors.size());
882    }
883
884    /**
885     * Handle CREATE TABLE error recovery.
886     * <p>
887     * Migrated from TGSqlParser.handleCreateTableErrorRecovery().
888     */
889    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
890        if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable)
891                || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex))
892                && (!TBaseType.c_createTableStrictParsing)) {
893
894            int nested = 0;
895            boolean isIgnore = false, isFoundIgnoreToken = false;
896            TSourceToken firstIgnoreToken = null;
897
898            for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
899                TSourceToken st = stmt.sourcetokenlist.get(k);
900                if (isIgnore) {
901                    if (st.issolidtoken() && (st.tokencode != ';')) {
902                        isFoundIgnoreToken = true;
903                        if (firstIgnoreToken == null) {
904                            firstIgnoreToken = st;
905                        }
906                    }
907                    if (st.tokencode != ';') {
908                        st.tokencode = TBaseType.sqlpluscmd;
909                    }
910                    continue;
911                }
912                if (st.tokencode == (int) ')') {
913                    nested--;
914                    if (nested == 0) {
915                        boolean isSelect = false;
916                        TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
917                        if (st1 != null) {
918                            TSourceToken st2 = st.searchToken((int) '(', 2);
919                            if (st2 != null) {
920                                TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
921                                isSelect = (st3 != null);
922                            }
923                        }
924                        if (!isSelect) isIgnore = true;
925                    }
926                } else if (st.tokencode == (int) '(') {
927                    nested++;
928                }
929            }
930
931            if (isFoundIgnoreToken) {
932                stmt.clearError();
933                stmt.parsestatement(null, false);
934            }
935        }
936    }
937
938    @Override
939    public String toString() {
940        return "RedshiftSqlParser{vendor=" + vendor + "}";
941    }
942}