001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerRedshift;
009import gudusoft.gsqlparser.TParserRedshift;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
021import gudusoft.gsqlparser.stmt.TCommonBlock;
022import gudusoft.gsqlparser.stmt.TRoutine;
023import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
025import gudusoft.gsqlparser.compiler.TContext;
026import gudusoft.gsqlparser.sqlenv.TSQLEnv;
027import gudusoft.gsqlparser.compiler.TGlobalScope;
028import gudusoft.gsqlparser.compiler.TFrame;
029import gudusoft.gsqlparser.resolver.TSQLResolver;
030import gudusoft.gsqlparser.TLog;
031import gudusoft.gsqlparser.compiler.TASTEvaluator;
032
033import java.io.BufferedReader;
034import java.util.ArrayList;
035import java.util.List;
036import java.util.Stack;
037
038/**
039 * Amazon Redshift SQL parser implementation.
040 *
041 * <p>This parser handles Redshift-specific SQL syntax including:
042 * <ul>
043 *   <li>PostgreSQL-based syntax (Redshift is based on PostgreSQL 8.0.2)</li>
044 *   <li>PL/pgSQL functions and procedures</li>
045 *   <li>CREATE FUNCTION with LANGUAGE clause</li>
046 *   <li>Function body delimiters ($$)</li>
047 *   <li>Redshift-specific types (ARRAY&lt;type&gt;, %ROWTYPE, etc.)</li>
048 *   <li>Redshift-specific keywords (FILTER, LANGUAGE, etc.)</li>
049 * </ul>
050 *
051 * <p><b>Design Notes:</b>
052 * <ul>
053 *   <li>Extends {@link AbstractSqlParser} using the template method pattern</li>
054 *   <li>Uses {@link TLexerRedshift} for tokenization</li>
055 *   <li>Uses {@link TParserRedshift} for parsing</li>
056 *   <li>Delimiter character: ';' for SQL statements</li>
057 * </ul>
058 *
059 * <p><b>Usage Example:</b>
060 * <pre>
061 * // Get Redshift parser from factory
062 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvredshift);
063 *
064 * // Build context
065 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvredshift)
066 *     .sqlText("SELECT * FROM orders WHERE order_date > CURRENT_DATE - 7")
067 *     .build();
068 *
069 * // Parse
070 * SqlParseResult result = parser.parse(context);
071 *
072 * // Access statements
073 * TStatementList statements = result.getSqlStatements();
074 * </pre>
075 *
076 * @see SqlParser
077 * @see AbstractSqlParser
078 * @see TLexerRedshift
079 * @see TParserRedshift
080 * @since 3.2.0.0
081 */
082public class RedshiftSqlParser extends AbstractSqlParser {
083
084    /**
085     * Construct Redshift SQL parser.
086     * <p>
087     * Configures the parser for Redshift database with default delimiter (;).
088     * <p>
089     * Following the original TGSqlParser pattern, the lexer and parser are
090     * created once in the constructor and reused for all parsing operations.
091     */
092    public RedshiftSqlParser() {
093        super(EDbVendor.dbvredshift);
094        this.delimiterChar = ';';
095        this.defaultDelimiterStr = ";";
096
097        // Create lexer once - will be reused for all parsing operations
098        this.flexer = new TLexerRedshift();
099        this.flexer.delimiterchar = this.delimiterChar;
100        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
101
102        // CRITICAL: Set parent's lexer reference for shared tokenization logic
103        this.lexer = this.flexer;
104
105        // Create parser once - will be reused for all parsing operations
106        this.fparser = new TParserRedshift(null);
107        this.fparser.lexer = this.flexer;
108    }
109
110    // ========== Parser Components ==========
111
112    /** The Redshift lexer used for tokenization */
113    public TLexerRedshift flexer;
114
115    /** SQL parser (for Redshift statements) */
116    private TParserRedshift fparser;
117
118    /** Current statement being built during extraction */
119    private TCustomSqlStatement gcurrentsqlstatement;
120
121    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
122    // - protected TContext globalContext
123    // - protected TSQLEnv sqlEnv
124    // - protected Stack<TFrame> frameStack
125    // - protected TFrame globalFrame
126
127    // ========== AbstractSqlParser Abstract Methods Implementation ==========
128
129    /**
130     * Return the Redshift lexer instance.
131     */
132    @Override
133    protected TCustomLexer getLexer(ParserContext context) {
134        return this.flexer;
135    }
136
137    /**
138     * Return the Redshift SQL parser instance with updated token list.
139     */
140    @Override
141    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
142        this.fparser.sourcetokenlist = tokens;
143        return this.fparser;
144    }
145
146    /**
147     * Redshift doesn't have a secondary parser.
148     * <p>
149     * Only Oracle uses a secondary parser (PL/SQL parser).
150     */
151    @Override
152    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
153        return null;
154    }
155
156    /**
157     * Call Redshift-specific tokenization logic.
158     * <p>
159     * Delegates to doredshiftsqltexttotokenlist which handles Redshift's
160     * specific keyword recognition, PostgreSQL commands, and token generation.
161     */
162    @Override
163    protected void tokenizeVendorSql() {
164        doredshiftsqltexttotokenlist();
165    }
166
167    /**
168     * Setup Redshift parser for raw statement extraction.
169     * <p>
170     * Redshift uses a single parser, so we inject sqlcmds and update
171     * the token list for the main parser only.
172     */
173    @Override
174    protected void setupVendorParsersForExtraction() {
175        // Inject sqlcmds into parser (required for make_stmt)
176        this.fparser.sqlcmds = this.sqlcmds;
177
178        // Update token list for parser
179        this.fparser.sourcetokenlist = this.sourcetokenlist;
180    }
181
182    /**
183     * Call Redshift-specific raw statement extraction logic.
184     * <p>
185     * Delegates to doredshiftgetrawsqlstatements which handles Redshift's
186     * statement delimiters (semicolon, function delimiters $$, etc.).
187     */
188    @Override
189    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
190        doredshiftgetrawsqlstatements(builder);
191
192        // Set the extracted statements in the builder
193        builder.sqlStatements(this.sqlstatements);
194    }
195
196    /**
197     * Perform full parsing of statements with syntax checking.
198     * <p>
199     * This method orchestrates the parsing of all statements.
200     */
201    @Override
202    protected TStatementList performParsing(ParserContext context,
203                                           TCustomParser parser,
204                                           TCustomParser secondaryParser,
205                                           TSourceTokenList tokens,
206                                           TStatementList rawStatements) {
207        // Store references
208        this.fparser = (TParserRedshift) parser;
209        this.sourcetokenlist = tokens;
210        this.parserContext = context;
211
212        // Use the raw statements passed from AbstractSqlParser.parse()
213        this.sqlstatements = rawStatements;
214
215        // Initialize sqlcmds (required for parsing)
216        this.sqlcmds = SqlCmdsFactory.get(vendor);
217
218        // CRITICAL: Inject sqlcmds into parser (required for make_stmt)
219        this.fparser.sqlcmds = this.sqlcmds;
220
221        // Initialize global context for semantic analysis
222        initializeGlobalContext();
223
224        // Parse each statement
225        for (int i = 0; i < sqlstatements.size(); i++) {
226            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
227
228            try {
229                // Set frame stack for this statement
230                stmt.setFrameStack(frameStack);
231
232                // Parse the statement
233                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
234
235                // Vendor-specific post-processing (override hook if needed)
236                afterStatementParsed(stmt);
237
238                // Error recovery
239                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
240                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
241                    handleCreateTableErrorRecovery(stmt);
242                }
243
244                // Collect errors
245                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
246                    copyErrorsFromStatement(stmt);
247                }
248
249            } catch (Exception ex) {
250                // Use inherited exception handler
251                handleStatementParsingException(stmt, i, ex);
252                continue;
253            }
254        }
255
256        // Clean up frame stack
257        if (globalFrame != null) {
258            globalFrame.popMeFromStack(frameStack);
259        }
260
261        return sqlstatements;
262    }
263
264    /**
265     * Perform semantic analysis on parsed statements.
266     * <p>
267     * This step resolves column-to-table relationships and performs type checking.
268     */
269    @Override
270    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
271        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
272            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
273            resolver.resolve();
274        }
275    }
276
277    /**
278     * Perform interpretation on parsed statements.
279     * <p>
280     * This step evaluates constant expressions and performs other interpretation tasks.
281     */
282    @Override
283    protected void performInterpreter(ParserContext context, TStatementList statements) {
284        if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) {
285            TLog.clearLogs();
286            TGlobalScope interpreterScope = new TGlobalScope(sqlEnv);
287            TLog.enableInterpreterLogOnly();
288            TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope);
289            astEvaluator.eval();
290        }
291    }
292
293    // ========== Redshift-Specific Tokenization ==========
294
295    /**
296     * Tokenize Redshift SQL text to token list.
297     * <p>
298     * Migrated from TGSqlParser.doredshiftsqltexttotokenlist().
299     * <p>
300     * This method handles Redshift-specific token processing:
301     * <ul>
302     *   <li>SQL*Plus-like commands detection</li>
303     *   <li>Forward slash (/) disambiguation</li>
304     *   <li>%ROWTYPE operator detection</li>
305     *   <li>Continuation lines (hyphen at end of line)</li>
306     * </ul>
307     */
308    private void doredshiftsqltexttotokenlist() {
309        boolean insqlpluscmd = false;
310        boolean isvalidplace = true;
311        boolean waitingreturnforfloatdiv = false;
312        boolean waitingreturnforsemicolon = false;
313        boolean continuesqlplusatnewline = false;
314
315        TSourceToken lct = null, prevst = null;
316
317        TSourceToken asourcetoken, lcprevst;
318        int yychar;
319
320        asourcetoken = getanewsourcetoken();
321        if (asourcetoken == null) return;
322        yychar = asourcetoken.tokencode;
323
324        while (yychar > 0) {
325            sourcetokenlist.add(asourcetoken);
326            switch (yychar) {
327                case TBaseType.cmtdoublehyphen:
328                case TBaseType.cmtslashstar:
329                case TBaseType.lexspace: {
330                    if (insqlpluscmd) {
331                        asourcetoken.insqlpluscmd = true;
332                    }
333                    break;
334                }
335                case TBaseType.lexnewline: {
336                    if (insqlpluscmd) {
337                        insqlpluscmd = false;
338                        isvalidplace = true;
339
340                        if (continuesqlplusatnewline) {
341                            insqlpluscmd = true;
342                            isvalidplace = false;
343                            asourcetoken.insqlpluscmd = true;
344                        }
345                    }
346
347                    if (waitingreturnforsemicolon) {
348                        isvalidplace = true;
349                    }
350                    if (waitingreturnforfloatdiv) {
351                        isvalidplace = true;
352                        lct.tokencode = TBaseType.sqlpluscmd;
353                        if (lct.tokentype != ETokenType.ttslash) {
354                            lct.tokentype = ETokenType.ttsqlpluscmd;
355                        }
356                    }
357                    flexer.insqlpluscmd = insqlpluscmd;
358                    break;
359                } //case newline
360                default: {
361                    //solid token
362                    continuesqlplusatnewline = false;
363                    waitingreturnforsemicolon = false;
364                    waitingreturnforfloatdiv = false;
365                    if (insqlpluscmd) {
366                        asourcetoken.insqlpluscmd = true;
367                        if (asourcetoken.toString().equalsIgnoreCase("-")) {
368                            continuesqlplusatnewline = true;
369                        }
370                    } else {
371                        if (asourcetoken.tokentype == ETokenType.ttsemicolon) {
372                            waitingreturnforsemicolon = true;
373                        }
374                        if ((asourcetoken.tokentype == ETokenType.ttslash)
375                                && (isvalidplace || (IsValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) {
376                            lct = asourcetoken;
377                            waitingreturnforfloatdiv = true;
378                        }
379                        if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) {
380                            asourcetoken.tokencode = TBaseType.sqlpluscmd;
381                            if (asourcetoken.tokentype != ETokenType.ttslash) {
382                                asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
383                            }
384                            insqlpluscmd = true;
385                            flexer.insqlpluscmd = insqlpluscmd;
386                        }
387                    }
388                    isvalidplace = false;
389
390                    // Redshift-specific: Handle %ROWTYPE operator
391                    if (asourcetoken.tokencode == TBaseType.rrw_redshift_rowtype) {
392                        TSourceToken stPercent = asourcetoken.searchToken('%', -1);
393                        if (stPercent != null) {
394                            stPercent.tokencode = TBaseType.rowtype_operator;
395                        }
396                    }
397                }
398            }
399
400            //flexer.yylexwrap(asourcetoken);
401            asourcetoken = getanewsourcetoken();
402            if (asourcetoken != null) {
403                yychar = asourcetoken.tokencode;
404            } else {
405                yychar = 0;
406
407                if (waitingreturnforfloatdiv) {
408                    // / at the end of line treat as sqlplus command
409                    lct.tokencode = TBaseType.sqlpluscmd;
410                    if (lct.tokentype != ETokenType.ttslash) {
411                        lct.tokentype = ETokenType.ttsqlpluscmd;
412                    }
413                }
414            }
415
416            if ((yychar == 0) && (prevst != null)) {
417                // End of input
418            }
419        } // while
420    }
421
422    /**
423     * Check if this is a valid place for a forward slash to be treated as a SQL*Plus command.
424     * <p>
425     * Migrated from TGSqlParser.IsValidPlaceForDivToSqlplusCmd().
426     */
427    private boolean IsValidPlaceForDivToSqlplusCmd(TSourceTokenList tokenlist, int pos) {
428        if (tokenlist == null) return false;
429        if (pos <= 0) return true;
430
431        for (int i = pos - 1; i >= 0; i--) {
432            TSourceToken st = tokenlist.get(i);
433            if (st.tokencode == TBaseType.lexnewline) {
434                return true;
435            }
436            if ((st.tokencode != TBaseType.lexspace)
437                    && (st.tokencode != TBaseType.cmtdoublehyphen)
438                    && (st.tokencode != TBaseType.cmtslashstar)) {
439                return false;
440            }
441        }
442        return true;
443    }
444
445    /**
446     * Check if this token is a valid PostgreSQL-like command.
447     * <p>
448     * Migrated from TGSqlParser.isvalidsqlpluscmdInPostgresql().
449     */
450    private boolean isvalidsqlpluscmdInPostgresql(String str) {
451        if (str == null) return false;
452        if (str.length() == 0) return false;
453
454        String s = str.trim().toLowerCase();
455        return s.startsWith("\\");
456    }
457
458    // ========== Redshift-Specific Raw Statement Extraction ==========
459
460    /**
461     * Extract raw SQL statements from token list.
462     * <p>
463     * Migrated from TGSqlParser.doredshiftgetrawsqlstatements().
464     * <p>
465     * This method handles Redshift-specific statement boundaries:
466     * <ul>
467     *   <li>Semicolon (;) for regular SQL statements</li>
468     *   <li>Function delimiter ($$) for function bodies</li>
469     *   <li>BEGIN/END blocks for PL/pgSQL</li>
470     *   <li>DECLARE blocks</li>
471     * </ul>
472     */
473    private void doredshiftgetrawsqlstatements(SqlParseResult.Builder builder) {
474        int waitingEnd = 0;
475        boolean foundEnd = false, enterDeclare = false;
476
477        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
478        if (!TBaseType.assigned(sourcetokenlist)) {
479            builder.errorCode(-1);
480            return;
481        }
482
483        gcurrentsqlstatement = null;
484        EFindSqlStateType gst = EFindSqlStateType.stnormal;
485        TSourceToken lcprevsolidtoken = null, ast = null;
486
487        if (parserContext.isSinglePLBlock()) {
488            gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql);
489        }
490
491        for (int i = 0; i < sourcetokenlist.size(); i++) {
492            if ((ast != null) && (ast.issolidtoken()))
493                lcprevsolidtoken = ast;
494
495            ast = sourcetokenlist.get(i);
496            sourcetokenlist.curpos = i;
497
498            // Redshift-specific token adjustments
499            if (ast.tokencode == TBaseType.rrw_redshift_filter) {
500                TSourceToken st1 = ast.nextSolidToken();
501                if (st1 != null) {
502                    if (st1.tokencode != '(') {
503                        ast.tokencode = TBaseType.ident;
504                    }
505                }
506            } else if (ast.tokencode == TBaseType.rrw_redshift_array) {
507                TSourceToken st1 = ast.searchToken('<', 1);
508                if (st1 != null) { // array<varchar(20)>
509                    ast.tokencode = TBaseType.rrw_redshift_array_type;
510                }
511            } else if (ast.tokencode == TBaseType.rrw_binary) {
512                // Distinguish BINARY as data type from BINARY as identifier
513                // BINARY is a type when:
514                // - Preceded by: AS (CAST), comma, left paren, column name
515                // - Followed by: VARYING, left paren, comma, right paren, NOT, NULL
516                TSourceToken prevToken = ast.prevSolidToken();
517                TSourceToken nextToken = ast.nextSolidToken();
518
519                // Check if preceded by a period -> identifier (e.g., table.binary)
520                if (prevToken != null && prevToken.tokencode == '.') {
521                    // Keep as identifier, no change
522                }
523                // Check type contexts by previous token
524                else if (prevToken != null &&
525                        (prevToken.tokencode == TBaseType.rrw_as ||           // CAST(x AS BINARY)
526                         prevToken.tokencode == ',' ||                         // func(INT, BINARY)
527                         prevToken.tokencode == '(' ||                         // (col BINARY), CAST(BINARY ...
528                         prevToken.tokentype == ETokenType.ttidentifier)) {    // column_name BINARY
529                    ast.tokencode = TBaseType.rrw_redshift_binary_as_type;
530                }
531                // Check type contexts by next token
532                else if (nextToken != null &&
533                        (nextToken.tokencode == TBaseType.rrw_varying ||       // BINARY VARYING
534                         nextToken.tokencode == '(' ||                          // BINARY(10)
535                         nextToken.tokencode == ',' ||                          // col BINARY, col2
536                         nextToken.tokencode == ')' ||                          // col BINARY)
537                         nextToken.tokencode == TBaseType.rrw_not ||           // BINARY NOT NULL
538                         nextToken.tokencode == TBaseType.rrw_null)) {         // BINARY NULL
539                    ast.tokencode = TBaseType.rrw_redshift_binary_as_type;
540                }
541            } else if (ast.tokencode == TBaseType.rrw_values) {
542                TSourceToken stParen = ast.searchToken('(', 1);
543                if (stParen != null) {
544                    TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist);
545                    if (stInsert != null) {
546                        TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist);
547                        if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) {
548                            // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16);
549                            // VALUES (1);
550                            // don't treat values(1) as insert values
551                        } else {
552                            TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist);
553                            if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) {
554                                // don't treat values after from keyword as an insert values
555                                // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT),
556                                // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!');
557                            } else {
558                                ast.tokencode = TBaseType.rrw_postgresql_insert_values;
559                            }
560                        }
561                    }
562                }
563            }
564
565            switch (gst) {
566                case sterror: {
567                    if (ast.tokentype == ETokenType.ttsemicolon) {
568                        gcurrentsqlstatement.sourcetokenlist.add(ast);
569                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
570                        gst = EFindSqlStateType.stnormal;
571                    } else {
572                        gcurrentsqlstatement.sourcetokenlist.add(ast);
573                    }
574                    break;
575                } //sterror
576
577                case stnormal: {
578                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
579                            || (ast.tokencode == TBaseType.cmtslashstar)
580                            || (ast.tokencode == TBaseType.lexspace)
581                            || (ast.tokencode == TBaseType.lexnewline)
582                            || (ast.tokentype == ETokenType.ttsemicolon)) {
583                        if (gcurrentsqlstatement != null) {
584                            gcurrentsqlstatement.sourcetokenlist.add(ast);
585                        }
586
587                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
588                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
589                                // ;;;; continuous semicolon, treat it as comment
590                                ast.tokentype = ETokenType.ttsimplecomment;
591                                ast.tokencode = TBaseType.cmtdoublehyphen;
592                            }
593                        }
594
595                        continue;
596                    }
597
598                    if (ast.tokencode == TBaseType.sqlpluscmd) {
599                        gst = EFindSqlStateType.stsqlplus;
600                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
601                        gcurrentsqlstatement.sourcetokenlist.add(ast);
602                        continue;
603                    }
604
605                    // find a token to start sql or plsql mode
606                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
607
608                    if (gcurrentsqlstatement != null) {
609                        enterDeclare = false;
610                        if (gcurrentsqlstatement.ispgplsql()) {
611                            gst = EFindSqlStateType.ststoredprocedure;
612                            gcurrentsqlstatement.sourcetokenlist.add(ast);
613                            foundEnd = false;
614                            if ((ast.tokencode == TBaseType.rrw_begin)
615                                    || (ast.tokencode == TBaseType.rrw_package)
616                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
617                                waitingEnd = 1;
618                            } else if (ast.tokencode == TBaseType.rrw_declare) {
619                                enterDeclare = true;
620                            }
621                        } else {
622                            gst = EFindSqlStateType.stsql;
623                            gcurrentsqlstatement.sourcetokenlist.add(ast);
624                        }
625                    } else {
626                        //error token found
627                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo),
628                                "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
629
630                        ast.tokentype = ETokenType.tttokenlizererrortoken;
631                        gst = EFindSqlStateType.sterror;
632
633                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
634                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
635                        gcurrentsqlstatement.sourcetokenlist.add(ast);
636                    }
637
638                    break;
639                } // stnormal
640
641                case stsqlplus: {
642                    if (ast.insqlpluscmd) {
643                        gcurrentsqlstatement.sourcetokenlist.add(ast);
644                    } else {
645                        gst = EFindSqlStateType.stnormal; //this token must be newline,
646                        gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here
647                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
648                    }
649
650                    break;
651                }//case stsqlplus
652
653                case stsql: {
654                    if (ast.tokentype == ETokenType.ttsemicolon) {
655                        gst = EFindSqlStateType.stnormal;
656                        gcurrentsqlstatement.sourcetokenlist.add(ast);
657                        gcurrentsqlstatement.semicolonended = ast;
658                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
659                        continue;
660                    }
661
662                    if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd
663                        gst = EFindSqlStateType.stnormal;
664                        gcurrentsqlstatement.sourcetokenlist.add(ast);
665                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
666                        continue;
667                    }
668
669                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
670                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
671                            gst = EFindSqlStateType.stnormal;
672                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
673                            continue;
674                        }
675                    }
676
677                    // Check if a DDL keyword starts a new SQL statement without semicolon separator.
678                    // Guard against false positives:
679                    // - GRANT/REVOKE use CREATE/ALTER/DROP as privilege names
680                    // - EXPLAIN statement can contain CREATE/ALTER/DROP (e.g. EXPLAIN CREATE TABLE)
681                    // - CREATE OR ALTER pattern: ALTER follows OR inside a CREATE statement
682                    if (ast.tokencode == TBaseType.rrw_create
683                            || ast.tokencode == TBaseType.rrw_alter
684                            || ast.tokencode == TBaseType.rrw_drop) {
685                        boolean shouldCheckSplit = true;
686
687                        // Don't split inside GRANT/REVOKE (CREATE/ALTER/DROP are privilege names)
688                        if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclegrant
689                                || gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclerevoke
690                                || gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstGrant
691                                || gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstRevoke) {
692                            shouldCheckSplit = false;
693                        }
694
695                        // Don't split inside EXPLAIN (EXPLAIN CREATE TABLE, EXPLAIN ALTER, etc.)
696                        if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstExplain) {
697                            shouldCheckSplit = false;
698                        }
699
700                        // Don't split ALTER/DROP when preceded by OR (CREATE OR ALTER pattern)
701                        if (shouldCheckSplit && (ast.tokencode == TBaseType.rrw_alter || ast.tokencode == TBaseType.rrw_drop)) {
702                            TSourceToken prevSolid = ast.prevSolidToken();
703                            if (prevSolid != null && prevSolid.tokencode == TBaseType.rrw_or) {
704                                shouldCheckSplit = false;
705                            }
706                        }
707
708                        if (shouldCheckSplit) {
709                            TCustomSqlStatement lcnextsqlstmt = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
710                            if (lcnextsqlstmt != null) {
711                                // Finalize current statement and start the new one
712                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
713                                gcurrentsqlstatement = lcnextsqlstmt;
714                                gcurrentsqlstatement.sourcetokenlist.add(ast);
715                                if (gcurrentsqlstatement.ispgplsql()) {
716                                    gst = EFindSqlStateType.ststoredprocedure;
717                                }
718                                // else stay in stsql
719                                continue;
720                            }
721                        }
722                    }
723
724                    gcurrentsqlstatement.sourcetokenlist.add(ast);
725                    break;
726                }//case stsql
727
728                case ststoredprocedure: {
729                    if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) {
730                        gcurrentsqlstatement.sourcetokenlist.add(ast);
731                        gst = EFindSqlStateType.ststoredprocedurePgStartBody;
732                        continue;
733                    }
734
735                    if (ast.tokencode == TBaseType.rrw_redshift_language) {
736                        // check next token which is the language used by this stored procedure
737                        TSourceToken nextSt = ast.nextSolidToken();
738                        if (nextSt != null) {
739                            if (gcurrentsqlstatement instanceof TRoutine) {  // can be TCreateProcedureStmt or TCreateFunctionStmt
740                                TRoutine p = (TRoutine) gcurrentsqlstatement;
741                                p.setRoutineLanguage(nextSt.toString());
742                            }
743                        }
744                    }
745
746                    if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) {
747                        gst = EFindSqlStateType.stnormal;
748                        gcurrentsqlstatement.sourcetokenlist.add(ast);
749                        gcurrentsqlstatement.semicolonended = ast;
750                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
751                        continue;
752                    }
753
754                    if ((ast.tokencode == TBaseType.rrw_begin)) {
755                        waitingEnd++;
756                        enterDeclare = false;
757                    } else if ((ast.tokencode == TBaseType.rrw_declare)) {
758                        enterDeclare = true;
759                    } else if ((ast.tokencode == TBaseType.rrw_if)) {
760                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
761                            //this is not if after END
762                            waitingEnd++;
763                        }
764                    } else if ((ast.tokencode == TBaseType.rrw_case)) {
765                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
766                            //this is not case after END
767                            waitingEnd++;
768                        }
769                    } else if ((ast.tokencode == TBaseType.rrw_loop)) {
770                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
771                            //this is not loop after END
772                            waitingEnd++;
773                        }
774                    } else if (ast.tokencode == TBaseType.rrw_end) {
775                        foundEnd = true;
776                        waitingEnd--;
777                        if (waitingEnd < 0) {
778                            waitingEnd = 0;
779                        }
780                    }
781
782                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
783                        // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast;
784                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
785                        gst = EFindSqlStateType.stnormal;
786                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
787
788                        //make / a sqlplus cmd
789                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
790                        gcurrentsqlstatement.sourcetokenlist.add(ast);
791                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
792                    } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) {
793                        // single dot at a separate line
794                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
795                        gst = EFindSqlStateType.stnormal;
796                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
797
798                        //make ttperiod a sqlplus cmd
799                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
800                        gcurrentsqlstatement.sourcetokenlist.add(ast);
801                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
802                    } else {
803                        gcurrentsqlstatement.sourcetokenlist.add(ast);
804                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0)
805                                && (foundEnd)) {
806                            gst = EFindSqlStateType.stnormal;
807                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
808                        }
809                    }
810
811                    if (ast.tokencode == TBaseType.sqlpluscmd) {
812                        //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd
813                        //in a sql statement(almost is plsql block) is not really a sqlplus cmd
814                        int m = flexer.getkeywordvalue(ast.getAstext());
815                        if (m != 0) {
816                            ast.tokencode = m;
817                        } else {
818                            ast.tokencode = TBaseType.ident;
819                        }
820                    }
821
822                    if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) {
823                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
824                            gst = EFindSqlStateType.stnormal;
825                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
826                        }
827                    }
828
829                    break;
830                } //ststoredprocedure
831
832                case ststoredprocedurePgStartBody: {
833                    // Check if this is the closing delimiter
834                    if (ast.tokencode == TBaseType.rrw_redshift_function_delimiter) {
835                        gcurrentsqlstatement.sourcetokenlist.add(ast);
836                        gst = EFindSqlStateType.ststoredprocedurePgEndBody;
837                        continue;
838                    }
839
840                    // Only add function body tokens if language is SQL or PLPGSQL
841                    // For other languages (e.g., plpythonu, plperl), skip tokens so the
842                    // parser sees two consecutive delimiters and matches the empty body rule
843                    boolean shouldAddToken = true; // Default: add tokens (assume SQL/PLPGSQL)
844
845                    // Look ahead to find the LANGUAGE keyword after the closing $$
846                    // to determine if we should skip these tokens
847                    TSourceToken languageToken = null;
848                    for (int j = i + 1; j < sourcetokenlist.size(); j++) {
849                        TSourceToken lookahead = sourcetokenlist.get(j);
850                        if (lookahead.tokencode == TBaseType.rrw_redshift_function_delimiter) {
851                            // Found closing delimiter, now look for LANGUAGE keyword
852                            for (int k = j + 1; k < sourcetokenlist.size(); k++) {
853                                TSourceToken st = sourcetokenlist.get(k);
854                                if (st.tokencode == TBaseType.rrw_redshift_language) {
855                                    // Found LANGUAGE, check next solid token for the language name
856                                    languageToken = st.nextSolidToken();
857                                    break;
858                                }
859                                if (st.tokentype == ETokenType.ttsemicolon) {
860                                    break; // Reached end of statement
861                                }
862                            }
863                            break;
864                        }
865                    }
866
867                    if (languageToken != null) {
868                        String language = languageToken.toString().toLowerCase().trim();
869                        // Remove quotes if present
870                        if (language.startsWith("'") && language.endsWith("'")) {
871                            language = language.substring(1, language.length() - 1);
872                        }
873                        // Skip tokens for non-SQL/non-PLPGSQL languages
874                        if (!language.equals("sql") && !language.equals("plpgsql")) {
875                            shouldAddToken = false;
876                        }
877                    }
878
879                    if (shouldAddToken) {
880                        gcurrentsqlstatement.sourcetokenlist.add(ast);
881                    }
882
883                    break;
884                }
885
886                case ststoredprocedurePgEndBody: {
887                    if (ast.tokentype == ETokenType.ttsemicolon) {
888                        gst = EFindSqlStateType.stnormal;
889                        gcurrentsqlstatement.sourcetokenlist.add(ast);
890                        gcurrentsqlstatement.semicolonended = ast;
891                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
892                        continue;
893                    } else if (ast.tokencode == TBaseType.cmtdoublehyphen) {
894                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { // -- sqlflow-delimiter
895                            gst = EFindSqlStateType.stnormal;
896                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
897                            continue;
898                        }
899                    }
900
901                    gcurrentsqlstatement.sourcetokenlist.add(ast);
902
903                    if (ast.tokencode == TBaseType.rrw_redshift_language) {
904                        // check next token which is the language used by this stored procedure
905                        TSourceToken nextSt = ast.nextSolidToken();
906                        if (nextSt != null) {
907                            if (gcurrentsqlstatement instanceof TRoutine) {  // can be TCreateProcedureStmt or TCreateFunctionStmt
908                                TRoutine p = (TRoutine) gcurrentsqlstatement;
909                                p.setRoutineLanguage(nextSt.toString());
910                            }
911                        }
912                    }
913
914                    break;
915                }
916            } //switch
917        }//for
918
919        //last statement
920        if ((gcurrentsqlstatement != null) &&
921                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql)
922                        || (gst == EFindSqlStateType.ststoredprocedure)
923                        || (gst == EFindSqlStateType.ststoredprocedurePgEndBody)
924                        || (gst == EFindSqlStateType.sterror) || (parserContext.isSinglePLBlock()))) {
925            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
926        }
927
928        builder.errorCode(syntaxErrors.size());
929    }
930
931    /**
932     * Handle CREATE TABLE error recovery.
933     * <p>
934     * Migrated from TGSqlParser.handleCreateTableErrorRecovery().
935     */
936    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
937        if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable)
938                || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex))
939                && (!TBaseType.c_createTableStrictParsing)) {
940
941            int nested = 0;
942            boolean isIgnore = false, isFoundIgnoreToken = false;
943            TSourceToken firstIgnoreToken = null;
944
945            for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
946                TSourceToken st = stmt.sourcetokenlist.get(k);
947                if (isIgnore) {
948                    if (st.issolidtoken() && (st.tokencode != ';')) {
949                        isFoundIgnoreToken = true;
950                        if (firstIgnoreToken == null) {
951                            firstIgnoreToken = st;
952                        }
953                    }
954                    if (st.tokencode != ';') {
955                        st.tokencode = TBaseType.sqlpluscmd;
956                    }
957                    continue;
958                }
959                if (st.tokencode == (int) ')') {
960                    nested--;
961                    if (nested == 0) {
962                        boolean isSelect = false;
963                        TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
964                        if (st1 != null) {
965                            TSourceToken st2 = st.searchToken((int) '(', 2);
966                            if (st2 != null) {
967                                TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
968                                isSelect = (st3 != null);
969                            }
970                        }
971                        if (!isSelect) isIgnore = true;
972                    }
973                } else if (st.tokencode == (int) '(') {
974                    nested++;
975                }
976            }
977
978            if (isFoundIgnoreToken) {
979                stmt.clearError();
980                stmt.parsestatement(null, false);
981            }
982        }
983    }
984
985    @Override
986    public String toString() {
987        return "RedshiftSqlParser{vendor=" + vendor + "}";
988    }
989}