001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerPostgresql;
009import gudusoft.gsqlparser.TParserPostgresql;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
020import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
021import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
023import gudusoft.gsqlparser.stmt.TCommonBlock;
024import gudusoft.gsqlparser.stmt.TRoutine;
025import gudusoft.gsqlparser.compiler.TContext;
026import gudusoft.gsqlparser.sqlenv.TSQLEnv;
027import gudusoft.gsqlparser.compiler.TGlobalScope;
028import gudusoft.gsqlparser.compiler.TFrame;
029import gudusoft.gsqlparser.ETokenStatus;
030
031import java.io.BufferedReader;
032import java.util.ArrayList;
033import java.util.List;
034import java.util.Stack;
035
036/**
037 * PostgreSQL database SQL parser implementation.
038 *
039 * <p>This parser handles PostgreSQL-specific SQL syntax including:
040 * <ul>
041 *   <li>PL/pgSQL blocks (functions, procedures, triggers)</li>
042 *   <li>Dollar quoting ($$...$$)</li>
043 *   <li>PostgreSQL-specific DML/DDL</li>
044 *   <li>Special operators and functions</li>
045 *   <li>Special token handling (%ROWTYPE, %TYPE, etc.)</li>
046 * </ul>
047 *
048 * <p><b>Design Notes:</b>
049 * <ul>
050 *   <li>Extends {@link AbstractSqlParser}</li>
051 *   <li>Can directly instantiate: {@link TLexerPostgresql}, {@link TParserPostgresqlSql}</li>
052 *   <li>Uses single parser (no secondary parser like Oracle's PL/SQL)</li>
053 *   <li>Delimiter character: ';' for SQL statements</li>
054 * </ul>
055 *
056 * <p><b>Usage Example:</b>
057 * <pre>
058 * // Get PostgreSQL parser from factory
059 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvpostgresql);
060 *
061 * // Build context
062 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvpostgresql)
063 *     .sqlText("SELECT * FROM employees WHERE dept_id = 10")
064 *     .build();
065 *
066 * // Parse
067 * SqlParseResult result = parser.parse(context);
068 *
069 * // Access statements
070 * TStatementList statements = result.getSqlStatements();
071 * </pre>
072 *
073 * @see SqlParser
074 * @see AbstractSqlParser
075 * @see TLexerPostgresql
076 * @see TParserPostgresql
077 * @since 3.2.0.0
078 */
079public class PostgreSqlParser extends AbstractSqlParser {
080
081    // ========== Lexer and Parser Instances ==========
082    // Created once in constructor, reused for all parsing operations
083
084    /** The PostgreSQL lexer used for tokenization (public for TGSqlParser.getFlexer()) */
085    public TLexerPostgresql flexer;
086    private TParserPostgresql fparser;
087
088    // ========== State Variables ==========
089    // NOTE: The following fields moved to AbstractSqlParser (inherited):
090    //   - sourcetokenlist (TSourceTokenList)
091    //   - sqlstatements (TStatementList)
092    //   - parserContext (ParserContext)
093    //   - sqlcmds (ISqlCmds) - to be added when PostgreSQL raw extraction is refactored
094    //   - globalContext (TContext)
095    //   - sqlEnv (TSQLEnv)
096    //   - frameStack (Stack<TFrame>)
097    //   - globalFrame (TFrame)
098
099    // ========== State Variables for Tokenization ==========
100    private boolean insqlpluscmd;
101    private boolean isvalidplace;
102    private boolean waitingreturnforsemicolon;
103    private boolean waitingreturnforfloatdiv;
104    private boolean continuesqlplusatnewline;
105
106    // ========== Constructor ==========
107
108    /**
109     * Construct PostgreSQL SQL parser.
110     * <p>
111     * Configures the parser for PostgreSQL database with default delimiter: semicolon (;)
112     * <p>
113     * Following the original TGSqlParser pattern, the lexer and parser are
114     * created once in the constructor and reused for all parsing operations.
115     */
116    public PostgreSqlParser() {
117        super(EDbVendor.dbvpostgresql);
118
119        // Set delimiter character
120        this.delimiterChar = ';';
121        this.defaultDelimiterStr = ";";
122
123        // Create lexer once - will be reused for all parsing operations
124        this.flexer = new TLexerPostgresql();
125        this.flexer.delimiterchar = this.delimiterChar;
126        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
127
128        // CRITICAL: Set lexer for inherited getanewsourcetoken() method
129        this.lexer = this.flexer;
130
131        // Create parser once - will be reused for all parsing operations
132        this.fparser = new TParserPostgresql(null);
133        this.fparser.lexer = this.flexer;
134
135        // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor
136    }
137
138    // ========== AbstractSqlParser Abstract Methods Implementation ==========
139
140    /**
141     * Return the PostgreSQL lexer instance.
142     * <p>
143     * The lexer is created once in the constructor and reused for all
144     * parsing operations. This method simply returns the existing instance,
145     * matching the original TGSqlParser pattern where the lexer is created
146     * once and reset before each use.
147     *
148     * @param context parser context (not used, lexer already created)
149     * @return the PostgreSQL lexer instance created in constructor
150     */
151    @Override
152    protected TCustomLexer getLexer(ParserContext context) {
153        // Return existing lexer instance (created in constructor)
154        return this.flexer;
155    }
156
157    /**
158     * Return the PostgreSQL SQL parser instance with updated token list.
159     * <p>
160     * The parser is created once in the constructor and reused for all
161     * parsing operations. This method updates the token list and returns
162     * the existing instance, matching the original TGSqlParser pattern.
163     *
164     * @param context parser context (not used, parser already created)
165     * @param tokens source token list to parse
166     * @return the PostgreSQL SQL parser instance created in constructor
167     */
168    @Override
169    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
170        // Update token list for reused parser instance
171        this.fparser.sourcetokenlist = tokens;
172        return this.fparser;
173    }
174
175    /**
176     * Call PostgreSQL-specific tokenization logic.
177     * <p>
178     * Delegates to dopostgresqltexttotokenlist which handles PostgreSQL's
179     * specific keyword recognition, dollar quotes, and token generation.
180     */
181    @Override
182    protected void tokenizeVendorSql() {
183        dopostgresqltexttotokenlist();
184    }
185
186    /**
187     * Setup PostgreSQL parser for raw statement extraction.
188     * <p>
189     * PostgreSQL uses a single parser, so we inject sqlcmds and update
190     * the token list for the main parser only.
191     */
192    @Override
193    protected void setupVendorParsersForExtraction() {
194        this.fparser.sqlcmds = this.sqlcmds;
195        this.fparser.sourcetokenlist = this.sourcetokenlist;
196    }
197
198    /**
199     * Call PostgreSQL-specific raw statement extraction logic.
200     * <p>
201     * Delegates to dopostgresqlgetrawsqlstatements which handles PostgreSQL's
202     * statement delimiters (semicolon for SQL, $$ for PL/pgSQL functions).
203     */
204    @Override
205    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
206        dopostgresqlgetrawsqlstatements(builder);
207    }
208
209    /**
210     * Perform full parsing of statements with syntax checking.
211     * <p>
212     * This method orchestrates the parsing of all statements.
213     *
214     * <p><b>Important:</b> This method does NOT extract raw statements - they are
215     * passed in as a parameter already extracted by {@link #extractRawStatements}.
216     *
217     * @param context parser context
218     * @param parser main SQL parser (TParserPostgresql)
219     * @param secondaryParser not used for PostgreSQL
220     * @param tokens source token list
221     * @param rawStatements raw statements already extracted (never null)
222     * @return list of fully parsed statements with AST built
223     */
224    @Override
225    protected TStatementList performParsing(ParserContext context,
226                                           TCustomParser parser,
227                                           TCustomParser secondaryParser,
228                                           TSourceTokenList tokens,
229                                           TStatementList rawStatements) {
230        // Store references (fparser is already set, don't reassign final variable)
231        this.sourcetokenlist = tokens;
232        this.parserContext = context;
233
234        // Use the raw statements passed from AbstractSqlParser.parse()
235        // (already extracted - DO NOT re-extract to avoid duplication)
236        this.sqlstatements = rawStatements;
237
238        // Initialize global context for statement parsing
239        initializeGlobalContext();
240
241        // Parse each statement
242        for (int i = 0; i < sqlstatements.size(); i++) {
243            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
244
245            // Set frame stack for the statement (needed for parsing)
246            stmt.setFrameStack(frameStack);
247
248            // Parse the statement
249            int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
250
251            // Collect syntax errors
252            if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
253                copyErrorsFromStatement(stmt);
254            }
255        }
256
257        // Clean up frame stack
258        if (globalFrame != null) {
259            globalFrame.popMeFromStack(frameStack);
260        }
261
262        return this.sqlstatements;
263    }
264
265    // ========== PostgreSQL-Specific Tokenization ==========
266
267    /**
268     * Perform PostgreSQL-specific tokenization.
269     * <p>
270     * Extracted from TGSqlParser.dopostgresqltexttotokenlist() (lines 3093-3287)
271     */
272    private void dopostgresqltexttotokenlist() {
273        // Initialize state machine
274        insqlpluscmd = false;
275        isvalidplace = true;
276        waitingreturnforfloatdiv = false;
277        waitingreturnforsemicolon = false;
278        continuesqlplusatnewline = false;
279
280        TSourceToken lct = null, prevst = null;
281        TSourceToken asourcetoken, lcprevst;
282        int yychar;
283
284        asourcetoken = getanewsourcetoken();
285        if (asourcetoken == null) return;
286        yychar = asourcetoken.tokencode;
287
288        while (yychar > 0) {
289            sourcetokenlist.add(asourcetoken);
290
291            switch (yychar) {
292                case TBaseType.cmtdoublehyphen:
293                case TBaseType.cmtslashstar:
294                case TBaseType.lexspace: {
295                    if (insqlpluscmd) {
296                        asourcetoken.insqlpluscmd = true;
297                    }
298                    break;
299                }
300
301                case TBaseType.lexnewline: {
302                    if (insqlpluscmd) {
303                        insqlpluscmd = false;
304                        isvalidplace = true;
305
306                        if (continuesqlplusatnewline) {
307                            insqlpluscmd = true;
308                            isvalidplace = false;
309                            asourcetoken.insqlpluscmd = true;
310                        }
311                    }
312
313                    if (waitingreturnforsemicolon) {
314                        isvalidplace = true;
315                    }
316                    if (waitingreturnforfloatdiv) {
317                        isvalidplace = true;
318                        lct.tokencode = TBaseType.sqlpluscmd;
319                        if (lct.tokentype != ETokenType.ttslash) {
320                            lct.tokentype = ETokenType.ttsqlpluscmd;
321                        }
322                    }
323                    flexer.insqlpluscmd = insqlpluscmd;
324                    break;
325                }
326
327                default: {
328                    // Solid token
329                    continuesqlplusatnewline = false;
330                    waitingreturnforsemicolon = false;
331                    waitingreturnforfloatdiv = false;
332
333                    if (insqlpluscmd) {
334                        asourcetoken.insqlpluscmd = true;
335                        if (asourcetoken.toString().equalsIgnoreCase("-")) {
336                            continuesqlplusatnewline = true;
337                        }
338                    } else {
339                        if (asourcetoken.tokentype == ETokenType.ttsemicolon) {
340                            waitingreturnforsemicolon = true;
341                        }
342                        if ((asourcetoken.tokentype == ETokenType.ttslash)
343                                && (isvalidplace || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) {
344                            lct = asourcetoken;
345                            waitingreturnforfloatdiv = true;
346                        }
347                        if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) {
348                            asourcetoken.tokencode = TBaseType.sqlpluscmd;
349                            if (asourcetoken.tokentype != ETokenType.ttslash) {
350                                asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
351                            }
352                            insqlpluscmd = true;
353                            flexer.insqlpluscmd = insqlpluscmd;
354                        }
355                    }
356                    isvalidplace = false;
357
358                    // PostgreSQL-specific keyword handling
359                    if (prevst != null) {
360                        if (prevst.tokencode == TBaseType.rrw_inner) {
361                            if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) {
362                                prevst.tokencode = TBaseType.ident;
363                            }
364                        }
365
366                        if ((prevst.tokencode == TBaseType.rrw_not)
367                                && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) {
368                            prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE");
369                        }
370                    }
371
372                    if (asourcetoken.tokencode == TBaseType.rrw_inner) {
373                        prevst = asourcetoken;
374                    } else if (asourcetoken.tokencode == TBaseType.rrw_not) {
375                        prevst = asourcetoken;
376                    } else {
377                        prevst = null;
378                    }
379
380                    // Additional PostgreSQL transformations
381                    if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD"))
382                            || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) {
383                        lcprevst = getprevsolidtoken(asourcetoken);
384                        if (lcprevst != null) {
385                            if (lcprevst.tokencode == TBaseType.rrw_for)
386                                lcprevst.tokencode = TBaseType.rw_for1;
387                        }
388                    }
389
390                    if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) {
391                        TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2);
392                        if (stKeep != null) {
393                            stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank;
394                        }
395                    }
396
397                    if ((asourcetoken.tokencode == TBaseType.rrw_postgresql_rowtype)
398                            || (asourcetoken.tokencode == TBaseType.rrw_postgresql_type)) {
399                        TSourceToken stPercent = asourcetoken.searchToken('%', -1);
400                        if (stPercent != null) {
401                            stPercent.tokencode = TBaseType.rowtype_operator;
402                        }
403                    }
404
405                    if (asourcetoken.tokencode == TBaseType.JSON_EXIST) {
406                        TSourceToken stPercent = asourcetoken.searchToken('=', -1);
407                        if (stPercent != null) {
408                            asourcetoken.tokencode = TBaseType.ident;
409                        }
410                    }
411
412                    if (asourcetoken.tokencode == TBaseType.rrw_update) {
413                        TSourceToken stDo = asourcetoken.searchToken(TBaseType.rrw_do, -1);
414                        if (stDo != null) {
415                            asourcetoken.tokencode = TBaseType.rrw_postgresql_do_update;
416                        }
417                    }
418
419                    break;
420                }
421            }
422
423            // Get next token
424            asourcetoken = getanewsourcetoken();
425            if (asourcetoken != null) {
426                yychar = asourcetoken.tokencode;
427            } else {
428                yychar = 0;
429
430                if (waitingreturnforfloatdiv) {
431                    lct.tokencode = TBaseType.sqlpluscmd;
432                    if (lct.tokentype != ETokenType.ttslash) {
433                        lct.tokentype = ETokenType.ttsqlpluscmd;
434                    }
435                }
436            }
437
438            if ((yychar == 0) && (prevst != null)) {
439                if (prevst.tokencode == TBaseType.rrw_inner) {
440                    prevst.tokencode = TBaseType.ident;
441                }
442            }
443        }
444    }
445
446    /**
447     * Get next source token from the lexer.
448     * <p>
449     * This method wraps the lexer's yylexwrap() call.
450     *
451     * @return next source token, or null if end of input
452     */
453
454    /**
455     * Check if token represents a valid SQL*Plus-like command in PostgreSQL.
456     *
457     * @param tokenText token text to check
458     * @return true if valid SQL*Plus command
459     */
460    private boolean isvalidsqlpluscmdInPostgresql(String tokenText) {
461        // PostgreSQL supports psql meta-commands like \d, \dt, etc.
462        // For now, keep compatible with original implementation
463        return false;
464    }
465
466    /**
467     * Determine if forward slash should be treated as SQL*Plus command delimiter.
468     *
469     * @param pstlist token list
470     * @param pPos position of '/' token
471     * @return true if '/' should be SQL*Plus command
472     */
473    private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) {
474        boolean ret = false;
475
476        if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret;
477
478        TSourceToken lcst = pstlist.get(pPos - 1);
479        if (lcst.tokentype != ETokenType.ttreturn) {
480            return ret;
481        }
482
483        if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) {
484            ret = true;
485        }
486
487        return ret;
488    }
489
490    /**
491     * Get previous non-whitespace token.
492     *
493     * @param ptoken current token
494     * @return previous solid token, or null
495     */
496    private TSourceToken getprevsolidtoken(TSourceToken ptoken) {
497        TSourceToken ret = null;
498        TSourceTokenList lctokenlist = ptoken.container;
499
500        if (lctokenlist != null) {
501            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
502                if (!(
503                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace)
504                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn)
505                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment)
506                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment)
507                )) {
508                    ret = lctokenlist.get(ptoken.posinlist - 1);
509                } else {
510                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
511                }
512            }
513        }
514        return ret;
515    }
516
517    // ========== PostgreSQL-Specific Raw Statement Extraction ==========
518
519    /**
520     * Extract raw PostgreSQL SQL statements from tokenized source.
521     * <p>
522     * Extracted from TGSqlParser.dopostgresqlgetrawsqlstatements() (lines 8051-8492)
523     *
524     * @param builder the result builder to populate with raw statements
525     */
526    private void dopostgresqlgetrawsqlstatements(SqlParseResult.Builder builder) {
527        int waitingEnd = 0;
528        boolean foundEnd = false, enterDeclare = false;
529        boolean isSinglePLBlock = false;
530
531        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
532        if (!TBaseType.assigned(sourcetokenlist)) {
533            // No tokens available - populate builder with empty results and return
534            builder.sqlStatements(this.sqlstatements);
535            builder.errorCode(1);
536            builder.errorMessage("No source token list available");
537            return;
538        }
539
540        TCustomSqlStatement gcurrentsqlstatement = null;
541        EFindSqlStateType gst = EFindSqlStateType.stnormal;
542        TSourceToken lcprevsolidtoken = null, ast = null;
543
544        if (isSinglePLBlock) {
545            gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql);
546        }
547
548        for (int i = 0; i < sourcetokenlist.size(); i++) {
549
550            if ((ast != null) && (ast.issolidtoken()))
551                lcprevsolidtoken = ast;
552
553            ast = sourcetokenlist.get(i);
554            sourcetokenlist.curpos = i;
555
556            if (isSinglePLBlock) {
557                gcurrentsqlstatement.sourcetokenlist.add(ast);
558                continue;
559            }
560
561            // Token transformations during raw statement extraction
562            performRawStatementTokenTransformations(ast);
563
564            switch (gst) {
565                case sterror: {
566                    if (ast.tokentype == ETokenType.ttsemicolon) {
567                        appendToken(gcurrentsqlstatement, ast);
568                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
569                        gst = EFindSqlStateType.stnormal;
570                    } else {
571                        appendToken(gcurrentsqlstatement, ast);
572                    }
573                    break;
574                }
575
576                case stnormal: {
577                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
578                            || (ast.tokencode == TBaseType.cmtslashstar)
579                            || (ast.tokencode == TBaseType.lexspace)
580                            || (ast.tokencode == TBaseType.lexnewline)
581                            || (ast.tokentype == ETokenType.ttsemicolon)) {
582                        if (gcurrentsqlstatement != null) {
583                            appendToken(gcurrentsqlstatement, ast);
584                        }
585
586                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
587                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
588                                ast.tokentype = ETokenType.ttsimplecomment;
589                                ast.tokencode = TBaseType.cmtdoublehyphen;
590                            }
591                        }
592
593                        continue;
594                    }
595
596                    if (ast.tokencode == TBaseType.sqlpluscmd) {
597                        gst = EFindSqlStateType.stsqlplus;
598                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
599                        appendToken(gcurrentsqlstatement, ast);
600                        continue;
601                    }
602
603                    // Handle psql meta-commands (\command) from pg_dump output
604                    if (ast.tokencode == TBaseType.error
605                            && "\\".equals(ast.getAstext())
606                            && (lcprevsolidtoken == null
607                                || lcprevsolidtoken.tokentype == ETokenType.ttsemicolon)) {
608                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
609                        appendToken(gcurrentsqlstatement, ast);
610                        // Consume all tokens until newline
611                        while (i + 1 < sourcetokenlist.size()) {
612                            TSourceToken nextToken = sourcetokenlist.get(i + 1);
613                            if (nextToken.tokentype == ETokenType.ttreturn) {
614                                break;
615                            }
616                            i++;
617                            ast = nextToken;
618                            appendToken(gcurrentsqlstatement, ast);
619                        }
620                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
621                        gst = EFindSqlStateType.stnormal;
622                        continue;
623                    }
624
625                    // Find a token to start sql or plsql mode
626                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
627
628                    if (gcurrentsqlstatement != null) {
629                        enterDeclare = false;
630                        if (gcurrentsqlstatement.ispgplsql()) {
631                            gst = EFindSqlStateType.ststoredprocedure;
632                            appendToken(gcurrentsqlstatement, ast);
633                            foundEnd = false;
634                            if ((ast.tokencode == TBaseType.rrw_begin)
635                                    || (ast.tokencode == TBaseType.rrw_package)
636                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
637                                waitingEnd = 1;
638                            } else if (ast.tokencode == TBaseType.rrw_declare) {
639                                enterDeclare = true;
640                            }
641                        } else {
642                            gst = EFindSqlStateType.stsql;
643                            appendToken(gcurrentsqlstatement, ast);
644                        }
645                    } else {
646                        // Error token found
647                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo,
648                                (ast.columnNo < 0 ? 0 : ast.columnNo),
649                                "Error when tokenize", EErrorType.spwarning,
650                                TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
651
652                        ast.tokentype = ETokenType.tttokenlizererrortoken;
653                        gst = EFindSqlStateType.sterror;
654
655                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
656                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
657                        appendToken(gcurrentsqlstatement, ast);
658                    }
659
660                    break;
661                }
662
663                case stsqlplus: {
664                    if (ast.insqlpluscmd) {
665                        appendToken(gcurrentsqlstatement, ast);
666                    } else {
667                        gst = EFindSqlStateType.stnormal;
668                        appendToken(gcurrentsqlstatement, ast);
669                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
670                    }
671
672                    break;
673                }
674
675                case stsql: {
676                    if (ast.tokentype == ETokenType.ttsemicolon) {
677                        gst = EFindSqlStateType.stnormal;
678                        appendToken(gcurrentsqlstatement, ast);
679                        gcurrentsqlstatement.semicolonended = ast;
680                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
681                        continue;
682                    }
683
684                    if (sourcetokenlist.sqlplusaftercurtoken()) {
685                        gst = EFindSqlStateType.stnormal;
686                        appendToken(gcurrentsqlstatement, ast);
687                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
688                        continue;
689                    }
690
691                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
692                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
693                            gst = EFindSqlStateType.stnormal;
694                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
695                            continue;
696                        }
697                    }
698
699                    appendToken(gcurrentsqlstatement, ast);
700                    break;
701                }
702
703                case ststoredprocedure: {
704                    if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) {
705                        appendToken(gcurrentsqlstatement, ast);
706                        gst = EFindSqlStateType.ststoredprocedurePgStartBody;
707                        continue;
708                    }
709
710                    if (ast.tokencode == TBaseType.rrw_postgresql_language) {
711                        TSourceToken nextSt = ast.nextSolidToken();
712                        if (nextSt != null) {
713                            if (gcurrentsqlstatement instanceof TRoutine) {
714                                TRoutine p = (TRoutine) gcurrentsqlstatement;
715                                p.setRoutineLanguage(nextSt.toString());
716                            }
717                        }
718                    }
719
720                    if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) {
721                        gst = EFindSqlStateType.stnormal;
722                        appendToken(gcurrentsqlstatement, ast);
723                        gcurrentsqlstatement.semicolonended = ast;
724                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
725                        continue;
726                    }
727
728                    if (ast.tokencode == TBaseType.rrw_begin) {
729                        waitingEnd++;
730                        enterDeclare = false;
731                    } else if (ast.tokencode == TBaseType.rrw_declare) {
732                        enterDeclare = true;
733                    } else if (ast.tokencode == TBaseType.rrw_if) {
734                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
735                            waitingEnd++;
736                        }
737                    } else if (ast.tokencode == TBaseType.rrw_case) {
738                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
739                            waitingEnd++;
740                        }
741                    } else if (ast.tokencode == TBaseType.rrw_loop) {
742                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
743                            waitingEnd++;
744                        }
745                    } else if (ast.tokencode == TBaseType.rrw_end) {
746                        foundEnd = true;
747                        waitingEnd--;
748                        if (waitingEnd < 0) {
749                            waitingEnd = 0;
750                        }
751                    }
752
753                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
754                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
755                        gst = EFindSqlStateType.stnormal;
756                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
757
758                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
759                        appendToken(gcurrentsqlstatement, ast);
760                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
761                    } else if ((ast.tokentype == ETokenType.ttperiod)
762                            && (sourcetokenlist.returnaftercurtoken(false))
763                            && (sourcetokenlist.returnbeforecurtoken(false))) {
764                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
765                        gst = EFindSqlStateType.stnormal;
766                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
767
768                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
769                        appendToken(gcurrentsqlstatement, ast);
770                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
771                    } else {
772                        appendToken(gcurrentsqlstatement, ast);
773                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (foundEnd)) {
774                            gst = EFindSqlStateType.stnormal;
775                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
776                        }
777                    }
778
779                    if (ast.tokencode == TBaseType.sqlpluscmd) {
780                        int m = flexer.getkeywordvalue(ast.getAstext());
781                        if (m != 0) {
782                            ast.tokencode = m;
783                        } else {
784                            ast.tokencode = TBaseType.ident;
785                        }
786                    }
787
788                    if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) {
789                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
790                            gst = EFindSqlStateType.stnormal;
791                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
792                        }
793                    }
794
795                    break;
796                }
797
798                case ststoredprocedurePgStartBody: {
799                    appendToken(gcurrentsqlstatement, ast);
800
801                    if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) {
802                        if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstDoExecuteBlock) {
803                            // Check if DO block has trailing LANGUAGE clause
804                            TSourceToken nextSolid = ast.nextSolidToken();
805                            if (nextSolid != null && nextSolid.tokencode == TBaseType.rrw_postgresql_language) {
806                                gst = EFindSqlStateType.ststoredprocedurePgEndBody;
807                            } else {
808                                gst = EFindSqlStateType.stnormal;
809                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
810                            }
811                            continue;
812                        } else {
813                            gst = EFindSqlStateType.ststoredprocedurePgEndBody;
814                            continue;
815                        }
816                    }
817
818                    break;
819                }
820
821                case ststoredprocedurePgEndBody: {
822                    if (ast.tokentype == ETokenType.ttsemicolon) {
823                        gst = EFindSqlStateType.stnormal;
824                        appendToken(gcurrentsqlstatement, ast);
825                        gcurrentsqlstatement.semicolonended = ast;
826                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
827                        continue;
828                    } else if (ast.tokencode == TBaseType.cmtdoublehyphen) {
829                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
830                            gst = EFindSqlStateType.stnormal;
831                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
832                            continue;
833                        }
834                    }
835
836                    appendToken(gcurrentsqlstatement, ast);
837
838                    if (ast.tokencode == TBaseType.rrw_postgresql_language) {
839                        TSourceToken nextSt = ast.nextSolidToken();
840                        if (nextSt != null) {
841                            if (gcurrentsqlstatement instanceof TRoutine) {
842                                TRoutine p = (TRoutine) gcurrentsqlstatement;
843                                p.setRoutineLanguage(nextSt.toString());
844                            }
845                        }
846                    }
847
848                    break;
849                }
850            }
851        }
852
853        // Last statement
854        if ((gcurrentsqlstatement != null) &&
855                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql)
856                        || (gst == EFindSqlStateType.ststoredprocedure)
857                        || (gst == EFindSqlStateType.ststoredprocedurePgEndBody)
858                        || (gst == EFindSqlStateType.sterror) || (isSinglePLBlock))) {
859            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder);
860        }
861
862        // Populate builder with results
863        builder.sqlStatements(this.sqlstatements);
864        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
865                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
866        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
867    }
868
869    /**
870     * Handle token transformations during raw statement extraction.
871     *
872     * @param ast current token being processed
873     */
874    private void performRawStatementTokenTransformations(TSourceToken ast) {
875        if (ast.tokencode == TBaseType.JSON_EXIST) {
876            TSourceToken stConstant = ast.searchToken(TBaseType.sconst, 1);
877            if (stConstant == null) {
878                ast.tokencode = TBaseType.ident;
879            }
880        } else if (ast.tokencode == TBaseType.rrw_postgresql_POSITION) {
881            TSourceToken st1 = ast.nextSolidToken();
882            if (st1 != null) {
883                if (st1.tokencode == '(') {
884                    ast.tokencode = TBaseType.rrw_postgresql_POSITION_FUNCTION;
885                }
886            }
887        } else if (ast.tokencode == TBaseType.rrw_postgresql_ordinality) {
888            TSourceToken lcprevst = getprevsolidtoken(ast);
889
890            if (lcprevst != null) {
891                if (lcprevst.tokencode == TBaseType.rrw_with) {
892                    TSourceToken lcbeforewith = getprevsolidtoken(lcprevst);
893                    if (lcbeforewith != null && lcbeforewith.tokencode == ')') {
894                        // WITH ORDINALITY after function call - table modifier
895                        lcprevst.tokencode = TBaseType.rrw_postgresql_with_lookahead;
896                    }
897                    // Otherwise keep as RW_WITH (CTE: WITH ordinality AS ...)
898                }
899            }
900        } else if (ast.tokencode == TBaseType.rrw_postgresql_filter) {
901            TSourceToken st1 = ast.nextSolidToken();
902            if (st1 != null) {
903                if (st1.tokencode != '(') {
904                    ast.tokencode = TBaseType.ident;
905                }
906            }
907        } else if (ast.tokencode == TBaseType.rrw_postgresql_jsonb) {
908            TSourceToken st1 = ast.nextSolidToken();
909            if (st1 != null) {
910                if (st1.tokencode == '?') {
911                    st1.tokencode = TBaseType.OP_JSONB_QUESTION;
912                }
913            }
914        } else if (ast.tokencode == '?') {
915            TSourceToken st1 = ast.nextSolidToken();
916            if (st1 != null) {
917                if (st1.tokencode == TBaseType.sconst) {
918                    ast.tokencode = TBaseType.OP_JSONB_QUESTION;
919                }
920            }
921        } else if (ast.tokencode == TBaseType.rrw_values) {
922            TSourceToken stParen = ast.searchToken('(', 1);
923            if (stParen != null) {
924                TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist);
925                if (stInsert != null) {
926                    TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist);
927                    if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) {
928                        // Don't treat values(1) as insert values
929                    } else {
930                        TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist);
931                        if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) {
932                            // Don't treat values after from keyword as an insert values
933                        } else {
934                            ast.tokencode = TBaseType.rrw_postgresql_insert_values;
935                        }
936                    }
937                }
938            }
939        }
940    }
941
942    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
943        if (statement == null || token == null) {
944            return;
945        }
946        token.stmt = statement;
947        statement.sourcetokenlist.add(token);
948    }
949
950    // Note: initializeGlobalContext() inherited from AbstractSqlParser
951
952    /**
953     * Override onRawStatementComplete to add PostgreSQL-specific processing.
954     *
955     * <p>This method handles special processing for stored procedures/functions
956     * whose body is written in non-SQL languages (e.g., PL/Python, PL/Perl, PL/R).
957     *
958     * <p>For such routines, the tokens between dollar-quote delimiters ($$, $function$, etc.)
959     * are marked as non-SQL content to prevent parsing errors.
960     *
961     * @param context parser context
962     * @param statement the completed statement
963     * @param mainParser main SQL parser
964     * @param secondaryParser secondary parser (not used for PostgreSQL)
965     * @param statementList list to add the statement to
966     * @param isLastStatement whether this is the last statement
967     * @param builder result builder for populating parse results
968     */
969    @Override
970    protected void onRawStatementComplete(ParserContext context,
971                                         TCustomSqlStatement statement,
972                                         TCustomParser mainParser,
973                                         TCustomParser secondaryParser,
974                                         TStatementList statementList,
975                                         boolean isLastStatement,
976                                         SqlParseResult.Builder builder) {
977        // Call parent implementation for standard processing
978        super.onRawStatementComplete(context, statement, mainParser, secondaryParser, statementList, isLastStatement, builder);
979
980        // PostgreSQL-specific: Handle stored procedures with non-SQL bodies
981        // (e.g., PL/Python, PL/Perl, PL/R, PL/Java, PL/Tcl)
982        if (statement instanceof TRoutine) {
983            TRoutine routine = (TRoutine) statement;
984
985            // Check if the routine body is NOT written in SQL/PLPGSQL
986            if (!routine.isBodyInSQL()) {
987                processNonSqlRoutineBody(routine);
988            }
989        }
990    }
991
992    /**
993     * Process a routine whose body is written in a non-SQL language.
994     *
995     * <p>This method:
996     * <ul>
997     *   <li>Identifies the dollar-quote delimiters marking the routine body</li>
998     *   <li>Marks all tokens between delimiters as non-SQL (sqlpluscmd type)</li>
999     *   <li>Extracts and stores the complete routine body text</li>
1000     * </ul>
1001     *
1002     * <p>This prevents the parser from trying to parse Python, Perl, or other
1003     * language syntax as SQL, which would cause syntax errors.
1004     *
1005     * @param routine the routine statement to process
1006     */
1007    private void processNonSqlRoutineBody(TRoutine routine) {
1008        if (routine.sourcetokenlist == null || routine.sourcetokenlist.size() == 0) {
1009            return;
1010        }
1011
1012        TSourceToken st;
1013        boolean inBody = false;
1014        StringBuilder routineBodyBuilder = new StringBuilder();
1015
1016        // Scan through all tokens to find and mark the routine body
1017        for (int i = 0; i < routine.sourcetokenlist.size(); i++) {
1018            st = routine.sourcetokenlist.get(i);
1019
1020            // Check if this is a dollar-quote delimiter
1021            if (isDollarFunctionDelimiter(st.tokencode, this.vendor)) {
1022                if (!inBody) {
1023                    // Start of body - record opening delimiter
1024                    inBody = true;
1025                    routineBodyBuilder.append(st.toString());
1026                } else {
1027                    // End of body - record closing delimiter
1028                    inBody = false;
1029                    routineBodyBuilder.append(st.toString());
1030                    break;
1031                }
1032                continue;
1033            }
1034
1035            // If we're inside the body, mark token as non-SQL and collect its text
1036            if (inBody) {
1037                st.tokencode = TBaseType.sqlpluscmd;
1038                routineBodyBuilder.append(st.toString());
1039            }
1040        }
1041
1042        // Store the complete routine body text
1043        routine.setRoutineBody(routineBodyBuilder.toString());
1044    }
1045
1046    // Note: isDollarFunctionDelimiter() is now inherited from AbstractSqlParser
1047    // The parent implementation handles all PostgreSQL-family databases
1048
1049    @Override
1050    public String toString() {
1051        return "PostgreSqlParser{vendor=" + vendor + "}";
1052    }
1053}