Source code

001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerPostgresql;
009import gudusoft.gsqlparser.TParserPostgresql;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
020import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
021import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
023import gudusoft.gsqlparser.stmt.TCommonBlock;
024import gudusoft.gsqlparser.stmt.TRoutine;
025import gudusoft.gsqlparser.compiler.TContext;
026import gudusoft.gsqlparser.sqlenv.TSQLEnv;
027import gudusoft.gsqlparser.compiler.TGlobalScope;
028import gudusoft.gsqlparser.compiler.TFrame;
029import gudusoft.gsqlparser.ETokenStatus;
030
031import java.io.BufferedReader;
032import java.util.ArrayList;
033import java.util.List;
034import java.util.Stack;
035
036/**
037 * PostgreSQL database SQL parser implementation.
038 *
039 * <p>This parser handles PostgreSQL-specific SQL syntax including:
040 * <ul>
041 *   <li>PL/pgSQL blocks (functions, procedures, triggers)</li>
042 *   <li>Dollar quoting ($$...$$)</li>
043 *   <li>PostgreSQL-specific DML/DDL</li>
044 *   <li>Special operators and functions</li>
045 *   <li>Special token handling (%ROWTYPE, %TYPE, etc.)</li>
046 * </ul>
047 *
048 * <p><b>Design Notes:</b>
049 * <ul>
050 *   <li>Extends {@link AbstractSqlParser}</li>
051 *   <li>Can directly instantiate: {@link TLexerPostgresql}, {@link TParserPostgresqlSql}</li>
052 *   <li>Uses single parser (no secondary parser like Oracle's PL/SQL)</li>
053 *   <li>Delimiter character: ';' for SQL statements</li>
054 * </ul>
055 *
056 * <p><b>Usage Example:</b>
057 * <pre>
058 * // Get PostgreSQL parser from factory
059 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvpostgresql);
060 *
061 * // Build context
062 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvpostgresql)
063 *     .sqlText("SELECT * FROM employees WHERE dept_id = 10")
064 *     .build();
065 *
066 * // Parse
067 * SqlParseResult result = parser.parse(context);
068 *
069 * // Access statements
070 * TStatementList statements = result.getSqlStatements();
071 * </pre>
072 *
073 * @see SqlParser
074 * @see AbstractSqlParser
075 * @see TLexerPostgresql
076 * @see TParserPostgresql
077 * @since 3.2.0.0
078 */
079public class PostgreSqlParser extends AbstractSqlParser {
080
081    // ========== Lexer and Parser Instances ==========
082    // Created once in constructor, reused for all parsing operations
083
084    /** The PostgreSQL lexer used for tokenization (public for TGSqlParser.getFlexer()) */
085    public TLexerPostgresql flexer;
086    private TParserPostgresql fparser;
087
088    // ========== State Variables ==========
089    // NOTE: The following fields moved to AbstractSqlParser (inherited):
090    //   - sourcetokenlist (TSourceTokenList)
091    //   - sqlstatements (TStatementList)
092    //   - parserContext (ParserContext)
093    //   - sqlcmds (ISqlCmds) - to be added when PostgreSQL raw extraction is refactored
094    //   - globalContext (TContext)
095    //   - sqlEnv (TSQLEnv)
096    //   - frameStack (Stack<TFrame>)
097    //   - globalFrame (TFrame)
098
099    // ========== State Variables for Tokenization ==========
100    private boolean insqlpluscmd;
101    private boolean isvalidplace;
102    private boolean waitingreturnforsemicolon;
103    private boolean waitingreturnforfloatdiv;
104    private boolean continuesqlplusatnewline;
105
106    // ========== Constructor ==========
107
108    /**
109     * Construct PostgreSQL SQL parser.
110     * <p>
111     * Configures the parser for PostgreSQL database with default delimiter: semicolon (;)
112     * <p>
113     * Following the original TGSqlParser pattern, the lexer and parser are
114     * created once in the constructor and reused for all parsing operations.
115     */
116    public PostgreSqlParser() {
117        super(EDbVendor.dbvpostgresql);
118
119        // Set delimiter character
120        this.delimiterChar = ';';
121        this.defaultDelimiterStr = ";";
122
123        // Create lexer once - will be reused for all parsing operations
124        this.flexer = new TLexerPostgresql();
125        this.flexer.delimiterchar = this.delimiterChar;
126        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
127
128        // CRITICAL: Set lexer for inherited getanewsourcetoken() method
129        this.lexer = this.flexer;
130
131        // Create parser once - will be reused for all parsing operations
132        this.fparser = new TParserPostgresql(null);
133        this.fparser.lexer = this.flexer;
134
135        // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor
136    }
137
138    // ========== AbstractSqlParser Abstract Methods Implementation ==========
139
140    /**
141     * Return the PostgreSQL lexer instance.
142     * <p>
143     * The lexer is created once in the constructor and reused for all
144     * parsing operations. This method simply returns the existing instance,
145     * matching the original TGSqlParser pattern where the lexer is created
146     * once and reset before each use.
147     *
148     * @param context parser context (not used, lexer already created)
149     * @return the PostgreSQL lexer instance created in constructor
150     */
151    @Override
152    protected TCustomLexer getLexer(ParserContext context) {
153        // Return existing lexer instance (created in constructor)
154        return this.flexer;
155    }
156
157    /**
158     * Return the PostgreSQL SQL parser instance with updated token list.
159     * <p>
160     * The parser is created once in the constructor and reused for all
161     * parsing operations. This method updates the token list and returns
162     * the existing instance, matching the original TGSqlParser pattern.
163     *
164     * @param context parser context (not used, parser already created)
165     * @param tokens source token list to parse
166     * @return the PostgreSQL SQL parser instance created in constructor
167     */
168    @Override
169    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
170        // Update token list for reused parser instance
171        this.fparser.sourcetokenlist = tokens;
172        return this.fparser;
173    }
174
175    /**
176     * Call PostgreSQL-specific tokenization logic.
177     * <p>
178     * Delegates to dopostgresqltexttotokenlist which handles PostgreSQL's
179     * specific keyword recognition, dollar quotes, and token generation.
180     */
181    @Override
182    protected void tokenizeVendorSql() {
183        dopostgresqltexttotokenlist();
184    }
185
186    /**
187     * Setup PostgreSQL parser for raw statement extraction.
188     * <p>
189     * PostgreSQL uses a single parser, so we inject sqlcmds and update
190     * the token list for the main parser only.
191     */
192    @Override
193    protected void setupVendorParsersForExtraction() {
194        this.fparser.sqlcmds = this.sqlcmds;
195        this.fparser.sourcetokenlist = this.sourcetokenlist;
196    }
197
198    /**
199     * Call PostgreSQL-specific raw statement extraction logic.
200     * <p>
201     * Delegates to dopostgresqlgetrawsqlstatements which handles PostgreSQL's
202     * statement delimiters (semicolon for SQL, $$ for PL/pgSQL functions).
203     */
204    @Override
205    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
206        dopostgresqlgetrawsqlstatements(builder);
207    }
208
209    /**
210     * Perform full parsing of statements with syntax checking.
211     * <p>
212     * This method orchestrates the parsing of all statements.
213     *
214     * <p><b>Important:</b> This method does NOT extract raw statements - they are
215     * passed in as a parameter already extracted by {@link #extractRawStatements}.
216     *
217     * @param context parser context
218     * @param parser main SQL parser (TParserPostgresql)
219     * @param secondaryParser not used for PostgreSQL
220     * @param tokens source token list
221     * @param rawStatements raw statements already extracted (never null)
222     * @return list of fully parsed statements with AST built
223     */
224    @Override
225    protected TStatementList performParsing(ParserContext context,
226                                           TCustomParser parser,
227                                           TCustomParser secondaryParser,
228                                           TSourceTokenList tokens,
229                                           TStatementList rawStatements) {
230        // Store references (fparser is already set, don't reassign final variable)
231        this.sourcetokenlist = tokens;
232        this.parserContext = context;
233
234        // Use the raw statements passed from AbstractSqlParser.parse()
235        // (already extracted - DO NOT re-extract to avoid duplication)
236        this.sqlstatements = rawStatements;
237
238        // Initialize global context for statement parsing
239        initializeGlobalContext();
240
241        // Parse each statement
242        for (int i = 0; i < sqlstatements.size(); i++) {
243            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
244
245            // Set frame stack for the statement (needed for parsing)
246            stmt.setFrameStack(frameStack);
247
248            // Parse the statement
249            int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
250
251            // Collect syntax errors
252            if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
253                copyErrorsFromStatement(stmt);
254            }
255        }
256
257        // Clean up frame stack
258        if (globalFrame != null) {
259            globalFrame.popMeFromStack(frameStack);
260        }
261
262        return this.sqlstatements;
263    }
264
265    // ========== PostgreSQL-Specific Tokenization ==========
266
267    /**
268     * Perform PostgreSQL-specific tokenization.
269     * <p>
270     * Extracted from TGSqlParser.dopostgresqltexttotokenlist() (lines 3093-3287)
271     */
272    private void dopostgresqltexttotokenlist() {
273        // Initialize state machine
274        insqlpluscmd = false;
275        isvalidplace = true;
276        waitingreturnforfloatdiv = false;
277        waitingreturnforsemicolon = false;
278        continuesqlplusatnewline = false;
279
280        TSourceToken lct = null, prevst = null;
281        TSourceToken asourcetoken, lcprevst;
282        int yychar;
283
284        asourcetoken = getanewsourcetoken();
285        if (asourcetoken == null) return;
286        yychar = asourcetoken.tokencode;
287
288        while (yychar > 0) {
289            sourcetokenlist.add(asourcetoken);
290
291            switch (yychar) {
292                case TBaseType.cmtdoublehyphen:
293                case TBaseType.cmtslashstar:
294                case TBaseType.lexspace: {
295                    if (insqlpluscmd) {
296                        asourcetoken.insqlpluscmd = true;
297                    }
298                    break;
299                }
300
301                case TBaseType.lexnewline: {
302                    if (insqlpluscmd) {
303                        insqlpluscmd = false;
304                        isvalidplace = true;
305
306                        if (continuesqlplusatnewline) {
307                            insqlpluscmd = true;
308                            isvalidplace = false;
309                            asourcetoken.insqlpluscmd = true;
310                        }
311                    }
312
313                    if (waitingreturnforsemicolon) {
314                        isvalidplace = true;
315                    }
316                    if (waitingreturnforfloatdiv) {
317                        isvalidplace = true;
318                        lct.tokencode = TBaseType.sqlpluscmd;
319                        if (lct.tokentype != ETokenType.ttslash) {
320                            lct.tokentype = ETokenType.ttsqlpluscmd;
321                        }
322                    }
323                    flexer.insqlpluscmd = insqlpluscmd;
324                    break;
325                }
326
327                default: {
328                    // Solid token
329                    continuesqlplusatnewline = false;
330                    waitingreturnforsemicolon = false;
331                    waitingreturnforfloatdiv = false;
332
333                    if (insqlpluscmd) {
334                        asourcetoken.insqlpluscmd = true;
335                        if (asourcetoken.toString().equalsIgnoreCase("-")) {
336                            continuesqlplusatnewline = true;
337                        }
338                    } else {
339                        if (asourcetoken.tokentype == ETokenType.ttsemicolon) {
340                            waitingreturnforsemicolon = true;
341                        }
342                        if ((asourcetoken.tokentype == ETokenType.ttslash)
343                                && (isvalidplace || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) {
344                            lct = asourcetoken;
345                            waitingreturnforfloatdiv = true;
346                        }
347                        if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) {
348                            asourcetoken.tokencode = TBaseType.sqlpluscmd;
349                            if (asourcetoken.tokentype != ETokenType.ttslash) {
350                                asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
351                            }
352                            insqlpluscmd = true;
353                            flexer.insqlpluscmd = insqlpluscmd;
354                        }
355                    }
356                    isvalidplace = false;
357
358                    // PostgreSQL-specific keyword handling
359                    if (prevst != null) {
360                        if (prevst.tokencode == TBaseType.rrw_inner) {
361                            if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) {
362                                prevst.tokencode = TBaseType.ident;
363                            }
364                        }
365
366                        if ((prevst.tokencode == TBaseType.rrw_not)
367                                && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) {
368                            prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE");
369                        }
370                    }
371
372                    if (asourcetoken.tokencode == TBaseType.rrw_inner) {
373                        prevst = asourcetoken;
374                    } else if (asourcetoken.tokencode == TBaseType.rrw_not) {
375                        prevst = asourcetoken;
376                    } else {
377                        prevst = null;
378                    }
379
380                    // Additional PostgreSQL transformations
381                    if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD"))
382                            || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) {
383                        lcprevst = getprevsolidtoken(asourcetoken);
384                        if (lcprevst != null) {
385                            if (lcprevst.tokencode == TBaseType.rrw_for)
386                                lcprevst.tokencode = TBaseType.rw_for1;
387                        }
388                    }
389
390                    if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) {
391                        TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2);
392                        if (stKeep != null) {
393                            stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank;
394                        }
395                    }
396
397                    if ((asourcetoken.tokencode == TBaseType.rrw_postgresql_rowtype)
398                            || (asourcetoken.tokencode == TBaseType.rrw_postgresql_type)) {
399                        TSourceToken stPercent = asourcetoken.searchToken('%', -1);
400                        if (stPercent != null) {
401                            stPercent.tokencode = TBaseType.rowtype_operator;
402                        }
403                    }
404
405                    if (asourcetoken.tokencode == TBaseType.JSON_EXIST) {
406                        TSourceToken stPercent = asourcetoken.searchToken('=', -1);
407                        if (stPercent != null) {
408                            asourcetoken.tokencode = TBaseType.ident;
409                        }
410                    }
411
412                    if (asourcetoken.tokencode == TBaseType.rrw_update) {
413                        TSourceToken stDo = asourcetoken.searchToken(TBaseType.rrw_do, -1);
414                        if (stDo != null) {
415                            asourcetoken.tokencode = TBaseType.rrw_postgresql_do_update;
416                        }
417                    }
418
419                    break;
420                }
421            }
422
423            // Get next token
424            asourcetoken = getanewsourcetoken();
425            if (asourcetoken != null) {
426                yychar = asourcetoken.tokencode;
427            } else {
428                yychar = 0;
429
430                if (waitingreturnforfloatdiv) {
431                    lct.tokencode = TBaseType.sqlpluscmd;
432                    if (lct.tokentype != ETokenType.ttslash) {
433                        lct.tokentype = ETokenType.ttsqlpluscmd;
434                    }
435                }
436            }
437
438            if ((yychar == 0) && (prevst != null)) {
439                if (prevst.tokencode == TBaseType.rrw_inner) {
440                    prevst.tokencode = TBaseType.ident;
441                }
442            }
443        }
444    }
445
446    /**
447     * Get next source token from the lexer.
448     * <p>
449     * This method wraps the lexer's yylexwrap() call.
450     *
451     * @return next source token, or null if end of input
452     */
453
454    /**
455     * Check if token represents a valid SQL*Plus-like command in PostgreSQL.
456     *
457     * @param tokenText token text to check
458     * @return true if valid SQL*Plus command
459     */
460    private boolean isvalidsqlpluscmdInPostgresql(String tokenText) {
461        // PostgreSQL supports psql meta-commands like \d, \dt, etc.
462        // For now, keep compatible with original implementation
463        return false;
464    }
465
466    /**
467     * Determine if forward slash should be treated as SQL*Plus command delimiter.
468     *
469     * @param pstlist token list
470     * @param pPos position of '/' token
471     * @return true if '/' should be SQL*Plus command
472     */
473    private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) {
474        boolean ret = false;
475
476        if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret;
477
478        TSourceToken lcst = pstlist.get(pPos - 1);
479        if (lcst.tokentype != ETokenType.ttreturn) {
480            return ret;
481        }
482
483        if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) {
484            ret = true;
485        }
486
487        return ret;
488    }
489
490    /**
491     * Get previous non-whitespace token.
492     *
493     * @param ptoken current token
494     * @return previous solid token, or null
495     */
496    private TSourceToken getprevsolidtoken(TSourceToken ptoken) {
497        TSourceToken ret = null;
498        TSourceTokenList lctokenlist = ptoken.container;
499
500        if (lctokenlist != null) {
501            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
502                if (!(
503                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace)
504                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn)
505                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment)
506                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment)
507                )) {
508                    ret = lctokenlist.get(ptoken.posinlist - 1);
509                } else {
510                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
511                }
512            }
513        }
514        return ret;
515    }
516
517    // ========== PostgreSQL-Specific Raw Statement Extraction ==========
518
519    /**
520     * Extract raw PostgreSQL SQL statements from tokenized source.
521     * <p>
522     * Extracted from TGSqlParser.dopostgresqlgetrawsqlstatements() (lines 8051-8492)
523     *
524     * @param builder the result builder to populate with raw statements
525     */
526    private void dopostgresqlgetrawsqlstatements(SqlParseResult.Builder builder) {
527        int waitingEnd = 0;
528        boolean foundEnd = false, enterDeclare = false;
529        boolean isSinglePLBlock = false;
530
531        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
532        if (!TBaseType.assigned(sourcetokenlist)) {
533            // No tokens available - populate builder with empty results and return
534            builder.sqlStatements(this.sqlstatements);
535            builder.errorCode(1);
536            builder.errorMessage("No source token list available");
537            return;
538        }
539
540        TCustomSqlStatement gcurrentsqlstatement = null;
541        EFindSqlStateType gst = EFindSqlStateType.stnormal;
542        TSourceToken lcprevsolidtoken = null, ast = null;
543
544        if (isSinglePLBlock) {
545            gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql);
546        }
547
548        for (int i = 0; i < sourcetokenlist.size(); i++) {
549
550            if ((ast != null) && (ast.issolidtoken()))
551                lcprevsolidtoken = ast;
552
553            ast = sourcetokenlist.get(i);
554            sourcetokenlist.curpos = i;
555
556            if (isSinglePLBlock) {
557                gcurrentsqlstatement.sourcetokenlist.add(ast);
558                continue;
559            }
560
561            // Token transformations during raw statement extraction
562            performRawStatementTokenTransformations(ast);
563
564            switch (gst) {
565                case sterror: {
566                    if (ast.tokentype == ETokenType.ttsemicolon) {
567                        appendToken(gcurrentsqlstatement, ast);
568                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
569                        gst = EFindSqlStateType.stnormal;
570                    } else {
571                        appendToken(gcurrentsqlstatement, ast);
572                    }
573                    break;
574                }
575
576                case stnormal: {
577                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
578                            || (ast.tokencode == TBaseType.cmtslashstar)
579                            || (ast.tokencode == TBaseType.lexspace)
580                            || (ast.tokencode == TBaseType.lexnewline)
581                            || (ast.tokentype == ETokenType.ttsemicolon)) {
582                        if (gcurrentsqlstatement != null) {
583                            appendToken(gcurrentsqlstatement, ast);
584                        }
585
586                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
587                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
588                                ast.tokentype = ETokenType.ttsimplecomment;
589                                ast.tokencode = TBaseType.cmtdoublehyphen;
590                            }
591                        }
592
593                        continue;
594                    }
595
596                    if (ast.tokencode == TBaseType.sqlpluscmd) {
597                        gst = EFindSqlStateType.stsqlplus;
598                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
599                        appendToken(gcurrentsqlstatement, ast);
600                        continue;
601                    }
602
603                    // Find a token to start sql or plsql mode
604                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
605
606                    if (gcurrentsqlstatement != null) {
607                        enterDeclare = false;
608                        if (gcurrentsqlstatement.ispgplsql()) {
609                            gst = EFindSqlStateType.ststoredprocedure;
610                            appendToken(gcurrentsqlstatement, ast);
611                            foundEnd = false;
612                            if ((ast.tokencode == TBaseType.rrw_begin)
613                                    || (ast.tokencode == TBaseType.rrw_package)
614                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
615                                waitingEnd = 1;
616                            } else if (ast.tokencode == TBaseType.rrw_declare) {
617                                enterDeclare = true;
618                            }
619                        } else {
620                            gst = EFindSqlStateType.stsql;
621                            appendToken(gcurrentsqlstatement, ast);
622                        }
623                    } else {
624                        // Error token found
625                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo,
626                                (ast.columnNo < 0 ? 0 : ast.columnNo),
627                                "Error when tokenize", EErrorType.spwarning,
628                                TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
629
630                        ast.tokentype = ETokenType.tttokenlizererrortoken;
631                        gst = EFindSqlStateType.sterror;
632
633                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
634                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
635                        appendToken(gcurrentsqlstatement, ast);
636                    }
637
638                    break;
639                }
640
641                case stsqlplus: {
642                    if (ast.insqlpluscmd) {
643                        appendToken(gcurrentsqlstatement, ast);
644                    } else {
645                        gst = EFindSqlStateType.stnormal;
646                        appendToken(gcurrentsqlstatement, ast);
647                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
648                    }
649
650                    break;
651                }
652
653                case stsql: {
654                    if (ast.tokentype == ETokenType.ttsemicolon) {
655                        gst = EFindSqlStateType.stnormal;
656                        appendToken(gcurrentsqlstatement, ast);
657                        gcurrentsqlstatement.semicolonended = ast;
658                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
659                        continue;
660                    }
661
662                    if (sourcetokenlist.sqlplusaftercurtoken()) {
663                        gst = EFindSqlStateType.stnormal;
664                        appendToken(gcurrentsqlstatement, ast);
665                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
666                        continue;
667                    }
668
669                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
670                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
671                            gst = EFindSqlStateType.stnormal;
672                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
673                            continue;
674                        }
675                    }
676
677                    appendToken(gcurrentsqlstatement, ast);
678                    break;
679                }
680
681                case ststoredprocedure: {
682                    if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) {
683                        appendToken(gcurrentsqlstatement, ast);
684                        gst = EFindSqlStateType.ststoredprocedurePgStartBody;
685                        continue;
686                    }
687
688                    if (ast.tokencode == TBaseType.rrw_postgresql_language) {
689                        TSourceToken nextSt = ast.nextSolidToken();
690                        if (nextSt != null) {
691                            if (gcurrentsqlstatement instanceof TRoutine) {
692                                TRoutine p = (TRoutine) gcurrentsqlstatement;
693                                p.setRoutineLanguage(nextSt.toString());
694                            }
695                        }
696                    }
697
698                    if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) {
699                        gst = EFindSqlStateType.stnormal;
700                        appendToken(gcurrentsqlstatement, ast);
701                        gcurrentsqlstatement.semicolonended = ast;
702                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
703                        continue;
704                    }
705
706                    if (ast.tokencode == TBaseType.rrw_begin) {
707                        waitingEnd++;
708                        enterDeclare = false;
709                    } else if (ast.tokencode == TBaseType.rrw_declare) {
710                        enterDeclare = true;
711                    } else if (ast.tokencode == TBaseType.rrw_if) {
712                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
713                            waitingEnd++;
714                        }
715                    } else if (ast.tokencode == TBaseType.rrw_case) {
716                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
717                            waitingEnd++;
718                        }
719                    } else if (ast.tokencode == TBaseType.rrw_loop) {
720                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
721                            waitingEnd++;
722                        }
723                    } else if (ast.tokencode == TBaseType.rrw_end) {
724                        foundEnd = true;
725                        waitingEnd--;
726                        if (waitingEnd < 0) {
727                            waitingEnd = 0;
728                        }
729                    }
730
731                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
732                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
733                        gst = EFindSqlStateType.stnormal;
734                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
735
736                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
737                        appendToken(gcurrentsqlstatement, ast);
738                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
739                    } else if ((ast.tokentype == ETokenType.ttperiod)
740                            && (sourcetokenlist.returnaftercurtoken(false))
741                            && (sourcetokenlist.returnbeforecurtoken(false))) {
742                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
743                        gst = EFindSqlStateType.stnormal;
744                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
745
746                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
747                        appendToken(gcurrentsqlstatement, ast);
748                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
749                    } else {
750                        appendToken(gcurrentsqlstatement, ast);
751                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (foundEnd)) {
752                            gst = EFindSqlStateType.stnormal;
753                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
754                        }
755                    }
756
757                    if (ast.tokencode == TBaseType.sqlpluscmd) {
758                        int m = flexer.getkeywordvalue(ast.getAstext());
759                        if (m != 0) {
760                            ast.tokencode = m;
761                        } else {
762                            ast.tokencode = TBaseType.ident;
763                        }
764                    }
765
766                    if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) {
767                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
768                            gst = EFindSqlStateType.stnormal;
769                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
770                        }
771                    }
772
773                    break;
774                }
775
776                case ststoredprocedurePgStartBody: {
777                    appendToken(gcurrentsqlstatement, ast);
778
779                    if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) {
780                        if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstDoExecuteBlock) {
781                            // Check if DO block has trailing LANGUAGE clause
782                            TSourceToken nextSolid = ast.nextSolidToken();
783                            if (nextSolid != null && nextSolid.tokencode == TBaseType.rrw_postgresql_language) {
784                                gst = EFindSqlStateType.ststoredprocedurePgEndBody;
785                            } else {
786                                gst = EFindSqlStateType.stnormal;
787                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
788                            }
789                            continue;
790                        } else {
791                            gst = EFindSqlStateType.ststoredprocedurePgEndBody;
792                            continue;
793                        }
794                    }
795
796                    break;
797                }
798
799                case ststoredprocedurePgEndBody: {
800                    if (ast.tokentype == ETokenType.ttsemicolon) {
801                        gst = EFindSqlStateType.stnormal;
802                        appendToken(gcurrentsqlstatement, ast);
803                        gcurrentsqlstatement.semicolonended = ast;
804                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
805                        continue;
806                    } else if (ast.tokencode == TBaseType.cmtdoublehyphen) {
807                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
808                            gst = EFindSqlStateType.stnormal;
809                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
810                            continue;
811                        }
812                    }
813
814                    appendToken(gcurrentsqlstatement, ast);
815
816                    if (ast.tokencode == TBaseType.rrw_postgresql_language) {
817                        TSourceToken nextSt = ast.nextSolidToken();
818                        if (nextSt != null) {
819                            if (gcurrentsqlstatement instanceof TRoutine) {
820                                TRoutine p = (TRoutine) gcurrentsqlstatement;
821                                p.setRoutineLanguage(nextSt.toString());
822                            }
823                        }
824                    }
825
826                    break;
827                }
828            }
829        }
830
831        // Last statement
832        if ((gcurrentsqlstatement != null) &&
833                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql)
834                        || (gst == EFindSqlStateType.ststoredprocedure)
835                        || (gst == EFindSqlStateType.ststoredprocedurePgEndBody)
836                        || (gst == EFindSqlStateType.sterror) || (isSinglePLBlock))) {
837            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder);
838        }
839
840        // Populate builder with results
841        builder.sqlStatements(this.sqlstatements);
842        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
843                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
844        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
845    }
846
847    /**
848     * Handle token transformations during raw statement extraction.
849     *
850     * @param ast current token being processed
851     */
852    private void performRawStatementTokenTransformations(TSourceToken ast) {
853        if (ast.tokencode == TBaseType.JSON_EXIST) {
854            TSourceToken stConstant = ast.searchToken(TBaseType.sconst, 1);
855            if (stConstant == null) {
856                ast.tokencode = TBaseType.ident;
857            }
858        } else if (ast.tokencode == TBaseType.rrw_postgresql_POSITION) {
859            TSourceToken st1 = ast.nextSolidToken();
860            if (st1 != null) {
861                if (st1.tokencode == '(') {
862                    ast.tokencode = TBaseType.rrw_postgresql_POSITION_FUNCTION;
863                }
864            }
865        } else if (ast.tokencode == TBaseType.rrw_postgresql_ordinality) {
866            TSourceToken lcprevst = getprevsolidtoken(ast);
867
868            if (lcprevst != null) {
869                if (lcprevst.tokencode == TBaseType.rrw_with) {
870                    TSourceToken lcbeforewith = getprevsolidtoken(lcprevst);
871                    if (lcbeforewith != null && lcbeforewith.tokencode == ')') {
872                        // WITH ORDINALITY after function call - table modifier
873                        lcprevst.tokencode = TBaseType.rrw_postgresql_with_lookahead;
874                    }
875                    // Otherwise keep as RW_WITH (CTE: WITH ordinality AS ...)
876                }
877            }
878        } else if (ast.tokencode == TBaseType.rrw_postgresql_filter) {
879            TSourceToken st1 = ast.nextSolidToken();
880            if (st1 != null) {
881                if (st1.tokencode != '(') {
882                    ast.tokencode = TBaseType.ident;
883                }
884            }
885        } else if (ast.tokencode == TBaseType.rrw_postgresql_jsonb) {
886            TSourceToken st1 = ast.nextSolidToken();
887            if (st1 != null) {
888                if (st1.tokencode == '?') {
889                    st1.tokencode = TBaseType.OP_JSONB_QUESTION;
890                }
891            }
892        } else if (ast.tokencode == '?') {
893            TSourceToken st1 = ast.nextSolidToken();
894            if (st1 != null) {
895                if (st1.tokencode == TBaseType.sconst) {
896                    ast.tokencode = TBaseType.OP_JSONB_QUESTION;
897                }
898            }
899        } else if (ast.tokencode == TBaseType.rrw_values) {
900            TSourceToken stParen = ast.searchToken('(', 1);
901            if (stParen != null) {
902                TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist);
903                if (stInsert != null) {
904                    TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist);
905                    if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) {
906                        // Don't treat values(1) as insert values
907                    } else {
908                        TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist);
909                        if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) {
910                            // Don't treat values after from keyword as an insert values
911                        } else {
912                            ast.tokencode = TBaseType.rrw_postgresql_insert_values;
913                        }
914                    }
915                }
916            }
917        }
918    }
919
920    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
921        if (statement == null || token == null) {
922            return;
923        }
924        token.stmt = statement;
925        statement.sourcetokenlist.add(token);
926    }
927
928    // Note: initializeGlobalContext() inherited from AbstractSqlParser
929
930    /**
931     * Override onRawStatementComplete to add PostgreSQL-specific processing.
932     *
933     * <p>This method handles special processing for stored procedures/functions
934     * whose body is written in non-SQL languages (e.g., PL/Python, PL/Perl, PL/R).
935     *
936     * <p>For such routines, the tokens between dollar-quote delimiters ($$, $function$, etc.)
937     * are marked as non-SQL content to prevent parsing errors.
938     *
939     * @param context parser context
940     * @param statement the completed statement
941     * @param mainParser main SQL parser
942     * @param secondaryParser secondary parser (not used for PostgreSQL)
943     * @param statementList list to add the statement to
944     * @param isLastStatement whether this is the last statement
945     * @param builder result builder for populating parse results
946     */
947    @Override
948    protected void onRawStatementComplete(ParserContext context,
949                                         TCustomSqlStatement statement,
950                                         TCustomParser mainParser,
951                                         TCustomParser secondaryParser,
952                                         TStatementList statementList,
953                                         boolean isLastStatement,
954                                         SqlParseResult.Builder builder) {
955        // Call parent implementation for standard processing
956        super.onRawStatementComplete(context, statement, mainParser, secondaryParser, statementList, isLastStatement, builder);
957
958        // PostgreSQL-specific: Handle stored procedures with non-SQL bodies
959        // (e.g., PL/Python, PL/Perl, PL/R, PL/Java, PL/Tcl)
960        if (statement instanceof TRoutine) {
961            TRoutine routine = (TRoutine) statement;
962
963            // Check if the routine body is NOT written in SQL/PLPGSQL
964            if (!routine.isBodyInSQL()) {
965                processNonSqlRoutineBody(routine);
966            }
967        }
968    }
969
970    /**
971     * Process a routine whose body is written in a non-SQL language.
972     *
973     * <p>This method:
974     * <ul>
975     *   <li>Identifies the dollar-quote delimiters marking the routine body</li>
976     *   <li>Marks all tokens between delimiters as non-SQL (sqlpluscmd type)</li>
977     *   <li>Extracts and stores the complete routine body text</li>
978     * </ul>
979     *
980     * <p>This prevents the parser from trying to parse Python, Perl, or other
981     * language syntax as SQL, which would cause syntax errors.
982     *
983     * @param routine the routine statement to process
984     */
985    private void processNonSqlRoutineBody(TRoutine routine) {
986        if (routine.sourcetokenlist == null || routine.sourcetokenlist.size() == 0) {
987            return;
988        }
989
990        TSourceToken st;
991        boolean inBody = false;
992        StringBuilder routineBodyBuilder = new StringBuilder();
993
994        // Scan through all tokens to find and mark the routine body
995        for (int i = 0; i < routine.sourcetokenlist.size(); i++) {
996            st = routine.sourcetokenlist.get(i);
997
998            // Check if this is a dollar-quote delimiter
999            if (isDollarFunctionDelimiter(st.tokencode, this.vendor)) {
1000                if (!inBody) {
1001                    // Start of body - record opening delimiter
1002                    inBody = true;
1003                    routineBodyBuilder.append(st.toString());
1004                } else {
1005                    // End of body - record closing delimiter
1006                    inBody = false;
1007                    routineBodyBuilder.append(st.toString());
1008                    break;
1009                }
1010                continue;
1011            }
1012
1013            // If we're inside the body, mark token as non-SQL and collect its text
1014            if (inBody) {
1015                st.tokencode = TBaseType.sqlpluscmd;
1016                routineBodyBuilder.append(st.toString());
1017            }
1018        }
1019
1020        // Store the complete routine body text
1021        routine.setRoutineBody(routineBodyBuilder.toString());
1022    }
1023
1024    // Note: isDollarFunctionDelimiter() is now inherited from AbstractSqlParser
1025    // The parent implementation handles all PostgreSQL-family databases
1026
1027    @Override
1028    public String toString() {
1029        return "PostgreSqlParser{vendor=" + vendor + "}";
1030    }
1031}