001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerPostgresql; 009import gudusoft.gsqlparser.TParserPostgresql; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 020import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 021import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 023import gudusoft.gsqlparser.stmt.TCommonBlock; 024import gudusoft.gsqlparser.stmt.TRoutine; 025import gudusoft.gsqlparser.compiler.TContext; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TGlobalScope; 028import gudusoft.gsqlparser.compiler.TFrame; 029import gudusoft.gsqlparser.ETokenStatus; 030 031import java.io.BufferedReader; 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Stack; 035 036/** 037 * PostgreSQL database SQL parser implementation. 038 * 039 * <p>This parser handles PostgreSQL-specific SQL syntax including: 040 * <ul> 041 * <li>PL/pgSQL blocks (functions, procedures, triggers)</li> 042 * <li>Dollar quoting ($$...$$)</li> 043 * <li>PostgreSQL-specific DML/DDL</li> 044 * <li>Special operators and functions</li> 045 * <li>Special token handling (%ROWTYPE, %TYPE, etc.)</li> 046 * </ul> 047 * 048 * <p><b>Design Notes:</b> 049 * <ul> 050 * <li>Extends {@link AbstractSqlParser}</li> 051 * <li>Can directly instantiate: {@link TLexerPostgresql}, {@link TParserPostgresqlSql}</li> 052 * <li>Uses single parser (no secondary parser like Oracle's PL/SQL)</li> 053 * <li>Delimiter character: ';' for SQL statements</li> 054 * </ul> 055 * 056 * <p><b>Usage Example:</b> 057 * <pre> 058 * // Get PostgreSQL parser from factory 059 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvpostgresql); 060 * 061 * // Build context 062 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvpostgresql) 063 * .sqlText("SELECT * FROM employees WHERE dept_id = 10") 064 * .build(); 065 * 066 * // Parse 067 * SqlParseResult result = parser.parse(context); 068 * 069 * // Access statements 070 * TStatementList statements = result.getSqlStatements(); 071 * </pre> 072 * 073 * @see SqlParser 074 * @see AbstractSqlParser 075 * @see TLexerPostgresql 076 * @see TParserPostgresql 077 * @since 3.2.0.0 078 */ 079public class PostgreSqlParser extends AbstractSqlParser { 080 081 // ========== Lexer and Parser Instances ========== 082 // Created once in constructor, reused for all parsing operations 083 084 /** The PostgreSQL lexer used for tokenization (public for TGSqlParser.getFlexer()) */ 085 public TLexerPostgresql flexer; 086 private TParserPostgresql fparser; 087 088 // ========== State Variables ========== 089 // NOTE: The following fields moved to AbstractSqlParser (inherited): 090 // - sourcetokenlist (TSourceTokenList) 091 // - sqlstatements (TStatementList) 092 // - parserContext (ParserContext) 093 // - sqlcmds (ISqlCmds) - to be added when PostgreSQL raw extraction is refactored 094 // - globalContext (TContext) 095 // - sqlEnv (TSQLEnv) 096 // - frameStack (Stack<TFrame>) 097 // - globalFrame (TFrame) 098 099 // ========== State Variables for Tokenization ========== 100 private boolean insqlpluscmd; 101 private boolean isvalidplace; 102 private boolean waitingreturnforsemicolon; 103 private boolean waitingreturnforfloatdiv; 104 private boolean continuesqlplusatnewline; 105 106 // ========== Constructor ========== 107 108 /** 109 * Construct PostgreSQL SQL parser. 110 * <p> 111 * Configures the parser for PostgreSQL database with default delimiter: semicolon (;) 112 * <p> 113 * Following the original TGSqlParser pattern, the lexer and parser are 114 * created once in the constructor and reused for all parsing operations. 115 */ 116 public PostgreSqlParser() { 117 super(EDbVendor.dbvpostgresql); 118 119 // Set delimiter character 120 this.delimiterChar = ';'; 121 this.defaultDelimiterStr = ";"; 122 123 // Create lexer once - will be reused for all parsing operations 124 this.flexer = new TLexerPostgresql(); 125 this.flexer.delimiterchar = this.delimiterChar; 126 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 127 128 // CRITICAL: Set lexer for inherited getanewsourcetoken() method 129 this.lexer = this.flexer; 130 131 // Create parser once - will be reused for all parsing operations 132 this.fparser = new TParserPostgresql(null); 133 this.fparser.lexer = this.flexer; 134 135 // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor 136 } 137 138 // ========== AbstractSqlParser Abstract Methods Implementation ========== 139 140 /** 141 * Return the PostgreSQL lexer instance. 142 * <p> 143 * The lexer is created once in the constructor and reused for all 144 * parsing operations. This method simply returns the existing instance, 145 * matching the original TGSqlParser pattern where the lexer is created 146 * once and reset before each use. 147 * 148 * @param context parser context (not used, lexer already created) 149 * @return the PostgreSQL lexer instance created in constructor 150 */ 151 @Override 152 protected TCustomLexer getLexer(ParserContext context) { 153 // Return existing lexer instance (created in constructor) 154 return this.flexer; 155 } 156 157 /** 158 * Return the PostgreSQL SQL parser instance with updated token list. 159 * <p> 160 * The parser is created once in the constructor and reused for all 161 * parsing operations. This method updates the token list and returns 162 * the existing instance, matching the original TGSqlParser pattern. 163 * 164 * @param context parser context (not used, parser already created) 165 * @param tokens source token list to parse 166 * @return the PostgreSQL SQL parser instance created in constructor 167 */ 168 @Override 169 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 170 // Update token list for reused parser instance 171 this.fparser.sourcetokenlist = tokens; 172 return this.fparser; 173 } 174 175 /** 176 * Call PostgreSQL-specific tokenization logic. 177 * <p> 178 * Delegates to dopostgresqltexttotokenlist which handles PostgreSQL's 179 * specific keyword recognition, dollar quotes, and token generation. 180 */ 181 @Override 182 protected void tokenizeVendorSql() { 183 dopostgresqltexttotokenlist(); 184 } 185 186 /** 187 * Setup PostgreSQL parser for raw statement extraction. 188 * <p> 189 * PostgreSQL uses a single parser, so we inject sqlcmds and update 190 * the token list for the main parser only. 191 */ 192 @Override 193 protected void setupVendorParsersForExtraction() { 194 this.fparser.sqlcmds = this.sqlcmds; 195 this.fparser.sourcetokenlist = this.sourcetokenlist; 196 } 197 198 /** 199 * Call PostgreSQL-specific raw statement extraction logic. 200 * <p> 201 * Delegates to dopostgresqlgetrawsqlstatements which handles PostgreSQL's 202 * statement delimiters (semicolon for SQL, $$ for PL/pgSQL functions). 203 */ 204 @Override 205 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 206 dopostgresqlgetrawsqlstatements(builder); 207 } 208 209 /** 210 * Perform full parsing of statements with syntax checking. 211 * <p> 212 * This method orchestrates the parsing of all statements. 213 * 214 * <p><b>Important:</b> This method does NOT extract raw statements - they are 215 * passed in as a parameter already extracted by {@link #extractRawStatements}. 216 * 217 * @param context parser context 218 * @param parser main SQL parser (TParserPostgresql) 219 * @param secondaryParser not used for PostgreSQL 220 * @param tokens source token list 221 * @param rawStatements raw statements already extracted (never null) 222 * @return list of fully parsed statements with AST built 223 */ 224 @Override 225 protected TStatementList performParsing(ParserContext context, 226 TCustomParser parser, 227 TCustomParser secondaryParser, 228 TSourceTokenList tokens, 229 TStatementList rawStatements) { 230 // Store references (fparser is already set, don't reassign final variable) 231 this.sourcetokenlist = tokens; 232 this.parserContext = context; 233 234 // Use the raw statements passed from AbstractSqlParser.parse() 235 // (already extracted - DO NOT re-extract to avoid duplication) 236 this.sqlstatements = rawStatements; 237 238 // Initialize global context for statement parsing 239 initializeGlobalContext(); 240 241 // Parse each statement 242 for (int i = 0; i < sqlstatements.size(); i++) { 243 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 244 245 // Set frame stack for the statement (needed for parsing) 246 stmt.setFrameStack(frameStack); 247 248 // Parse the statement 249 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 250 251 // Collect syntax errors 252 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 253 copyErrorsFromStatement(stmt); 254 } 255 } 256 257 // Clean up frame stack 258 if (globalFrame != null) { 259 globalFrame.popMeFromStack(frameStack); 260 } 261 262 return this.sqlstatements; 263 } 264 265 // ========== PostgreSQL-Specific Tokenization ========== 266 267 /** 268 * Perform PostgreSQL-specific tokenization. 269 * <p> 270 * Extracted from TGSqlParser.dopostgresqltexttotokenlist() (lines 3093-3287) 271 */ 272 private void dopostgresqltexttotokenlist() { 273 // Initialize state machine 274 insqlpluscmd = false; 275 isvalidplace = true; 276 waitingreturnforfloatdiv = false; 277 waitingreturnforsemicolon = false; 278 continuesqlplusatnewline = false; 279 280 TSourceToken lct = null, prevst = null; 281 TSourceToken asourcetoken, lcprevst; 282 int yychar; 283 284 asourcetoken = getanewsourcetoken(); 285 if (asourcetoken == null) return; 286 yychar = asourcetoken.tokencode; 287 288 while (yychar > 0) { 289 sourcetokenlist.add(asourcetoken); 290 291 switch (yychar) { 292 case TBaseType.cmtdoublehyphen: 293 case TBaseType.cmtslashstar: 294 case TBaseType.lexspace: { 295 if (insqlpluscmd) { 296 asourcetoken.insqlpluscmd = true; 297 } 298 break; 299 } 300 301 case TBaseType.lexnewline: { 302 if (insqlpluscmd) { 303 insqlpluscmd = false; 304 isvalidplace = true; 305 306 if (continuesqlplusatnewline) { 307 insqlpluscmd = true; 308 isvalidplace = false; 309 asourcetoken.insqlpluscmd = true; 310 } 311 } 312 313 if (waitingreturnforsemicolon) { 314 isvalidplace = true; 315 } 316 if (waitingreturnforfloatdiv) { 317 isvalidplace = true; 318 lct.tokencode = TBaseType.sqlpluscmd; 319 if (lct.tokentype != ETokenType.ttslash) { 320 lct.tokentype = ETokenType.ttsqlpluscmd; 321 } 322 } 323 flexer.insqlpluscmd = insqlpluscmd; 324 break; 325 } 326 327 default: { 328 // Solid token 329 continuesqlplusatnewline = false; 330 waitingreturnforsemicolon = false; 331 waitingreturnforfloatdiv = false; 332 333 if (insqlpluscmd) { 334 asourcetoken.insqlpluscmd = true; 335 if (asourcetoken.toString().equalsIgnoreCase("-")) { 336 continuesqlplusatnewline = true; 337 } 338 } else { 339 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 340 waitingreturnforsemicolon = true; 341 } 342 if ((asourcetoken.tokentype == ETokenType.ttslash) 343 && (isvalidplace || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 344 lct = asourcetoken; 345 waitingreturnforfloatdiv = true; 346 } 347 if ((isvalidplace) && isvalidsqlpluscmdInPostgresql(asourcetoken.toString())) { 348 asourcetoken.tokencode = TBaseType.sqlpluscmd; 349 if (asourcetoken.tokentype != ETokenType.ttslash) { 350 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 351 } 352 insqlpluscmd = true; 353 flexer.insqlpluscmd = insqlpluscmd; 354 } 355 } 356 isvalidplace = false; 357 358 // PostgreSQL-specific keyword handling 359 if (prevst != null) { 360 if (prevst.tokencode == TBaseType.rrw_inner) { 361 if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) { 362 prevst.tokencode = TBaseType.ident; 363 } 364 } 365 366 if ((prevst.tokencode == TBaseType.rrw_not) 367 && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) { 368 prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE"); 369 } 370 } 371 372 if (asourcetoken.tokencode == TBaseType.rrw_inner) { 373 prevst = asourcetoken; 374 } else if (asourcetoken.tokencode == TBaseType.rrw_not) { 375 prevst = asourcetoken; 376 } else { 377 prevst = null; 378 } 379 380 // Additional PostgreSQL transformations 381 if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD")) 382 || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) { 383 lcprevst = getprevsolidtoken(asourcetoken); 384 if (lcprevst != null) { 385 if (lcprevst.tokencode == TBaseType.rrw_for) 386 lcprevst.tokencode = TBaseType.rw_for1; 387 } 388 } 389 390 if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) { 391 TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2); 392 if (stKeep != null) { 393 stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank; 394 } 395 } 396 397 if ((asourcetoken.tokencode == TBaseType.rrw_postgresql_rowtype) 398 || (asourcetoken.tokencode == TBaseType.rrw_postgresql_type)) { 399 TSourceToken stPercent = asourcetoken.searchToken('%', -1); 400 if (stPercent != null) { 401 stPercent.tokencode = TBaseType.rowtype_operator; 402 } 403 } 404 405 if (asourcetoken.tokencode == TBaseType.JSON_EXIST) { 406 TSourceToken stPercent = asourcetoken.searchToken('=', -1); 407 if (stPercent != null) { 408 asourcetoken.tokencode = TBaseType.ident; 409 } 410 } 411 412 if (asourcetoken.tokencode == TBaseType.rrw_update) { 413 TSourceToken stDo = asourcetoken.searchToken(TBaseType.rrw_do, -1); 414 if (stDo != null) { 415 asourcetoken.tokencode = TBaseType.rrw_postgresql_do_update; 416 } 417 } 418 419 break; 420 } 421 } 422 423 // Get next token 424 asourcetoken = getanewsourcetoken(); 425 if (asourcetoken != null) { 426 yychar = asourcetoken.tokencode; 427 } else { 428 yychar = 0; 429 430 if (waitingreturnforfloatdiv) { 431 lct.tokencode = TBaseType.sqlpluscmd; 432 if (lct.tokentype != ETokenType.ttslash) { 433 lct.tokentype = ETokenType.ttsqlpluscmd; 434 } 435 } 436 } 437 438 if ((yychar == 0) && (prevst != null)) { 439 if (prevst.tokencode == TBaseType.rrw_inner) { 440 prevst.tokencode = TBaseType.ident; 441 } 442 } 443 } 444 } 445 446 /** 447 * Get next source token from the lexer. 448 * <p> 449 * This method wraps the lexer's yylexwrap() call. 450 * 451 * @return next source token, or null if end of input 452 */ 453 454 /** 455 * Check if token represents a valid SQL*Plus-like command in PostgreSQL. 456 * 457 * @param tokenText token text to check 458 * @return true if valid SQL*Plus command 459 */ 460 private boolean isvalidsqlpluscmdInPostgresql(String tokenText) { 461 // PostgreSQL supports psql meta-commands like \d, \dt, etc. 462 // For now, keep compatible with original implementation 463 return false; 464 } 465 466 /** 467 * Determine if forward slash should be treated as SQL*Plus command delimiter. 468 * 469 * @param pstlist token list 470 * @param pPos position of '/' token 471 * @return true if '/' should be SQL*Plus command 472 */ 473 private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) { 474 boolean ret = false; 475 476 if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret; 477 478 TSourceToken lcst = pstlist.get(pPos - 1); 479 if (lcst.tokentype != ETokenType.ttreturn) { 480 return ret; 481 } 482 483 if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) { 484 ret = true; 485 } 486 487 return ret; 488 } 489 490 /** 491 * Get previous non-whitespace token. 492 * 493 * @param ptoken current token 494 * @return previous solid token, or null 495 */ 496 private TSourceToken getprevsolidtoken(TSourceToken ptoken) { 497 TSourceToken ret = null; 498 TSourceTokenList lctokenlist = ptoken.container; 499 500 if (lctokenlist != null) { 501 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 502 if (!( 503 (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace) 504 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn) 505 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment) 506 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment) 507 )) { 508 ret = lctokenlist.get(ptoken.posinlist - 1); 509 } else { 510 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 511 } 512 } 513 } 514 return ret; 515 } 516 517 // ========== PostgreSQL-Specific Raw Statement Extraction ========== 518 519 /** 520 * Extract raw PostgreSQL SQL statements from tokenized source. 521 * <p> 522 * Extracted from TGSqlParser.dopostgresqlgetrawsqlstatements() (lines 8051-8492) 523 * 524 * @param builder the result builder to populate with raw statements 525 */ 526 private void dopostgresqlgetrawsqlstatements(SqlParseResult.Builder builder) { 527 int waitingEnd = 0; 528 boolean foundEnd = false, enterDeclare = false; 529 boolean isSinglePLBlock = false; 530 531 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 532 if (!TBaseType.assigned(sourcetokenlist)) { 533 // No tokens available - populate builder with empty results and return 534 builder.sqlStatements(this.sqlstatements); 535 builder.errorCode(1); 536 builder.errorMessage("No source token list available"); 537 return; 538 } 539 540 TCustomSqlStatement gcurrentsqlstatement = null; 541 EFindSqlStateType gst = EFindSqlStateType.stnormal; 542 TSourceToken lcprevsolidtoken = null, ast = null; 543 544 if (isSinglePLBlock) { 545 gcurrentsqlstatement = new TCommonBlock(EDbVendor.dbvpostgresql); 546 } 547 548 for (int i = 0; i < sourcetokenlist.size(); i++) { 549 550 if ((ast != null) && (ast.issolidtoken())) 551 lcprevsolidtoken = ast; 552 553 ast = sourcetokenlist.get(i); 554 sourcetokenlist.curpos = i; 555 556 if (isSinglePLBlock) { 557 gcurrentsqlstatement.sourcetokenlist.add(ast); 558 continue; 559 } 560 561 // Token transformations during raw statement extraction 562 performRawStatementTokenTransformations(ast); 563 564 switch (gst) { 565 case sterror: { 566 if (ast.tokentype == ETokenType.ttsemicolon) { 567 appendToken(gcurrentsqlstatement, ast); 568 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 569 gst = EFindSqlStateType.stnormal; 570 } else { 571 appendToken(gcurrentsqlstatement, ast); 572 } 573 break; 574 } 575 576 case stnormal: { 577 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 578 || (ast.tokencode == TBaseType.cmtslashstar) 579 || (ast.tokencode == TBaseType.lexspace) 580 || (ast.tokencode == TBaseType.lexnewline) 581 || (ast.tokentype == ETokenType.ttsemicolon)) { 582 if (gcurrentsqlstatement != null) { 583 appendToken(gcurrentsqlstatement, ast); 584 } 585 586 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 587 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 588 ast.tokentype = ETokenType.ttsimplecomment; 589 ast.tokencode = TBaseType.cmtdoublehyphen; 590 } 591 } 592 593 continue; 594 } 595 596 if (ast.tokencode == TBaseType.sqlpluscmd) { 597 gst = EFindSqlStateType.stsqlplus; 598 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 599 appendToken(gcurrentsqlstatement, ast); 600 continue; 601 } 602 603 // Find a token to start sql or plsql mode 604 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 605 606 if (gcurrentsqlstatement != null) { 607 enterDeclare = false; 608 if (gcurrentsqlstatement.ispgplsql()) { 609 gst = EFindSqlStateType.ststoredprocedure; 610 appendToken(gcurrentsqlstatement, ast); 611 foundEnd = false; 612 if ((ast.tokencode == TBaseType.rrw_begin) 613 || (ast.tokencode == TBaseType.rrw_package) 614 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 615 waitingEnd = 1; 616 } else if (ast.tokencode == TBaseType.rrw_declare) { 617 enterDeclare = true; 618 } 619 } else { 620 gst = EFindSqlStateType.stsql; 621 appendToken(gcurrentsqlstatement, ast); 622 } 623 } else { 624 // Error token found 625 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, 626 (ast.columnNo < 0 ? 0 : ast.columnNo), 627 "Error when tokenize", EErrorType.spwarning, 628 TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 629 630 ast.tokentype = ETokenType.tttokenlizererrortoken; 631 gst = EFindSqlStateType.sterror; 632 633 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 634 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 635 appendToken(gcurrentsqlstatement, ast); 636 } 637 638 break; 639 } 640 641 case stsqlplus: { 642 if (ast.insqlpluscmd) { 643 appendToken(gcurrentsqlstatement, ast); 644 } else { 645 gst = EFindSqlStateType.stnormal; 646 appendToken(gcurrentsqlstatement, ast); 647 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 648 } 649 650 break; 651 } 652 653 case stsql: { 654 if (ast.tokentype == ETokenType.ttsemicolon) { 655 gst = EFindSqlStateType.stnormal; 656 appendToken(gcurrentsqlstatement, ast); 657 gcurrentsqlstatement.semicolonended = ast; 658 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 659 continue; 660 } 661 662 if (sourcetokenlist.sqlplusaftercurtoken()) { 663 gst = EFindSqlStateType.stnormal; 664 appendToken(gcurrentsqlstatement, ast); 665 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 666 continue; 667 } 668 669 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 670 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { 671 gst = EFindSqlStateType.stnormal; 672 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 673 continue; 674 } 675 } 676 677 appendToken(gcurrentsqlstatement, ast); 678 break; 679 } 680 681 case ststoredprocedure: { 682 if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) { 683 appendToken(gcurrentsqlstatement, ast); 684 gst = EFindSqlStateType.ststoredprocedurePgStartBody; 685 continue; 686 } 687 688 if (ast.tokencode == TBaseType.rrw_postgresql_language) { 689 TSourceToken nextSt = ast.nextSolidToken(); 690 if (nextSt != null) { 691 if (gcurrentsqlstatement instanceof TRoutine) { 692 TRoutine p = (TRoutine) gcurrentsqlstatement; 693 p.setRoutineLanguage(nextSt.toString()); 694 } 695 } 696 } 697 698 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (!enterDeclare)) { 699 gst = EFindSqlStateType.stnormal; 700 appendToken(gcurrentsqlstatement, ast); 701 gcurrentsqlstatement.semicolonended = ast; 702 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 703 continue; 704 } 705 706 if (ast.tokencode == TBaseType.rrw_begin) { 707 waitingEnd++; 708 enterDeclare = false; 709 } else if (ast.tokencode == TBaseType.rrw_declare) { 710 enterDeclare = true; 711 } else if (ast.tokencode == TBaseType.rrw_if) { 712 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 713 waitingEnd++; 714 } 715 } else if (ast.tokencode == TBaseType.rrw_case) { 716 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 717 waitingEnd++; 718 } 719 } else if (ast.tokencode == TBaseType.rrw_loop) { 720 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 721 waitingEnd++; 722 } 723 } else if (ast.tokencode == TBaseType.rrw_end) { 724 foundEnd = true; 725 waitingEnd--; 726 if (waitingEnd < 0) { 727 waitingEnd = 0; 728 } 729 } 730 731 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 732 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 733 gst = EFindSqlStateType.stnormal; 734 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 735 736 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 737 appendToken(gcurrentsqlstatement, ast); 738 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 739 } else if ((ast.tokentype == ETokenType.ttperiod) 740 && (sourcetokenlist.returnaftercurtoken(false)) 741 && (sourcetokenlist.returnbeforecurtoken(false))) { 742 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 743 gst = EFindSqlStateType.stnormal; 744 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 745 746 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 747 appendToken(gcurrentsqlstatement, ast); 748 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 749 } else { 750 appendToken(gcurrentsqlstatement, ast); 751 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) && (foundEnd)) { 752 gst = EFindSqlStateType.stnormal; 753 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 754 } 755 } 756 757 if (ast.tokencode == TBaseType.sqlpluscmd) { 758 int m = flexer.getkeywordvalue(ast.getAstext()); 759 if (m != 0) { 760 ast.tokencode = m; 761 } else { 762 ast.tokencode = TBaseType.ident; 763 } 764 } 765 766 if ((gst == EFindSqlStateType.ststoredprocedure) && (ast.tokencode == TBaseType.cmtdoublehyphen)) { 767 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { 768 gst = EFindSqlStateType.stnormal; 769 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 770 } 771 } 772 773 break; 774 } 775 776 case ststoredprocedurePgStartBody: { 777 appendToken(gcurrentsqlstatement, ast); 778 779 if (ast.tokencode == TBaseType.rrw_postgresql_function_delimiter) { 780 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstDoExecuteBlock) { 781 gst = EFindSqlStateType.stnormal; 782 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 783 continue; 784 } else { 785 gst = EFindSqlStateType.ststoredprocedurePgEndBody; 786 continue; 787 } 788 } 789 790 break; 791 } 792 793 case ststoredprocedurePgEndBody: { 794 if (ast.tokentype == ETokenType.ttsemicolon) { 795 gst = EFindSqlStateType.stnormal; 796 appendToken(gcurrentsqlstatement, ast); 797 gcurrentsqlstatement.semicolonended = ast; 798 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 799 continue; 800 } else if (ast.tokencode == TBaseType.cmtdoublehyphen) { 801 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { 802 gst = EFindSqlStateType.stnormal; 803 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 804 continue; 805 } 806 } 807 808 appendToken(gcurrentsqlstatement, ast); 809 810 if (ast.tokencode == TBaseType.rrw_postgresql_language) { 811 TSourceToken nextSt = ast.nextSolidToken(); 812 if (nextSt != null) { 813 if (gcurrentsqlstatement instanceof TRoutine) { 814 TRoutine p = (TRoutine) gcurrentsqlstatement; 815 p.setRoutineLanguage(nextSt.toString()); 816 } 817 } 818 } 819 820 break; 821 } 822 } 823 } 824 825 // Last statement 826 if ((gcurrentsqlstatement != null) && 827 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) 828 || (gst == EFindSqlStateType.ststoredprocedure) 829 || (gst == EFindSqlStateType.ststoredprocedurePgEndBody) 830 || (gst == EFindSqlStateType.sterror) || (isSinglePLBlock))) { 831 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder); 832 } 833 834 // Populate builder with results 835 builder.sqlStatements(this.sqlstatements); 836 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 837 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 838 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 839 } 840 841 /** 842 * Handle token transformations during raw statement extraction. 843 * 844 * @param ast current token being processed 845 */ 846 private void performRawStatementTokenTransformations(TSourceToken ast) { 847 if (ast.tokencode == TBaseType.JSON_EXIST) { 848 TSourceToken stConstant = ast.searchToken(TBaseType.sconst, 1); 849 if (stConstant == null) { 850 ast.tokencode = TBaseType.ident; 851 } 852 } else if (ast.tokencode == TBaseType.rrw_postgresql_POSITION) { 853 TSourceToken st1 = ast.nextSolidToken(); 854 if (st1 != null) { 855 if (st1.tokencode == '(') { 856 ast.tokencode = TBaseType.rrw_postgresql_POSITION_FUNCTION; 857 } 858 } 859 } else if (ast.tokencode == TBaseType.rrw_postgresql_ordinality) { 860 TSourceToken lcprevst = getprevsolidtoken(ast); 861 862 if (lcprevst != null) { 863 if (lcprevst.tokencode == TBaseType.rrw_with) { 864 TSourceToken lcnextst = ast.nextSolidToken(); 865 if ((lcnextst != null) && (lcnextst.tokencode == TBaseType.rrw_as)) { 866 // Don't change with to rrw_postgresql_with_lookahead 867 } else { 868 lcprevst.tokencode = TBaseType.rrw_postgresql_with_lookahead; 869 } 870 } 871 } 872 } else if (ast.tokencode == TBaseType.rrw_postgresql_filter) { 873 TSourceToken st1 = ast.nextSolidToken(); 874 if (st1 != null) { 875 if (st1.tokencode != '(') { 876 ast.tokencode = TBaseType.ident; 877 } 878 } 879 } else if (ast.tokencode == TBaseType.rrw_postgresql_jsonb) { 880 TSourceToken st1 = ast.nextSolidToken(); 881 if (st1 != null) { 882 if (st1.tokencode == '?') { 883 st1.tokencode = TBaseType.OP_JSONB_QUESTION; 884 } 885 } 886 } else if (ast.tokencode == '?') { 887 TSourceToken st1 = ast.nextSolidToken(); 888 if (st1 != null) { 889 if (st1.tokencode == TBaseType.sconst) { 890 ast.tokencode = TBaseType.OP_JSONB_QUESTION; 891 } 892 } 893 } else if (ast.tokencode == TBaseType.rrw_values) { 894 TSourceToken stParen = ast.searchToken('(', 1); 895 if (stParen != null) { 896 TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist); 897 if (stInsert != null) { 898 TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist); 899 if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) { 900 // Don't treat values(1) as insert values 901 } else { 902 TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist); 903 if ((stFrom != null) && (stFrom.posinlist > stInsert.posinlist)) { 904 // Don't treat values after from keyword as an insert values 905 } else { 906 ast.tokencode = TBaseType.rrw_postgresql_insert_values; 907 } 908 } 909 } 910 } 911 } 912 } 913 914 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 915 if (statement == null || token == null) { 916 return; 917 } 918 token.stmt = statement; 919 statement.sourcetokenlist.add(token); 920 } 921 922 // Note: initializeGlobalContext() inherited from AbstractSqlParser 923 924 /** 925 * Override onRawStatementComplete to add PostgreSQL-specific processing. 926 * 927 * <p>This method handles special processing for stored procedures/functions 928 * whose body is written in non-SQL languages (e.g., PL/Python, PL/Perl, PL/R). 929 * 930 * <p>For such routines, the tokens between dollar-quote delimiters ($$, $function$, etc.) 931 * are marked as non-SQL content to prevent parsing errors. 932 * 933 * @param context parser context 934 * @param statement the completed statement 935 * @param mainParser main SQL parser 936 * @param secondaryParser secondary parser (not used for PostgreSQL) 937 * @param statementList list to add the statement to 938 * @param isLastStatement whether this is the last statement 939 * @param builder result builder for populating parse results 940 */ 941 @Override 942 protected void onRawStatementComplete(ParserContext context, 943 TCustomSqlStatement statement, 944 TCustomParser mainParser, 945 TCustomParser secondaryParser, 946 TStatementList statementList, 947 boolean isLastStatement, 948 SqlParseResult.Builder builder) { 949 // Call parent implementation for standard processing 950 super.onRawStatementComplete(context, statement, mainParser, secondaryParser, statementList, isLastStatement, builder); 951 952 // PostgreSQL-specific: Handle stored procedures with non-SQL bodies 953 // (e.g., PL/Python, PL/Perl, PL/R, PL/Java, PL/Tcl) 954 if (statement instanceof TRoutine) { 955 TRoutine routine = (TRoutine) statement; 956 957 // Check if the routine body is NOT written in SQL/PLPGSQL 958 if (!routine.isBodyInSQL()) { 959 processNonSqlRoutineBody(routine); 960 } 961 } 962 } 963 964 /** 965 * Process a routine whose body is written in a non-SQL language. 966 * 967 * <p>This method: 968 * <ul> 969 * <li>Identifies the dollar-quote delimiters marking the routine body</li> 970 * <li>Marks all tokens between delimiters as non-SQL (sqlpluscmd type)</li> 971 * <li>Extracts and stores the complete routine body text</li> 972 * </ul> 973 * 974 * <p>This prevents the parser from trying to parse Python, Perl, or other 975 * language syntax as SQL, which would cause syntax errors. 976 * 977 * @param routine the routine statement to process 978 */ 979 private void processNonSqlRoutineBody(TRoutine routine) { 980 if (routine.sourcetokenlist == null || routine.sourcetokenlist.size() == 0) { 981 return; 982 } 983 984 TSourceToken st; 985 boolean inBody = false; 986 StringBuilder routineBodyBuilder = new StringBuilder(); 987 988 // Scan through all tokens to find and mark the routine body 989 for (int i = 0; i < routine.sourcetokenlist.size(); i++) { 990 st = routine.sourcetokenlist.get(i); 991 992 // Check if this is a dollar-quote delimiter 993 if (isDollarFunctionDelimiter(st.tokencode, this.vendor)) { 994 if (!inBody) { 995 // Start of body - record opening delimiter 996 inBody = true; 997 routineBodyBuilder.append(st.toString()); 998 } else { 999 // End of body - record closing delimiter 1000 inBody = false; 1001 routineBodyBuilder.append(st.toString()); 1002 break; 1003 } 1004 continue; 1005 } 1006 1007 // If we're inside the body, mark token as non-SQL and collect its text 1008 if (inBody) { 1009 st.tokencode = TBaseType.sqlpluscmd; 1010 routineBodyBuilder.append(st.toString()); 1011 } 1012 } 1013 1014 // Store the complete routine body text 1015 routine.setRoutineBody(routineBodyBuilder.toString()); 1016 } 1017 1018 // Note: isDollarFunctionDelimiter() is now inherited from AbstractSqlParser 1019 // The parent implementation handles all PostgreSQL-family databases 1020 1021 @Override 1022 public String toString() { 1023 return "PostgreSqlParser{vendor=" + vendor + "}"; 1024 } 1025}