001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerPresto; 009import gudusoft.gsqlparser.TParserPresto; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 021import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 023import gudusoft.gsqlparser.compiler.TContext; 024import gudusoft.gsqlparser.sqlenv.TSQLEnv; 025import gudusoft.gsqlparser.compiler.TGlobalScope; 026import gudusoft.gsqlparser.compiler.TFrame; 027import gudusoft.gsqlparser.resolver.TSQLResolver; 028import gudusoft.gsqlparser.TLog; 029import gudusoft.gsqlparser.compiler.TASTEvaluator; 030 031import java.io.BufferedReader; 032import java.util.ArrayList; 033import java.util.List; 034import java.util.Stack; 035 036/** 037 * Presto SQL parser implementation. 038 * 039 * <p>This parser handles Presto-specific SQL syntax including: 040 * <ul> 041 * <li>Presto SQL statements</li> 042 * <li>Presto PL/SQL blocks (stored procedures, functions)</li> 043 * <li>Semicolon statement separators</li> 044 * </ul> 045 * 046 * <p><b>Design Notes:</b> 047 * <ul> 048 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 049 * <li>Uses {@link TLexerPresto} for tokenization</li> 050 * <li>Uses {@link TParserPresto} for parsing</li> 051 * <li>Delimiter character: ';' for SQL statements</li> 052 * </ul> 053 * 054 * @see SqlParser 055 * @see AbstractSqlParser 056 * @see TLexerPresto 057 * @see TParserPresto 058 * @since 3.2.0.0 059 */ 060public class PrestoSqlParser extends AbstractSqlParser { 061 062 /** 063 * Construct Presto SQL parser. 064 * <p> 065 * Configures the parser for Presto database with default delimiter (;). 066 * <p> 067 * Following the original TGSqlParser pattern, the lexer and parser are 068 * created once in the constructor and reused for all parsing operations. 069 */ 070 public PrestoSqlParser() { 071 super(EDbVendor.dbvpresto); 072 this.delimiterChar = ';'; 073 this.defaultDelimiterStr = ";"; 074 075 // Create lexer once - will be reused for all parsing operations 076 this.flexer = new TLexerPresto(); 077 this.flexer.delimiterchar = this.delimiterChar; 078 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 079 080 // Set parent's lexer reference for shared tokenization logic 081 this.lexer = this.flexer; 082 083 // Create parser once - will be reused for all parsing operations 084 this.fparser = new TParserPresto(null); 085 this.fparser.lexer = this.flexer; 086 } 087 088 // ========== Parser Components ========== 089 090 /** The Presto lexer used for tokenization */ 091 public TLexerPresto flexer; 092 093 /** SQL parser (for Presto statements) */ 094 private TParserPresto fparser; 095 096 /** Current statement being built during extraction */ 097 private TCustomSqlStatement gcurrentsqlstatement; 098 099 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 100 // - protected TContext globalContext 101 // - protected TSQLEnv sqlEnv 102 // - protected Stack<TFrame> frameStack 103 // - protected TFrame globalFrame 104 105 // ========== AbstractSqlParser Abstract Methods Implementation ========== 106 107 /** 108 * Return the Presto lexer instance. 109 */ 110 @Override 111 protected TCustomLexer getLexer(ParserContext context) { 112 return this.flexer; 113 } 114 115 /** 116 * Return the Presto SQL parser instance with updated token list. 117 */ 118 @Override 119 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 120 this.fparser.sourcetokenlist = tokens; 121 return this.fparser; 122 } 123 124 /** 125 * Presto does not have a secondary parser (only Oracle has PL/SQL parser). 126 * 127 * @return null (no secondary parser) 128 */ 129 @Override 130 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 131 return null; 132 } 133 134 /** 135 * Call Presto-specific tokenization logic. 136 * <p> 137 * Delegates to doprestotexttotokenlist which handles Presto's 138 * specific keyword recognition and token generation. 139 */ 140 @Override 141 protected void tokenizeVendorSql() { 142 doprestotexttotokenlist(); 143 } 144 145 /** 146 * Setup Presto parser for raw statement extraction. 147 * <p> 148 * Presto uses a single parser, so we inject sqlcmds and update 149 * the token list for the main parser only. 150 */ 151 @Override 152 protected void setupVendorParsersForExtraction() { 153 // Inject sqlcmds into parser (required for make_stmt) 154 this.fparser.sqlcmds = this.sqlcmds; 155 156 // Update token list for parser 157 this.fparser.sourcetokenlist = this.sourcetokenlist; 158 } 159 160 /** 161 * Call Presto-specific raw statement extraction logic. 162 * <p> 163 * Delegates to doprestogetrawsqlstatements which handles Presto's 164 * statement delimiters (semicolon and slash for PL/SQL). 165 */ 166 @Override 167 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 168 doprestogetrawsqlstatements(builder); 169 } 170 171 /** 172 * Perform full parsing of statements with syntax checking. 173 * <p> 174 * This method orchestrates the parsing of all statements. 175 */ 176 @Override 177 protected TStatementList performParsing(ParserContext context, 178 TCustomParser parser, 179 TCustomParser secondaryParser, 180 TSourceTokenList tokens, 181 TStatementList rawStatements) { 182 // Store references 183 this.fparser = (TParserPresto) parser; 184 this.sourcetokenlist = tokens; 185 this.parserContext = context; 186 187 // Use the raw statements passed from AbstractSqlParser.parse() 188 this.sqlstatements = rawStatements; 189 190 // Initialize statement parsing infrastructure 191 this.sqlcmds = SqlCmdsFactory.get(vendor); 192 193 // Inject sqlcmds into parser (required for make_stmt and other methods) 194 this.fparser.sqlcmds = this.sqlcmds; 195 196 // Initialize global context for semantic analysis 197 initializeGlobalContext(); 198 199 // Parse each statement with exception handling for robustness 200 for (int i = 0; i < sqlstatements.size(); i++) { 201 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 202 203 try { 204 stmt.setFrameStack(frameStack); 205 206 // Parse the statement 207 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 208 209 // Handle error recovery for CREATE TABLE/INDEX 210 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 211 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 212 handleCreateTableErrorRecovery(stmt); 213 } 214 215 // Collect syntax errors 216 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 217 copyErrorsFromStatement(stmt); 218 } 219 220 } catch (Exception ex) { 221 // Use inherited exception handler from AbstractSqlParser 222 // This provides consistent error handling across all database parsers 223 handleStatementParsingException(stmt, i, ex); 224 continue; 225 } 226 } 227 228 // Clean up frame stack 229 if (globalFrame != null) { 230 globalFrame.popMeFromStack(frameStack); 231 } 232 233 return this.sqlstatements; 234 } 235 236 // Note: initializeGlobalContext() inherited from AbstractSqlParser 237 // Note: No override of afterStatementParsed() needed - default (no-op) is appropriate for Presto 238 239 /** 240 * Handle error recovery for CREATE TABLE/INDEX statements. 241 */ 242 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 243 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) 244 || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) 245 && (!TBaseType.c_createTableStrictParsing)) { 246 247 int nested = 0; 248 boolean isIgnore = false, isFoundIgnoreToken = false; 249 TSourceToken firstIgnoreToken = null; 250 251 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 252 TSourceToken st = stmt.sourcetokenlist.get(k); 253 if (isIgnore) { 254 if (st.issolidtoken() && (st.tokencode != ';')) { 255 isFoundIgnoreToken = true; 256 if (firstIgnoreToken == null) { 257 firstIgnoreToken = st; 258 } 259 } 260 if (st.tokencode != ';') { 261 st.tokencode = TBaseType.sqlpluscmd; 262 } 263 continue; 264 } 265 if (st.tokencode == (int) ')') { 266 nested--; 267 if (nested == 0) { 268 boolean isSelect = false; 269 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 270 if (st1 != null) { 271 TSourceToken st2 = st.searchToken((int) '(', 2); 272 if (st2 != null) { 273 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 274 isSelect = (st3 != null); 275 } 276 } 277 if (!isSelect) isIgnore = true; 278 } 279 } else if (st.tokencode == (int) '(') { 280 nested++; 281 } 282 } 283 284 if (isFoundIgnoreToken) { 285 stmt.clearError(); 286 stmt.parsestatement(null, false); 287 } 288 } 289 } 290 291 /** 292 * Perform Presto-specific semantic analysis using TSQLResolver. 293 */ 294 @Override 295 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 296 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 297 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 298 resolver.resolve(); 299 } 300 } 301 302 /** 303 * Perform interpretation/evaluation on parsed statements. 304 */ 305 @Override 306 protected void performInterpreter(ParserContext context, TStatementList statements) { 307 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 308 TLog.clearLogs(); 309 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 310 TLog.enableInterpreterLogOnly(); 311 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 312 astEvaluator.eval(); 313 } 314 } 315 316 // ========== Presto-Specific Tokenization ========== 317 318 /** 319 * Presto-specific tokenization logic. 320 * <p> 321 * Extracted from: TGSqlParser.doprestotexttotokenlist() (lines 4727-4757) 322 */ 323 private void doprestotexttotokenlist() { 324 TSourceToken asourcetoken; 325 int yychar; 326 boolean startDelimiter = false; 327 328 flexer.tmpDelimiter = ""; 329 330 asourcetoken = getanewsourcetoken(); 331 if (asourcetoken == null) return; 332 yychar = asourcetoken.tokencode; 333 334 while (yychar > 0) { 335 sourcetokenlist.add(asourcetoken); 336 asourcetoken = getanewsourcetoken(); 337 if (asourcetoken == null) break; 338 checkMySQLCommentToken(asourcetoken); 339 340 if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) { 341 startDelimiter = false; 342 flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext(); 343 } 344 345 yychar = asourcetoken.tokencode; 346 } 347 } 348 349 /** 350 * Check MySQL comment token format. 351 * <p> 352 * This method is currently a no-op (commented out in original TGSqlParser). 353 */ 354 private void checkMySQLCommentToken(TSourceToken cmtToken) { 355 // Currently a no-op - left for future enhancement 356 } 357 358 // ========== Presto-Specific Raw Statement Extraction ========== 359 360 /** 361 * Extract raw SQL statements from token list. 362 * <p> 363 * Extracted from: TGSqlParser.doprestogetrawsqlstatements() (lines 7205-7424) 364 * <p> 365 * Handles Presto-specific statement boundaries: 366 * - Semicolon (;) for SQL statements 367 * - Forward slash (/) for PL/SQL blocks 368 * - BEGIN/END blocks for stored procedures 369 */ 370 private void doprestogetrawsqlstatements(SqlParseResult.Builder builder) { 371 int waitingEnd = 0; 372 boolean foundEnd = false; 373 374 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 375 if (!TBaseType.assigned(sourcetokenlist)) { 376 builder.errorCode(-1); 377 return; 378 } 379 380 gcurrentsqlstatement = null; 381 EFindSqlStateType gst = EFindSqlStateType.stnormal; 382 TSourceToken lcprevsolidtoken = null, ast = null; 383 384 for (int i = 0; i < sourcetokenlist.size(); i++) { 385 386 if ((ast != null) && (ast.issolidtoken())) 387 lcprevsolidtoken = ast; 388 389 ast = sourcetokenlist.get(i); 390 sourcetokenlist.curpos = i; 391 392 switch (gst) { 393 case sterror: { 394 if (ast.tokentype == ETokenType.ttsemicolon) { 395 gcurrentsqlstatement.sourcetokenlist.add(ast); 396 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 397 gst = EFindSqlStateType.stnormal; 398 } else { 399 gcurrentsqlstatement.sourcetokenlist.add(ast); 400 } 401 break; 402 } //sterror 403 404 case stnormal: { 405 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 406 || (ast.tokencode == TBaseType.cmtslashstar) 407 || (ast.tokencode == TBaseType.lexspace) 408 || (ast.tokencode == TBaseType.lexnewline) 409 || (ast.tokentype == ETokenType.ttsemicolon)) { 410 if (gcurrentsqlstatement != null) { 411 gcurrentsqlstatement.sourcetokenlist.add(ast); 412 } 413 414 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 415 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 416 // ;;;; continuous semicolon,treat it as comment 417 ast.tokentype = ETokenType.ttsimplecomment; 418 ast.tokencode = TBaseType.cmtdoublehyphen; 419 } 420 } 421 422 continue; 423 } 424 425 // find a tokentext to start sql or plsql mode 426 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 427 428 if (gcurrentsqlstatement != null) { 429 if (gcurrentsqlstatement.isprestoplsql()) { 430 gst = EFindSqlStateType.ststoredprocedure; 431 gcurrentsqlstatement.sourcetokenlist.add(ast); 432 foundEnd = false; 433 if ((ast.tokencode == TBaseType.rrw_begin) 434 || (ast.tokencode == TBaseType.rrw_package) 435 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 436 waitingEnd = 1; 437 } 438 } else { 439 gst = EFindSqlStateType.stsql; 440 gcurrentsqlstatement.sourcetokenlist.add(ast); 441 } 442 } else { 443 //error tokentext found 444 445 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 446 , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 447 448 ast.tokentype = ETokenType.tttokenlizererrortoken; 449 gst = EFindSqlStateType.sterror; 450 451 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 452 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 453 gcurrentsqlstatement.sourcetokenlist.add(ast); 454 455 } 456 457 break; 458 } // stnormal 459 460 case stsql: { 461 if (ast.tokentype == ETokenType.ttsemicolon) { 462 gst = EFindSqlStateType.stnormal; 463 gcurrentsqlstatement.sourcetokenlist.add(ast); 464 gcurrentsqlstatement.semicolonended = ast; 465 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 466 continue; 467 } 468 469 if (sourcetokenlist.sqlplusaftercurtoken()) //most probaly is / cmd 470 { 471 gst = EFindSqlStateType.stnormal; 472 gcurrentsqlstatement.sourcetokenlist.add(ast); 473 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 474 continue; 475 } 476 gcurrentsqlstatement.sourcetokenlist.add(ast); 477 break; 478 }//case stsql 479 480 case ststoredprocedure: { 481 if ((ast.tokencode == TBaseType.rrw_begin)) { 482 waitingEnd++; 483 } else if ((ast.tokencode == TBaseType.rrw_if)) { 484 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 485 //this is not if after END 486 waitingEnd++; 487 } 488 } else if ((ast.tokencode == TBaseType.rrw_case)) { 489 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 490 //this is not case after END 491 waitingEnd++; 492 } 493 } else if ((ast.tokencode == TBaseType.rrw_loop)) { 494 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 495 //this is not loop after END 496 waitingEnd++; 497 } 498 } else if (ast.tokencode == TBaseType.rrw_end) { 499 foundEnd = true; 500 waitingEnd--; 501 if (waitingEnd < 0) { 502 waitingEnd = 0; 503 } 504 } 505 506 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) //and (prevst.NewlineIsLastTokenInTailerToken)) then 507 { 508 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 509 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 510 gst = EFindSqlStateType.stnormal; 511 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 512 513 //make / a sqlplus cmd 514 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 515 gcurrentsqlstatement.sourcetokenlist.add(ast); 516 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 517 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { // single dot at a seperate line 518 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 519 gst = EFindSqlStateType.stnormal; 520 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 521 522 //make ttperiod a sqlplus cmd 523 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 524 gcurrentsqlstatement.sourcetokenlist.add(ast); 525 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 526 } else { 527 gcurrentsqlstatement.sourcetokenlist.add(ast); 528 if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0) 529 && (foundEnd)) { 530 gst = EFindSqlStateType.stnormal; 531 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 532 } 533 } 534 535 if (ast.tokencode == TBaseType.sqlpluscmd) { 536 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 537 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 538 int m = flexer.getkeywordvalue(ast.getAstext()); 539 if (m != 0) { 540 ast.tokencode = m; 541 } else { 542 ast.tokencode = TBaseType.ident; 543 } 544 } 545 546 break; 547 } //ststoredprocedure 548 } //switch 549 }//for 550 551 //last statement 552 if ((gcurrentsqlstatement != null) && 553 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || 554 (gst == EFindSqlStateType.sterror))) { 555 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 556 } 557 558 // Set the statement list in the builder 559 builder.sqlStatements(this.sqlstatements); 560 builder.errorCode(syntaxErrors.size()); 561 } 562 563 @Override 564 public String toString() { 565 return "PrestoSqlParser{vendor=" + vendor + "}"; 566 } 567}