001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerBigquery; 009import gudusoft.gsqlparser.TParserBigquery; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement; 021import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 022import gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage; 023import gudusoft.gsqlparser.nodes.TTypeName; 024import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 025import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 026import gudusoft.gsqlparser.compiler.TContext; 027import gudusoft.gsqlparser.sqlenv.TSQLEnv; 028import gudusoft.gsqlparser.compiler.TGlobalScope; 029import gudusoft.gsqlparser.compiler.TFrame; 030import gudusoft.gsqlparser.resolver.TSQLResolver; 031import gudusoft.gsqlparser.TLog; 032import gudusoft.gsqlparser.compiler.TASTEvaluator; 033 034import java.io.BufferedReader; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.List; 038import java.util.Stack; 039 040import static gudusoft.gsqlparser.ESqlStatementType.*; 041 042/** 043 * Google BigQuery SQL parser implementation. 044 * 045 * <p>This parser handles BigQuery-specific SQL syntax including: 046 * <ul> 047 * <li>BigQuery scripting language (BEGIN...END blocks, IF/WHILE/FOR/LOOP/REPEAT statements)</li> 048 * <li>BigQuery UDFs (CREATE FUNCTION with SQL or JavaScript)</li> 049 * <li>BigQuery procedures (CREATE PROCEDURE with BEGIN...END blocks)</li> 050 * <li>Special type handling (STRUCT, ARRAY, DATE/TIME/TIMESTAMP literals)</li> 051 * <li>Backtick-quoted identifiers including qualified names (`schema.table`)</li> 052 * <li>BigQuery-specific keywords and functions</li> 053 * </ul> 054 * 055 * <p><b>Design Notes:</b> 056 * <ul> 057 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 058 * <li>Uses {@link TLexerBigquery} for tokenization</li> 059 * <li>Uses {@link TParserBigquery} for parsing</li> 060 * <li>Delimiter character: ';' for SQL statements</li> 061 * <li>Splits backtick-quoted qualified names (`schema.table`) into individual tokens</li> 062 * </ul> 063 * 064 * <p><b>Usage Example:</b> 065 * <pre> 066 * // Get BigQuery parser from factory 067 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvbigquery); 068 * 069 * // Build context 070 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvbigquery) 071 * .sqlText("SELECT * FROM `project.dataset.table` WHERE region = 'US'") 072 * .build(); 073 * 074 * // Parse 075 * SqlParseResult result = parser.parse(context); 076 * 077 * // Access statements 078 * TStatementList statements = result.getSqlStatements(); 079 * </pre> 080 * 081 * @see SqlParser 082 * @see AbstractSqlParser 083 * @see TLexerBigquery 084 * @see TParserBigquery 085 * @since 3.2.0.0 086 */ 087public class BigQuerySqlParser extends AbstractSqlParser { 088 089 /** 090 * Construct BigQuery SQL parser. 091 * <p> 092 * Configures the parser for BigQuery database with default delimiter (;). 093 * <p> 094 * Following the original TGSqlParser pattern, the lexer and parser are 095 * created once in the constructor and reused for all parsing operations. 096 */ 097 public BigQuerySqlParser() { 098 super(EDbVendor.dbvbigquery); 099 this.delimiterChar = ';'; 100 this.defaultDelimiterStr = ";"; 101 102 // Create lexer once - will be reused for all parsing operations 103 this.flexer = new TLexerBigquery(); 104 this.flexer.delimiterchar = this.delimiterChar; 105 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 106 107 // Set parent's lexer reference for shared tokenization logic 108 this.lexer = this.flexer; 109 110 // Create parser once - will be reused for all parsing operations 111 this.fparser = new TParserBigquery(null); 112 this.fparser.lexer = this.flexer; 113 } 114 115 // ========== Parser Components ========== 116 117 /** The BigQuery lexer used for tokenization */ 118 public TLexerBigquery flexer; 119 120 /** SQL parser (for BigQuery statements) */ 121 private TParserBigquery fparser; 122 123 /** Current statement being built during extraction */ 124 private TCustomSqlStatement gcurrentsqlstatement; 125 126 /** Parser context for current operation */ 127 private ParserContext parserContext; 128 129 /** User-defined delimiter string for MySQL DELIMITER command */ 130 private String userDelimiterStr; 131 132 // Stored procedure parsing state tracking 133 private enum stored_procedure_type { 134 procedure, function, package_spec, package_body, block_with_declare, 135 block_with_begin, create_trigger, create_library, others 136 } 137 138 private enum stored_procedure_status { 139 start, is_as, body, bodyend, end 140 } 141 142 private static final int stored_procedure_nested_level = 50; 143 144 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 145 // - protected TContext globalContext 146 // - protected TSQLEnv sqlEnv 147 // - protected Stack<TFrame> frameStack 148 // - protected TFrame globalFrame 149 150 // ========== AbstractSqlParser Abstract Methods Implementation ========== 151 152 /** 153 * Return the BigQuery lexer instance. 154 */ 155 @Override 156 protected TCustomLexer getLexer(ParserContext context) { 157 return this.flexer; 158 } 159 160 /** 161 * Return the BigQuery SQL parser instance with updated token list. 162 */ 163 @Override 164 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 165 this.fparser.sourcetokenlist = tokens; 166 return this.fparser; 167 } 168 169 /** 170 * BigQuery does not use a secondary parser (unlike Oracle with PL/SQL). 171 */ 172 @Override 173 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 174 return null; 175 } 176 177 /** 178 * Call BigQuery-specific tokenization logic. 179 * <p> 180 * Delegates to dobigquerysqltexttotokenlist which handles BigQuery's 181 * specific keyword recognition, backtick-quoted identifiers, and 182 * qualified name splitting. 183 */ 184 @Override 185 protected void tokenizeVendorSql() { 186 dobigquerysqltexttotokenlist(); 187 } 188 189 /** 190 * Setup BigQuery parser for raw statement extraction. 191 * <p> 192 * BigQuery uses a single parser, so we inject sqlcmds and update 193 * the token list for the main parser only. 194 */ 195 @Override 196 protected void setupVendorParsersForExtraction() { 197 // Inject sqlcmds into parser (required for make_stmt) 198 this.fparser.sqlcmds = this.sqlcmds; 199 200 // Update token list for parser 201 this.fparser.sourcetokenlist = this.sourcetokenlist; 202 } 203 204 /** 205 * Call BigQuery-specific raw statement extraction logic. 206 * <p> 207 * Delegates to dobigquerygetrawsqlstatements which handles BigQuery's 208 * statement delimiters and scripting language boundaries (BEGIN...END blocks, 209 * IF/WHILE/FOR/LOOP statements). 210 */ 211 @Override 212 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 213 int errorCount = dobigquerygetrawsqlstatements(builder); 214 // Error count is tracked internally; errors are already added to syntaxErrors list 215 216 // Set the extracted statements in the builder 217 builder.sqlStatements(this.sqlstatements); 218 } 219 220 // ========== Tokenization Methods ========== 221 222 /** 223 * Tokenize BigQuery SQL text into a list of tokens. 224 * <p> 225 * This method handles BigQuery-specific token processing: 226 * <ul> 227 * <li>Splits backtick-quoted qualified names (`schema.table`) into separate tokens</li> 228 * <li>Handles all standard SQL tokens (keywords, identifiers, operators, etc.)</li> 229 * </ul> 230 * <p> 231 * Migrated from TGSqlParser.dobigquerysqltexttotokenlist() 232 */ 233 private void dobigquerysqltexttotokenlist() { 234 TSourceToken asourcetoken, lcprevst; 235 int yychar; 236 237 flexer.tmpDelimiter = ""; 238 239 asourcetoken = getanewsourcetoken(); 240 if (asourcetoken == null) return; 241 yychar = asourcetoken.tokencode; 242 243 while (yychar > 0) { 244 if (asourcetoken != null) { 245 sourcetokenlist.add(asourcetoken); 246 } 247 248 asourcetoken = getanewsourcetoken(); 249 if (asourcetoken == null) break; 250 yychar = asourcetoken.tokencode; 251 252 // `schema.table_name` - split into separate tokens 253 if ((asourcetoken.tokencode == TBaseType.ident) 254 && (asourcetoken.toString().startsWith("`")) && (asourcetoken.toString().endsWith("`")) 255 && (asourcetoken.toString().indexOf(".") > 0)) { 256 yychar = splitQualifiedNameInBacktick(asourcetoken); 257 asourcetoken = null; 258 } 259 } 260 } 261 262 /** 263 * Turn one token: `schema.table_name` into 3 tokens: `schema` . `table_name` 264 * <p> 265 * This helper method splits backtick-quoted qualified names into individual 266 * identifier and period tokens, preserving line/column information for each part. 267 * <p> 268 * Migrated from TGSqlParser.splitQualifiedNameInBacktick() 269 * 270 * @param asourcetoken the token to split 271 * @return the token code of the last token created 272 */ 273 private int splitQualifiedNameInBacktick(TSourceToken asourcetoken) { 274 int yychar = 0; 275 276 List<String> elephantList = Arrays.asList(TBaseType.getTextWithoutQuoted(asourcetoken.toString()).split("\\.")); 277 int p = 0, offset = 0; 278 for (String s : elephantList) { 279 TSourceToken pst = new TSourceToken("`" + s + "`"); 280 pst.tokencode = asourcetoken.tokencode; 281 pst.tokentype = asourcetoken.tokentype; 282 pst.tokenstatus = asourcetoken.tokenstatus; 283 pst.lineNo = asourcetoken.lineNo; 284 pst.columnNo = asourcetoken.columnNo + offset; 285 if (p == 0) offset++; // this count the first ` token 286 offset = offset + s.length(); 287 pst.container = sourcetokenlist; 288 if (p > 0) { // 第一个token使用被拆分前那个token的位置,从第二个开始的token,需要先把列表的位置指针加 1 289 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 290 } 291 pst.posinlist = sourcetokenlist.curpos; 292 293 sourcetokenlist.add(pst); 294 yychar = pst.tokencode; 295 296 if (p != elephantList.size() - 1) { 297 //`schema.table_name`, add period token in the middle of the backtick included identifier. 298 TSourceToken periodst = new TSourceToken("."); 299 periodst.tokencode = '.'; 300 periodst.tokentype = ETokenType.ttperiod; 301 periodst.tokenstatus = asourcetoken.tokenstatus; 302 periodst.lineNo = asourcetoken.lineNo; 303 periodst.columnNo = asourcetoken.columnNo + offset; 304 offset++; 305 periodst.container = sourcetokenlist; 306 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 307 periodst.posinlist = sourcetokenlist.curpos; 308 sourcetokenlist.add(periodst); 309 yychar = periodst.tokencode; 310 } 311 312 p++; 313 } 314 315 return yychar; 316 } 317 318 // ========== Raw Statement Extraction ========== 319 320 /** 321 * Check if token code is one that pairs with END keyword. 322 * <p> 323 * Used for BigQuery scripting language to track nested BEGIN/IF/CASE/LOOP/WHILE/FOR/REPEAT blocks. 324 * <p> 325 * Migrated from TGSqlParser.checkTokenPairWithEnd() 326 */ 327 private boolean checkTokenPairWithEnd(int tokencode) { 328 return ((tokencode == TBaseType.rrw_if) || (tokencode == TBaseType.rrw_case) 329 || (tokencode == TBaseType.rrw_loop) || (tokencode == TBaseType.rrw_repeat) 330 || (tokencode == TBaseType.rrw_while) || (tokencode == TBaseType.rrw_for) 331 || (tokencode == TBaseType.rrw_case)); 332 } 333 334 /** 335 * Extract raw SQL statements from the token list. 336 * <p> 337 * This method separates individual SQL statements without full syntax checking. 338 * It handles BigQuery-specific syntax including: 339 * <ul> 340 * <li>Stored procedures and functions with BEGIN...END blocks</li> 341 * <li>Scripting language statements (IF, WHILE, FOR, LOOP, REPEAT, CASE)</li> 342 * <li>STRUCT constructor special handling</li> 343 * <li>Type casting with literals (DATE '2021-01-01')</li> 344 * <li>Nested procedure tracking</li> 345 * </ul> 346 * <p> 347 * Migrated from TGSqlParser.dobigquerygetrawsqlstatements() 348 * 349 * @param builder the result builder to populate 350 * @return error count 351 */ 352 private int dobigquerygetrawsqlstatements(SqlParseResult.Builder builder) { 353 int errorcount = 0; 354 gcurrentsqlstatement = null; 355 EFindSqlStateType gst = EFindSqlStateType.stnormal; 356 int i, c, beginNested = 0, waitingEnd = 0; 357 TSourceToken ast = null, lcprevsolidtoken = null; 358 boolean waitingDelimiter = false; 359 360 int waitingEnds[] = new int[stored_procedure_nested_level]; 361 stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level]; 362 stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level]; 363 boolean endBySlashOnly = true; 364 int nestedProcedures = 0, nestedParenthesis = 0; 365 366 //reset delimiter 367 userDelimiterStr = defaultDelimiterStr; 368 369 for (i = 0; i < sourcetokenlist.size(); i++) { 370 if ((ast != null) && (ast.issolidtoken())) 371 lcprevsolidtoken = ast; 372 373 ast = sourcetokenlist.get(i); 374 sourcetokenlist.curpos = i; 375 376 // Handle STRUCT constructor: STRUCT(...) -> mark as struct constructor 377 if (ast.tokencode == TBaseType.rrw_bigquery_struct) { 378 TSourceToken st1 = ast.nextSolidToken(); 379 if (st1 != null) { 380 if (st1.tokencode == '(') { 381 ast.tokencode = TBaseType.rrw_bigquery_struct_constructor; 382 } 383 } 384 } 385 // Handle type casting: DATE '2021-01-01' -> mark DATE as datatype used to cast 386 else if ((ast.tokencode == TBaseType.sconst)) { 387 if (TTypeName.searchTypeByName(lcprevsolidtoken.toString()) != null) { 388 if (lcprevsolidtoken.tokencode != TBaseType.rrw_interval) { 389 lcprevsolidtoken.tokencode = TBaseType.rrw_bigquery_datatype_used_to_cast; 390 } 391 } 392 } 393 // Handle TIME/DATE: could be function or type literal 394 else if ((ast.tokencode == TBaseType.rrw_time) || (ast.tokencode == TBaseType.rrw_date)) { 395 TSourceToken st1 = ast.nextSolidToken(); 396 if (st1 != null) { 397 if (st1.tokencode == TBaseType.sconst) { 398 // ast.tokencode = TBaseType.rrw_bigquery_time_before_const; 399 } else if (st1.tokencode == '(') { 400 ast.tokencode = TBaseType.ident; 401 } 402 } 403 } 404 // Handle FROM after period: i.from -> treat FROM as identifier 405 else if (ast.tokencode == TBaseType.rrw_from) { 406 TSourceToken st1 = ast.prevSolidToken(); 407 if (st1 != null) { // select i.from as `from` from t, treats from in i.from as identifier 408 if (st1.tokencode == '.') { 409 ast.tokencode = TBaseType.ident; 410 } 411 } 412 } 413 414 switch (gst) { 415 case sterror: { 416 if (ast.tokentype == ETokenType.ttsemicolon) { 417 gcurrentsqlstatement.sourcetokenlist.add(ast); 418 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 419 gst = EFindSqlStateType.stnormal; 420 } else { 421 gcurrentsqlstatement.sourcetokenlist.add(ast); 422 } 423 break; 424 } 425 case stnormal: { 426 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 427 || (ast.tokencode == TBaseType.cmtslashstar) 428 || (ast.tokencode == TBaseType.lexspace) 429 || (ast.tokencode == TBaseType.lexnewline) 430 || (ast.tokentype == ETokenType.ttsemicolon)) { 431 if (TBaseType.assigned(gcurrentsqlstatement)) { 432 gcurrentsqlstatement.sourcetokenlist.add(ast); 433 } 434 435 continue; 436 } 437 438 // find a token to start sql or plsql mode 439 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 440 441 if (TBaseType.assigned(gcurrentsqlstatement)) { 442 if (gcurrentsqlstatement.isBigQueryplsql()) { 443 nestedProcedures = 0; 444 gst = EFindSqlStateType.ststoredprocedure; 445 gcurrentsqlstatement.sourcetokenlist.add(ast); 446 447 switch (gcurrentsqlstatement.sqlstatementtype) { 448 case sstplsql_createprocedure: 449 case sstcreateprocedure: 450 sptype[nestedProcedures] = stored_procedure_type.procedure; 451 break; 452 case sstplsql_createfunction: 453 sptype[nestedProcedures] = stored_procedure_type.function; 454 break; 455 case sstplsql_createpackage: 456 sptype[nestedProcedures] = stored_procedure_type.package_spec; 457 if (ast.searchToken(TBaseType.rrw_body, 5) != null) { 458 sptype[nestedProcedures] = stored_procedure_type.package_body; 459 } 460 break; 461 case sst_plsql_block: 462 sptype[nestedProcedures] = stored_procedure_type.block_with_declare; 463 if (ast.tokencode == TBaseType.rrw_begin) { 464 sptype[nestedProcedures] = stored_procedure_type.block_with_begin; 465 } 466 break; 467 case sstplsql_createtrigger: 468 sptype[nestedProcedures] = stored_procedure_type.create_trigger; 469 break; 470 case sstoraclecreatelibrary: 471 sptype[nestedProcedures] = stored_procedure_type.create_library; 472 break; 473 case sstplsql_createtype_placeholder: 474 gst = EFindSqlStateType.stsql; 475 break; 476 default: 477 sptype[nestedProcedures] = stored_procedure_type.others; 478 break; 479 } 480 481 if (sptype[0] == stored_procedure_type.block_with_declare) { 482 // sd 483 endBySlashOnly = false; 484 procedure_status[0] = stored_procedure_status.is_as; 485 } else if (sptype[0] == stored_procedure_type.block_with_begin) { 486 // sb 487 endBySlashOnly = false; 488 procedure_status[0] = stored_procedure_status.body; 489 } else if (sptype[0] == stored_procedure_type.procedure) { 490 // ss 491 endBySlashOnly = false; 492 procedure_status[0] = stored_procedure_status.start; 493 } else if (sptype[0] == stored_procedure_type.function) { 494 // ss 495 endBySlashOnly = false; 496 procedure_status[0] = stored_procedure_status.start; 497 } else if (sptype[0] == stored_procedure_type.package_spec) { 498 // ss 499 endBySlashOnly = false; 500 procedure_status[0] = stored_procedure_status.start; 501 } else if (sptype[0] == stored_procedure_type.package_body) { 502 // ss 503 endBySlashOnly = false; 504 procedure_status[0] = stored_procedure_status.start; 505 } else if (sptype[0] == stored_procedure_type.create_trigger) { 506 // ss 507 endBySlashOnly = false; 508 procedure_status[0] = stored_procedure_status.start; 509 } else if (sptype[0] == stored_procedure_type.create_library) { 510 // ss 511 endBySlashOnly = false; 512 procedure_status[0] = stored_procedure_status.bodyend; 513 } else { 514 // so 515 endBySlashOnly = true; 516 procedure_status[0] = stored_procedure_status.bodyend; 517 } 518 if ((ast.tokencode == TBaseType.rrw_begin) 519 || (ast.tokencode == TBaseType.rrw_package) 520 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 521 waitingEnds[nestedProcedures] = 1; 522 } 523 524 } else if ((gcurrentsqlstatement.sqlstatementtype == sst_ifstmt) 525 || (gcurrentsqlstatement.sqlstatementtype == sst_loopstmt) 526 || (gcurrentsqlstatement.sqlstatementtype == sstRepeat) 527 || (gcurrentsqlstatement.sqlstatementtype == sstWhilestmt) 528 || (gcurrentsqlstatement.sqlstatementtype == sstForStmt) 529 || (gcurrentsqlstatement.sqlstatementtype == sst_plsql_block) 530 || (gcurrentsqlstatement.sqlstatementtype == sst_casestmt)) { 531 gst = EFindSqlStateType.stBigQueryIf; 532 waitingEnd = 1; 533 gcurrentsqlstatement.sourcetokenlist.add(ast); 534 } else { 535 gst = EFindSqlStateType.stsql; 536 gcurrentsqlstatement.sourcetokenlist.add(ast); 537 } 538 539 } 540 541 if (!TBaseType.assigned(gcurrentsqlstatement)) //error token found 542 { 543 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 544 , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 545 546 ast.tokentype = ETokenType.tttokenlizererrortoken; 547 gst = EFindSqlStateType.sterror; 548 549 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 550 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 551 gcurrentsqlstatement.sourcetokenlist.add(ast); 552 553 } 554 break; 555 } 556 case stBigQueryIf: { 557 gcurrentsqlstatement.sourcetokenlist.add(ast); 558 559 if (checkTokenPairWithEnd(ast.tokencode)) { // if... end if 560 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 561 //this is not if after END 562 waitingEnd++; 563 } 564 } else if (ast.tokencode == TBaseType.rrw_end) { // if ... end if 565 TSourceToken next = ast.nextSolidToken(); 566 if (next != null) { 567 if (checkTokenPairWithEnd(next.tokencode)) { // if ... end if; 568 waitingEnd--; 569 } else if (next.tokencode == ';') { // begin ... end ; 570 waitingEnd--; 571 } 572 573 } 574 } else if ((ast.tokencode == ';') && (waitingEnd == 0)) { 575 gst = EFindSqlStateType.stnormal; 576 577 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 578 } 579 break; 580 } 581 case stsqlplus: { 582 if (ast.tokencode == TBaseType.lexnewline) { 583 gst = EFindSqlStateType.stnormal; 584 gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here 585 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 586 } else { 587 { 588 gcurrentsqlstatement.sourcetokenlist.add(ast); 589 } 590 } 591 592 break; 593 }//case source command or \. command 594 case stsql: { 595 if ((ast.tokentype == ETokenType.ttsemicolon) && (gcurrentsqlstatement.sqlstatementtype != ESqlStatementType.sstmysqldelimiter)) { 596 gst = EFindSqlStateType.stnormal; 597 gcurrentsqlstatement.sourcetokenlist.add(ast); 598 gcurrentsqlstatement.semicolonended = ast; 599 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 600 continue; 601 } 602 if (ast.toString().equalsIgnoreCase(userDelimiterStr)) { 603 gst = EFindSqlStateType.stnormal; 604 ast.tokencode = ';';// treat it as semicolon 605 gcurrentsqlstatement.sourcetokenlist.add(ast); 606 gcurrentsqlstatement.semicolonended = ast; 607 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 608 continue; 609 } 610 gcurrentsqlstatement.sourcetokenlist.add(ast); 611 612 if ((ast.tokencode == TBaseType.lexnewline) 613 && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstmysqldelimiter)) { 614 gst = EFindSqlStateType.stnormal; 615 userDelimiterStr = ""; 616 for (int k = 0; k < gcurrentsqlstatement.sourcetokenlist.size(); k++) { 617 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(k); 618 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) 619 || (st.tokencode == TBaseType.lexnewline) 620 || (st.tokencode == TBaseType.lexspace)) { 621 continue; 622 } 623 624 userDelimiterStr += st.toString(); 625 } 626 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 627 628 continue; 629 } 630 631 break; 632 } 633 case ststoredprocedure: { 634 if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) { 635 gcurrentsqlstatement.sourcetokenlist.add(ast); 636 } 637 638 switch (procedure_status[nestedProcedures]) { 639 case start: 640 if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) { 641 // s1 642 if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) { 643 if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) { 644 //when it's a package specification, only top level accept as/is 645 } else { 646 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 647 if (ast.searchToken("language", 1) != null) { 648 if (nestedProcedures == 0) { 649 gst = EFindSqlStateType.stsql; 650 } else { 651 procedure_status[nestedProcedures] = stored_procedure_status.body; 652 nestedProcedures--; 653 } 654 655 } 656 } 657 } 658 } else if (ast.tokencode == TBaseType.rrw_begin) { 659 // s4 660 if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) 661 waitingEnds[nestedProcedures]++; 662 663 if (nestedProcedures > 0) { 664 nestedProcedures--; 665 } 666 procedure_status[nestedProcedures] = stored_procedure_status.body; 667 waitingEnds[nestedProcedures] = 1; 668 } else if (ast.tokencode == TBaseType.rrw_end) { 669 //s10 670 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1) 671 && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body) 672 || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) { 673 nestedProcedures--; 674 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 675 } 676 } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 677 //s3 678 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 679 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 680 nestedProcedures--; 681 nestedProcedures++; 682 waitingEnds[nestedProcedures] = 0; 683 procedure_status[nestedProcedures] = stored_procedure_status.start; 684 } 685 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) { 686 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 687 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 688 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 689 gst = EFindSqlStateType.stnormal; 690 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 691 692 //make / a sqlplus cmd 693 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 694 gcurrentsqlstatement.sourcetokenlist.add(ast); 695 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 696 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger)) { 697 if (ast.tokencode == TBaseType.rrw_trigger) { 698 TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1); 699 if (compoundSt != null) { 700 //it's trigger with compound trigger block 701 procedure_status[nestedProcedures] = stored_procedure_status.body; 702 waitingEnds[nestedProcedures]++; 703 } 704 } 705 } else if ((sptype[nestedProcedures] == stored_procedure_type.function) && (ast.tokencode == TBaseType.rrw_teradata_using)) { 706 if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) { 707 if (nestedProcedures == 0) { 708 gst = EFindSqlStateType.stsql; 709 } else { 710 procedure_status[nestedProcedures] = stored_procedure_status.body; 711 nestedProcedures--; 712 } 713 } 714 715 } else { 716 //other tokens, do nothing 717 } 718 break; 719 case is_as: 720 if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 721 // s2 722 nestedProcedures++; 723 waitingEnds[nestedProcedures] = 0; 724 procedure_status[nestedProcedures] = stored_procedure_status.start; 725 726 if (nestedProcedures > stored_procedure_nested_level - 1) { 727 gst = EFindSqlStateType.sterror; 728 nestedProcedures--; 729 } 730 731 } else if (ast.tokencode == TBaseType.rrw_begin) { 732 // s5 733 if ((nestedProcedures == 0) && 734 ((sptype[nestedProcedures] == stored_procedure_type.package_body) 735 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 736 //top level package or package body's BEGIN keyword already count, 737 // so don't increase waitingEnds[nestedProcedures] here 738 739 } else { 740 waitingEnds[nestedProcedures]++; 741 } 742 procedure_status[nestedProcedures] = stored_procedure_status.body; 743 } else if (ast.tokencode == TBaseType.rrw_end) { 744 // s6 745 if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) && 746 ((sptype[nestedProcedures] == stored_procedure_type.package_body) || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 747 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 748 waitingEnds[nestedProcedures]--; 749 } else { 750 waitingEnds[nestedProcedures]--; 751 } 752 } else if (ast.tokencode == TBaseType.rrw_case) { 753 if (ast.searchToken(';', 1) == null) { 754 //this is not case before ; 755 waitingEnds[nestedProcedures]++; 756 } 757 } else { 758 //other tokens, do nothing 759 } 760 break; 761 case body: 762 if ((ast.tokencode == TBaseType.rrw_begin)) { 763 waitingEnds[nestedProcedures]++; 764 } else if (ast.tokencode == TBaseType.rrw_if) { 765 766 if (ast.searchToken(';', 2) == null) { 767 //this is not if before ; 768 769 // 2015-02-27, change 1 to 2 make it able to detect label name after case 770 // like this: END CASE l1; 771 waitingEnds[nestedProcedures]++; 772 } 773 } else if (ast.tokencode == TBaseType.rrw_case) { 774 if (ast.searchToken(';', 2) == null) { 775 //this is not case before ; 776 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 777 waitingEnds[nestedProcedures]++; 778 } 779 } 780 } else if ((ast.tokencode == TBaseType.rrw_loop) 781 || (ast.tokencode == TBaseType.rrw_while) || (ast.tokencode == TBaseType.rrw_repeat) 782 || (ast.tokencode == TBaseType.rrw_for)) { 783 if (!((ast.searchToken(TBaseType.rrw_end, -1) != null) 784 && (ast.searchToken(';', 2) != null))) { 785 // exclude loop like this: 786 // end loop [labelname]; 787 waitingEnds[nestedProcedures]++; 788 } 789 790 } else if (ast.tokencode == TBaseType.rrw_end) { 791 //foundEnd = true; 792 waitingEnds[nestedProcedures]--; 793 //if (waitingEnd < 0) { waitingEnd = 0;} 794 if (waitingEnds[nestedProcedures] == 0) { 795 if (nestedProcedures == 0) { 796 // s7 797 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 798 } else { 799 // s71 800 nestedProcedures--; 801 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 802 } 803 } 804 } else if ((waitingEnds[nestedProcedures] == 0) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 805 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 806 gst = EFindSqlStateType.stnormal; 807 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 808 809 //make / a sqlplus cmd 810 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 811 gcurrentsqlstatement.sourcetokenlist.add(ast); 812 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 813 } 814 break; 815 case bodyend: 816 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 817 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 818 gst = EFindSqlStateType.stnormal; 819 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 820 821 //make / a sqlplus cmd 822 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 823 gcurrentsqlstatement.sourcetokenlist.add(ast); 824 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 825 } else if ((ast.tokencode == ';')) { 826 gst = EFindSqlStateType.stnormal; 827 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 828 829 } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) { 830 gcurrentsqlstatement.sourcetokenlist.add(ast); 831 gst = EFindSqlStateType.stnormal; 832 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 833 } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) { 834 gcurrentsqlstatement.sourcetokenlist.add(ast); 835 gst = EFindSqlStateType.stnormal; 836 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 837 } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) { 838 gcurrentsqlstatement.sourcetokenlist.add(ast); 839 gst = EFindSqlStateType.stnormal; 840 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 841 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_package, 4) != null) && (!endBySlashOnly)) { 842 gcurrentsqlstatement.sourcetokenlist.add(ast); 843 gst = EFindSqlStateType.stnormal; 844 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 845 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) { 846 gcurrentsqlstatement.sourcetokenlist.add(ast); 847 gst = EFindSqlStateType.stnormal; 848 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 849 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) { 850 gcurrentsqlstatement.sourcetokenlist.add(ast); 851 gst = EFindSqlStateType.stnormal; 852 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 853 } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) { 854 gcurrentsqlstatement.sourcetokenlist.add(ast); 855 gst = EFindSqlStateType.stnormal; 856 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 857 } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) { 858 gcurrentsqlstatement.sourcetokenlist.add(ast); 859 gst = EFindSqlStateType.stnormal; 860 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 861 } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) && 862 (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) { 863 gcurrentsqlstatement.sourcetokenlist.add(ast); 864 gst = EFindSqlStateType.stnormal; 865 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 866 } else { 867 gcurrentsqlstatement.sourcetokenlist.add(ast); 868 } 869 break; 870 case end: 871 break; 872 default: 873 break; 874 } 875 876 if (ast.tokencode == TBaseType.sqlpluscmd) { 877 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 878 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 879 int m = flexer.getkeywordvalue(ast.getAstext()); 880 if (m != 0) { 881 ast.tokencode = m; 882 } else if (ast.tokentype == ETokenType.ttslash) { 883 ast.tokencode = '/'; 884 } else { 885 ast.tokencode = TBaseType.ident; 886 } 887 } 888 889 final int wrapped_keyword_max_pos = 20; 890 if ((ast.tokencode == TBaseType.rrw_wrapped) && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) { 891 if (gcurrentsqlstatement instanceof TCommonStoredProcedureSqlStatement) { 892 ((TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true); 893 } 894 895 if (gcurrentsqlstatement instanceof TPlsqlCreatePackage) { 896 if (ast.prevSolidToken() != null) { 897 ((TPlsqlCreatePackage) gcurrentsqlstatement).setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken())); 898 } 899 } 900 } 901 902 break; 903 } //ststoredprocedure 904 905 } //case 906 } //for 907 908 //last statement 909 if (TBaseType.assigned(gcurrentsqlstatement) && 910 ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) 911 || (gst == EFindSqlStateType.stBigQueryIf) || (gst == EFindSqlStateType.sterror))) { 912 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 913 } 914 915 return errorcount; 916 } 917 918 // ========== Statement Parsing ========== 919 920 /** 921 * Parse all SQL statements after tokenization and raw extraction. 922 * <p> 923 * This method iterates through all statements, calls parsestatement on each, 924 * and handles error recovery for CREATE TABLE statements if enabled. 925 * <p> 926 * Inherited performParsing pattern from AbstractSqlParser, specialized for BigQuery. 927 */ 928 @Override 929 protected TStatementList performParsing(ParserContext context, TCustomParser parser, 930 TCustomParser secondaryParser, TSourceTokenList tokens, 931 TStatementList rawStatements) { 932 // Store references for error handling 933 this.fparser = (TParserBigquery) parser; 934 this.sourcetokenlist = tokens; 935 this.parserContext = context; 936 this.sqlstatements = rawStatements; 937 938 // Initialize sqlcmds for BigQuery 939 if (this.sqlcmds == null) { 940 this.sqlcmds = SqlCmdsFactory.get(vendor); 941 } 942 this.fparser.sqlcmds = this.sqlcmds; 943 944 // Initialize global context using inherited method 945 initializeGlobalContext(); 946 947 // Parse each statement 948 for (int i = 0; i < sqlstatements.size(); i++) { 949 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 950 951 try { 952 // Set frame stack for variable scope tracking 953 stmt.setFrameStack(frameStack); 954 955 // Parse the statement 956 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 957 958 // Vendor-specific post-processing (none needed for BigQuery currently) 959 afterStatementParsed(stmt); 960 961 // Collect errors from the statement 962 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 963 copyErrorsFromStatement(stmt); 964 } 965 966 } catch (Exception ex) { 967 // Use inherited exception handler 968 handleStatementParsingException(stmt, i, ex); 969 continue; 970 } 971 } 972 973 // Clean up frame stack 974 if (globalFrame != null) { 975 globalFrame.popMeFromStack(frameStack); 976 } 977 978 return sqlstatements; 979 } 980 981 /** 982 * Post-process statement after parsing (hook method). 983 * <p> 984 * BigQuery does not require special post-processing, so this is a no-op. 985 * Override if BigQuery-specific validation is needed in the future. 986 */ 987 protected void afterStatementParsed(TCustomSqlStatement stmt) { 988 // No special post-processing needed for BigQuery 989 } 990 991 // ========== Semantic Analysis ========== 992 993 /** 994 * Perform semantic analysis on parsed statements. 995 * <p> 996 * Runs TSQLResolver to build relationships between tables and columns, 997 * resolve references, and perform type checking. 998 */ 999 @Override 1000 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 1001 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 1002 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 1003 resolver.resolve(); 1004 } 1005 } 1006 1007 /** 1008 * Perform interpretation/evaluation on statements. 1009 * <p> 1010 * Runs TASTEvaluator to execute constant expressions and compile-time 1011 * evaluations. 1012 */ 1013 @Override 1014 protected void performInterpreter(ParserContext context, TStatementList statements) { 1015 // BigQuery does not require interpretation currently 1016 } 1017 1018 @Override 1019 public String toString() { 1020 return "BigQuerySqlParser{vendor=" + vendor + "}"; 1021 } 1022}