001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerBigquery; 009import gudusoft.gsqlparser.TParserBigquery; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement; 021import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 022import gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage; 023import gudusoft.gsqlparser.nodes.TTypeName; 024import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 025import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 026import gudusoft.gsqlparser.compiler.TContext; 027import gudusoft.gsqlparser.sqlenv.TSQLEnv; 028import gudusoft.gsqlparser.compiler.TGlobalScope; 029import gudusoft.gsqlparser.compiler.TFrame; 030import gudusoft.gsqlparser.resolver.TSQLResolver; 031import gudusoft.gsqlparser.TLog; 032import gudusoft.gsqlparser.compiler.TASTEvaluator; 033 034import java.io.BufferedReader; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.List; 038import java.util.Stack; 039 040import static gudusoft.gsqlparser.ESqlStatementType.*; 041 042/** 043 * Google BigQuery SQL parser implementation. 044 * 045 * <p>This parser handles BigQuery-specific SQL syntax including: 046 * <ul> 047 * <li>BigQuery scripting language (BEGIN...END blocks, IF/WHILE/FOR/LOOP/REPEAT statements)</li> 048 * <li>BigQuery UDFs (CREATE FUNCTION with SQL or JavaScript)</li> 049 * <li>BigQuery procedures (CREATE PROCEDURE with BEGIN...END blocks)</li> 050 * <li>Special type handling (STRUCT, ARRAY, DATE/TIME/TIMESTAMP literals)</li> 051 * <li>Backtick-quoted identifiers including qualified names (`schema.table`)</li> 052 * <li>BigQuery-specific keywords and functions</li> 053 * </ul> 054 * 055 * <p><b>Design Notes:</b> 056 * <ul> 057 * <li>Extends {@link AbstractSqlParser} using the template method pattern</li> 058 * <li>Uses {@link TLexerBigquery} for tokenization</li> 059 * <li>Uses {@link TParserBigquery} for parsing</li> 060 * <li>Delimiter character: ';' for SQL statements</li> 061 * <li>Splits backtick-quoted qualified names (`schema.table`) into individual tokens</li> 062 * </ul> 063 * 064 * <p><b>Usage Example:</b> 065 * <pre> 066 * // Get BigQuery parser from factory 067 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvbigquery); 068 * 069 * // Build context 070 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvbigquery) 071 * .sqlText("SELECT * FROM `project.dataset.table` WHERE region = 'US'") 072 * .build(); 073 * 074 * // Parse 075 * SqlParseResult result = parser.parse(context); 076 * 077 * // Access statements 078 * TStatementList statements = result.getSqlStatements(); 079 * </pre> 080 * 081 * @see SqlParser 082 * @see AbstractSqlParser 083 * @see TLexerBigquery 084 * @see TParserBigquery 085 * @since 3.2.0.0 086 */ 087public class BigQuerySqlParser extends AbstractSqlParser { 088 089 /** 090 * Construct BigQuery SQL parser. 091 * <p> 092 * Configures the parser for BigQuery database with default delimiter (;). 093 * <p> 094 * Following the original TGSqlParser pattern, the lexer and parser are 095 * created once in the constructor and reused for all parsing operations. 096 */ 097 public BigQuerySqlParser() { 098 super(EDbVendor.dbvbigquery); 099 this.delimiterChar = ';'; 100 this.defaultDelimiterStr = ";"; 101 102 // Create lexer once - will be reused for all parsing operations 103 this.flexer = new TLexerBigquery(); 104 this.flexer.delimiterchar = this.delimiterChar; 105 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 106 107 // Set parent's lexer reference for shared tokenization logic 108 this.lexer = this.flexer; 109 110 // Create parser once - will be reused for all parsing operations 111 this.fparser = new TParserBigquery(null); 112 this.fparser.lexer = this.flexer; 113 } 114 115 // ========== Parser Components ========== 116 117 /** The BigQuery lexer used for tokenization */ 118 public TLexerBigquery flexer; 119 120 /** SQL parser (for BigQuery statements) */ 121 private TParserBigquery fparser; 122 123 /** Current statement being built during extraction */ 124 private TCustomSqlStatement gcurrentsqlstatement; 125 126 /** Parser context for current operation */ 127 private ParserContext parserContext; 128 129 /** User-defined delimiter string for MySQL DELIMITER command */ 130 private String userDelimiterStr; 131 132 // Stored procedure parsing state tracking 133 private enum stored_procedure_type { 134 procedure, function, package_spec, package_body, block_with_declare, 135 block_with_begin, create_trigger, create_library, others 136 } 137 138 private enum stored_procedure_status { 139 start, is_as, body, bodyend, end 140 } 141 142 private static final int stored_procedure_nested_level = 50; 143 144 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 145 // - protected TContext globalContext 146 // - protected TSQLEnv sqlEnv 147 // - protected Stack<TFrame> frameStack 148 // - protected TFrame globalFrame 149 150 // ========== AbstractSqlParser Abstract Methods Implementation ========== 151 152 /** 153 * Return the BigQuery lexer instance. 154 */ 155 @Override 156 protected TCustomLexer getLexer(ParserContext context) { 157 return this.flexer; 158 } 159 160 /** 161 * Return the BigQuery SQL parser instance with updated token list. 162 */ 163 @Override 164 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 165 this.fparser.sourcetokenlist = tokens; 166 return this.fparser; 167 } 168 169 /** 170 * BigQuery does not use a secondary parser (unlike Oracle with PL/SQL). 171 */ 172 @Override 173 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 174 return null; 175 } 176 177 /** 178 * Call BigQuery-specific tokenization logic. 179 * <p> 180 * Delegates to dobigquerysqltexttotokenlist which handles BigQuery's 181 * specific keyword recognition, backtick-quoted identifiers, and 182 * qualified name splitting. 183 */ 184 @Override 185 protected void tokenizeVendorSql() { 186 dobigquerysqltexttotokenlist(); 187 } 188 189 /** 190 * Setup BigQuery parser for raw statement extraction. 191 * <p> 192 * BigQuery uses a single parser, so we inject sqlcmds and update 193 * the token list for the main parser only. 194 */ 195 @Override 196 protected void setupVendorParsersForExtraction() { 197 // Inject sqlcmds into parser (required for make_stmt) 198 this.fparser.sqlcmds = this.sqlcmds; 199 200 // Update token list for parser 201 this.fparser.sourcetokenlist = this.sourcetokenlist; 202 } 203 204 /** 205 * Call BigQuery-specific raw statement extraction logic. 206 * <p> 207 * Delegates to dobigquerygetrawsqlstatements which handles BigQuery's 208 * statement delimiters and scripting language boundaries (BEGIN...END blocks, 209 * IF/WHILE/FOR/LOOP statements). 210 */ 211 @Override 212 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 213 int errorCount = dobigquerygetrawsqlstatements(builder); 214 // Error count is tracked internally; errors are already added to syntaxErrors list 215 216 // Set the extracted statements in the builder 217 builder.sqlStatements(this.sqlstatements); 218 } 219 220 // ========== Tokenization Methods ========== 221 222 /** 223 * Tokenize BigQuery SQL text into a list of tokens. 224 * <p> 225 * This method handles BigQuery-specific token processing: 226 * <ul> 227 * <li>Splits backtick-quoted qualified names (`schema.table`) into separate tokens</li> 228 * <li>Handles all standard SQL tokens (keywords, identifiers, operators, etc.)</li> 229 * </ul> 230 * <p> 231 * Migrated from TGSqlParser.dobigquerysqltexttotokenlist() 232 */ 233 private void dobigquerysqltexttotokenlist() { 234 TSourceToken asourcetoken, lcprevst; 235 int yychar; 236 237 flexer.tmpDelimiter = ""; 238 239 asourcetoken = getanewsourcetoken(); 240 if (asourcetoken == null) return; 241 yychar = asourcetoken.tokencode; 242 243 while (yychar > 0) { 244 if (asourcetoken != null) { 245 sourcetokenlist.add(asourcetoken); 246 } 247 248 asourcetoken = getanewsourcetoken(); 249 if (asourcetoken == null) break; 250 yychar = asourcetoken.tokencode; 251 252 // `schema.table_name` - split into separate tokens 253 if ((asourcetoken.tokencode == TBaseType.ident) 254 && (asourcetoken.toString().startsWith("`")) && (asourcetoken.toString().endsWith("`")) 255 && (asourcetoken.toString().indexOf(".") > 0)) { 256 yychar = splitQualifiedNameInBacktick(asourcetoken); 257 asourcetoken = null; 258 } 259 } 260 } 261 262 /** 263 * Turn one token: `schema.table_name` into 3 tokens: `schema` . `table_name` 264 * <p> 265 * This helper method splits backtick-quoted qualified names into individual 266 * identifier and period tokens, preserving line/column information for each part. 267 * <p> 268 * Migrated from TGSqlParser.splitQualifiedNameInBacktick() 269 * 270 * @param asourcetoken the token to split 271 * @return the token code of the last token created 272 */ 273 private int splitQualifiedNameInBacktick(TSourceToken asourcetoken) { 274 int yychar = 0; 275 276 List<String> elephantList = Arrays.asList(TBaseType.getTextWithoutQuoted(asourcetoken.toString()).split("\\.")); 277 int p = 0, offset = 0; 278 for (String s : elephantList) { 279 TSourceToken pst = new TSourceToken("`" + s + "`"); 280 pst.tokencode = asourcetoken.tokencode; 281 pst.tokentype = asourcetoken.tokentype; 282 pst.tokenstatus = asourcetoken.tokenstatus; 283 pst.lineNo = asourcetoken.lineNo; 284 pst.columnNo = asourcetoken.columnNo + offset; 285 if (p == 0) offset++; // this count the first ` token 286 offset = offset + s.length(); 287 pst.container = sourcetokenlist; 288 if (p > 0) { // 第一个token使用被拆分前那个token的位置,从第二个开始的token,需要先把列表的位置指针加 1 289 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 290 } 291 pst.posinlist = sourcetokenlist.curpos; 292 293 sourcetokenlist.add(pst); 294 yychar = pst.tokencode; 295 296 if (p != elephantList.size() - 1) { 297 //`schema.table_name`, add period token in the middle of the backtick included identifier. 298 TSourceToken periodst = new TSourceToken("."); 299 periodst.tokencode = '.'; 300 periodst.tokentype = ETokenType.ttperiod; 301 periodst.tokenstatus = asourcetoken.tokenstatus; 302 periodst.lineNo = asourcetoken.lineNo; 303 periodst.columnNo = asourcetoken.columnNo + offset; 304 offset++; 305 periodst.container = sourcetokenlist; 306 sourcetokenlist.curpos = sourcetokenlist.curpos + 1; 307 periodst.posinlist = sourcetokenlist.curpos; 308 sourcetokenlist.add(periodst); 309 yychar = periodst.tokencode; 310 } 311 312 p++; 313 } 314 315 return yychar; 316 } 317 318 // ========== Raw Statement Extraction ========== 319 320 /** 321 * Check if token code is one that pairs with END keyword. 322 * <p> 323 * Used for BigQuery scripting language to track nested BEGIN/IF/CASE/LOOP/WHILE/FOR/REPEAT blocks. 324 * <p> 325 * Migrated from TGSqlParser.checkTokenPairWithEnd() 326 */ 327 private boolean checkTokenPairWithEnd(int tokencode) { 328 return ((tokencode == TBaseType.rrw_if) || (tokencode == TBaseType.rrw_case) 329 || (tokencode == TBaseType.rrw_loop) || (tokencode == TBaseType.rrw_repeat) 330 || (tokencode == TBaseType.rrw_while) || (tokencode == TBaseType.rrw_for) 331 || (tokencode == TBaseType.rrw_case)); 332 } 333 334 /** 335 * Extract raw SQL statements from the token list. 336 * <p> 337 * This method separates individual SQL statements without full syntax checking. 338 * It handles BigQuery-specific syntax including: 339 * <ul> 340 * <li>Stored procedures and functions with BEGIN...END blocks</li> 341 * <li>Scripting language statements (IF, WHILE, FOR, LOOP, REPEAT, CASE)</li> 342 * <li>STRUCT constructor special handling</li> 343 * <li>Type casting with literals (DATE '2021-01-01')</li> 344 * <li>Nested procedure tracking</li> 345 * </ul> 346 * <p> 347 * Migrated from TGSqlParser.dobigquerygetrawsqlstatements() 348 * 349 * @param builder the result builder to populate 350 * @return error count 351 */ 352 private int dobigquerygetrawsqlstatements(SqlParseResult.Builder builder) { 353 int errorcount = 0; 354 gcurrentsqlstatement = null; 355 EFindSqlStateType gst = EFindSqlStateType.stnormal; 356 int i, c, beginNested = 0, waitingEnd = 0; 357 TSourceToken ast = null, lcprevsolidtoken = null; 358 boolean waitingDelimiter = false; 359 360 int waitingEnds[] = new int[stored_procedure_nested_level]; 361 stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level]; 362 stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level]; 363 boolean endBySlashOnly = true; 364 int nestedProcedures = 0, nestedParenthesis = 0; 365 366 //reset delimiter 367 userDelimiterStr = defaultDelimiterStr; 368 369 for (i = 0; i < sourcetokenlist.size(); i++) { 370 if ((ast != null) && (ast.issolidtoken())) 371 lcprevsolidtoken = ast; 372 373 ast = sourcetokenlist.get(i); 374 sourcetokenlist.curpos = i; 375 376 // Handle STRUCT constructor: STRUCT(...) -> mark as struct constructor 377 if (ast.tokencode == TBaseType.rrw_bigquery_struct) { 378 TSourceToken st1 = ast.nextSolidToken(); 379 if (st1 != null) { 380 if (st1.tokencode == '(') { 381 ast.tokencode = TBaseType.rrw_bigquery_struct_constructor; 382 } 383 } 384 } 385 // Handle type casting: DATE '2021-01-01' -> mark DATE as datatype used to cast 386 else if ((ast.tokencode == TBaseType.sconst)) { 387 if (lcprevsolidtoken != null && TTypeName.searchTypeByName(lcprevsolidtoken.toString()) != null) { 388 if (lcprevsolidtoken.tokencode != TBaseType.rrw_interval) { 389 lcprevsolidtoken.tokencode = TBaseType.rrw_bigquery_datatype_used_to_cast; 390 } 391 } 392 } 393 // Handle TIME/DATE: could be function or type literal 394 else if ((ast.tokencode == TBaseType.rrw_time) || (ast.tokencode == TBaseType.rrw_date)) { 395 TSourceToken st1 = ast.nextSolidToken(); 396 if (st1 != null) { 397 if (st1.tokencode == TBaseType.sconst) { 398 // ast.tokencode = TBaseType.rrw_bigquery_time_before_const; 399 } else if (st1.tokencode == '(') { 400 ast.tokencode = TBaseType.ident; 401 } 402 } 403 } 404 // Handle FROM after period: i.from -> treat FROM as identifier 405 else if (ast.tokencode == TBaseType.rrw_from) { 406 TSourceToken st1 = ast.prevSolidToken(); 407 if (st1 != null) { // select i.from as `from` from t, treats from in i.from as identifier 408 if (st1.tokencode == '.') { 409 ast.tokencode = TBaseType.ident; 410 } 411 } 412 } 413 // Handle TIMESTAMP: distinguish type from identifier based on context 414 else if (ast.tokencode == TBaseType.rrw_timestamp) { 415 TSourceToken prevToken = ast.prevSolidToken(); 416 TSourceToken nextToken = ast.nextSolidToken(); 417 418 // Check if preceded by period -> identifier (e.g., table.timestamp) 419 if (prevToken != null && prevToken.tokencode == '.') { 420 // Keep as RW_TIMESTAMP (will be used as identifier via unreserved_keyword) 421 } 422 // Check type contexts where TIMESTAMP should be marked as type 423 else if (prevToken != null && 424 (prevToken.tokencode == TBaseType.rrw_as || // CAST(x AS TIMESTAMP) 425 prevToken.tokencode == ',' || // func(INT, TIMESTAMP) 426 prevToken.tokencode == '(' || // CAST(TIMESTAMP ...), STRUCT(col TIMESTAMP) 427 prevToken.tokencode == '<' || // ARRAY<TIMESTAMP>, STRUCT<col TIMESTAMP> 428 prevToken.tokencode == TBaseType.ident)) { // column_name TIMESTAMP (column definition) 429 ast.tokencode = TBaseType.rrw_bigquery_timestamp_as_type; 430 } 431 // Check if followed by '(' with precision -> type constructor TIMESTAMP(6) 432 else if (nextToken != null && nextToken.tokencode == '(') { 433 TSourceToken afterParen = nextToken.nextSolidToken(); 434 if (afterParen != null && 435 (afterParen.tokencode == TBaseType.iconst || // TIMESTAMP(6) 436 afterParen.tokencode == ')')) { // TIMESTAMP() 437 ast.tokencode = TBaseType.rrw_bigquery_timestamp_as_type; 438 } 439 } 440 // Check if followed by common tokens after type declarations 441 else if (nextToken != null && 442 (nextToken.tokencode == ',' || // col TIMESTAMP, another 443 nextToken.tokencode == ')' || // col TIMESTAMP) 444 nextToken.tokencode == '>' || // ARRAY<TIMESTAMP> 445 nextToken.tokencode == TBaseType.rrw_not || // TIMESTAMP NOT NULL 446 nextToken.tokencode == TBaseType.rrw_null)) { // TIMESTAMP NULL 447 ast.tokencode = TBaseType.rrw_bigquery_timestamp_as_type; 448 } 449 } 450 451 switch (gst) { 452 case sterror: { 453 if (ast.tokentype == ETokenType.ttsemicolon) { 454 gcurrentsqlstatement.sourcetokenlist.add(ast); 455 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 456 gst = EFindSqlStateType.stnormal; 457 } else { 458 gcurrentsqlstatement.sourcetokenlist.add(ast); 459 } 460 break; 461 } 462 case stnormal: { 463 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 464 || (ast.tokencode == TBaseType.cmtslashstar) 465 || (ast.tokencode == TBaseType.lexspace) 466 || (ast.tokencode == TBaseType.lexnewline) 467 || (ast.tokentype == ETokenType.ttsemicolon)) { 468 if (TBaseType.assigned(gcurrentsqlstatement)) { 469 gcurrentsqlstatement.sourcetokenlist.add(ast); 470 } 471 472 continue; 473 } 474 475 // find a token to start sql or plsql mode 476 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 477 478 if (TBaseType.assigned(gcurrentsqlstatement)) { 479 if (gcurrentsqlstatement.isBigQueryplsql()) { 480 nestedProcedures = 0; 481 gst = EFindSqlStateType.ststoredprocedure; 482 gcurrentsqlstatement.sourcetokenlist.add(ast); 483 484 switch (gcurrentsqlstatement.sqlstatementtype) { 485 case sstplsql_createprocedure: 486 case sstcreateprocedure: 487 sptype[nestedProcedures] = stored_procedure_type.procedure; 488 break; 489 case sstplsql_createfunction: 490 sptype[nestedProcedures] = stored_procedure_type.function; 491 break; 492 case sstplsql_createpackage: 493 sptype[nestedProcedures] = stored_procedure_type.package_spec; 494 if (ast.searchToken(TBaseType.rrw_body, 5) != null) { 495 sptype[nestedProcedures] = stored_procedure_type.package_body; 496 } 497 break; 498 case sst_plsql_block: 499 sptype[nestedProcedures] = stored_procedure_type.block_with_declare; 500 if (ast.tokencode == TBaseType.rrw_begin) { 501 sptype[nestedProcedures] = stored_procedure_type.block_with_begin; 502 } 503 break; 504 case sstplsql_createtrigger: 505 sptype[nestedProcedures] = stored_procedure_type.create_trigger; 506 break; 507 case sstoraclecreatelibrary: 508 sptype[nestedProcedures] = stored_procedure_type.create_library; 509 break; 510 case sstplsql_createtype_placeholder: 511 gst = EFindSqlStateType.stsql; 512 break; 513 default: 514 sptype[nestedProcedures] = stored_procedure_type.others; 515 break; 516 } 517 518 if (sptype[0] == stored_procedure_type.block_with_declare) { 519 // sd 520 endBySlashOnly = false; 521 procedure_status[0] = stored_procedure_status.is_as; 522 } else if (sptype[0] == stored_procedure_type.block_with_begin) { 523 // sb 524 endBySlashOnly = false; 525 procedure_status[0] = stored_procedure_status.body; 526 } else if (sptype[0] == stored_procedure_type.procedure) { 527 // ss 528 endBySlashOnly = false; 529 procedure_status[0] = stored_procedure_status.start; 530 } else if (sptype[0] == stored_procedure_type.function) { 531 // ss 532 endBySlashOnly = false; 533 procedure_status[0] = stored_procedure_status.start; 534 } else if (sptype[0] == stored_procedure_type.package_spec) { 535 // ss 536 endBySlashOnly = false; 537 procedure_status[0] = stored_procedure_status.start; 538 } else if (sptype[0] == stored_procedure_type.package_body) { 539 // ss 540 endBySlashOnly = false; 541 procedure_status[0] = stored_procedure_status.start; 542 } else if (sptype[0] == stored_procedure_type.create_trigger) { 543 // ss 544 endBySlashOnly = false; 545 procedure_status[0] = stored_procedure_status.start; 546 } else if (sptype[0] == stored_procedure_type.create_library) { 547 // ss 548 endBySlashOnly = false; 549 procedure_status[0] = stored_procedure_status.bodyend; 550 } else { 551 // so 552 endBySlashOnly = true; 553 procedure_status[0] = stored_procedure_status.bodyend; 554 } 555 if ((ast.tokencode == TBaseType.rrw_begin) 556 || (ast.tokencode == TBaseType.rrw_package) 557 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 558 waitingEnds[nestedProcedures] = 1; 559 } 560 561 } else if ((gcurrentsqlstatement.sqlstatementtype == sst_ifstmt) 562 || (gcurrentsqlstatement.sqlstatementtype == sst_loopstmt) 563 || (gcurrentsqlstatement.sqlstatementtype == sstRepeat) 564 || (gcurrentsqlstatement.sqlstatementtype == sstWhilestmt) 565 || (gcurrentsqlstatement.sqlstatementtype == sstForStmt) 566 || (gcurrentsqlstatement.sqlstatementtype == sst_plsql_block) 567 || (gcurrentsqlstatement.sqlstatementtype == sst_casestmt)) { 568 gst = EFindSqlStateType.stBigQueryIf; 569 // For labeled statements (label: LOOP/WHILE/etc.), the first token 570 // is the label identifier, and the keyword comes later in stBigQueryIf 571 // where it will increment waitingEnd. So start at 0. 572 waitingEnd = (ast.tokencode == TBaseType.ident) ? 0 : 1; 573 gcurrentsqlstatement.sourcetokenlist.add(ast); 574 } else { 575 gst = EFindSqlStateType.stsql; 576 gcurrentsqlstatement.sourcetokenlist.add(ast); 577 } 578 579 } 580 581 if (!TBaseType.assigned(gcurrentsqlstatement)) //error token found 582 { 583 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 584 , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 585 586 ast.tokentype = ETokenType.tttokenlizererrortoken; 587 gst = EFindSqlStateType.sterror; 588 589 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 590 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 591 gcurrentsqlstatement.sourcetokenlist.add(ast); 592 593 } 594 break; 595 } 596 case stBigQueryIf: { 597 gcurrentsqlstatement.sourcetokenlist.add(ast); 598 599 if (checkTokenPairWithEnd(ast.tokencode)) { // if... end if 600 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 601 //this is not if after END 602 waitingEnd++; 603 } 604 } else if (ast.tokencode == TBaseType.rrw_end) { // if ... end if 605 TSourceToken next = ast.nextSolidToken(); 606 if (next != null) { 607 if (checkTokenPairWithEnd(next.tokencode)) { // if ... end if; 608 waitingEnd--; 609 } else if (next.tokencode == ';') { // begin ... end ; 610 waitingEnd--; 611 } else if (next.tokencode == TBaseType.ident) { 612 // begin ... end label_name ; (labeled block end) 613 TSourceToken afterLabel = next.nextSolidToken(); 614 if (afterLabel != null && afterLabel.tokencode == ';') { 615 waitingEnd--; 616 } 617 } 618 619 } 620 } else if ((ast.tokencode == ';') && (waitingEnd == 0)) { 621 gst = EFindSqlStateType.stnormal; 622 623 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 624 } 625 break; 626 } 627 case stsqlplus: { 628 if (ast.tokencode == TBaseType.lexnewline) { 629 gst = EFindSqlStateType.stnormal; 630 gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here 631 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 632 } else { 633 { 634 gcurrentsqlstatement.sourcetokenlist.add(ast); 635 } 636 } 637 638 break; 639 }//case source command or \. command 640 case stsql: { 641 if ((ast.tokentype == ETokenType.ttsemicolon) && (gcurrentsqlstatement.sqlstatementtype != ESqlStatementType.sstmysqldelimiter)) { 642 gst = EFindSqlStateType.stnormal; 643 gcurrentsqlstatement.sourcetokenlist.add(ast); 644 gcurrentsqlstatement.semicolonended = ast; 645 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 646 continue; 647 } 648 if (ast.toString().equalsIgnoreCase(userDelimiterStr)) { 649 gst = EFindSqlStateType.stnormal; 650 ast.tokencode = ';';// treat it as semicolon 651 gcurrentsqlstatement.sourcetokenlist.add(ast); 652 gcurrentsqlstatement.semicolonended = ast; 653 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 654 continue; 655 } 656 gcurrentsqlstatement.sourcetokenlist.add(ast); 657 658 if ((ast.tokencode == TBaseType.lexnewline) 659 && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstmysqldelimiter)) { 660 gst = EFindSqlStateType.stnormal; 661 userDelimiterStr = ""; 662 for (int k = 0; k < gcurrentsqlstatement.sourcetokenlist.size(); k++) { 663 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(k); 664 if ((st.tokencode == TBaseType.rrw_mysql_delimiter) 665 || (st.tokencode == TBaseType.lexnewline) 666 || (st.tokencode == TBaseType.lexspace)) { 667 continue; 668 } 669 670 userDelimiterStr += st.toString(); 671 } 672 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 673 674 continue; 675 } 676 677 break; 678 } 679 case ststoredprocedure: { 680 if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) { 681 gcurrentsqlstatement.sourcetokenlist.add(ast); 682 } 683 684 switch (procedure_status[nestedProcedures]) { 685 case start: 686 if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) { 687 // s1 688 if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) { 689 if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) { 690 //when it's a package specification, only top level accept as/is 691 } else { 692 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 693 if (ast.searchToken("language", 1) != null) { 694 if (nestedProcedures == 0) { 695 gst = EFindSqlStateType.stsql; 696 } else { 697 procedure_status[nestedProcedures] = stored_procedure_status.body; 698 nestedProcedures--; 699 } 700 701 } 702 } 703 } 704 } else if (ast.tokencode == TBaseType.rrw_begin) { 705 // s4 706 if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) 707 waitingEnds[nestedProcedures]++; 708 709 if (nestedProcedures > 0) { 710 nestedProcedures--; 711 } 712 procedure_status[nestedProcedures] = stored_procedure_status.body; 713 waitingEnds[nestedProcedures] = 1; 714 } else if (ast.tokencode == TBaseType.rrw_end) { 715 //s10 716 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1) 717 && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body) 718 || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) { 719 nestedProcedures--; 720 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 721 } 722 } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 723 //s3 724 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 725 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 726 nestedProcedures--; 727 nestedProcedures++; 728 waitingEnds[nestedProcedures] = 0; 729 procedure_status[nestedProcedures] = stored_procedure_status.start; 730 } 731 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) { 732 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 733 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 734 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 735 gst = EFindSqlStateType.stnormal; 736 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 737 738 //make / a sqlplus cmd 739 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 740 gcurrentsqlstatement.sourcetokenlist.add(ast); 741 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 742 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger)) { 743 if (ast.tokencode == TBaseType.rrw_trigger) { 744 TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1); 745 if (compoundSt != null) { 746 //it's trigger with compound trigger block 747 procedure_status[nestedProcedures] = stored_procedure_status.body; 748 waitingEnds[nestedProcedures]++; 749 } 750 } 751 } else if ((sptype[nestedProcedures] == stored_procedure_type.function) && (ast.tokencode == TBaseType.rrw_teradata_using)) { 752 if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) { 753 if (nestedProcedures == 0) { 754 gst = EFindSqlStateType.stsql; 755 } else { 756 procedure_status[nestedProcedures] = stored_procedure_status.body; 757 nestedProcedures--; 758 } 759 } 760 761 } else { 762 //other tokens, do nothing 763 } 764 break; 765 case is_as: 766 if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 767 // s2 768 nestedProcedures++; 769 waitingEnds[nestedProcedures] = 0; 770 procedure_status[nestedProcedures] = stored_procedure_status.start; 771 772 if (nestedProcedures > stored_procedure_nested_level - 1) { 773 gst = EFindSqlStateType.sterror; 774 nestedProcedures--; 775 } 776 777 } else if (ast.tokencode == TBaseType.rrw_begin) { 778 // s5 779 if ((nestedProcedures == 0) && 780 ((sptype[nestedProcedures] == stored_procedure_type.package_body) 781 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 782 //top level package or package body's BEGIN keyword already count, 783 // so don't increase waitingEnds[nestedProcedures] here 784 785 } else { 786 waitingEnds[nestedProcedures]++; 787 } 788 procedure_status[nestedProcedures] = stored_procedure_status.body; 789 } else if (ast.tokencode == TBaseType.rrw_end) { 790 // s6 791 if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) && 792 ((sptype[nestedProcedures] == stored_procedure_type.package_body) || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 793 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 794 waitingEnds[nestedProcedures]--; 795 } else { 796 waitingEnds[nestedProcedures]--; 797 } 798 } else if (ast.tokencode == TBaseType.rrw_case) { 799 if (ast.searchToken(';', 1) == null) { 800 //this is not case before ; 801 waitingEnds[nestedProcedures]++; 802 } 803 } else { 804 //other tokens, do nothing 805 } 806 break; 807 case body: 808 if ((ast.tokencode == TBaseType.rrw_begin)) { 809 waitingEnds[nestedProcedures]++; 810 } else if (ast.tokencode == TBaseType.rrw_if) { 811 812 if (ast.searchToken(';', 2) == null) { 813 //this is not if before ; 814 815 // 2015-02-27, change 1 to 2 make it able to detect label name after case 816 // like this: END CASE l1; 817 waitingEnds[nestedProcedures]++; 818 } 819 } else if (ast.tokencode == TBaseType.rrw_case) { 820 if (ast.searchToken(';', 2) == null) { 821 //this is not case before ; 822 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 823 waitingEnds[nestedProcedures]++; 824 } 825 } 826 } else if ((ast.tokencode == TBaseType.rrw_loop) 827 || (ast.tokencode == TBaseType.rrw_while) || (ast.tokencode == TBaseType.rrw_repeat) 828 || (ast.tokencode == TBaseType.rrw_for)) { 829 if (!((ast.searchToken(TBaseType.rrw_end, -1) != null) 830 && (ast.searchToken(';', 2) != null))) { 831 // exclude loop like this: 832 // end loop [labelname]; 833 waitingEnds[nestedProcedures]++; 834 } 835 836 } else if (ast.tokencode == TBaseType.rrw_end) { 837 //foundEnd = true; 838 waitingEnds[nestedProcedures]--; 839 //if (waitingEnd < 0) { waitingEnd = 0;} 840 if (waitingEnds[nestedProcedures] == 0) { 841 if (nestedProcedures == 0) { 842 // s7 843 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 844 } else { 845 // s71 846 nestedProcedures--; 847 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 848 } 849 } 850 } else if ((waitingEnds[nestedProcedures] == 0) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 851 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 852 gst = EFindSqlStateType.stnormal; 853 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 854 855 //make / a sqlplus cmd 856 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 857 gcurrentsqlstatement.sourcetokenlist.add(ast); 858 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 859 } 860 break; 861 case bodyend: 862 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 863 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 864 gst = EFindSqlStateType.stnormal; 865 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 866 867 //make / a sqlplus cmd 868 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 869 gcurrentsqlstatement.sourcetokenlist.add(ast); 870 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 871 } else if ((ast.tokencode == ';')) { 872 gst = EFindSqlStateType.stnormal; 873 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 874 875 } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) { 876 gcurrentsqlstatement.sourcetokenlist.add(ast); 877 gst = EFindSqlStateType.stnormal; 878 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 879 } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) { 880 gcurrentsqlstatement.sourcetokenlist.add(ast); 881 gst = EFindSqlStateType.stnormal; 882 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 883 } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) { 884 gcurrentsqlstatement.sourcetokenlist.add(ast); 885 gst = EFindSqlStateType.stnormal; 886 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 887 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_package, 4) != null) && (!endBySlashOnly)) { 888 gcurrentsqlstatement.sourcetokenlist.add(ast); 889 gst = EFindSqlStateType.stnormal; 890 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 891 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) { 892 gcurrentsqlstatement.sourcetokenlist.add(ast); 893 gst = EFindSqlStateType.stnormal; 894 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 895 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) { 896 gcurrentsqlstatement.sourcetokenlist.add(ast); 897 gst = EFindSqlStateType.stnormal; 898 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 899 } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) { 900 gcurrentsqlstatement.sourcetokenlist.add(ast); 901 gst = EFindSqlStateType.stnormal; 902 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 903 } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) { 904 gcurrentsqlstatement.sourcetokenlist.add(ast); 905 gst = EFindSqlStateType.stnormal; 906 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 907 } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) && 908 (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) { 909 gcurrentsqlstatement.sourcetokenlist.add(ast); 910 gst = EFindSqlStateType.stnormal; 911 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder); 912 } else { 913 gcurrentsqlstatement.sourcetokenlist.add(ast); 914 } 915 break; 916 case end: 917 break; 918 default: 919 break; 920 } 921 922 if (ast.tokencode == TBaseType.sqlpluscmd) { 923 //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd 924 //in a sql statement(almost is plsql block) is not really a sqlplus cmd 925 int m = flexer.getkeywordvalue(ast.getAstext()); 926 if (m != 0) { 927 ast.tokencode = m; 928 } else if (ast.tokentype == ETokenType.ttslash) { 929 ast.tokencode = '/'; 930 } else { 931 ast.tokencode = TBaseType.ident; 932 } 933 } 934 935 final int wrapped_keyword_max_pos = 20; 936 if ((ast.tokencode == TBaseType.rrw_wrapped) && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) { 937 if (gcurrentsqlstatement instanceof TCommonStoredProcedureSqlStatement) { 938 ((TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true); 939 } 940 941 if (gcurrentsqlstatement instanceof TPlsqlCreatePackage) { 942 if (ast.prevSolidToken() != null) { 943 ((TPlsqlCreatePackage) gcurrentsqlstatement).setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken())); 944 } 945 } 946 } 947 948 break; 949 } //ststoredprocedure 950 951 } //case 952 } //for 953 954 //last statement 955 if (TBaseType.assigned(gcurrentsqlstatement) && 956 ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) 957 || (gst == EFindSqlStateType.stBigQueryIf) || (gst == EFindSqlStateType.sterror))) { 958 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder); 959 } 960 961 return errorcount; 962 } 963 964 // ========== Statement Parsing ========== 965 966 /** 967 * Parse all SQL statements after tokenization and raw extraction. 968 * <p> 969 * This method iterates through all statements, calls parsestatement on each, 970 * and handles error recovery for CREATE TABLE statements if enabled. 971 * <p> 972 * Inherited performParsing pattern from AbstractSqlParser, specialized for BigQuery. 973 */ 974 @Override 975 protected TStatementList performParsing(ParserContext context, TCustomParser parser, 976 TCustomParser secondaryParser, TSourceTokenList tokens, 977 TStatementList rawStatements) { 978 // Store references for error handling 979 this.fparser = (TParserBigquery) parser; 980 this.sourcetokenlist = tokens; 981 this.parserContext = context; 982 this.sqlstatements = rawStatements; 983 984 // Initialize sqlcmds for BigQuery 985 if (this.sqlcmds == null) { 986 this.sqlcmds = SqlCmdsFactory.get(vendor); 987 } 988 this.fparser.sqlcmds = this.sqlcmds; 989 990 // Initialize global context using inherited method 991 initializeGlobalContext(); 992 993 // Parse each statement 994 for (int i = 0; i < sqlstatements.size(); i++) { 995 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 996 997 try { 998 // Set frame stack for variable scope tracking 999 stmt.setFrameStack(frameStack); 1000 1001 // Parse the statement 1002 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 1003 1004 // Vendor-specific post-processing (none needed for BigQuery currently) 1005 afterStatementParsed(stmt); 1006 1007 // Collect errors from the statement 1008 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 1009 copyErrorsFromStatement(stmt); 1010 } 1011 1012 } catch (Exception ex) { 1013 // Use inherited exception handler 1014 handleStatementParsingException(stmt, i, ex); 1015 continue; 1016 } 1017 } 1018 1019 // Clean up frame stack 1020 if (globalFrame != null) { 1021 globalFrame.popMeFromStack(frameStack); 1022 } 1023 1024 return sqlstatements; 1025 } 1026 1027 /** 1028 * Post-process statement after parsing (hook method). 1029 * <p> 1030 * BigQuery does not require special post-processing, so this is a no-op. 1031 * Override if BigQuery-specific validation is needed in the future. 1032 */ 1033 protected void afterStatementParsed(TCustomSqlStatement stmt) { 1034 // No special post-processing needed for BigQuery 1035 } 1036 1037 // ========== Semantic Analysis ========== 1038 1039 /** 1040 * Perform semantic analysis on parsed statements. 1041 * <p> 1042 * Runs TSQLResolver to build relationships between tables and columns, 1043 * resolve references, and perform type checking. 1044 */ 1045 @Override 1046 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 1047 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 1048 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 1049 resolver.resolve(); 1050 } 1051 } 1052 1053 /** 1054 * Perform interpretation/evaluation on statements. 1055 * <p> 1056 * Runs TASTEvaluator to execute constant expressions and compile-time 1057 * evaluations. 1058 */ 1059 @Override 1060 protected void performInterpreter(ParserContext context, TStatementList statements) { 1061 // BigQuery does not require interpretation currently 1062 } 1063 1064 @Override 1065 public String toString() { 1066 return "BigQuerySqlParser{vendor=" + vendor + "}"; 1067 } 1068}