001// lexical analyzer for GSQLParser component java version 002 003/**************************************************** 004 Lexical analizer for GSQLParser Java component 005 Copyright (c) 2004-2024 by Gudu software 006****************************************************/ 007 008package gudusoft.gsqlparser; 009 010import java.util.HashMap; 011import java.io.InputStreamReader; 012 013import java.util.Locale; 014import java.io.BufferedReader; 015import java.io.IOException; 016 017 018public class TLexerSnowflake extends TCustomLexer{ 019 static int yynmarks = 0 ; 020 static int yynmatches ; 021 static int yyntrans ; 022 static int yynstates ; 023 static int[] yyk,yym ; // 1 based 024 static int[] yytint; // 1 based 025 static TYytRec[] yyt ; // 1 based 026 static int[] yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based 027 private static String[] keywordlist; 028 static String table_file; 029 static HashMap<String, Integer> keywordValueList; 030 static HashMap<Integer, Integer> keywordTypeList; 031 static int[][] yystateTable; 032 033 static { 034 keywordValueList = new HashMap<String, Integer>(); 035 keywordTypeList = new HashMap<Integer, Integer>(); 036 table_file = "/gudusoft/gsqlparser/parser/snowflake/snowflake_lex_table.txt"; 037 if (TBaseType.enterprise_edition||TBaseType.snowflake_edition){ 038 inittable(); 039 } 040 } 041 042 public TLexerSnowflake(){ 043 super(); 044 dbvendor = EDbVendor.dbvsnowflake; 045 } 046 047 // Track non-SQL languages (JAVA, PYTHON, SCALA) so $$ body is not parsed as SQL 048 private boolean isNonSQLLanguageDetected = false; 049 private boolean isLanguagePending = false; 050 051public boolean canBeColumnName(int tokencode){ 052 //http://blog.csdn.net/superbeck/article/details/5387476 053 boolean ret = false; 054 int modifiers = keyword_type_identifier | keyword_type_column ; 055 Integer s = keywordTypeList.get(tokencode); 056 if (s != null){ 057 int modifier = s; 058 ret = (modifiers & modifier) == modifier; 059 } 060 061 return ret; 062} 063 064 public int iskeyword(String str){ 065 int ret = -1; 066 Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH)); 067 if( s != null){ 068 ret = s; 069 } 070 return ret;// -1 means not a keyword 071 } 072 073 public int getkeywordvalue(String keyword){ 074 int ret = 0; 075 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 076 if( s != null){ 077 ret = s; 078 } 079 return ret;// 0 means not a keyword 080 } 081 082 public static EKeywordType getKeywordType(String keyword){ 083 return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList); 084 } 085 086 static void yystateLookupConfigure() { 087 int yystates = yytl.length; 088 yystateTable = new int[257][yystates]; 089 090 // initialize to empty 091 for(int i = 0; i < yystates; i++) { 092 for (int j = 0; j < 257; j++) 093 yystateTable[j][i] = -1; 094 } 095 096 for(int i = 0; i < yystates; i++) { 097 int low = yytl[i]; 098 int high = yyth[i]; 099 for (int j = low; j <= high; j++) { 100 for (char c: yyt[j].cc) { 101 yystateTable[c][i] = j; 102 } 103 } 104 } 105 } 106 107 int yylex(){ 108 int yyn; 109 while (true) { // top level while 110 yynew(); 111 while (true){ //scan 112 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){ 113 yymark(yyk[yyn]); 114 } 115 116 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){ 117 yymatch(yym[yyn]); 118 } 119 120 if(yytl[yystate] > yyth[yystate]){ 121 break; 122 } 123 124 yyscan(); 125// yyn = yytl[yystate]; 126 totablechar(); 127// while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){ 128// yyn++; 129// } 130// if (yyn > yyth[yystate]){ 131// break; 132// } 133 134 yyn = yystateTable[yytablechar][yystate]; 135 if (yyn == -1) 136 break; 137 138 yystate = yyt[yyn].s; 139 } //scan 140 141 while (true){ //action 142 int yyrule; 143 if ( (yyrule = yyfind()) != -1 ){ 144 yyaction(yyrule); 145 if (yyreject){ 146 continue; 147 } 148 }else if( (!yydefault() ) && (yywrap()) ){ 149 yyclear(); 150 returni(0); 151 } 152 break; 153 } 154 155 if (!yydone) { 156 continue; 157 } 158 break; 159 } // top level while 160 161 return yyretval; 162 } 163 164 static void inittable(){ 165 166 //if (yynmarks > 0) return; //init table already 167 168 String line; 169 boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false; 170 int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0; 171 int c=0; 172 keywordValueList.clear(); 173 keywordTypeList.clear(); 174 175 BufferedReader br = new BufferedReader(new InputStreamReader(TLexerSnowflake.class.getResourceAsStream(table_file))); 176 177 try{ 178 while( (line = br.readLine()) != null){ 179 if (line.trim().startsWith("yynmarks=")){ 180 String[] ss = line.split("[=;]"); 181 yynmarks=Integer.parseInt(ss[1].trim()); 182 yyk = new int[yynmarks+1]; 183 }else if (line.trim().startsWith("yynmatches=")){ 184 String[] ss = line.split("[=;]"); 185 yynmatches=Integer.parseInt(ss[1].trim()); 186 yym = new int[yynmatches+1]; 187 }else if (line.trim().startsWith("yyntrans=")){ 188 String[] ss = line.split("[=;]"); 189 yyntrans=Integer.parseInt(ss[1].trim()); 190 yytint = new int[yyntrans+1]; 191 yyt = new TYytRec[yyntrans+1]; 192 }else if (line.trim().startsWith("yynstates=")){ 193 String[] ss = line.split("[=;]"); 194 yynstates=Integer.parseInt(ss[1].trim()); 195 yykl = new int[yynstates]; 196 yykh = new int[yynstates]; 197 yyml = new int[yynstates]; 198 yymh = new int[yynstates]; 199 yytl = new int[yynstates]; 200 yyth = new int[yynstates]; 201 }else if (line.trim().startsWith("<end>")){ 202 if (inyyk){ 203 inyyk = false; 204 if (yynmarks+1 != yyk_count ){ 205 System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1)); 206 } 207 } 208 else if(inyym){ 209 inyym = false; 210 if (yynmatches+1 != yym_count ){ 211 System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1)); 212 } 213 } 214 else if(inyykl){ 215 inyykl = false; 216 if (yynstates != yykl_count ){ 217 System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count)); 218 } 219 } 220 else if(inyykh){ 221 inyykh = false; 222 if (yynstates != yykh_count ){ 223 System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count)); 224 } 225 } 226 else if(inyyml){ 227 inyyml = false; 228 if (yynstates != yyml_count ){ 229 System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count)); 230 } 231 } 232 else if(inyymh){ 233 inyymh = false; 234 if (yynstates != yymh_count ){ 235 System.out.println("required:"+(yynstates)+" actually:"+(yymh_count)); 236 } 237 } 238 else if(inyytl){ 239 inyytl = false; 240 if (yynstates != yytl_count ){ 241 System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count)); 242 } 243 } 244 else if(inyyth){ 245 inyyth = false; 246 if (yynstates != yyth_count ){ 247 System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count)); 248 } 249 } 250 else if(inyytint){ 251 inyytint = false; 252 if (yyntrans + 1 != yytint_count ){ 253 System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1)); 254 } 255 } 256 else if(inyyt){ 257 inyyt = false; 258 if (yyntrans+1 != yyt_count ){ 259 System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1)); 260 } 261 } 262 else if(inkeyword){ 263 inkeyword = false; 264 } 265 }else if(line.trim().startsWith("yyk =")){ 266 inyyk = true; 267 }else if(line.trim().startsWith("yym =")){ 268 inyym = true; 269 }else if(line.trim().startsWith("yykl =")){ 270 inyykl = true; 271 }else if(line.trim().startsWith("yykh =")){ 272 inyykh = true; 273 }else if(line.trim().startsWith("yyml =")){ 274 inyyml = true; 275 }else if(line.trim().startsWith("yymh =")){ 276 inyymh = true; 277 }else if(line.trim().startsWith("yytl =")){ 278 inyytl = true; 279 }else if(line.trim().startsWith("yyth =")){ 280 inyyth = true; 281 }else if(line.trim().startsWith("yytint =")){ 282 inyytint = true; 283 }else if(line.trim().startsWith("yyt =")){ 284 inyyt = true; 285 }else if(line.trim().startsWith("keywordsvalue =")){ 286 inkeyword = true; 287 }else if(inyyk){ 288 String[] ss = line.split("[,]"); 289 for(int j=0;j<ss.length;j++){ 290 // System.out.println(ss[j].trim()); 291 yyk[yyk_count++] = Integer.parseInt(ss[j].trim()); 292 } 293 }else if(inyym){ 294 String[] ss = line.split("[,]"); 295 for(int j=0;j<ss.length;j++){ 296 // System.out.println(ss[j].trim()); 297 yym[yym_count++] = Integer.parseInt(ss[j].trim()); 298 } 299 }else if(inyykl){ 300 String[] ss = line.split("[,]"); 301 for(int j=0;j<ss.length;j++){ 302 // System.out.println(ss[j].trim()); 303 yykl[yykl_count++] = Integer.parseInt(ss[j].trim()); 304 } 305 }else if(inyykh){ 306 String[] ss = line.split("[,]"); 307 for(int j=0;j<ss.length;j++){ 308 // System.out.println(ss[j].trim()); 309 yykh[yykh_count++] = Integer.parseInt(ss[j].trim()); 310 } 311 }else if(inyyml){ 312 String[] ss = line.split("[,]"); 313 for(int j=0;j<ss.length;j++){ 314 // System.out.println(ss[j].trim()); 315 yyml[yyml_count++] = Integer.parseInt(ss[j].trim()); 316 } 317 }else if(inyymh){ 318 String[] ss = line.split("[,]"); 319 for(int j=0;j<ss.length;j++){ 320 // System.out.println(ss[j].trim()); 321 yymh[yymh_count++] = Integer.parseInt(ss[j].trim()); 322 } 323 }else if(inyytl){ 324 String[] ss = line.split("[,]"); 325 for(int j=0;j<ss.length;j++){ 326 // System.out.println(ss[j].trim()); 327 yytl[yytl_count++] = Integer.parseInt(ss[j].trim()); 328 } 329 }else if(inyyth){ 330 String[] ss = line.split("[,]"); 331 for(int j=0;j<ss.length;j++){ 332 // System.out.println(ss[j].trim()); 333 yyth[yyth_count++] = Integer.parseInt(ss[j].trim()); 334 } 335 }else if(inyytint){ 336 String[] ss = line.split("[,]"); 337 for(int j=0;j<ss.length;j++){ 338 // System.out.println(ss[j].trim()); 339 yytint[yytint_count++] = Integer.parseInt(ss[j].trim()); 340 } 341 }else if(inyyt){ 342 //System.out.println(line.trim()); 343 344 c = 0; 345 String[] st = line.trim().split(",,"); 346 char[] tmp = new char[st.length]; 347 for(int i=0;i<st.length;i++){ 348 349 if(st[i].startsWith("\'")) { 350 if(st[i].length() == 3){ // 'a' 351 tmp[c++] = st[i].charAt(1); 352 }else if(st[i].length() == 4) { // '\\' 353 tmp[c++] = st[i].charAt(2); 354 }else{ 355 System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count); 356 } 357 }else{ 358 try{ 359 tmp[c++] = (char)Integer.parseInt(st[i]); // char in number like 32 that represent space 360 } catch (NumberFormatException nfe) { 361 System.out.println("NumberFormatException: " + nfe.getMessage()); 362 } 363 } 364 } //while hasmoreTokens 365 366 //yyt[lineno] = new YYTrec(tmp,yytint[lineno]); 367 yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]); 368 yyt_count++; 369 370 }else if(inkeyword){ 371 String[] ss =line.split("[=]"); 372 373 int val1 = -1; 374 int val2 = -1; 375 try { 376 val1 = Integer.parseInt(ss[1]); 377 val2 = Integer.parseInt(ss[2]); 378 } 379 catch (NumberFormatException nfe) { 380 System.out.println("NumberFormatException: " + nfe.getMessage()); 381 } 382 keywordValueList.put(ss[0].toUpperCase(),val1); 383 keywordTypeList.put(val1,val2); 384 } 385 } 386 }catch(IOException e){ 387 System.out.println(e.toString()); 388 } 389 390 yystateLookupConfigure(); 391 392 } 393 394 395 void yyaction(int yyruleno){ 396 397 398 int ic; 399 char[] tmparray = {'=','+','-','*','/','>','<'}; 400 401 yylvalstr = getyytext(); 402 /* actions: */ 403 switch(yyruleno){ 404 case 1: 405 406 { 407 returni (filepath_sign); 408 break; 409 } 410 411 case 2: 412 413 { 414 if (yylvalstr.equalsIgnoreCase(dolqstart)) 415 { 416 //dolqstart = ""; 417 start(init); 418 addlit(yylvalstr,yytextlen); 419 yylvalstr = litbufdup(); 420 returni(sconst); 421 } 422 else 423 { 424 //nchars = yytextlen; 425 addlit(yylvalstr, yytextlen-1); 426 yyless(yytextlen-1); 427 return; 428 } 429 //System.out.println("<xdolq>{dolqdelim}: "+dolqstart); 430 break; 431 } 432 433 case 3: 434 435 { 436 if (isReadyForFunctionBody) { // meet the first $$ 437 //isInFunctionBody = true; 438 isReadyForFunctionBody = false; 439 functionBodyDelimiterIndex++; 440 functionBodyDelimiter.add(yylvalstr); 441 // System.out.println("start function body:"+functionBodyDelimiter.get(functionBodyDelimiterIndex)); 442 returni(snowflake_function_delimiter); 443 }else if ((functionBodyDelimiterIndex>=0)&&(functionBodyDelimiter.get(functionBodyDelimiterIndex).equalsIgnoreCase(yylvalstr))){ // meet the second $$ 444 // Lookahead to check if this $$ is ending function body or starting a nested string 445 // If next char is whitespace, ';', ')', or EOF, it's ending the function body 446 // If next char is alphanumeric, backslash, or other content, it's starting a nested string 447 char nextChar = get_char(); 448 boolean isEndingFunctionBody = (nextChar == ' ' || nextChar == '\t' || nextChar == '\n' || 449 nextChar == '\r' || nextChar == ';' || nextChar == ')' || 450 nextChar == '\0' || nextChar == 0); 451 if (nextChar != '\0' && nextChar != 0) { 452 unget_char(nextChar); // put the character back 453 } 454 455 if (isEndingFunctionBody) { 456 //isInFunctionBody = false; 457 //System.out.println("end function body:"+functionBodyDelimiter.get(functionBodyDelimiterIndex)); 458 functionBodyDelimiter.remove(functionBodyDelimiterIndex); 459 functionBodyDelimiterIndex--; 460 returni(snowflake_function_delimiter); 461 } else { 462 // Not ending function body, start a nested dollar-quoted string 463 start(xdolq); 464 startlit(); 465 dolqstart = yylvalstr; 466 addlit(yylvalstr,yytextlen); 467 } 468 }else { 469 if (getyysstate() == xq){ 470 nchars = yytextlen; 471 addlit(yylvalstr, yytextlen-1); 472 yyless(nchars-1); 473 return;//exit; 474 }else{ 475 start(xdolq); 476 startlit(); 477 dolqstart = yylvalstr; 478 addlit(yylvalstr,yytextlen); 479 } 480 } 481 482 break; 483 } 484 485 case 4: 486 487 { 488 addlit(yylvalstr, yytextlen); 489 //System.out.println("<xdolq>{dolqinside}: "+yylvalstr); 490 break; 491 } 492 493 case 5: 494 495 { 496 addlit(yylvalstr, yytextlen); 497 break; 498 } 499 500 case 6: 501 502 { 503 addlitchar(yylvalstr.charAt(0)); 504 break; 505 } 506 507 508 case 7: 509 510 { 511 if (getyysstate() == xq) 512 { 513 nchars = yytextlen; 514 addlit(yylvalstr, yytextlen-1); 515 yyless(nchars-1); 516 return;//exit; 517 } 518 519 start(xq); 520 startlit(); 521 addlit(yylvalstr,yytextlen); 522 break; 523 } 524 525 case 8: 526 527 { 528 if (getyysstate() == xq) 529 { 530 nchars = yytextlen; 531 addlit(yylvalstr, yytextlen-1); 532 yyless(nchars-1); 533 return;//exit; 534 } 535 536 start(xq); 537 startlit(); 538 addlit(yylvalstr,yytextlen); 539 break; 540 } 541 542 case 9: 543 544 { 545 if (getyysstate() == xq) 546 { 547 nchars = yytextlen; 548 addlit(yylvalstr, yytextlen-1); 549 yyless(nchars-1); 550 return;//exit; 551 } 552 553 start(xq); 554 startlit(); 555 addlit(yylvalstr,yytextlen); 556 break; 557 } 558 559 case 10: 560 561 { 562 addlit(yylvalstr,yytextlen); 563 if (xcdepth <= 0) 564 { 565 start(init); 566 yylvalstr = litbufdup(); 567 returni(cmtslashstar); 568 } 569 else 570 xcdepth--; 571 572 break; 573 } 574 575 576 case 11: 577 578 { 579 if (yylvalstr.equalsIgnoreCase("/*+")){ 580 xcdepth++; 581 yyless(2); 582 addlit(yylvalstr,yytextlen); 583 }else{ 584 yyless(1); 585 addlit(yylvalstr,1); 586 } 587 588 break; 589 590 } 591 592 case 12: 593 594 { 595 596 if (getyysstate() == xq) 597 { 598 nchars = yytextlen; 599 addlit(yylvalstr, yytextlen-1); 600 yyless(nchars-1); 601 return;//exit; 602 } 603 604 xcdepth = 0; 605 start(xc); 606 startlit(); 607 yyless(2); 608 addlit(yylvalstr,yytextlen); 609 610 break; 611 } 612 613 case 13: 614 615 { 616 addlit(yylvalstr,yytextlen); 617 618 break; 619 } 620 621 case 14: 622 623 { 624 addlitchar(yylvalstr.charAt(0)); 625 626 break; 627 } 628 629 case 15: 630 631 { 632 start(init); 633 addlit(yylvalstr, yytextlen); 634 yylvalstr = litbufdup(); 635 if( yylvalstr.startsWith("b")|| (yylvalstr.startsWith("B"))){ 636 returni(bconst); 637 }else if( yylvalstr.startsWith("x")|| (yylvalstr.startsWith("X"))){ 638 returni(xconst); 639 }else 640 { 641 returni(sconst); 642 } 643 break; 644 } 645 646 case 16: 647 648 { 649 if (insqlpluscmd){ 650 yyless(0); 651 yylvalstr = litbufdup(); 652 start(init); 653 returni(sconst); 654 }else{ 655 addlit(yylvalstr,yytextlen); 656 } 657 658 break; 659 } 660 case 17: 661 662 { 663 if (getyysstate() == xq) 664 { 665 nchars = yytextlen; 666 addlit(yylvalstr, yytextlen-1); 667 yyless(nchars-1); 668 return;//exit; 669 } 670 671 start(xq); 672 startlit(); 673 addlit(yylvalstr,yytextlen); 674 675 dummych1 = get_char(); 676 if (dummych1 == '\\') // recognize string like '\' 677 { 678 dummych2 = get_char(); 679 if (dummych2 == '\'') 680 { 681 // start(init); 682 addlit("\\", 1); 683 addlit("\'", 1); 684 //yylvalstr = litbufdup(); 685 //returni(sconst); 686 } 687 else 688 { 689 unget_char(dummych2); 690 unget_char(dummych1); 691 } 692 } 693 else 694 { unget_char(dummych1);} 695 696 break; 697 } 698 699 case 18: 700 701 { 702 start(xq); 703 startlit(); 704 addlit(yylvalstr, yytextlen); 705 dummych1 = get_char(); 706 if (dummych1 == '\\') // recognize string like '\' 707 { 708 dummych2 = get_char(); 709 if (dummych2 == '\'') 710 { 711 // start(init); 712 addlit("\\", 1); 713 addlit("\'", 1); 714 //yylvalstr = litbufdup(); 715 //returni(sconst); 716 } 717 else 718 { 719 unget_char(dummych2); 720 unget_char(dummych1); 721 } 722 } 723 else 724 { unget_char(dummych1);} 725 726 break; 727 } 728 729 case 19: 730 731 { 732 addlit(yylvalstr, yytextlen); 733 break; 734 } 735 736 case 20: 737 738 { 739 dummych1 = get_char(); 740 unget_char(dummych1); 741 if (dummych1 == (char)10) 742 { 743 if (insqlpluscmd){ 744 nchars = yytextlen; 745 if(yylvalstr.charAt(nchars-1) == (char)13){ 746 yyless(nchars - 1); 747 yylvalstr = yylvalstr.substring(0,nchars); 748 } 749 start(init); 750 addlit(yylvalstr, nchars-1); 751 yylvalstr = litbufdup(); 752 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 753 754 }else{ 755 dummych1 = get_char(); 756 addlit(yylvalstr+dummych1, yytextlen+1); 757 } 758 } else if (dummych1 == '\\') 759 { 760 // Handle backslash inside string literals 761 // Snowflake supports both \' (C-style escape) and '' (SQL standard escape) 762 // The pattern \'' is ambiguous: 763 // - Could be \' (escape) + ' (close) - e.g., 'text\'' means text' then close 764 // - Could be \ (literal) + '' (escape) - e.g., 'text\''more' means text\'more 765 // We distinguish by looking at what follows \'' : 766 // - \''' : \' (escape) + '' (leave for xqdouble) - two quote chars in value 767 // - \''<letter/digit>: \ + '' (backslash + escape, string continues) 768 // - \''<space><SQL keyword>: \' + close (string ends, keyword follows) 769 // - \''<space><other>: \ + '' (backslash + escape, string continues with space) 770 // - \''<EOF/control>: \' + close 771 // See mantisbt issue 4298 for details 772 dummych1 = get_char(); // Read the backslash 773 dummych2 = get_char(); // Read the next char 774 if (dummych2 == '\'') { 775 // Have \' - check if followed by another quote 776 char dummych3 = get_char(); 777 if (dummych3 == '\'') { 778 // \'' pattern - check what comes after 779 char dummych4 = get_char(); 780 if (dummych4 == '\'') { 781 // \''' : consume \' as escape, leave '' for xqdouble 782 unget_char(dummych4); 783 unget_char(dummych3); 784 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 785 } else if (Character.isLetterOrDigit(dummych4) || dummych4 == '_') { 786 // \''<letter/digit>: \ + '' (string continues) 787 unget_char(dummych4); 788 unget_char(dummych3); 789 unget_char(dummych2); 790 addlit(yylvalstr+dummych1, yytextlen+1); 791 } else if (dummych4 == ' ' || dummych4 == '\t') { 792 // \''<space> - check if followed by SQL keyword 793 char dummych5 = get_char(); 794 char dummych6 = get_char(); 795 // Check for SQL keywords that typically follow a column/value expression 796 // Only check keywords that wouldn't appear as regular words in strings 797 String twoChars = "" + Character.toLowerCase(dummych5) + Character.toLowerCase(dummych6); 798 if (twoChars.equals("as")) { // AS is the most reliable indicator of SQL context 799 // Likely SQL keyword follows - treat \'' as \' + close 800 unget_char(dummych6); 801 unget_char(dummych5); 802 unget_char(dummych4); 803 unget_char(dummych3); 804 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 805 } else { 806 // Not a SQL keyword - treat as \ + '' (string continues) 807 unget_char(dummych6); 808 unget_char(dummych5); 809 unget_char(dummych4); 810 unget_char(dummych3); 811 unget_char(dummych2); 812 addlit(yylvalstr+dummych1, yytextlen+1); 813 } 814 } else if (dummych4 == ';' || dummych4 == ',' || dummych4 == ')' 815 || dummych4 == '|' || dummych4 == '+' || dummych4 == '=' 816 || dummych4 == '<' || dummych4 == '>' || dummych4 == '/' 817 || dummych4 == '*') { 818 // \''<SQL-punctuation>: \' (escape) + ' (close string) 819 unget_char(dummych4); 820 unget_char(dummych3); 821 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 822 } else if (dummych4 >= ' ' && dummych4 != (char)0 && dummych4 != (char)-1) { 823 // Other printable: \ + '' (string continues) 824 unget_char(dummych4); 825 unget_char(dummych3); 826 unget_char(dummych2); 827 addlit(yylvalstr+dummych1, yytextlen+1); 828 } else { 829 // \'' at EOF or followed by control char: \' escape + ' closes string 830 unget_char(dummych4); 831 unget_char(dummych3); 832 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 833 } 834 } else { 835 // \' followed by non-quote: consume \' as C-style escape 836 unget_char(dummych3); 837 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 838 } 839 } else { 840 // \X where X is not quote: consume both 841 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 842 } 843 } 844 else 845 { addlit(yylvalstr, yytextlen);} 846 847 break; 848 } 849 850 case 21: 851 852 { 853 addlit(yylvalstr, yytextlen); 854 break; 855 } 856 857 858 859 case 22: 860 861 { 862 start(init); 863 addlit(yylvalstr, yytextlen); 864 if ((literallen == 0) && (!insqlpluscmd)) 865 {returni (error);} 866 if (literallen >= namedatalen) 867 { 868 setlengthofliteralbuf(namedatalen); 869 literallen = namedatalen; 870 } 871 yylvalstr = litbufdup(); 872 returni (ident); 873 874 break; 875 } 876 877 878 case 23: 879 880 { 881 if (insqlpluscmd){ 882 yyless(0); 883 yylvalstr = litbufdup(); 884 start(init); 885 returni(sconst); 886 }else{ 887 addlit(yylvalstr, yytextlen); 888 } 889 890 break; 891 } 892 893 case 24: 894 895 { 896 addlit(yylvalstr, yytextlen); 897 break; 898 } 899 900 case 25: 901 902 { 903 // Handle ""identifier"" pattern from Snowflake GET_DDL() output. 904 // When we see " followed by another ", and then identifier content 905 // ending with "", consume the whole thing as one identifier token. 906 dummych1 = get_char(); 907 if (dummych1 == '"') { 908 // We have "" at the start. Peek to see if identifier content follows. 909 dummych2 = get_char(); 910 if (Character.isLetter(dummych2) || dummych2 == '_' || (dummych2 >= '\200' && dummych2 <= '\377')) { 911 // This is ""identifier..."" pattern. Read until closing "". 912 startlit(); 913 addlit("\"\"", 2); 914 addlit(String.valueOf(dummych2), 1); 915 // Read chars until we find closing "" 916 while (true) { 917 char ch = get_char(); 918 if (ch == '"') { 919 char ch2 = get_char(); 920 if (ch2 == '"') { 921 // Found "" - this is the closing "" 922 addlit("\"\"", 2); 923 yylvalstr = litbufdup(); 924 returni(ident); 925 break; 926 } 927 // Single " not followed by " - treat as closing quote 928 unget_char(ch2); 929 addlit("\"", 1); 930 yylvalstr = litbufdup(); 931 returni(ident); 932 break; 933 } 934 if (ch == '\n' || ch == (char)0 || ch == (char)-1) { 935 // End of line or input - close the identifier 936 unget_char(ch); 937 addlit("\"", 1); 938 yylvalstr = litbufdup(); 939 returni(ident); 940 break; 941 } 942 addlit(String.valueOf(ch), 1); 943 } 944 break; 945 } 946 // "" not followed by identifier content - push back and handle normally 947 unget_char(dummych2); 948 unget_char(dummych1); 949 } else { 950 unget_char(dummych1); 951 } 952 start(xd); 953 startlit(); 954 addlit(yylvalstr, yytextlen); 955 break; 956 } 957 958 case 26: 959 960 { 961 dummych1 = get_char(); 962 unget_char(dummych1); 963 if (dummych1 == (char)10) 964 { 965 if (insqlpluscmd){ 966 nchars = yytextlen; 967 if(yylvalstr.charAt(nchars-1) == (char)13){ 968 yyless(nchars - 1); 969 yylvalstr = yylvalstr.substring(0,nchars); 970 } 971 start(init); 972 addlit(yylvalstr, nchars-1); 973 yylvalstr = litbufdup(); 974 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 975 976 }else{ 977 dummych1 = get_char(); 978 addlit(yylvalstr+dummych1, yytextlen+1); 979 } 980 981 } else 982 addlit(yylvalstr, yytextlen); 983 984 break; 985 } 986 987 case 27: 988 989 { 990 returni(lexnewline); 991 break; 992 } 993 994 case 28: 995 996 { 997 returni(lexspace); 998 break; 999 } 1000 1001 case 29: 1002 1003 { 1004 if ((getyysstate() == xq) 1005 || (getyysstate() == xd) 1006 || (getyysstate() == xc) 1007 ) 1008 { 1009 addlit(yylvalstr, 1); 1010 yyless(1); 1011 return;//exit; 1012 } 1013 1014 returni(cmtdoublehyphen); 1015 break; 1016 } 1017 1018 case 30: 1019 1020 { 1021 if (yylvalstr.charAt(0) == '{') { 1022 // Look ahead for {{variable}} template substitution syntax 1023 char ch1 = get_char(); 1024 if (ch1 == '{') { 1025 StringBuilder varBuf = new StringBuilder(); 1026 boolean closed = false; 1027 while (true) { 1028 char ch = get_char(); 1029 if (ch == 0) break; 1030 if (ch == '}') { 1031 char ch2 = get_char(); 1032 if (ch2 == '}') { 1033 closed = true; 1034 break; 1035 } else { 1036 varBuf.append(ch); 1037 if (ch2 != 0) varBuf.append(ch2); 1038 else break; 1039 } 1040 } else { 1041 varBuf.append(ch); 1042 } 1043 } 1044 if (closed) { 1045 yylvalstr = "{{" + varBuf.toString() + "}}"; 1046 yytextlen = yylvalstr.length(); 1047 // Consume trailing identifier chars (e.g., {{env}}_suffix) 1048 StringBuilder trailBuf2 = new StringBuilder(); 1049 while (true) { 1050 char tc2 = get_char(); 1051 if (tc2 == 0) break; 1052 if (Character.isLetterOrDigit(tc2) || tc2 == '_') { 1053 trailBuf2.append(tc2); 1054 } else { 1055 unget_char(tc2); 1056 break; 1057 } 1058 } 1059 if (trailBuf2.length() > 0) { 1060 yylvalstr = yylvalstr + trailBuf2.toString(); 1061 yytextlen = yylvalstr.length(); 1062 } 1063 returni(ident); 1064 } else { 1065 // Unclosed — put back and return '{' 1066 String s = varBuf.toString(); 1067 for (int i = s.length() - 1; i >= 0; i--) { 1068 unget_char(s.charAt(i)); 1069 } 1070 unget_char(ch1); 1071 returnc('{'); 1072 } 1073 } else { 1074 if (ch1 != 0) unget_char(ch1); 1075 returnc('{'); 1076 } 1077 } else { 1078 returnc(yylvalstr.charAt(0)); 1079 } 1080 break; 1081 } 1082 1083 case 31: 1084 1085 { 1086 returni(cmpop); 1087 break; 1088 } 1089 1090 case 32: 1091 1092 { 1093 1094 if (getyysstate() == xc) 1095 { 1096 slashstar = yylvalstr.indexOf("*/"); 1097 if (slashstar >= 0) 1098 { 1099 start(init); 1100 addlit(yylvalstr,slashstar+2); 1101 yylvalstr = litbufdup(); 1102 yyless(slashstar+2); 1103 returni(cmtslashstar); 1104 } 1105 else 1106 { 1107 addlit(yylvalstr,1); 1108 yyless(1); 1109 } 1110 } 1111 else if (getyysstate() == xq) 1112 { 1113 addlit(yylvalstr,1); 1114 yyless(1); 1115 } 1116 else 1117 { 1118 nchars = yytextlen; 1119 slashstar = yylvalstr.indexOf("/*"); 1120 dashdash = yylvalstr.indexOf("--"); 1121 if ((slashstar > 0) && (dashdash > 0)) 1122 { 1123 //if both appear, take the first one 1124 if (slashstar > dashdash) 1125 {slashstar = dashdash;} 1126 } 1127 else 1128 { 1129 // if slashstar=0 then slashstar := dashdash; 1130 // add (getyysstate <> xc) to avoid something like this */--,here */ should be handled instead of -- 1131 if ((slashstar > 0) && (getyysstate() != xc)) { 1132 nchars = slashstar; 1133 } 1134 } 1135 1136 while ((nchars > 1) 1137 && ( (yylvalstr.charAt(nchars-1) == '+' ) 1138 || (yylvalstr.charAt(nchars-1) =='-')) 1139 && (getyysstate() != xc)) 1140 { 1141 for (ic = nchars - 1; ic>=1; ic--) 1142 { 1143 if (isopchar(yylvalstr.charAt(ic-1))) break; 1144 } 1145 if (ic >= 1) break; 1146 nchars--; 1147 } 1148 1149 if (nchars < yytextlen) 1150 { 1151 //Strip the unwanted chars from the token 1152 yyless(nchars); 1153 yylvalstr = yylvalstr.substring(0,nchars); 1154 } 1155 1156 ///* 1157 // * If what we have left is only one char, and it's 1158 // * one of the characters matching "self", then 1159 // * return it as a character token the same way 1160 // * that the "self" rule would have. 1161 // * make sure @ return as self char, by james wang 1162 // */ 1163 if ((nchars == 1) && (isselfchar(yylvalstr.charAt(0)) || (yylvalstr.charAt(0) == '@'))) 1164 { 1165 returnc(yylvalstr.charAt(0)); 1166 } 1167 else if ( 1168 (nchars >= 2) 1169 &&( 1170 charinarray(yylvalstr.charAt(nchars-1-1), tmparray) 1171 && ((yylvalstr.charAt(nchars-1) == ':') 1172 ||(yylvalstr.charAt(nchars-1) == '.') 1173 ) 1174 ) 1175 ) 1176 { 1177 yyless(nchars-1); 1178 yylvalstr = yylvalstr.substring(0,nchars-1); 1179 if (nchars == 2) 1180 returnc(yylvalstr.charAt(0)); 1181 else 1182 returni(op); 1183 } 1184 else if ( 1185 (nchars >= 2) 1186 && ( 1187 charinarray(yylvalstr.charAt(nchars-1-1),tmparray) 1188 && (yylvalstr.charAt(nchars-1) == '&') 1189 ) 1190 ) 1191 { 1192 yyless(nchars-1); 1193 yylvalstr = yylvalstr.substring(0,nchars-1); 1194 if (nchars == 2) 1195 returnc(yylvalstr.charAt(0)); 1196 else 1197 returni(op); 1198 } 1199 else if ( (nchars > 1) && (yylvalstr.charAt(0) == '~')) 1200 { 1201 yyless(1); 1202 returnc(yylvalstr.charAt(0)); 1203 } 1204 else if ( (nchars == 2) && (yylvalstr.charAt(0) == '.') && (yylvalstr.charAt(1) == '*')) 1205 { 1206 yyless(1); 1207 returnc(yylvalstr.charAt(0)); 1208 } 1209 else if ((nchars == 2) && ((yylvalstr.charAt(0) == '=') && ( (yylvalstr.charAt(1) == '?')||(yylvalstr.charAt(1) == '@')) )) 1210 { 1211 yyless(1); 1212 returnc(yylvalstr.charAt(0)); 1213 } 1214 else if ( (nchars >= 2) && ((yylvalstr.charAt(0) == '@')&&(yylvalstr.charAt(1) != '>'))) 1215 { 1216 yyless(1); 1217 returnc(yylvalstr.charAt(0)); 1218 } 1219 else if ( (nchars >= 2) && ((yylvalstr.charAt(0) == '/'))) 1220 { 1221 yyless(1); 1222 returnc(yylvalstr.charAt(0)); 1223 } 1224 else if (((nchars > 2) && (yylvalstr.charAt(0) == '*')) 1225 && (yylvalstr.charAt(1) == '/') 1226 && (getyysstate() == xc) 1227 ) 1228 { //in comment, and find */ , then it must the end of comment 1229 yyless(2); 1230 addlit(yylvalstr,yytextlen); 1231 if (xcdepth <= 0) 1232 { 1233 start(init); 1234 yylvalstr = litbufdup(); 1235 returni(cmtslashstar); 1236 } 1237 else 1238 xcdepth--; 1239 } 1240 else 1241 returni(op); 1242 } 1243 1244 break; 1245 } 1246 1247 case 33: 1248 1249 { 1250 returni(iconst); 1251 break; 1252 } 1253 1254 case 34: 1255 1256 { 1257 ///* for i in 1..5 loop, we can't recognize 1. as a decimal,but 1 as decimal 1258 nchars = yytextlen; 1259 if (yylvalstr.charAt(nchars-1) == '.') 1260 { 1261 dummych1 = get_char(); 1262 unget_char(dummych1); 1263 if (dummych1 == '.') 1264 { 1265 yyless(nchars-1); 1266 yylvalstr = yylvalstr.substring(0,nchars - 1); 1267 returni (iconst); 1268 return;//exit; 1269 } 1270 } 1271 returni (fconst); 1272 break; 1273 } 1274 1275 case 35: 1276 1277 { 1278 returni (fconst); 1279 break; 1280 } 1281 1282 case 36: 1283 1284 { 1285 returni (sconst); 1286 break; 1287 } 1288 1289 case 37: 1290 1291 { 1292 boolean dollarConstant = false; 1293 if (getyysstate() == xdolq){ 1294 int p = yylvalstr.indexOf("$"); 1295 if (p > 0){ 1296 dollarConstant = true; 1297 addlit(yylvalstr, p); 1298 yyless(p); 1299 return; 1300 } 1301 } 1302 1303 // Handle identifiers ending with $$ inside function body (e.g., "b$$") 1304 // When inside a function body, split identifier at $$ boundary 1305 // Only split if the identifier contains $$ (the function body delimiter) 1306 // Do NOT split identifiers like SYSTEM$TASK_RUNTIME_INFO that contain single $ 1307 if (functionBodyDelimiterIndex >= 0){ // inside function body 1308 int p = yylvalstr.indexOf("$$"); 1309 if (p > 0){ 1310 yylvalstr = yylvalstr.substring(0,p); 1311 yyless(p); 1312 } 1313 } 1314 1315 if (!dollarConstant){ 1316 // Look ahead for {{variable}} template to merge with identifier 1317 // e.g., FOCG{{env_suffix}} -> single ident "FOCG{{env_suffix}}" 1318 while (true) { 1319 char lk1 = get_char(); 1320 if (lk1 == '{') { 1321 char lk2 = get_char(); 1322 if (lk2 == '{') { 1323 StringBuilder vBuf = new StringBuilder(); 1324 boolean vClosed = false; 1325 while (true) { 1326 char vc = get_char(); 1327 if (vc == 0) break; 1328 if (vc == '}') { 1329 char vc2 = get_char(); 1330 if (vc2 == '}') { vClosed = true; break; } 1331 else { vBuf.append(vc); if (vc2 != 0) vBuf.append(vc2); else break; } 1332 } else { vBuf.append(vc); } 1333 } 1334 if (vClosed) { 1335 yylvalstr = yylvalstr + "{{" + vBuf.toString() + "}}"; 1336 yytextlen = yylvalstr.length(); 1337 // Continue loop to check for more {{}} or trailing identifier chars 1338 // Consume any trailing identifier chars after }} 1339 StringBuilder trailing = new StringBuilder(); 1340 while (true) { 1341 char tc = get_char(); 1342 if (tc == 0) break; 1343 if (Character.isLetterOrDigit(tc) || tc == '_') { 1344 trailing.append(tc); 1345 } else { 1346 unget_char(tc); 1347 break; 1348 } 1349 } 1350 if (trailing.length() > 0) { 1351 yylvalstr = yylvalstr + trailing.toString(); 1352 yytextlen = yylvalstr.length(); 1353 } 1354 continue; // check for another {{ 1355 } else { 1356 String s = vBuf.toString(); 1357 for (int si = s.length() - 1; si >= 0; si--) unget_char(s.charAt(si)); 1358 unget_char(lk2); 1359 unget_char(lk1); 1360 break; 1361 } 1362 } else { 1363 unget_char(lk2); 1364 unget_char(lk1); 1365 break; 1366 } 1367 } else { 1368 if (lk1 != 0) unget_char(lk1); 1369 break; 1370 } 1371 } 1372 1373 int rw; 1374 if ( (rw = iskeyword(yylvalstr)) != -1) { 1375 if (rw == TBaseType.rrw_as){ 1376 isReadyForFunctionBody = true; 1377 // check whether language is javascript, if yes, don't set isReadyForFunctionBody to true 1378 if ((TOKEN_TABLE[TBaseType.rrw_snowflake_language][COLUMN0_COUNT] > 0) 1379 &&(TOKEN_TABLE[TBaseType.rrw_snowflake_javascript][COLUMN0_COUNT] > 0)){ 1380 if (TOKEN_TABLE[TBaseType.rrw_snowflake_javascript][COLUMN5_FIRST_POS] - TOKEN_TABLE[TBaseType.rrw_snowflake_language][COLUMN5_FIRST_POS] <= 2){ 1381 // RETURNS STRING LANGUAGE JAVASCRIPT 1382 // 如果是 RETURNS STRING LANGUAGE JAVASCRIPT,那么不需要设置isReadyForFunctionBody = true,即把整个$$...$$当作字符串处理 1383 isReadyForFunctionBody = false; 1384 } 1385 } 1386 // Also check for non-SQL languages (JAVA, PYTHON, SCALA) detected earlier 1387 if (isNonSQLLanguageDetected) { 1388 isReadyForFunctionBody = false; 1389 } 1390 1391 }else if (rw == TBaseType.rrw_snowflake_language){ 1392 isLanguagePending = true; 1393 isReadyForFunctionBody = false; 1394 }else{ 1395 isReadyForFunctionBody = false; 1396 } 1397 returni(rw); 1398 } 1399 else 1400 { 1401 if (isLanguagePending) { 1402 String langName = yylvalstr.toLowerCase(Locale.ROOT); 1403 if (langName.equals("java") || langName.equals("python") || langName.equals("scala")) { 1404 isNonSQLLanguageDetected = true; 1405 } 1406 isLanguagePending = false; 1407 } 1408 isReadyForFunctionBody = false; 1409 returni(ident); 1410 } 1411 } 1412 1413 break; 1414 } 1415 1416 case 38: 1417 1418 { 1419 if (getyysstate() == xdolq){ 1420 addlit(yylvalstr, yytextlen); 1421 return; 1422 }else{ 1423 returni (param); 1424 } 1425 1426 break; 1427 } 1428 1429 case 39: 1430 1431 { 1432 returni (outer_join); 1433 break; 1434 } 1435 1436 case 40: 1437 1438 { 1439 returni (typecast); 1440 break; 1441 } 1442 1443 case 41: 1444 1445 { 1446 returni (double_dot); 1447 break; 1448 } 1449 1450 case 42: 1451 1452 { 1453 returni (assign_sign); 1454 break; 1455 } 1456 1457 case 43: 1458 1459 { 1460 returni (variable); 1461 break; 1462 } 1463 1464 case 44: 1465 1466 { 1467 returni (bind_v); 1468 break; 1469 } 1470 1471 1472 case 45: 1473 1474 { 1475 // Backtick-quoted identifier (SnowSQL compatibility) 1476 StringBuilder buf = new StringBuilder(); 1477 buf.append('`'); 1478 while (true) { 1479 char ch = get_char(); 1480 if (ch == 0) break; 1481 buf.append(ch); 1482 if (ch == '`') break; 1483 } 1484 yylvalstr = buf.toString(); 1485 yytextlen = yylvalstr.length(); 1486 returni(ident); 1487 break; 1488 } 1489 1490 case 46: 1491 1492 { 1493 // Look ahead for ${variable} template substitution syntax 1494 char ch1 = get_char(); 1495 if (ch1 == '{') { 1496 StringBuilder varBuf = new StringBuilder(); 1497 boolean closed = false; 1498 while (true) { 1499 char ch = get_char(); 1500 if (ch == 0) break; 1501 if (ch == '}') { closed = true; break; } 1502 varBuf.append(ch); 1503 } 1504 if (closed) { 1505 // Simple ${name} — return as IDENT for template variable substitution 1506 yylvalstr = "${" + varBuf.toString() + "}"; 1507 yytextlen = yylvalstr.length(); 1508 returni(ident); 1509 } else { 1510 // Unclosed — put back and return error 1511 String s = varBuf.toString(); 1512 for (int i = s.length() - 1; i >= 0; i--) { 1513 unget_char(s.charAt(i)); 1514 } 1515 unget_char('{'); 1516 returni(error); 1517 } 1518 } else { 1519 if (ch1 != 0) unget_char(ch1); 1520 returni(error); 1521 } 1522 break; 1523 } 1524 1525 case 47: 1526 1527 { 1528 returni (error); 1529 break; 1530 } 1531 1532 default:{ 1533 System.out.println("fatal error in yyaction"); 1534 } 1535 }//switch 1536}/*yyaction*/; 1537 1538 1539 1540 }