001// lexical analyzer for GSQLParser component java version 002 003/**************************************************** 004 Lexical analizer for GSQLParser Java component 005 Copyright (c) 2004-2024 by Gudu software 006****************************************************/ 007 008package gudusoft.gsqlparser; 009 010import java.util.HashMap; 011import java.io.InputStreamReader; 012 013import java.util.Locale; 014import java.io.BufferedReader; 015import java.io.IOException; 016 017 018public class TLexerSnowflake extends TCustomLexer{ 019 static int yynmarks = 0 ; 020 static int yynmatches ; 021 static int yyntrans ; 022 static int yynstates ; 023 static int[] yyk,yym ; // 1 based 024 static int[] yytint; // 1 based 025 static TYytRec[] yyt ; // 1 based 026 static int[] yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based 027 private static String[] keywordlist; 028 static String table_file; 029 static HashMap<String, Integer> keywordValueList; 030 static HashMap<Integer, Integer> keywordTypeList; 031 static int[][] yystateTable; 032 033 static { 034 keywordValueList = new HashMap<String, Integer>(); 035 keywordTypeList = new HashMap<Integer, Integer>(); 036 table_file = "/gudusoft/gsqlparser/parser/snowflake/snowflake_lex_table.txt"; 037 if (TBaseType.enterprise_edition||TBaseType.snowflake_edition){ 038 inittable(); 039 } 040 } 041 042 public TLexerSnowflake(){ 043 super(); 044 dbvendor = EDbVendor.dbvsnowflake; 045 } 046 047 // Track non-SQL languages (JAVA, PYTHON, SCALA) so $$ body is not parsed as SQL 048 private boolean isNonSQLLanguageDetected = false; 049 private boolean isLanguagePending = false; 050 051public boolean canBeColumnName(int tokencode){ 052 //http://blog.csdn.net/superbeck/article/details/5387476 053 boolean ret = false; 054 int modifiers = keyword_type_identifier | keyword_type_column ; 055 Integer s = keywordTypeList.get(tokencode); 056 if (s != null){ 057 int modifier = s; 058 ret = (modifiers & modifier) == modifier; 059 } 060 061 return ret; 062} 063 064 public int iskeyword(String str){ 065 int ret = -1; 066 Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH)); 067 if( s != null){ 068 ret = s; 069 } 070 return ret;// -1 means not a keyword 071 } 072 073 public int getkeywordvalue(String keyword){ 074 int ret = 0; 075 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 076 if( s != null){ 077 ret = s; 078 } 079 return ret;// 0 means not a keyword 080 } 081 082 public static EKeywordType getKeywordType(String keyword){ 083 return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList); 084 } 085 086 static void yystateLookupConfigure() { 087 int yystates = yytl.length; 088 yystateTable = new int[257][yystates]; 089 090 // initialize to empty 091 for(int i = 0; i < yystates; i++) { 092 for (int j = 0; j < 257; j++) 093 yystateTable[j][i] = -1; 094 } 095 096 for(int i = 0; i < yystates; i++) { 097 int low = yytl[i]; 098 int high = yyth[i]; 099 for (int j = low; j <= high; j++) { 100 for (char c: yyt[j].cc) { 101 yystateTable[c][i] = j; 102 } 103 } 104 } 105 } 106 107 int yylex(){ 108 int yyn; 109 while (true) { // top level while 110 yynew(); 111 while (true){ //scan 112 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){ 113 yymark(yyk[yyn]); 114 } 115 116 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){ 117 yymatch(yym[yyn]); 118 } 119 120 if(yytl[yystate] > yyth[yystate]){ 121 break; 122 } 123 124 yyscan(); 125// yyn = yytl[yystate]; 126 totablechar(); 127// while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){ 128// yyn++; 129// } 130// if (yyn > yyth[yystate]){ 131// break; 132// } 133 134 yyn = yystateTable[yytablechar][yystate]; 135 if (yyn == -1) 136 break; 137 138 yystate = yyt[yyn].s; 139 } //scan 140 141 while (true){ //action 142 int yyrule; 143 if ( (yyrule = yyfind()) != -1 ){ 144 yyaction(yyrule); 145 if (yyreject){ 146 continue; 147 } 148 }else if( (!yydefault() ) && (yywrap()) ){ 149 yyclear(); 150 returni(0); 151 } 152 break; 153 } 154 155 if (!yydone) { 156 continue; 157 } 158 break; 159 } // top level while 160 161 return yyretval; 162 } 163 164 static void inittable(){ 165 166 //if (yynmarks > 0) return; //init table already 167 168 String line; 169 boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false; 170 int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0; 171 int c=0; 172 keywordValueList.clear(); 173 keywordTypeList.clear(); 174 175 BufferedReader br = new BufferedReader(new InputStreamReader(TLexerSnowflake.class.getResourceAsStream(table_file))); 176 177 try{ 178 while( (line = br.readLine()) != null){ 179 if (line.trim().startsWith("yynmarks=")){ 180 String[] ss = line.split("[=;]"); 181 yynmarks=Integer.parseInt(ss[1].trim()); 182 yyk = new int[yynmarks+1]; 183 }else if (line.trim().startsWith("yynmatches=")){ 184 String[] ss = line.split("[=;]"); 185 yynmatches=Integer.parseInt(ss[1].trim()); 186 yym = new int[yynmatches+1]; 187 }else if (line.trim().startsWith("yyntrans=")){ 188 String[] ss = line.split("[=;]"); 189 yyntrans=Integer.parseInt(ss[1].trim()); 190 yytint = new int[yyntrans+1]; 191 yyt = new TYytRec[yyntrans+1]; 192 }else if (line.trim().startsWith("yynstates=")){ 193 String[] ss = line.split("[=;]"); 194 yynstates=Integer.parseInt(ss[1].trim()); 195 yykl = new int[yynstates]; 196 yykh = new int[yynstates]; 197 yyml = new int[yynstates]; 198 yymh = new int[yynstates]; 199 yytl = new int[yynstates]; 200 yyth = new int[yynstates]; 201 }else if (line.trim().startsWith("<end>")){ 202 if (inyyk){ 203 inyyk = false; 204 if (yynmarks+1 != yyk_count ){ 205 System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1)); 206 } 207 } 208 else if(inyym){ 209 inyym = false; 210 if (yynmatches+1 != yym_count ){ 211 System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1)); 212 } 213 } 214 else if(inyykl){ 215 inyykl = false; 216 if (yynstates != yykl_count ){ 217 System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count)); 218 } 219 } 220 else if(inyykh){ 221 inyykh = false; 222 if (yynstates != yykh_count ){ 223 System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count)); 224 } 225 } 226 else if(inyyml){ 227 inyyml = false; 228 if (yynstates != yyml_count ){ 229 System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count)); 230 } 231 } 232 else if(inyymh){ 233 inyymh = false; 234 if (yynstates != yymh_count ){ 235 System.out.println("required:"+(yynstates)+" actually:"+(yymh_count)); 236 } 237 } 238 else if(inyytl){ 239 inyytl = false; 240 if (yynstates != yytl_count ){ 241 System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count)); 242 } 243 } 244 else if(inyyth){ 245 inyyth = false; 246 if (yynstates != yyth_count ){ 247 System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count)); 248 } 249 } 250 else if(inyytint){ 251 inyytint = false; 252 if (yyntrans + 1 != yytint_count ){ 253 System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1)); 254 } 255 } 256 else if(inyyt){ 257 inyyt = false; 258 if (yyntrans+1 != yyt_count ){ 259 System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1)); 260 } 261 } 262 else if(inkeyword){ 263 inkeyword = false; 264 } 265 }else if(line.trim().startsWith("yyk =")){ 266 inyyk = true; 267 }else if(line.trim().startsWith("yym =")){ 268 inyym = true; 269 }else if(line.trim().startsWith("yykl =")){ 270 inyykl = true; 271 }else if(line.trim().startsWith("yykh =")){ 272 inyykh = true; 273 }else if(line.trim().startsWith("yyml =")){ 274 inyyml = true; 275 }else if(line.trim().startsWith("yymh =")){ 276 inyymh = true; 277 }else if(line.trim().startsWith("yytl =")){ 278 inyytl = true; 279 }else if(line.trim().startsWith("yyth =")){ 280 inyyth = true; 281 }else if(line.trim().startsWith("yytint =")){ 282 inyytint = true; 283 }else if(line.trim().startsWith("yyt =")){ 284 inyyt = true; 285 }else if(line.trim().startsWith("keywordsvalue =")){ 286 inkeyword = true; 287 }else if(inyyk){ 288 String[] ss = line.split("[,]"); 289 for(int j=0;j<ss.length;j++){ 290 // System.out.println(ss[j].trim()); 291 yyk[yyk_count++] = Integer.parseInt(ss[j].trim()); 292 } 293 }else if(inyym){ 294 String[] ss = line.split("[,]"); 295 for(int j=0;j<ss.length;j++){ 296 // System.out.println(ss[j].trim()); 297 yym[yym_count++] = Integer.parseInt(ss[j].trim()); 298 } 299 }else if(inyykl){ 300 String[] ss = line.split("[,]"); 301 for(int j=0;j<ss.length;j++){ 302 // System.out.println(ss[j].trim()); 303 yykl[yykl_count++] = Integer.parseInt(ss[j].trim()); 304 } 305 }else if(inyykh){ 306 String[] ss = line.split("[,]"); 307 for(int j=0;j<ss.length;j++){ 308 // System.out.println(ss[j].trim()); 309 yykh[yykh_count++] = Integer.parseInt(ss[j].trim()); 310 } 311 }else if(inyyml){ 312 String[] ss = line.split("[,]"); 313 for(int j=0;j<ss.length;j++){ 314 // System.out.println(ss[j].trim()); 315 yyml[yyml_count++] = Integer.parseInt(ss[j].trim()); 316 } 317 }else if(inyymh){ 318 String[] ss = line.split("[,]"); 319 for(int j=0;j<ss.length;j++){ 320 // System.out.println(ss[j].trim()); 321 yymh[yymh_count++] = Integer.parseInt(ss[j].trim()); 322 } 323 }else if(inyytl){ 324 String[] ss = line.split("[,]"); 325 for(int j=0;j<ss.length;j++){ 326 // System.out.println(ss[j].trim()); 327 yytl[yytl_count++] = Integer.parseInt(ss[j].trim()); 328 } 329 }else if(inyyth){ 330 String[] ss = line.split("[,]"); 331 for(int j=0;j<ss.length;j++){ 332 // System.out.println(ss[j].trim()); 333 yyth[yyth_count++] = Integer.parseInt(ss[j].trim()); 334 } 335 }else if(inyytint){ 336 String[] ss = line.split("[,]"); 337 for(int j=0;j<ss.length;j++){ 338 // System.out.println(ss[j].trim()); 339 yytint[yytint_count++] = Integer.parseInt(ss[j].trim()); 340 } 341 }else if(inyyt){ 342 //System.out.println(line.trim()); 343 344 c = 0; 345 String[] st = line.trim().split(",,"); 346 char[] tmp = new char[st.length]; 347 for(int i=0;i<st.length;i++){ 348 349 if(st[i].startsWith("\'")) { 350 if(st[i].length() == 3){ // 'a' 351 tmp[c++] = st[i].charAt(1); 352 }else if(st[i].length() == 4) { // '\\' 353 tmp[c++] = st[i].charAt(2); 354 }else{ 355 System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count); 356 } 357 }else{ 358 try{ 359 tmp[c++] = (char)Integer.parseInt(st[i]); // char in number like 32 that represent space 360 } catch (NumberFormatException nfe) { 361 System.out.println("NumberFormatException: " + nfe.getMessage()); 362 } 363 } 364 } //while hasmoreTokens 365 366 //yyt[lineno] = new YYTrec(tmp,yytint[lineno]); 367 yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]); 368 yyt_count++; 369 370 }else if(inkeyword){ 371 String[] ss =line.split("[=]"); 372 373 int val1 = -1; 374 int val2 = -1; 375 try { 376 val1 = Integer.parseInt(ss[1]); 377 val2 = Integer.parseInt(ss[2]); 378 } 379 catch (NumberFormatException nfe) { 380 System.out.println("NumberFormatException: " + nfe.getMessage()); 381 } 382 keywordValueList.put(ss[0].toUpperCase(),val1); 383 keywordTypeList.put(val1,val2); 384 } 385 } 386 }catch(IOException e){ 387 System.out.println(e.toString()); 388 } 389 390 yystateLookupConfigure(); 391 392 } 393 394 395 void yyaction(int yyruleno){ 396 397 398 int ic; 399 char[] tmparray = {'=','+','-','*','/','>','<'}; 400 401 yylvalstr = getyytext(); 402 /* actions: */ 403 switch(yyruleno){ 404 case 1: 405 406 { 407 returni (filepath_sign); 408 break; 409 } 410 411 case 2: 412 413 { 414 if (yylvalstr.equalsIgnoreCase(dolqstart)) 415 { 416 //dolqstart = ""; 417 start(init); 418 addlit(yylvalstr,yytextlen); 419 yylvalstr = litbufdup(); 420 returni(sconst); 421 } 422 else 423 { 424 //nchars = yytextlen; 425 addlit(yylvalstr, yytextlen-1); 426 yyless(yytextlen-1); 427 return; 428 } 429 //System.out.println("<xdolq>{dolqdelim}: "+dolqstart); 430 break; 431 } 432 433 case 3: 434 435 { 436 if (isReadyForFunctionBody) { // meet the first $$ 437 //isInFunctionBody = true; 438 isReadyForFunctionBody = false; 439 functionBodyDelimiterIndex++; 440 functionBodyDelimiter.add(yylvalstr); 441 // System.out.println("start function body:"+functionBodyDelimiter.get(functionBodyDelimiterIndex)); 442 returni(snowflake_function_delimiter); 443 }else if ((functionBodyDelimiterIndex>=0)&&(functionBodyDelimiter.get(functionBodyDelimiterIndex).equalsIgnoreCase(yylvalstr))){ // meet the second $$ 444 // Lookahead to check if this $$ is ending function body or starting a nested string 445 // If next char is whitespace, ';', ')', or EOF, it's ending the function body 446 // If next char is alphanumeric, backslash, or other content, it's starting a nested string 447 char nextChar = get_char(); 448 boolean isEndingFunctionBody = (nextChar == ' ' || nextChar == '\t' || nextChar == '\n' || 449 nextChar == '\r' || nextChar == ';' || nextChar == ')' || 450 nextChar == '\0' || nextChar == 0); 451 if (nextChar != '\0' && nextChar != 0) { 452 unget_char(nextChar); // put the character back 453 } 454 455 if (isEndingFunctionBody) { 456 //isInFunctionBody = false; 457 //System.out.println("end function body:"+functionBodyDelimiter.get(functionBodyDelimiterIndex)); 458 functionBodyDelimiter.remove(functionBodyDelimiterIndex); 459 functionBodyDelimiterIndex--; 460 returni(snowflake_function_delimiter); 461 } else { 462 // Not ending function body, start a nested dollar-quoted string 463 start(xdolq); 464 startlit(); 465 dolqstart = yylvalstr; 466 addlit(yylvalstr,yytextlen); 467 } 468 }else { 469 if (getyysstate() == xq){ 470 nchars = yytextlen; 471 addlit(yylvalstr, yytextlen-1); 472 yyless(nchars-1); 473 return;//exit; 474 }else{ 475 start(xdolq); 476 startlit(); 477 dolqstart = yylvalstr; 478 addlit(yylvalstr,yytextlen); 479 } 480 } 481 482 break; 483 } 484 485 case 4: 486 487 { 488 addlit(yylvalstr, yytextlen); 489 //System.out.println("<xdolq>{dolqinside}: "+yylvalstr); 490 break; 491 } 492 493 case 5: 494 495 { 496 addlit(yylvalstr, yytextlen); 497 break; 498 } 499 500 case 6: 501 502 { 503 addlitchar(yylvalstr.charAt(0)); 504 break; 505 } 506 507 508 case 7: 509 510 { 511 if (getyysstate() == xq) 512 { 513 nchars = yytextlen; 514 addlit(yylvalstr, yytextlen-1); 515 yyless(nchars-1); 516 return;//exit; 517 } 518 519 start(xq); 520 startlit(); 521 addlit(yylvalstr,yytextlen); 522 break; 523 } 524 525 case 8: 526 527 { 528 if (getyysstate() == xq) 529 { 530 nchars = yytextlen; 531 addlit(yylvalstr, yytextlen-1); 532 yyless(nchars-1); 533 return;//exit; 534 } 535 536 start(xq); 537 startlit(); 538 addlit(yylvalstr,yytextlen); 539 break; 540 } 541 542 case 9: 543 544 { 545 if (getyysstate() == xq) 546 { 547 nchars = yytextlen; 548 addlit(yylvalstr, yytextlen-1); 549 yyless(nchars-1); 550 return;//exit; 551 } 552 553 start(xq); 554 startlit(); 555 addlit(yylvalstr,yytextlen); 556 break; 557 } 558 559 case 10: 560 561 { 562 addlit(yylvalstr,yytextlen); 563 if (xcdepth <= 0) 564 { 565 start(init); 566 yylvalstr = litbufdup(); 567 returni(cmtslashstar); 568 } 569 else 570 xcdepth--; 571 572 break; 573 } 574 575 576 case 11: 577 578 { 579 if (yylvalstr.equalsIgnoreCase("/*+")){ 580 xcdepth++; 581 yyless(2); 582 addlit(yylvalstr,yytextlen); 583 }else{ 584 yyless(1); 585 addlit(yylvalstr,1); 586 } 587 588 break; 589 590 } 591 592 case 12: 593 594 { 595 596 if (getyysstate() == xq) 597 { 598 nchars = yytextlen; 599 addlit(yylvalstr, yytextlen-1); 600 yyless(nchars-1); 601 return;//exit; 602 } 603 604 xcdepth = 0; 605 start(xc); 606 startlit(); 607 yyless(2); 608 addlit(yylvalstr,yytextlen); 609 610 break; 611 } 612 613 case 13: 614 615 { 616 addlit(yylvalstr,yytextlen); 617 618 break; 619 } 620 621 case 14: 622 623 { 624 addlitchar(yylvalstr.charAt(0)); 625 626 break; 627 } 628 629 case 15: 630 631 { 632 start(init); 633 addlit(yylvalstr, yytextlen); 634 yylvalstr = litbufdup(); 635 if( yylvalstr.startsWith("b")|| (yylvalstr.startsWith("B"))){ 636 returni(bconst); 637 }else if( yylvalstr.startsWith("x")|| (yylvalstr.startsWith("X"))){ 638 returni(xconst); 639 }else 640 { 641 returni(sconst); 642 } 643 break; 644 } 645 646 case 16: 647 648 { 649 if (insqlpluscmd){ 650 yyless(0); 651 yylvalstr = litbufdup(); 652 start(init); 653 returni(sconst); 654 }else{ 655 addlit(yylvalstr,yytextlen); 656 } 657 658 break; 659 } 660 case 17: 661 662 { 663 if (getyysstate() == xq) 664 { 665 nchars = yytextlen; 666 addlit(yylvalstr, yytextlen-1); 667 yyless(nchars-1); 668 return;//exit; 669 } 670 671 start(xq); 672 startlit(); 673 addlit(yylvalstr,yytextlen); 674 675 dummych1 = get_char(); 676 if (dummych1 == '\\') // recognize string like '\' 677 { 678 dummych2 = get_char(); 679 if (dummych2 == '\'') 680 { 681 // start(init); 682 addlit("\\", 1); 683 addlit("\'", 1); 684 //yylvalstr = litbufdup(); 685 //returni(sconst); 686 } 687 else 688 { 689 unget_char(dummych2); 690 unget_char(dummych1); 691 } 692 } 693 else 694 { unget_char(dummych1);} 695 696 break; 697 } 698 699 case 18: 700 701 { 702 start(xq); 703 startlit(); 704 addlit(yylvalstr, yytextlen); 705 dummych1 = get_char(); 706 if (dummych1 == '\\') // recognize string like '\' 707 { 708 dummych2 = get_char(); 709 if (dummych2 == '\'') 710 { 711 // start(init); 712 addlit("\\", 1); 713 addlit("\'", 1); 714 //yylvalstr = litbufdup(); 715 //returni(sconst); 716 } 717 else 718 { 719 unget_char(dummych2); 720 unget_char(dummych1); 721 } 722 } 723 else 724 { unget_char(dummych1);} 725 726 break; 727 } 728 729 case 19: 730 731 { 732 addlit(yylvalstr, yytextlen); 733 break; 734 } 735 736 case 20: 737 738 { 739 dummych1 = get_char(); 740 unget_char(dummych1); 741 if (dummych1 == (char)10) 742 { 743 if (insqlpluscmd){ 744 nchars = yytextlen; 745 if(yylvalstr.charAt(nchars-1) == (char)13){ 746 yyless(nchars - 1); 747 yylvalstr = yylvalstr.substring(0,nchars); 748 } 749 start(init); 750 addlit(yylvalstr, nchars-1); 751 yylvalstr = litbufdup(); 752 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 753 754 }else{ 755 dummych1 = get_char(); 756 addlit(yylvalstr+dummych1, yytextlen+1); 757 } 758 } else if (dummych1 == '\\') 759 { 760 // Handle backslash inside string literals 761 // Snowflake supports both \' (C-style escape) and '' (SQL standard escape) 762 // The pattern \'' is ambiguous: 763 // - Could be \' (escape) + ' (close) - e.g., 'text\'' means text' then close 764 // - Could be \ (literal) + '' (escape) - e.g., 'text\''more' means text\'more 765 // We distinguish by looking at what follows \'' : 766 // - \''' : \' (escape) + '' (leave for xqdouble) - two quote chars in value 767 // - \''<letter/digit>: \ + '' (backslash + escape, string continues) 768 // - \''<space><SQL keyword>: \' + close (string ends, keyword follows) 769 // - \''<space><other>: \ + '' (backslash + escape, string continues with space) 770 // - \''<EOF/control>: \' + close 771 // See mantisbt issue 4298 for details 772 dummych1 = get_char(); // Read the backslash 773 dummych2 = get_char(); // Read the next char 774 if (dummych2 == '\'') { 775 // Have \' - check if followed by another quote 776 char dummych3 = get_char(); 777 if (dummych3 == '\'') { 778 // \'' pattern - check what comes after 779 char dummych4 = get_char(); 780 if (dummych4 == '\'') { 781 // \''' : consume \' as escape, leave '' for xqdouble 782 unget_char(dummych4); 783 unget_char(dummych3); 784 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 785 } else if (Character.isLetterOrDigit(dummych4) || dummych4 == '_') { 786 // \''<letter/digit>: \ + '' (string continues) 787 unget_char(dummych4); 788 unget_char(dummych3); 789 unget_char(dummych2); 790 addlit(yylvalstr+dummych1, yytextlen+1); 791 } else if (dummych4 == ' ' || dummych4 == '\t') { 792 // \''<space> - check if followed by SQL keyword 793 char dummych5 = get_char(); 794 char dummych6 = get_char(); 795 // Check for SQL keywords that typically follow a column/value expression 796 // Only check keywords that wouldn't appear as regular words in strings 797 String twoChars = "" + Character.toLowerCase(dummych5) + Character.toLowerCase(dummych6); 798 if (twoChars.equals("as")) { // AS is the most reliable indicator of SQL context 799 // Likely SQL keyword follows - treat \'' as \' + close 800 unget_char(dummych6); 801 unget_char(dummych5); 802 unget_char(dummych4); 803 unget_char(dummych3); 804 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 805 } else { 806 // Not a SQL keyword - treat as \ + '' (string continues) 807 unget_char(dummych6); 808 unget_char(dummych5); 809 unget_char(dummych4); 810 unget_char(dummych3); 811 unget_char(dummych2); 812 addlit(yylvalstr+dummych1, yytextlen+1); 813 } 814 } else if (dummych4 >= ' ' && dummych4 != (char)0 && dummych4 != (char)-1) { 815 // Other printable: \ + '' (string continues) 816 unget_char(dummych4); 817 unget_char(dummych3); 818 unget_char(dummych2); 819 addlit(yylvalstr+dummych1, yytextlen+1); 820 } else { 821 // \'' at EOF or followed by control char: \' escape + ' closes string 822 unget_char(dummych4); 823 unget_char(dummych3); 824 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 825 } 826 } else { 827 // \' followed by non-quote: consume \' as C-style escape 828 unget_char(dummych3); 829 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 830 } 831 } else { 832 // \X where X is not quote: consume both 833 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 834 } 835 } 836 else 837 { addlit(yylvalstr, yytextlen);} 838 839 break; 840 } 841 842 case 21: 843 844 { 845 addlit(yylvalstr, yytextlen); 846 break; 847 } 848 849 850 851 case 22: 852 853 { 854 start(init); 855 addlit(yylvalstr, yytextlen); 856 if ((literallen == 0) && (!insqlpluscmd)) 857 {returni (error);} 858 if (literallen >= namedatalen) 859 { 860 setlengthofliteralbuf(namedatalen); 861 literallen = namedatalen; 862 } 863 yylvalstr = litbufdup(); 864 returni (ident); 865 866 break; 867 } 868 869 870 case 23: 871 872 { 873 if (insqlpluscmd){ 874 yyless(0); 875 yylvalstr = litbufdup(); 876 start(init); 877 returni(sconst); 878 }else{ 879 addlit(yylvalstr, yytextlen); 880 } 881 882 break; 883 } 884 885 case 24: 886 887 { 888 addlit(yylvalstr, yytextlen); 889 break; 890 } 891 892 case 25: 893 894 { 895 start(xd); 896 startlit(); 897 addlit(yylvalstr, yytextlen); 898 break; 899 } 900 901 case 26: 902 903 { 904 dummych1 = get_char(); 905 unget_char(dummych1); 906 if (dummych1 == (char)10) 907 { 908 if (insqlpluscmd){ 909 nchars = yytextlen; 910 if(yylvalstr.charAt(nchars-1) == (char)13){ 911 yyless(nchars - 1); 912 yylvalstr = yylvalstr.substring(0,nchars); 913 } 914 start(init); 915 addlit(yylvalstr, nchars-1); 916 yylvalstr = litbufdup(); 917 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 918 919 }else{ 920 dummych1 = get_char(); 921 addlit(yylvalstr+dummych1, yytextlen+1); 922 } 923 924 } else 925 addlit(yylvalstr, yytextlen); 926 927 break; 928 } 929 930 case 27: 931 932 { 933 returni(lexnewline); 934 break; 935 } 936 937 case 28: 938 939 { 940 returni(lexspace); 941 break; 942 } 943 944 case 29: 945 946 { 947 if ((getyysstate() == xq) 948 || (getyysstate() == xd) 949 || (getyysstate() == xc) 950 ) 951 { 952 addlit(yylvalstr, 1); 953 yyless(1); 954 return;//exit; 955 } 956 957 returni(cmtdoublehyphen); 958 break; 959 } 960 961 case 30: 962 963 { 964 returnc(yylvalstr.charAt(0)); 965 break; 966 } 967 968 case 31: 969 970 { 971 returni(cmpop); 972 break; 973 } 974 975 case 32: 976 977 { 978 979 if (getyysstate() == xc) 980 { 981 slashstar = yylvalstr.indexOf("*/"); 982 if (slashstar >= 0) 983 { 984 start(init); 985 addlit(yylvalstr,slashstar+2); 986 yylvalstr = litbufdup(); 987 yyless(slashstar+2); 988 returni(cmtslashstar); 989 } 990 else 991 { 992 addlit(yylvalstr,1); 993 yyless(1); 994 } 995 } 996 else if (getyysstate() == xq) 997 { 998 addlit(yylvalstr,1); 999 yyless(1); 1000 } 1001 else 1002 { 1003 nchars = yytextlen; 1004 slashstar = yylvalstr.indexOf("/*"); 1005 dashdash = yylvalstr.indexOf("--"); 1006 if ((slashstar > 0) && (dashdash > 0)) 1007 { 1008 //if both appear, take the first one 1009 if (slashstar > dashdash) 1010 {slashstar = dashdash;} 1011 } 1012 else 1013 { 1014 // if slashstar=0 then slashstar := dashdash; 1015 // add (getyysstate <> xc) to avoid something like this */--,here */ should be handled instead of -- 1016 if ((slashstar > 0) && (getyysstate() != xc)) { 1017 nchars = slashstar; 1018 } 1019 } 1020 1021 while ((nchars > 1) 1022 && ( (yylvalstr.charAt(nchars-1) == '+' ) 1023 || (yylvalstr.charAt(nchars-1) =='-')) 1024 && (getyysstate() != xc)) 1025 { 1026 for (ic = nchars - 1; ic>=1; ic--) 1027 { 1028 if (isopchar(yylvalstr.charAt(ic-1))) break; 1029 } 1030 if (ic >= 1) break; 1031 nchars--; 1032 } 1033 1034 if (nchars < yytextlen) 1035 { 1036 //Strip the unwanted chars from the token 1037 yyless(nchars); 1038 yylvalstr = yylvalstr.substring(0,nchars); 1039 } 1040 1041 ///* 1042 // * If what we have left is only one char, and it's 1043 // * one of the characters matching "self", then 1044 // * return it as a character token the same way 1045 // * that the "self" rule would have. 1046 // * make sure @ return as self char, by james wang 1047 // */ 1048 if ((nchars == 1) && (isselfchar(yylvalstr.charAt(0)) || (yylvalstr.charAt(0) == '@'))) 1049 { 1050 returnc(yylvalstr.charAt(0)); 1051 } 1052 else if ( 1053 (nchars >= 2) 1054 &&( 1055 charinarray(yylvalstr.charAt(nchars-1-1), tmparray) 1056 && ((yylvalstr.charAt(nchars-1) == ':') 1057 ||(yylvalstr.charAt(nchars-1) == '.') 1058 ) 1059 ) 1060 ) 1061 { 1062 yyless(nchars-1); 1063 yylvalstr = yylvalstr.substring(0,nchars-1); 1064 if (nchars == 2) 1065 returnc(yylvalstr.charAt(0)); 1066 else 1067 returni(op); 1068 } 1069 else if ( 1070 (nchars >= 2) 1071 && ( 1072 charinarray(yylvalstr.charAt(nchars-1-1),tmparray) 1073 && (yylvalstr.charAt(nchars-1) == '&') 1074 ) 1075 ) 1076 { 1077 yyless(nchars-1); 1078 yylvalstr = yylvalstr.substring(0,nchars-1); 1079 if (nchars == 2) 1080 returnc(yylvalstr.charAt(0)); 1081 else 1082 returni(op); 1083 } 1084 else if ( (nchars > 1) && (yylvalstr.charAt(0) == '~')) 1085 { 1086 yyless(1); 1087 returnc(yylvalstr.charAt(0)); 1088 } 1089 else if ( (nchars == 2) && (yylvalstr.charAt(0) == '.') && (yylvalstr.charAt(1) == '*')) 1090 { 1091 yyless(1); 1092 returnc(yylvalstr.charAt(0)); 1093 } 1094 else if ((nchars == 2) && ((yylvalstr.charAt(0) == '=') && ( (yylvalstr.charAt(1) == '?')||(yylvalstr.charAt(1) == '@')) )) 1095 { 1096 yyless(1); 1097 returnc(yylvalstr.charAt(0)); 1098 } 1099 else if ( (nchars >= 2) && ((yylvalstr.charAt(0) == '@')&&(yylvalstr.charAt(1) != '>'))) 1100 { 1101 yyless(1); 1102 returnc(yylvalstr.charAt(0)); 1103 } 1104 else if ( (nchars >= 2) && ((yylvalstr.charAt(0) == '/'))) 1105 { 1106 yyless(1); 1107 returnc(yylvalstr.charAt(0)); 1108 } 1109 else if (((nchars > 2) && (yylvalstr.charAt(0) == '*')) 1110 && (yylvalstr.charAt(1) == '/') 1111 && (getyysstate() == xc) 1112 ) 1113 { //in comment, and find */ , then it must the end of comment 1114 yyless(2); 1115 addlit(yylvalstr,yytextlen); 1116 if (xcdepth <= 0) 1117 { 1118 start(init); 1119 yylvalstr = litbufdup(); 1120 returni(cmtslashstar); 1121 } 1122 else 1123 xcdepth--; 1124 } 1125 else 1126 returni(op); 1127 } 1128 1129 break; 1130 } 1131 1132 case 33: 1133 1134 { 1135 returni(iconst); 1136 break; 1137 } 1138 1139 case 34: 1140 1141 { 1142 ///* for i in 1..5 loop, we can't recognize 1. as a decimal,but 1 as decimal 1143 nchars = yytextlen; 1144 if (yylvalstr.charAt(nchars-1) == '.') 1145 { 1146 dummych1 = get_char(); 1147 unget_char(dummych1); 1148 if (dummych1 == '.') 1149 { 1150 yyless(nchars-1); 1151 yylvalstr = yylvalstr.substring(0,nchars - 1); 1152 returni (iconst); 1153 return;//exit; 1154 } 1155 } 1156 returni (fconst); 1157 break; 1158 } 1159 1160 case 35: 1161 1162 { 1163 returni (fconst); 1164 break; 1165 } 1166 1167 case 36: 1168 1169 { 1170 returni (sconst); 1171 break; 1172 } 1173 1174 case 37: 1175 1176 { 1177 boolean dollarConstant = false; 1178 if (getyysstate() == xdolq){ 1179 int p = yylvalstr.indexOf("$"); 1180 if (p > 0){ 1181 dollarConstant = true; 1182 addlit(yylvalstr, p); 1183 yyless(p); 1184 return; 1185 } 1186 } 1187 1188 // Handle identifiers ending with $$ inside function body (e.g., "b$$") 1189 // When inside a function body, split identifier at $$ boundary 1190 // Only split if the identifier contains $$ (the function body delimiter) 1191 // Do NOT split identifiers like SYSTEM$TASK_RUNTIME_INFO that contain single $ 1192 if (functionBodyDelimiterIndex >= 0){ // inside function body 1193 int p = yylvalstr.indexOf("$$"); 1194 if (p > 0){ 1195 yylvalstr = yylvalstr.substring(0,p); 1196 yyless(p); 1197 } 1198 } 1199 1200 if (!dollarConstant){ 1201 int rw; 1202 if ( (rw = iskeyword(yylvalstr)) != -1) { 1203 if (rw == TBaseType.rrw_as){ 1204 isReadyForFunctionBody = true; 1205 // check whether language is javascript, if yes, don't set isReadyForFunctionBody to true 1206 if ((TOKEN_TABLE[TBaseType.rrw_snowflake_language][COLUMN0_COUNT] > 0) 1207 &&(TOKEN_TABLE[TBaseType.rrw_snowflake_javascript][COLUMN0_COUNT] > 0)){ 1208 if (TOKEN_TABLE[TBaseType.rrw_snowflake_javascript][COLUMN5_FIRST_POS] - TOKEN_TABLE[TBaseType.rrw_snowflake_language][COLUMN5_FIRST_POS] <= 2){ 1209 // RETURNS STRING LANGUAGE JAVASCRIPT 1210 // 如果是 RETURNS STRING LANGUAGE JAVASCRIPT,那么不需要设置isReadyForFunctionBody = true,即把整个$$...$$当作字符串处理 1211 isReadyForFunctionBody = false; 1212 } 1213 } 1214 // Also check for non-SQL languages (JAVA, PYTHON, SCALA) detected earlier 1215 if (isNonSQLLanguageDetected) { 1216 isReadyForFunctionBody = false; 1217 } 1218 1219 }else if (rw == TBaseType.rrw_snowflake_language){ 1220 isLanguagePending = true; 1221 isReadyForFunctionBody = false; 1222 }else{ 1223 isReadyForFunctionBody = false; 1224 } 1225 returni(rw); 1226 } 1227 else 1228 { 1229 if (isLanguagePending) { 1230 String langName = yylvalstr.toLowerCase(Locale.ROOT); 1231 if (langName.equals("java") || langName.equals("python") || langName.equals("scala")) { 1232 isNonSQLLanguageDetected = true; 1233 } 1234 isLanguagePending = false; 1235 } 1236 isReadyForFunctionBody = false; 1237 returni(ident); 1238 } 1239 } 1240 1241 break; 1242 } 1243 1244 case 38: 1245 1246 { 1247 if (getyysstate() == xdolq){ 1248 addlit(yylvalstr, yytextlen); 1249 return; 1250 }else{ 1251 returni (param); 1252 } 1253 1254 break; 1255 } 1256 1257 case 39: 1258 1259 { 1260 returni (outer_join); 1261 break; 1262 } 1263 1264 case 40: 1265 1266 { 1267 returni (typecast); 1268 break; 1269 } 1270 1271 case 41: 1272 1273 { 1274 returni (double_dot); 1275 break; 1276 } 1277 1278 case 42: 1279 1280 { 1281 returni (assign_sign); 1282 break; 1283 } 1284 1285 case 43: 1286 1287 { 1288 returni (variable); 1289 break; 1290 } 1291 1292 case 44: 1293 1294 { 1295 returni (bind_v); 1296 break; 1297 } 1298 1299 1300 case 45: 1301 1302 { 1303 returni (error); 1304 break; 1305 } 1306 1307 default:{ 1308 System.out.println("fatal error in yyaction"); 1309 } 1310 }//switch 1311}/*yyaction*/; 1312 1313 1314 1315 }