001// lexical analyzer for GSQLParser component java version 002 003/****************************************************} 004{ Lexical analizer for GSQLParser component java version } 005{ Copyright (c) 2004-2024 by Gudu software } 006{****************************************************/ 007 008package gudusoft.gsqlparser; 009 010import gudusoft.gsqlparser.nodes.TTypeName; 011 012import java.util.HashMap; 013import java.io.InputStreamReader; 014 015import java.util.Locale; 016import java.io.BufferedReader; 017import java.io.IOException; 018 019public class TLexerBigquery extends TCustomLexer{ 020 021 static int yynmarks = 0 ; 022 static int yynmatches ; 023 static int yyntrans ; 024 static int yynstates ; 025 static int[] yyk,yym ; // 1 based 026 static int[] yytint; // 1 based 027 static TYytRec[] yyt ; // 1 based 028 static int[] yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based 029 private static String[] keywordlist; 030 static String table_file; 031 032 static HashMap<String, Integer> keywordValueList; 033 static HashMap<Integer, Integer> keywordTypeList; 034 035 HashMap mysqlCharsets; 036 static int[][] yystateTable; 037 boolean afterFromKeyword = false; 038 039 static { 040 keywordValueList = new HashMap<String, Integer>(); 041 keywordTypeList = new HashMap<Integer, Integer>(); 042 table_file = "/gudusoft/gsqlparser/parser/bigquery/bigquery_lex_table.txt"; 043 if (TBaseType.enterprise_edition||TBaseType.bigquery_edition){ 044 inittable(); 045 } 046 } 047 048 049 public TLexerBigquery(){ 050 super(); 051 dbvendor = EDbVendor.dbvbigquery; 052 sourcetokens = new TSourceTokenList(); 053 mysqlCharsets = new HashMap(); 054 } 055 056 057 public TSourceTokenList sourcetokens; 058 059 public int checkIdentifierIncludeMinus(String str) { 060 // 如果由 - 连接的identifier 跟在 from keyword 后面,则全部作为 identifier 接收。否则退回包含 - 在内的后面所有的字符。 061 int ret = 1; // 退还最后的 . 062 // if (str.indexOf('-') == -1) return ret; 063 if (!afterFromKeyword){ // 不是在 from keyword 后面,则 - 不能作为连接符号,退回包含 - 在内的后面所有的字符。 064 ret = str.length() - str.indexOf('-'); 065 afterFromKeyword = false; 066 } 067 068 return ret; 069 } 070 071 072 /** 073 * 如果 identifier 中包含 dash (-) , 检查是否为合法的 identifier 074 * 075 * @param str 076 * @return 0 表示合法, >0, 表示仅可以接受这些长度的字符为合法 identifier 077 */ 078 public int checkIdentifier(String str){ 079 int ret = 0; 080 if (str.indexOf('-') == -1) return 0; 081 if (str.endsWith("-")){ // 在 lexer 中回退所有在最后的 - 符号,如果有连续多个的话,一起回退 082 int pos = str.length(); 083 while (str.charAt(pos-1) == '-'){ 084 pos--; 085 if (pos == 0) break; 086 } 087 088 return pos; // 089 } 090 091 String[] nameValues = str.split("-"); 092 if (nameValues.length > 0){ 093 int acceptedChar = 0; 094 boolean isValidPart = true; 095 096 for(int j=0;j<nameValues.length;j++){ 097 String v = nameValues[j]; 098 for(int i=0;i<v.length();i++){ 099 if (i == 0){ 100 if (!Character.isDigit(v.charAt(0))) break; 101 }else{ 102 // 如果该部分第一个字符为数字,那么该部分的所有字符都必须为数字 103 isValidPart = Character.isDigit(v.charAt(i)); 104 if (!isValidPart) break; 105 } 106 } 107 108 if (isValidPart) { 109 if (v.length() == 0){ 110 // select ADCS_SUBID--NORMAL AMDOCS SUBSCRIBER 111 // nameValues will be split into [ADCS_SUBID, "",NORMAL ] 112 // just return before -- 113 isValidPart = false; 114 break; 115 }else{ 116 if (j == 0){ 117 acceptedChar = acceptedChar + v.length(); 118 }else{ 119 acceptedChar = acceptedChar + v.length() + 1; // 加上 - 的一个长度 120 } 121 } 122 }else{ 123 break; 124 } 125 } 126 127 if (isValidPart) return 0; 128 else 129 return acceptedChar; 130 } 131 132 return ret; 133 } 134 135public boolean canBeColumnName(int tokencode){ 136 //http://blog.csdn.net/superbeck/article/details/5387476 137 boolean ret = false; 138 int modifiers = keyword_type_identifier | keyword_type_column ; 139 140 Integer s = keywordTypeList.get(tokencode); 141 if (s != null){ 142 int modifier = s; 143 ret = (modifiers & modifier) == modifier; 144 } 145 146 return ret; 147} 148 149public int iskeyword(String str){ 150 int ret = -1; 151 152 Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH)); 153 if( s != null){ 154 ret = s; 155 } 156 return ret;// -1 means not a keyword 157 } 158 159 public int getkeywordvalue(String keyword){ 160 int ret = 0; 161 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 162 if( s != null){ 163 ret = s; 164 } 165 return ret;// 0 means not a keyword 166 } 167 168 public static EKeywordType getKeywordType(String keyword){ 169 return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList); 170 } 171 172 int issystemvariable(String str){ 173 return -1;// -1 means not a system variable 174 } 175 176 static void yystateLookupConfigure() { 177 int yystates = yytl.length; 178 yystateTable = new int[257][yystates]; 179 180 // initialize to empty 181 for(int i = 0; i < yystates; i++) { 182 for (int j = 0; j < 257; j++) 183 yystateTable[j][i] = -1; 184 } 185 186 for(int i = 0; i < yystates; i++) { 187 int low = yytl[i]; 188 int high = yyth[i]; 189 for (int j = low; j <= high; j++) { 190 for (char c: yyt[j].cc) { 191 yystateTable[c][i] = j; 192 } 193 } 194 } 195 } 196 197 int yylex(){ 198 int yyn; 199 while (true) { // top level while 200 yynew(); 201 while (true){ //scan 202 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){ 203 yymark(yyk[yyn]); 204 } 205 206 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){ 207 yymatch(yym[yyn]); 208 } 209 210 if(yytl[yystate] > yyth[yystate]){ 211 break; 212 } 213 214 yyscan(); 215 //yyn = yytl[yystate]; 216 totablechar(); 217 218 //while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){ 219 // yyn++; 220 // } 221 //if (yyn > yyth[yystate]){ 222 // break; 223 //} 224 225 yyn = yystateTable[yytablechar][yystate]; 226 if (yyn == -1) 227 break; 228 229 yystate = yyt[yyn].s; 230 } //scan 231 232 while (true){ //action 233 int yyrule; 234 if ( (yyrule = yyfind()) != -1 ){ 235 yyaction(yyrule); 236 if (yyreject){ 237 continue; 238 } 239 }else if( (!yydefault() ) && (yywrap()) ){ 240 yyclear(); 241 returni(0); 242 } 243 break; 244 } 245 246 if (!yydone) { 247 continue; 248 } 249 break; 250 } // top level while 251 252 return yyretval; 253 } 254 255 static void inittable(){ 256 257 //if (yynmarks > 0) return; //init table already 258 259 String line; 260 boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false; 261 int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0; 262 int c=0; 263 keywordValueList.clear(); 264 keywordTypeList.clear(); 265 266 BufferedReader br = new BufferedReader(new InputStreamReader(TLexerBigquery.class.getResourceAsStream(table_file))); 267 268 try{ 269 while( (line = br.readLine()) != null){ 270 if (line.trim().startsWith("yynmarks=")){ 271 String[] ss = line.split("[=;]"); 272 yynmarks=Integer.parseInt(ss[1].trim()); 273 yyk = new int[yynmarks+1]; 274 }else if (line.trim().startsWith("yynmatches=")){ 275 String[] ss = line.split("[=;]"); 276 yynmatches=Integer.parseInt(ss[1].trim()); 277 yym = new int[yynmatches+1]; 278 }else if (line.trim().startsWith("yyntrans=")){ 279 String[] ss = line.split("[=;]"); 280 yyntrans=Integer.parseInt(ss[1].trim()); 281 yytint = new int[yyntrans+1]; 282 yyt = new TYytRec[yyntrans+1]; 283 }else if (line.trim().startsWith("yynstates=")){ 284 String[] ss = line.split("[=;]"); 285 yynstates=Integer.parseInt(ss[1].trim()); 286 yykl = new int[yynstates]; 287 yykh = new int[yynstates]; 288 yyml = new int[yynstates]; 289 yymh = new int[yynstates]; 290 yytl = new int[yynstates]; 291 yyth = new int[yynstates]; 292 }else if (line.trim().startsWith("<end>")){ 293 if (inyyk){ 294 inyyk = false; 295 if (yynmarks+1 != yyk_count ){ 296 System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1)); 297 } 298 } 299 else if(inyym){ 300 inyym = false; 301 if (yynmatches+1 != yym_count ){ 302 System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1)); 303 } 304 } 305 else if(inyykl){ 306 inyykl = false; 307 if (yynstates != yykl_count ){ 308 System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count)); 309 } 310 } 311 else if(inyykh){ 312 inyykh = false; 313 if (yynstates != yykh_count ){ 314 System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count)); 315 } 316 } 317 else if(inyyml){ 318 inyyml = false; 319 if (yynstates != yyml_count ){ 320 System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count)); 321 } 322 } 323 else if(inyymh){ 324 inyymh = false; 325 if (yynstates != yymh_count ){ 326 System.out.println("required:"+(yynstates)+" actually:"+(yymh_count)); 327 } 328 } 329 else if(inyytl){ 330 inyytl = false; 331 if (yynstates != yytl_count ){ 332 System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count)); 333 } 334 } 335 else if(inyyth){ 336 inyyth = false; 337 if (yynstates != yyth_count ){ 338 System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count)); 339 } 340 } 341 else if(inyytint){ 342 inyytint = false; 343 if (yyntrans + 1 != yytint_count ){ 344 System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1)); 345 } 346 } 347 else if(inyyt){ 348 inyyt = false; 349 if (yyntrans+1 != yyt_count ){ 350 System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1)); 351 } 352 } 353 else if(inkeyword){ 354 inkeyword = false; 355 } 356 }else if(line.trim().startsWith("yyk =")){ 357 inyyk = true; 358 }else if(line.trim().startsWith("yym =")){ 359 inyym = true; 360 }else if(line.trim().startsWith("yykl =")){ 361 inyykl = true; 362 }else if(line.trim().startsWith("yykh =")){ 363 inyykh = true; 364 }else if(line.trim().startsWith("yyml =")){ 365 inyyml = true; 366 }else if(line.trim().startsWith("yymh =")){ 367 inyymh = true; 368 }else if(line.trim().startsWith("yytl =")){ 369 inyytl = true; 370 }else if(line.trim().startsWith("yyth =")){ 371 inyyth = true; 372 }else if(line.trim().startsWith("yytint =")){ 373 inyytint = true; 374 }else if(line.trim().startsWith("yyt =")){ 375 inyyt = true; 376 }else if(line.trim().startsWith("keywordsvalue =")){ 377 inkeyword = true; 378 }else if(inyyk){ 379 String[] ss = line.split("[,]"); 380 for(int j=0;j<ss.length;j++){ 381 // System.out.println(ss[j].trim()); 382 yyk[yyk_count++] = Integer.parseInt(ss[j].trim()); 383 } 384 }else if(inyym){ 385 String[] ss = line.split("[,]"); 386 for(int j=0;j<ss.length;j++){ 387 // System.out.println(ss[j].trim()); 388 yym[yym_count++] = Integer.parseInt(ss[j].trim()); 389 } 390 }else if(inyykl){ 391 String[] ss = line.split("[,]"); 392 for(int j=0;j<ss.length;j++){ 393 // System.out.println(ss[j].trim()); 394 yykl[yykl_count++] = Integer.parseInt(ss[j].trim()); 395 } 396 }else if(inyykh){ 397 String[] ss = line.split("[,]"); 398 for(int j=0;j<ss.length;j++){ 399 // System.out.println(ss[j].trim()); 400 yykh[yykh_count++] = Integer.parseInt(ss[j].trim()); 401 } 402 }else if(inyyml){ 403 String[] ss = line.split("[,]"); 404 for(int j=0;j<ss.length;j++){ 405 // System.out.println(ss[j].trim()); 406 yyml[yyml_count++] = Integer.parseInt(ss[j].trim()); 407 } 408 }else if(inyymh){ 409 String[] ss = line.split("[,]"); 410 for(int j=0;j<ss.length;j++){ 411 // System.out.println(ss[j].trim()); 412 yymh[yymh_count++] = Integer.parseInt(ss[j].trim()); 413 } 414 }else if(inyytl){ 415 String[] ss = line.split("[,]"); 416 for(int j=0;j<ss.length;j++){ 417 // System.out.println(ss[j].trim()); 418 yytl[yytl_count++] = Integer.parseInt(ss[j].trim()); 419 } 420 }else if(inyyth){ 421 String[] ss = line.split("[,]"); 422 for(int j=0;j<ss.length;j++){ 423 // System.out.println(ss[j].trim()); 424 yyth[yyth_count++] = Integer.parseInt(ss[j].trim()); 425 } 426 }else if(inyytint){ 427 String[] ss = line.split("[,]"); 428 for(int j=0;j<ss.length;j++){ 429 // System.out.println(ss[j].trim()); 430 yytint[yytint_count++] = Integer.parseInt(ss[j].trim()); 431 } 432 }else if(inyyt){ 433 //System.out.println(line.trim()); 434 435 c = 0; 436 String[] st = line.trim().split(",,"); 437 char[] tmp = new char[st.length]; 438 for(int i=0;i<st.length;i++){ 439 440 if(st[i].startsWith("\'")) { 441 if(st[i].length() == 3){ // 'a' 442 tmp[c++] = st[i].charAt(1); 443 }else if(st[i].length() == 4) { // '\\' 444 tmp[c++] = st[i].charAt(2); 445 }else{ 446 System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count); 447 } 448 }else{ 449 try{ 450 tmp[c++] = (char)Integer.parseInt(st[i]); // char in number like 32 that represent space 451 } catch (NumberFormatException nfe) { 452 System.out.println("NumberFormatException: " + nfe.getMessage()); 453 } 454 } 455 } //while hasmoreTokens 456 457 //yyt[lineno] = new YYTrec(tmp,yytint[lineno]); 458 yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]); 459 yyt_count++; 460 461 }else if(inkeyword){ 462 String[] ss =line.split("[=]"); 463 464 int val1 = -1; 465 int val2 = -1; 466 try { 467 val1 = Integer.parseInt(ss[1]); 468 val2 = Integer.parseInt(ss[2]); 469 } 470 catch (NumberFormatException nfe) { 471 System.out.println("NumberFormatException: " + nfe.getMessage()); 472 } 473 keywordValueList.put(ss[0].toUpperCase(),val1); 474 keywordTypeList.put(val1,val2); 475 476 } 477 } 478 }catch(IOException e){ 479 System.out.println(e.toString()); 480 } 481 482 yystateLookupConfigure(); 483 484 } 485 486 487 488 void yyaction(int yyruleno){ 489 490 491 int ic; 492 char[] tmparray = {'=','+','-','*','/','>','<'}; 493 494 yylvalstr = getyytext(); 495 /* actions: */ 496 switch(yyruleno){ 497 case 1: 498 499 { 500 start(init); 501 addlit(yylvalstr, yytextlen); 502 yylvalstr = litbufdup(); 503 returni(sconst); 504 break; 505 } 506 507 case 2: 508 509 { 510 511 start(xqtriple); 512 startlit(); 513 addlit(yylvalstr, yytextlen); 514 //System.out.println("In triquote"); 515 break; 516 } 517 518 519 case 3: 520 521 { 522 addlit(yylvalstr, yytextlen); 523 break; 524 } 525 526 case 4: 527 528 { 529 start(init); 530 addlit(yylvalstr, yytextlen); 531 yylvalstr = litbufdup(); 532 returni(sconst); 533 break; 534 } 535 536 case 5: 537 538 { 539 540 start(xdtriple); 541 startlit(); 542 addlit(yylvalstr, yytextlen); 543 544 break; 545 } 546 547 case 6: 548 549 { 550 addlit(yylvalstr, yytextlen); 551 break; 552 } 553 554 555 case 7: 556 557 { 558 addlit(yylvalstr,yytextlen); 559 if (xcdepth <= 0) 560 { 561 start(init); 562 yylvalstr = litbufdup(); 563 returni(cmtslashstar); 564 } 565 else 566 xcdepth--; 567 568 break; 569 } 570 571 572 case 8: 573 574 { 575 xcdepth++; 576 yyless(2); 577 addlit(yylvalstr,yytextlen); 578 break; 579 } 580 581 case 9: 582 583 { 584 585 if (getyysstate() == xq) 586 { 587 nchars = yytextlen; 588 addlit(yylvalstr, yytextlen-1); 589 yyless(nchars-1); 590 return;//exit; 591 } 592 593 xcdepth = 0; 594 start(xc); 595 startlit(); 596 yyless(2); 597 addlit(yylvalstr,yytextlen); 598 599 break; 600 } 601 602 case 10: 603 604 { 605 addlit(yylvalstr,yytextlen); 606 break; 607 } 608 609 case 11: 610 611 { 612 addlitchar(yylvalstr.charAt(0)); 613 break; 614 } 615 616 617 case 12: 618 619 { 620 621 if (getyysstate() == xq) 622 { 623 nchars = yytextlen; 624 addlit(yylvalstr, yytextlen-1); 625 yyless(nchars-1); 626 return;//exit; 627 } 628 start(xq); 629 startlit(); 630 addlit(yylvalstr,yytextlen); 631 break; 632 } 633 634 case 13: 635 636 { 637 638 if ( 639 (getyysstate() == xd)||(getyysstate() == xqtriple) 640 ) 641 { 642 addlit(yylvalstr, 1); 643 yyless(1); 644 return;//exit; 645 } 646 647 start(xd); 648 startlit(); 649 addlit(yylvalstr,yytextlen); 650 break; 651 } 652 653 case 14: 654 655 { 656 657 start(xqtriple); 658 startlit(); 659 addlit(yylvalstr,yytextlen); 660 break; 661 } 662 663 664 case 15: 665 666 { 667 668 start(xdtriple); 669 startlit(); 670 addlit(yylvalstr,yytextlen); 671 break; 672 } 673 674 case 16: 675 676 { 677 start(init); 678 addlit(yylvalstr, yytextlen); 679 yylvalstr = litbufdup(); 680 returni(ident); 681 break; 682 } 683 684 case 17: 685 686 { 687 if ((getyysstate() == xd)||(getyysstate() == xq)) 688 { 689 addlit(yylvalstr, 1); 690 yyless(1); 691 return;//exit; 692 } 693 //token_start := yylvalStr; 694 start(xbacktick); 695 startlit(); 696 addlit(yylvalstr, yytextlen); 697 break; 698 } 699 700 701 case 18: 702 703 { 704 addlit(yylvalstr, yytextlen); 705 break; 706 } 707 708 case 19: 709 710 { 711 addlit(yylvalstr, yytextlen); 712 break; 713 } 714 715 716 case 20: 717 718 { 719 start(init); 720 addlit(yylvalstr, yytextlen); 721 yylvalstr = litbufdup(); 722 returni(sconst); 723 break; 724 } 725 726 case 21: 727 728 { 729 addlit(yylvalstr,yytextlen); 730 break; 731 } 732 733 case 22: 734 735 { 736 737 if ( 738 (getyysstate() == xd)||(getyysstate() == xqtriple) 739 ) 740 { 741 addlit(yylvalstr, 1); 742 yyless(1); 743 return;//exit; 744 } 745 746 //System.out.println("In quote"); 747 start(xq); 748 startlit(); 749 addlit(yylvalstr, yytextlen); 750 751 break; 752 } 753 754 755 case 23: 756 757 { 758 addlit(yylvalstr, yytextlen); 759 break; 760 } 761 762 case 24: 763 764 { 765 dummych1 = get_char(); 766 unget_char(dummych1); 767 while (dummych1 == '\\'){ 768 dummych1 = get_char(); 769 dummych2 = get_char(); 770 addlit(yylvalstr+dummych1+dummych2, yytextlen+2); 771 yylvalstr = ""; 772 yytextlen = 0; 773 774 dummych1 = get_char(); 775 unget_char(dummych1); 776 } 777 778 if (dummych1 == (char)10) 779 { 780 if (insqlpluscmd){ 781 nchars = yytextlen; 782 if(yylvalstr.charAt(nchars-1) == (char)13){ 783 yyless(nchars - 1); 784 yylvalstr = yylvalstr.substring(0,nchars); 785 } 786 start(init); 787 addlit(yylvalstr, nchars-1); 788 yylvalstr = litbufdup(); 789 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 790 791 }else{ 792 dummych1 = get_char(); 793 addlit(yylvalstr+dummych1, yytextlen+1); 794 } 795 } 796 else 797 { addlit(yylvalstr, yytextlen);} 798 799 break; 800 } 801 802 case 25: 803 804 { 805 dummych1 = get_char(); 806 unget_char(dummych1); 807 if (dummych1 == ']') 808 { 809 dummych1 = get_char(); 810 addlit(yylvalstr+dummych1, yytextlen+1); 811 } 812 else 813 { 814 815 start(init); 816 addlit(yylvalstr, yytextlen); 817 if (literallen == 0) returni (error); 818 819 if (literallen == 2) { 820 yylvalstr = litbufdup(); 821 yyless(0); 822 returnc(yylvalstr.charAt(0)); 823 }else{ 824 if (literallen >= namedatalen) 825 { 826 setlengthofliteralbuf(namedatalen); 827 literallen = namedatalen; 828 } 829 yylvalstr = litbufdup(); 830 int rw; 831 832 if ( ((rw = iskeyword(yylvalstr.substring(1, yylvalstr.length() - 1))) != -1) 833 &&(TTypeName.searchTypeByName(yylvalstr.substring(1, yylvalstr.length() - 1)) != null) 834 ) 835 { 836 // returni(rw); 837 if (rw == TBaseType.rrw_date){ 838 returni(ident); 839 }else{ 840 returni(rw); 841 } 842 } 843 else 844 returni (ident); 845 846 } 847 848 } 849 850 break; 851 } 852 853 854 case 26: 855 856 { 857 if (TBaseType.bigquery_legacysql_compatible){ 858 // support table name in [] which is syntax used in bigquery legacySQL, sample sql: SELECT COUNT(*) FROM [PROJECT_ID:DATASET.TABLE@-3600000] 859 start(xdbracket); 860 startlit(); 861 addlit(yylvalstr, yytextlen); 862 }else{ 863 // return [ 864 returnc(yylvalstr.charAt(0)); 865 } 866 break; 867 } 868 869 case 27: 870 871 { 872 dummych1 = get_char(); 873 unget_char(dummych1); 874 if (dummych1 == (char)10) 875 { 876 dummych1 = get_char(); 877 addlit(yylvalstr+dummych1, yytextlen+1); 878 } 879 else 880 addlit(yylvalstr, yytextlen); 881 882 if ((yylvalstr.indexOf(":")>0)||(yylvalstr.indexOf("@")>0)) { 883 }else if (yylvalstr.indexOf(".")>0) { 884 int dotpos = yylvalstr.indexOf("."); 885 if (dotpos < yylvalstr.length() -1){ 886 String n = "0123456789"; 887 if (n.indexOf(yylvalstr.charAt(dotpos+1)) > 0){ 888 // [23.4, 26.3, 26.4, 26.1], this is array, not identifier 889 yylvalstr = litbufdup(); 890 start(init); 891 yyless(0); 892 returnc(yylvalstr.charAt(0)); 893 } 894 } 895 }else{ 896 yylvalstr = litbufdup(); 897 start(init); 898 yyless(0); 899 returnc(yylvalstr.charAt(0)); 900 } 901 902 break; 903 } 904 905 906 case 28: 907 908 { 909 addlit(yylvalstr, yytextlen); 910 break; 911 } 912 913 914 case 29: 915 916 { 917 start(init); 918 addlit(yylvalstr, yytextlen); 919 if ((literallen == 0) && (!insqlpluscmd)) 920 {returni (error);} 921 // if (literallen >= namedatalen) 922 // { 923 // setlengthofliteralbuf(namedatalen); 924 // literallen = namedatalen; 925 // } 926 yylvalstr = litbufdup(); 927 928 returni(sconst); 929 930 break; 931 } 932 933 case 30: 934 935 { 936 addlit(yylvalstr, yytextlen); 937 break; 938 } 939 940 case 31: 941 942 { 943 if ( 944 getyysstate() == xdtriple 945 ) 946 { 947 addlit(yylvalstr, 1); 948 yyless(1); 949 return;//exit; 950 } 951 952 start(xd); 953 startlit(); 954 addlit(yylvalstr, yytextlen); 955 break; 956 } 957 case 32: 958 959 { 960 dummych1 = get_char(); 961 unget_char(dummych1); 962 if (dummych1 == (char)10) 963 { 964 if (insqlpluscmd){ 965 nchars = yytextlen; 966 if(yylvalstr.charAt(nchars-1) == (char)13){ 967 yyless(nchars - 1); 968 yylvalstr = yylvalstr.substring(0,nchars); 969 } 970 start(init); 971 addlit(yylvalstr, nchars-1); 972 yylvalstr = litbufdup(); 973 returni(sconst); //in sqlplus command, characters between ' and return is treated as a string 974 975 }else{ 976 dummych1 = get_char(); 977 addlit(yylvalstr+dummych1, yytextlen+1); 978 } 979 980 }else if (dummych1 == '"'){ // sample: "\"", " after \ will be escaped 981 if (yylvalstr.endsWith("\\")){ 982 dummych1 = get_char(); 983 addlit(yylvalstr+dummych1, yytextlen+1); 984 } 985 else 986 addlit(yylvalstr, yytextlen); 987 } 988 else 989 addlit(yylvalstr, yytextlen); 990 991 break; 992 } 993 994 995 996 case 33: 997 998 { 999 returni(lexnewline); 1000 break; 1001 } 1002 1003 case 34: 1004 1005 { 1006 returni(lexspace); 1007 break; 1008 } 1009 1010 case 35: 1011 1012 { 1013 if ((getyysstate() == xq) 1014 || (getyysstate() == xd) 1015 || (getyysstate() == xc) 1016 || (getyysstate() == xdbracket) 1017 || (getyysstate() == xbacktick) 1018 ) 1019 { 1020 addlit(yylvalstr, 1); 1021 yyless(1); 1022 return;//exit; 1023 } 1024 1025 1026 returni(cmtdoublehyphen); 1027 break; 1028 } 1029 1030 case 36: 1031 1032 { 1033 // Look ahead for ${variable} template substitution syntax 1034 char ch1 = get_char(); 1035 if (ch1 == '{') { 1036 StringBuilder varBuf = new StringBuilder(); 1037 int depth = 1; 1038 boolean closed = false; 1039 while (depth > 0) { 1040 char ch = get_char(); 1041 if (ch == 0) break; 1042 if (ch == '{') { depth++; varBuf.append(ch); } 1043 else if (ch == '}') { 1044 depth--; 1045 if (depth == 0) { closed = true; } 1046 else { varBuf.append(ch); } 1047 } else { 1048 varBuf.append(ch); 1049 } 1050 } 1051 if (closed) { 1052 // ${name} — return as IDENT for template variable substitution 1053 yylvalstr = "${" + varBuf.toString(); 1054 yytextlen = yylvalstr.length(); 1055 returni(ident); 1056 } else { 1057 // Unclosed — put back and return '$' 1058 String s = varBuf.toString(); 1059 for (int i = s.length() - 1; i >= 0; i--) { 1060 unget_char(s.charAt(i)); 1061 } 1062 unget_char('{'); 1063 returnc('$'); 1064 } 1065 } else { 1066 if (ch1 != 0) { 1067 unget_char(ch1); 1068 } 1069 returnc('$'); 1070 } 1071 break; 1072 } 1073 1074 case 37: 1075 1076 { 1077 returnc(yylvalstr.charAt(0)); 1078 break; 1079 } 1080 1081 case 38: 1082 1083 { 1084 returni(iskeyword("TYPECAST")); 1085 break; 1086 } 1087 1088 case 39: 1089 1090 { 1091 returni(iskeyword("PIPE")); 1092 break; 1093 } 1094 1095 case 40: 1096 1097 { 1098 returni(cmpop); 1099 break; 1100 } 1101 1102 case 41: 1103 1104 { 1105 1106 if (getyysstate() == xc) 1107 { 1108 slashstar = yylvalstr.indexOf("*/"); 1109 if (slashstar >= 0) 1110 { 1111 start(init); 1112 addlit(yylvalstr,slashstar+2); 1113 yylvalstr = litbufdup(); 1114 yyless(slashstar+2); 1115 returni(cmtslashstar); 1116 } 1117 else 1118 { 1119 addlit(yylvalstr,1); 1120 yyless(1); 1121 } 1122 } 1123 else 1124 { 1125 nchars = yytextlen; 1126 slashstar = yylvalstr.indexOf("/*"); 1127 dashdash = yylvalstr.indexOf("--"); 1128 if ((slashstar >= 0) && (dashdash >= 0)) 1129 { 1130 //if both appear, take the first one 1131 if (slashstar > dashdash) 1132 {slashstar = dashdash;} 1133 } 1134 else 1135 { 1136 // if slashstar=0 then slashstar := dashdash; 1137 // add (getyysstate <> xc) to avoid something like this */--,here */ should be handled instead of -- 1138 if ((slashstar >= 0) && (getyysstate() != xc)) { 1139 nchars = slashstar; 1140 } 1141 } 1142 1143 if ( (nchars > 2) && (yylvalstr.charAt(0)== '<') && (yylvalstr.charAt(1) == '=')) 1144 { //||--sss ,just get || only 1145 yyless(2); 1146 yylvalstr = yylvalstr.substring(0,2); 1147 returni(op); 1148 break; 1149 } 1150 else if ( (nchars > 2) && (yylvalstr.charAt(0)== '>') && (yylvalstr.charAt(1) == '=')) 1151 { //||--sss ,just get || only 1152 yyless(2); 1153 yylvalstr = yylvalstr.substring(0,2); 1154 returni(op); 1155 break; 1156 } 1157 1158 while ((nchars > 1) 1159 && ( (yylvalstr.charAt(nchars-1) == '+' ) 1160 || (yylvalstr.charAt(nchars-1) =='-')) 1161 && (getyysstate() != xc)) 1162 { 1163 for (ic = nchars - 1; ic>=1; ic--) 1164 { 1165 if (isopchar(yylvalstr.charAt(ic-1))) break; 1166 } 1167 if (ic >= 1) break; 1168 nchars--; 1169 } 1170 1171 if (nchars < yytextlen) 1172 { 1173 //Strip the unwanted chars from the token 1174 yyless(nchars); 1175 yylvalstr = yylvalstr.substring(0,nchars); 1176 } 1177 1178 ///* 1179 // * If what we have left is only one char, and it's 1180 // * one of the characters matching "self", then 1181 // * return it as a character token the same way 1182 // * that the "self" rule would have. 1183 // * make sure @ return as self char, by james wang 1184 // */ 1185 if ((nchars == 1) && (isselfchar(yylvalstr.charAt(0)) || (yylvalstr.charAt(0) == '@'))) 1186 { 1187 returnc(yylvalstr.charAt(0)); 1188 } 1189 else if ( 1190 (nchars >= 2) 1191 &&( 1192 charinarray(yylvalstr.charAt(nchars-1-1), tmparray) 1193 && ((yylvalstr.charAt(nchars-1) == ':')) 1194 ) 1195 ) 1196 { 1197 yyless(nchars-1); 1198 yylvalstr = yylvalstr.substring(0,nchars-1); 1199 if (nchars == 2) 1200 returnc(yylvalstr.charAt(0)); 1201 else 1202 returni(op); 1203 } 1204 else if ( 1205 (nchars >= 2) 1206 && ( 1207 charinarray(yylvalstr.charAt(nchars-1-1),tmparray) 1208 && (yylvalstr.charAt(nchars-1) == '@') 1209 ) 1210 ) 1211 { 1212 yyless(nchars-1); 1213 yylvalstr = yylvalstr.substring(0,nchars-1); 1214 if (nchars == 2) 1215 returnc(yylvalstr.charAt(0)); 1216 else 1217 returni(op); 1218 } 1219 else if ( (nchars >= 2) && ((yylvalstr.charAt(0) == '>')||(yylvalstr.charAt(0) == '<')) ) 1220 { 1221 yyless(1); 1222 returnc(yylvalstr.charAt(0)); 1223 } 1224 else if ( (nchars >= 2) && (yylvalstr.charAt(0) == '+') ) 1225 { 1226 yyless(1); 1227 returnc(yylvalstr.charAt(0)); 1228 } 1229 else if ( (nchars >= 2) && (yylvalstr.charAt(0) == '=')&& (yylvalstr.charAt(1) == '@') ) 1230 { 1231 yyless(1); 1232 returnc(yylvalstr.charAt(0)); 1233 } 1234 else if ( (nchars == 2) && (yylvalstr.charAt(0) == '.') && (yylvalstr.charAt(1) == '*')) 1235 { 1236 yyless(1); 1237 returnc(yylvalstr.charAt(0)); 1238 } 1239 else if ( (nchars == 2) && (yylvalstr.charAt(0) == '+') && (yylvalstr.charAt(1) == '.')) 1240 { 1241 yyless(1); 1242 returnc(yylvalstr.charAt(0)); 1243 } 1244 else if ( (nchars == 2) && (yylvalstr.charAt(0) == '-') && (yylvalstr.charAt(1) == '.')) 1245 { 1246 yyless(1); 1247 returnc(yylvalstr.charAt(0)); 1248 } 1249 else if ( (nchars == 2) && (yylvalstr.charAt(1) == '~')) 1250 { 1251 yyless(1); 1252 returnc(yylvalstr.charAt(0)); 1253 } 1254 else if ( (nchars >= 2) && (yylvalstr.charAt(0) == '@') && (yylvalstr.charAt(1) == '@')) 1255 { 1256 yyless(1); 1257 returnc(yylvalstr.charAt(0)); 1258 } 1259 else if ( (nchars >= 2) && (yylvalstr.charAt(0)== '|') && (yylvalstr.charAt(1) == '|')) 1260 { //||--sss ,just get || only 1261 yyless(2); 1262 yylvalstr = yylvalstr.substring(0,2); 1263 returni(op); 1264 } 1265 else if ( (nchars == 2) && (yylvalstr.charAt(1) == '?') ) 1266 { 1267 yyless(1); 1268 returnc(yylvalstr.charAt(0)); 1269 } 1270 else if ( (nchars == 3) && (yylvalstr.charAt(2) == '?') ) 1271 { 1272 //yyless(2); 1273 //returnc(yylvalstr.charAt(0)); 1274 1275 yyless(nchars-1); 1276 yylvalstr = yylvalstr.substring(0,nchars-1); 1277 if (nchars == 2) 1278 returnc(yylvalstr.charAt(0)); 1279 else 1280 returni(op); 1281 1282 } 1283 else if ( (nchars >= 3) && (yylvalstr.charAt(nchars-1) == '$' ) && (yylvalstr.charAt(nchars-2) == '$' )) 1284 { // =$$abc 1285 if (nchars == 3){ 1286 yyless(1); 1287 returnc(yylvalstr.charAt(0)); 1288 }else{ 1289 yyless(nchars-2); 1290 yylvalstr = yylvalstr.substring(0,nchars-2); 1291 returni(cmpop); 1292 } 1293 } 1294 else if (((nchars > 2) && (yylvalstr.charAt(0) == '*')) 1295 && (yylvalstr.charAt(1) == '/') 1296 && (getyysstate() == xc) 1297 ) 1298 { //in comment, and find */ , then it must the end of comment 1299 yyless(2); 1300 addlit(yylvalstr,yytextlen); 1301 if (xcdepth <= 0) 1302 { 1303 start(init); 1304 yylvalstr = litbufdup(); 1305 returni(cmtslashstar); 1306 } 1307 else 1308 xcdepth--; 1309 } 1310 else 1311 returni(op); 1312 } 1313 1314 break; 1315 } 1316 1317 case 42: 1318 1319 { 1320 // BigQuery: allow unquoted identifiers that start with digits 1321 // when followed by an underscore (e.g. 1_test_table). Distinct 1322 // from numeric literals because of the required underscore. 1323 afterFromKeyword = false; 1324 returni(ident); 1325 break; 1326 } 1327 1328 case 43: 1329 1330 { 1331 returni(iconst); 1332 break; 1333 } 1334 1335 case 44: 1336 1337 { 1338 // Bug #3666 case 2: ".1" must not be a decimal when followed by '_' or 1339 // a letter (e.g., db.1_schema.table_name). Push the digits back, emit 1340 // '.', and let digit_leading_ident match the remainder on the next scan. 1341 if (yylvalstr.charAt(0) == '.') { 1342 dummych1 = get_char(); 1343 unget_char(dummych1); 1344 if (dummych1 == '_' 1345 || (dummych1 >= 'A' && dummych1 <= 'Z') 1346 || (dummych1 >= 'a' && dummych1 <= 'z')) { 1347 yyless(1); 1348 yylvalstr = "."; 1349 returni('.'); 1350 return; 1351 } 1352 } 1353 ///* for i in 1..5 loop, we can't recognize 1. as a decimal,but 1 as decimal 1354 nchars = yytextlen; 1355 if (yylvalstr.charAt(nchars-1) == '.') 1356 { 1357 dummych1 = get_char(); 1358 unget_char(dummych1); 1359 if (dummych1 == '.') 1360 { 1361 yyless(nchars-1); 1362 yylvalstr = yylvalstr.substring(0,nchars - 1); 1363 returni (iconst); 1364 return;//exit; 1365 } 1366 } 1367 returni (fconst); 1368 break; 1369 } 1370 1371 case 45: 1372 1373 { 1374 returni (fconst); 1375 break; 1376 } 1377 1378 1379 case 46: 1380 1381 { 1382 int rw; 1383 afterFromKeyword = false; 1384 nchars = yytextlen; 1385 1386 if ((tmpDelimiter.length()>0) && (yylvalstr.endsWith(tmpDelimiter))){ 1387 yyless(nchars-tmpDelimiter.length()); 1388 yylvalstr = yylvalstr.substring(0,nchars-tmpDelimiter.length()); 1389 } 1390 1391 int k = checkIdentifier(yylvalstr); 1392 if (k > 0){ 1393 yyless(k); 1394 yylvalstr = yylvalstr.substring(0,k); 1395 } 1396 1397 if ( (rw = iskeyword(yylvalstr)) != -1) { 1398 afterFromKeyword = (rw==TBaseType.rrw_from||rw==TBaseType.rrw_table); 1399 returni(rw); 1400 } 1401 else returni(ident); 1402 break; 1403 } 1404 1405 case 47: 1406 1407 { 1408 nchars = yytextlen; 1409 //System.out.println("Found identwithminus step 1: "+yylvalstr); 1410 int k = checkIdentifierIncludeMinus(yylvalstr); 1411 if (k > 0){ 1412 yyless(nchars-k); 1413 yylvalstr = yylvalstr.substring(0,nchars - k); 1414 if (k == 1){ 1415 // System.out.println("Found identwithminus step 2: "+yylvalstr); 1416 } 1417 } 1418 1419 returni(ident); 1420 1421 break; 1422 } 1423 1424 1425 1426 case 48: 1427 1428 { 1429 returni(variable); 1430 1431 break; 1432 } 1433 1434 1435 1436 1437 case 49: 1438 1439 { 1440 returni( error); 1441 break; 1442 } 1443 1444 default:{ 1445 System.out.println("fatal error in yyaction"); 1446 } 1447 }//switch 1448}/*yyaction*/; 1449 1450 1451 1452 }