001// lexical analyzer for GSQLParser component java version 002 003/****************************************************} 004{ Lexical analizer for GSQLParser component java version } 005{ Copyright (c) 2004-2016 by Gudu software } 006{****************************************************/ 007 008package gudusoft.gsqlparser; 009 010import java.util.HashMap; 011import java.io.InputStreamReader; 012 013import java.util.Locale; 014import java.io.BufferedReader; 015import java.io.IOException; 016 017 018 019public class TLexerImpala extends TCustomLexer{ 020 021 static int yynmarks = 0 ; 022 static int yynmatches ; 023 static int yyntrans ; 024 static int yynstates ; 025 static int[] yyk,yym ; // 1 based 026 static int[] yytint; // 1 based 027 static TYytRec[] yyt ; // 1 based 028 static int[] yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based 029 private static String[] keywordlist; 030 static String table_file; 031 static HashMap<String, Integer> keywordValueList; 032 static HashMap<Integer, Integer> keywordTypeList; 033 static int[][] yystateTable; 034 035 static { 036 keywordValueList = new HashMap<String, Integer>(); 037 keywordTypeList = new HashMap<Integer, Integer>(); 038 table_file = "/gudusoft/gsqlparser/parser/impala/impala_lex_table.txt"; 039 if (TBaseType.enterprise_edition||TBaseType.impala_edition){ 040 inittable(); 041 } 042 } 043 044 public TLexerImpala(){ 045 super(); 046 dbvendor = EDbVendor.dbvimpala; 047 sourcetokens = new TSourceTokenList(); 048 } 049 050 051 public TSourceTokenList sourcetokens; 052 053 054public boolean canBeColumnName(int tokencode){ 055 056 boolean ret = false; 057 int modifiers = keyword_type_identifier | keyword_type_column ; 058 Integer s = keywordTypeList.get(tokencode); 059 if (s != null){ 060 int modifier = s; 061 ret = (modifiers & modifier) == modifier; 062 } 063 064 return ret; 065} 066 067 public int iskeyword(String str){ 068 int ret = -1; 069 Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH)); 070 if( s != null){ 071 ret = s; 072 } 073 return ret;// -1 means not a keyword 074 } 075 076 public int getkeywordvalue(String keyword){ 077 int ret = 0; 078 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 079 if( s != null){ 080 ret = s; 081 } 082 return ret;// 0 means not a keyword 083 } 084 085 public static EKeywordType getKeywordType(String keyword){ 086 return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList); 087 } 088 089 static void yystateLookupConfigure() { 090 int yystates = yytl.length; 091 yystateTable = new int[257][yystates]; 092 093 // initialize to empty 094 for(int i = 0; i < yystates; i++) { 095 for (int j = 0; j < 257; j++) 096 yystateTable[j][i] = -1; 097 } 098 099 for(int i = 0; i < yystates; i++) { 100 int low = yytl[i]; 101 int high = yyth[i]; 102 for (int j = low; j <= high; j++) { 103 for (char c: yyt[j].cc) { 104 yystateTable[c][i] = j; 105 } 106 } 107 } 108 } 109 110 int yylex(){ 111 int yyn; 112 while (true) { // top level while 113 yynew(); 114 while (true){ //scan 115 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){ 116 yymark(yyk[yyn]); 117 } 118 119 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){ 120 yymatch(yym[yyn]); 121 } 122 123 if(yytl[yystate] > yyth[yystate]){ 124 break; 125 } 126 127 yyscan(); 128// yyn = yytl[yystate]; 129 totablechar(); 130// while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){ 131// yyn++; 132// } 133// if (yyn > yyth[yystate]){ 134// break; 135// } 136 137 yyn = yystateTable[yytablechar][yystate]; 138 if (yyn == -1) 139 break; 140 141 yystate = yyt[yyn].s; 142 } //scan 143 144 while (true){ //action 145 int yyrule; 146 if ( (yyrule = yyfind()) != -1 ){ 147 yyaction(yyrule); 148 if (yyreject){ 149 continue; 150 } 151 }else if( (!yydefault() ) && (yywrap()) ){ 152 yyclear(); 153 returni(0); 154 } 155 break; 156 } 157 158 if (!yydone) { 159 continue; 160 } 161 break; 162 } // top level while 163 164 return yyretval; 165 } 166 167 static void inittable(){ 168 169 //if (yynmarks > 0) return; //init table already 170 171 String line; 172 boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false; 173 int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0; 174 int c=0; 175 keywordValueList.clear(); 176 keywordTypeList.clear(); 177 178 BufferedReader br = new BufferedReader(new InputStreamReader(TLexerImpala.class.getResourceAsStream(table_file))); 179 180 try{ 181 while( (line = br.readLine()) != null){ 182 if (line.trim().startsWith("yynmarks=")){ 183 String[] ss = line.split("[=;]"); 184 yynmarks=Integer.parseInt(ss[1].trim()); 185 yyk = new int[yynmarks+1]; 186 }else if (line.trim().startsWith("yynmatches=")){ 187 String[] ss = line.split("[=;]"); 188 yynmatches=Integer.parseInt(ss[1].trim()); 189 yym = new int[yynmatches+1]; 190 }else if (line.trim().startsWith("yyntrans=")){ 191 String[] ss = line.split("[=;]"); 192 yyntrans=Integer.parseInt(ss[1].trim()); 193 yytint = new int[yyntrans+1]; 194 yyt = new TYytRec[yyntrans+1]; 195 }else if (line.trim().startsWith("yynstates=")){ 196 String[] ss = line.split("[=;]"); 197 yynstates=Integer.parseInt(ss[1].trim()); 198 yykl = new int[yynstates]; 199 yykh = new int[yynstates]; 200 yyml = new int[yynstates]; 201 yymh = new int[yynstates]; 202 yytl = new int[yynstates]; 203 yyth = new int[yynstates]; 204 }else if (line.trim().startsWith("<end>")){ 205 if (inyyk){ 206 inyyk = false; 207 if (yynmarks+1 != yyk_count ){ 208 System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1)); 209 } 210 } 211 else if(inyym){ 212 inyym = false; 213 if (yynmatches+1 != yym_count ){ 214 System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1)); 215 } 216 } 217 else if(inyykl){ 218 inyykl = false; 219 if (yynstates != yykl_count ){ 220 System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count)); 221 } 222 } 223 else if(inyykh){ 224 inyykh = false; 225 if (yynstates != yykh_count ){ 226 System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count)); 227 } 228 } 229 else if(inyyml){ 230 inyyml = false; 231 if (yynstates != yyml_count ){ 232 System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count)); 233 } 234 } 235 else if(inyymh){ 236 inyymh = false; 237 if (yynstates != yymh_count ){ 238 System.out.println("required:"+(yynstates)+" actually:"+(yymh_count)); 239 } 240 } 241 else if(inyytl){ 242 inyytl = false; 243 if (yynstates != yytl_count ){ 244 System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count)); 245 } 246 } 247 else if(inyyth){ 248 inyyth = false; 249 if (yynstates != yyth_count ){ 250 System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count)); 251 } 252 } 253 else if(inyytint){ 254 inyytint = false; 255 if (yyntrans + 1 != yytint_count ){ 256 System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1)); 257 } 258 } 259 else if(inyyt){ 260 inyyt = false; 261 if (yyntrans+1 != yyt_count ){ 262 System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1)); 263 } 264 } 265 else if(inkeyword){ 266 inkeyword = false; 267 } 268 }else if(line.trim().startsWith("yyk =")){ 269 inyyk = true; 270 }else if(line.trim().startsWith("yym =")){ 271 inyym = true; 272 }else if(line.trim().startsWith("yykl =")){ 273 inyykl = true; 274 }else if(line.trim().startsWith("yykh =")){ 275 inyykh = true; 276 }else if(line.trim().startsWith("yyml =")){ 277 inyyml = true; 278 }else if(line.trim().startsWith("yymh =")){ 279 inyymh = true; 280 }else if(line.trim().startsWith("yytl =")){ 281 inyytl = true; 282 }else if(line.trim().startsWith("yyth =")){ 283 inyyth = true; 284 }else if(line.trim().startsWith("yytint =")){ 285 inyytint = true; 286 }else if(line.trim().startsWith("yyt =")){ 287 inyyt = true; 288 }else if(line.trim().startsWith("keywordsvalue =")){ 289 inkeyword = true; 290 }else if(inyyk){ 291 String[] ss = line.split("[,]"); 292 for(int j=0;j<ss.length;j++){ 293 // System.out.println(ss[j].trim()); 294 yyk[yyk_count++] = Integer.parseInt(ss[j].trim()); 295 } 296 }else if(inyym){ 297 String[] ss = line.split("[,]"); 298 for(int j=0;j<ss.length;j++){ 299 // System.out.println(ss[j].trim()); 300 yym[yym_count++] = Integer.parseInt(ss[j].trim()); 301 } 302 }else if(inyykl){ 303 String[] ss = line.split("[,]"); 304 for(int j=0;j<ss.length;j++){ 305 // System.out.println(ss[j].trim()); 306 yykl[yykl_count++] = Integer.parseInt(ss[j].trim()); 307 } 308 }else if(inyykh){ 309 String[] ss = line.split("[,]"); 310 for(int j=0;j<ss.length;j++){ 311 // System.out.println(ss[j].trim()); 312 yykh[yykh_count++] = Integer.parseInt(ss[j].trim()); 313 } 314 }else if(inyyml){ 315 String[] ss = line.split("[,]"); 316 for(int j=0;j<ss.length;j++){ 317 // System.out.println(ss[j].trim()); 318 yyml[yyml_count++] = Integer.parseInt(ss[j].trim()); 319 } 320 }else if(inyymh){ 321 String[] ss = line.split("[,]"); 322 for(int j=0;j<ss.length;j++){ 323 // System.out.println(ss[j].trim()); 324 yymh[yymh_count++] = Integer.parseInt(ss[j].trim()); 325 } 326 }else if(inyytl){ 327 String[] ss = line.split("[,]"); 328 for(int j=0;j<ss.length;j++){ 329 // System.out.println(ss[j].trim()); 330 yytl[yytl_count++] = Integer.parseInt(ss[j].trim()); 331 } 332 }else if(inyyth){ 333 String[] ss = line.split("[,]"); 334 for(int j=0;j<ss.length;j++){ 335 // System.out.println(ss[j].trim()); 336 yyth[yyth_count++] = Integer.parseInt(ss[j].trim()); 337 } 338 }else if(inyytint){ 339 String[] ss = line.split("[,]"); 340 for(int j=0;j<ss.length;j++){ 341 // System.out.println(ss[j].trim()); 342 yytint[yytint_count++] = Integer.parseInt(ss[j].trim()); 343 } 344 }else if(inyyt){ 345 //System.out.println(line.trim()); 346 347 c = 0; 348 String[] st = line.trim().split(",,"); 349 char[] tmp = new char[st.length]; 350 for(int i=0;i<st.length;i++){ 351 352 if(st[i].startsWith("\'")) { 353 if(st[i].length() == 3){ // 'a' 354 tmp[c++] = st[i].charAt(1); 355 }else if(st[i].length() == 4) { // '\\' 356 tmp[c++] = st[i].charAt(2); 357 }else{ 358 System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count); 359 } 360 }else{ 361 try{ 362 tmp[c++] = (char)Integer.parseInt(st[i]); // char in number like 32 that represent space 363 } catch (NumberFormatException nfe) { 364 System.out.println("NumberFormatException: " + nfe.getMessage()); 365 } 366 } 367 } //while hasmoreTokens 368 369 //yyt[lineno] = new YYTrec(tmp,yytint[lineno]); 370 yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]); 371 yyt_count++; 372 373 }else if(inkeyword){ 374 String[] ss =line.split("[=]"); 375 376 int val1 = -1; 377 int val2 = -1; 378 try { 379 val1 = Integer.parseInt(ss[1]); 380 val2 = Integer.parseInt(ss[2]); 381 } 382 catch (NumberFormatException nfe) { 383 System.out.println("NumberFormatException: " + nfe.getMessage()); 384 } 385 keywordValueList.put(ss[0].toUpperCase(),val1); 386 keywordTypeList.put(val1,val2); 387 } 388 } 389 }catch(IOException e){ 390 System.out.println(e.toString()); 391 } 392 393 yystateLookupConfigure(); 394 395 } 396 397 398 399 void yyaction(int yyruleno){ 400 401 int ic; 402 char[] tmparray = {'=','+','-','*','/','>','<'}; 403 404 yylvalstr = getyytext(); 405 /* actions: */ 406 switch(yyruleno){ 407 case 1: 408 409 { 410 addlit(yylvalstr,yytextlen); 411 if (xcdepth <= 0) 412 { 413 start(init); 414 yylvalstr = litbufdup(); 415 returni(cmtslashstar); 416 } 417 else 418 xcdepth--; 419 420 break; 421 } 422 423 case 2: 424 425 { 426 xcdepth++; 427 yyless(2); 428 addlit(yylvalstr,yytextlen); 429 break; 430 } 431 432 case 3: 433 434 { 435 436 if (getyysstate() == xq) 437 { 438 nchars = yytextlen; 439 addlit(yylvalstr, yytextlen-1); 440 yyless(nchars-1); 441 return;//exit; 442 } 443 444 xcdepth = 0; 445 start(xc); 446 startlit(); 447 yyless(2); 448 addlit(yylvalstr,yytextlen); 449 450 break; 451 } 452 453 case 4: 454 455 { 456 addlit(yylvalstr,yytextlen); 457 break; 458 } 459 460 case 5: 461 462 { 463 addlitchar(yylvalstr.charAt(0)); 464 465 break; 466 } 467 468 469 case 6: 470 471 { 472 start(init); 473 addlit(yylvalstr, yytextlen); 474 yylvalstr = litbufdup(); 475 returni(TBaseType.hive_StringLiteral); 476 break; 477 } 478 479 case 7: 480 481 { 482 addlit(yylvalstr, yytextlen); 483 break; 484 } 485 486 487 488 case 8: 489 490 { 491 if (yysstate == xd) 492 { 493 addlit(yylvalstr, yytextlen); 494 } 495 else 496 { 497 start(xq); 498 startlit(); 499 addlit(yylvalstr, yytextlen); 500 } 501 502 break; 503 } 504 505 case 9: 506 507 { 508 addlit(yylvalstr, yytextlen); 509 break; 510 } 511 512 case 10: 513 514 { 515 addlit(yylvalstr, yytextlen); 516 517 break; 518 } 519 520 521 case 11: 522 523 { 524 dummych1 = get_char(); 525 addlit(yylvalstr+dummych1, yytextlen+1); 526 break; 527 } 528 529 case 12: 530 531 { 532 dummych1 = get_char(); 533 addlit(yylvalstr+dummych1, yytextlen+1); 534 break; 535 } 536 537 case 13: 538 539 { 540 start(init); 541 addlit(yylvalstr, yytextlen); 542 if (literallen == 0) returni (error); 543 if (literallen >= namedatalen) 544 { 545 setlengthofliteralbuf(namedatalen); 546 literallen = namedatalen; 547 } 548 yylvalstr = litbufdup(); 549 returni (TBaseType.hive_StringLiteral); 550 551 break; 552 } 553 554 case 14: 555 556 { 557 addlit(yylvalstr, yytextlen); 558 break; 559 } 560 561 case 15: 562 563 { 564 start(xd); 565 startlit(); 566 addlit(yylvalstr, yytextlen); 567 break; 568 } 569 570 case 16: 571 572 { 573 dummych1 = get_char(); 574 unget_char(dummych1); 575 if (dummych1 == (char)10) 576 { 577 dummych1 = get_char(); 578 addlit(yylvalstr+dummych1, yytextlen+1); 579 } else 580 addlit(yylvalstr, yytextlen); 581 582 break; 583 } 584 585 586 587 case 17: 588 589 { 590 returni(lexnewline); 591 break; 592 } 593 594 case 18: 595 596 { 597 returni(lexspace); 598 break; 599 } 600 601 case 19: 602 603 { 604 if ((getyysstate() == xq) 605 || (getyysstate() == xd) 606 || (getyysstate() == xc) 607 ) 608 { 609 addlit(yylvalstr, 1); 610 yyless(1); 611 return;//exit; 612 } 613 614 returni(cmtdoublehyphen); 615 break; 616 } 617 618 case 20: 619 620 { 621 returni('.'); 622 break; 623 } 624 625 case 21: 626 627 { 628 returni(':'); 629 break; 630 } 631 632 case 22: 633 634 { 635 returni(','); 636 break; 637 } 638 639 case 23: 640 641 { 642 returni(';'); 643 break; 644 } 645 646 647 case 24: 648 649 { 650 returni('('); 651 break; 652 } 653 654 case 25: 655 656 { 657 returni(')'); 658 break; 659 } 660 661 case 26: 662 663 { 664 returni('['); 665 break; 666 } 667 668 case 27: 669 670 { 671 returni(']'); 672 break; 673 } 674 675 case 28: 676 677 { 678 returni('{'); 679 break; 680 } 681 682 case 29: 683 684 { 685 returni('}'); 686 break; 687 } 688 689 690 case 30: 691 692 { 693 returni('<'); 694 break; 695 } 696 697 case 31: 698 699 { 700 returni('>'); 701 break; 702 } 703 704 705 case 32: 706 707 { 708 returni('/'); 709 break; 710 } 711 712 case 33: 713 714 { 715 returni('+'); 716 break; 717 } 718 719 case 34: 720 721 { 722 returni('-'); 723 break; 724 } 725 726 case 35: 727 728 { 729 returni('*'); 730 break; 731 } 732 733 case 36: 734 735 { 736 returni('%'); 737 break; 738 } 739 740 741 case 37: 742 743 { 744 returni('&'); 745 break; 746 } 747 748 case 38: 749 750 { 751 returni('~'); 752 break; 753 } 754 755 case 39: 756 757 { 758 returni('|'); 759 break; 760 } 761 762 case 40: 763 764 { 765 returni('^'); 766 break; 767 } 768 769 case 41: 770 771 { 772 returni('?'); 773 break; 774 } 775 776 case 42: 777 778 { 779 returni('$'); 780 break; 781 } 782 783 784 785 case 43: 786 787 { 788 returni(TBaseType.hive_equal); 789 break; 790 } 791 792 case 44: 793 794 { 795 returni(TBaseType.safe_equal); 796 break; 797 } 798 799 800 case 45: 801 802 { 803 returni(TBaseType.not_equal); 804 break; 805 } 806 807 case 46: 808 809 { 810 returni(TBaseType.less_equal); 811 break; 812 } 813 814 case 47: 815 816 { 817 returni('!'); 818 break; 819 } 820 821 case 48: 822 823 { 824 returni(TBaseType.boolean_and); 825 break; 826 } 827 828 case 49: 829 830 { 831 returni(TBaseType.boolean_or); 832 break; 833 } 834 835 case 50: 836 837 { 838 returni(TBaseType.great_equal); 839 break; 840 } 841 842 843 case 51: 844 845 { 846 returni(TBaseType.hive_ByteLengthLiteral); 847 break; 848 } 849 850 case 52: 851 852 { 853 returni (TBaseType.hive_BigintLiteral); 854 break; 855 } 856 857 case 53: 858 859 { 860 returni (TBaseType.hive_SmallintLiteral); 861 break; 862 } 863 864 case 54: 865 866 { 867 returni (TBaseType.hive_TinyintLiteral); 868 break; 869 } 870 871 872 case 55: 873 874 { 875 returni (TBaseType.hive_DecimalLiteral); 876 break; 877 } 878 879 case 56: 880 881 { 882 returni (TBaseType.hive_number); 883 break; 884 } 885 886 case 57: 887 888 { 889 returni (TBaseType.hive_CharSetLiteral); 890 break; 891 } 892 893 894 case 58: 895 896 { 897 int rw; 898 if ( (rw = iskeyword(yylvalstr)) != -1) { returni(rw);} 899 else 900 returni(ident); 901 break; 902 } 903 904 case 59: 905 906 { 907 returni(TBaseType.hive_CharSetName); 908 break; 909 } 910 911 case 60: 912 913 { 914 returni(TBaseType.hive_div); 915 break; 916 } 917 918 919 920 case 61: 921 922 { 923 returni( error); 924 break; 925 } 926 default:{ 927 System.out.println("fatal error in yyaction"); 928 } 929 }//switch 930}/*yyaction*/; 931 932 933 934 }