001// lexical analyzer for GSQLParser component java version 002 003/****************************************************} 004{ Lexical analizer for GSQLParser component java version } 005{ Copyright (c) 2004-2023 by Gudu software } 006{****************************************************/ 007 008package gudusoft.gsqlparser; 009 010import java.util.HashMap; 011import java.io.InputStreamReader; 012 013import java.util.Locale; 014import java.io.BufferedReader; 015import java.io.IOException; 016 017 018 019public class TLexerHive extends TCustomLexer{ 020 021 static int yynmarks = 0 ; 022 static int yynmatches ; 023 static int yyntrans ; 024 static int yynstates ; 025 static int[] yyk,yym ; // 1 based 026 static int[] yytint; // 1 based 027 static TYytRec[] yyt ; // 1 based 028 static int[] yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based 029 private static String[] keywordlist; 030 static String table_file; 031 static HashMap<String, Integer> keywordValueList; 032 static HashMap<Integer, Integer> keywordTypeList; 033 static int[][] yystateTable; 034 035 static { 036 keywordValueList = new HashMap<String, Integer>(); 037 keywordTypeList = new HashMap<Integer, Integer>(); 038 table_file = "/gudusoft/gsqlparser/parser/hive/hive_lex_table.txt"; 039 if (TBaseType.enterprise_edition||TBaseType.hive_edition){ 040 inittable(); 041 } 042 } 043 044 public TLexerHive(){ 045 super(); 046 dbvendor = EDbVendor.dbvhive; 047 sourcetokens = new TSourceTokenList(); 048 } 049 050 051 public TSourceTokenList sourcetokens; 052 053 054public boolean canBeColumnName(int tokencode){ 055 //http://blog.csdn.net/superbeck/article/details/5387476 056 boolean ret = false; 057 int modifiers = keyword_type_identifier | keyword_type_column ; 058 Integer s = keywordTypeList.get(tokencode); 059 if (s != null){ 060 int modifier = s; 061 ret = (modifiers & modifier) == modifier; 062 } 063 064 return ret; 065} 066 067 public int iskeyword(String str){ 068 int ret = -1; 069 Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH)); 070 if( s != null){ 071 ret = s; 072 } 073 return ret;// -1 means not a keyword 074 } 075 076 public int getkeywordvalue(String keyword){ 077 int ret = 0; 078 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 079 if( s != null){ 080 ret = s; 081 } 082 return ret;// 0 means not a keyword 083 } 084 085 public static EKeywordType getKeywordType(String keyword){ 086 return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList); 087 } 088 089 static void yystateLookupConfigure() { 090 int yystates = yytl.length; 091 yystateTable = new int[257][yystates]; 092 093 // initialize to empty 094 for(int i = 0; i < yystates; i++) { 095 for (int j = 0; j < 257; j++) 096 yystateTable[j][i] = -1; 097 } 098 099 for(int i = 0; i < yystates; i++) { 100 int low = yytl[i]; 101 int high = yyth[i]; 102 for (int j = low; j <= high; j++) { 103 for (char c: yyt[j].cc) { 104 yystateTable[c][i] = j; 105 } 106 } 107 } 108 } 109 110 int yylex(){ 111 int yyn; 112 while (true) { // top level while 113 yynew(); 114 while (true){ //scan 115 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){ 116 yymark(yyk[yyn]); 117 } 118 119 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){ 120 yymatch(yym[yyn]); 121 } 122 123 if(yytl[yystate] > yyth[yystate]){ 124 break; 125 } 126 127 yyscan(); 128// yyn = yytl[yystate]; 129 totablechar(); 130// while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){ 131// yyn++; 132// } 133// if (yyn > yyth[yystate]){ 134// break; 135// } 136 137 yyn = yystateTable[yytablechar][yystate]; 138 if (yyn == -1) 139 break; 140 141 yystate = yyt[yyn].s; 142 } //scan 143 144 while (true){ //action 145 int yyrule; 146 if ( (yyrule = yyfind()) != -1 ){ 147 yyaction(yyrule); 148 if (yyreject){ 149 continue; 150 } 151 }else if( (!yydefault() ) && (yywrap()) ){ 152 yyclear(); 153 returni(0); 154 } 155 break; 156 } 157 158 if (!yydone) { 159 continue; 160 } 161 break; 162 } // top level while 163 164 return yyretval; 165 } 166 167 static void inittable(){ 168 169 //if (yynmarks > 0) return; //init table already 170 171 String line; 172 boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false; 173 int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0; 174 int c=0; 175 keywordValueList.clear(); 176 keywordTypeList.clear(); 177 178 BufferedReader br = new BufferedReader(new InputStreamReader(TLexerHive.class.getResourceAsStream(table_file))); 179 180 try{ 181 while( (line = br.readLine()) != null){ 182 if (line.trim().startsWith("yynmarks=")){ 183 String[] ss = line.split("[=;]"); 184 yynmarks=Integer.parseInt(ss[1].trim()); 185 yyk = new int[yynmarks+1]; 186 }else if (line.trim().startsWith("yynmatches=")){ 187 String[] ss = line.split("[=;]"); 188 yynmatches=Integer.parseInt(ss[1].trim()); 189 yym = new int[yynmatches+1]; 190 }else if (line.trim().startsWith("yyntrans=")){ 191 String[] ss = line.split("[=;]"); 192 yyntrans=Integer.parseInt(ss[1].trim()); 193 yytint = new int[yyntrans+1]; 194 yyt = new TYytRec[yyntrans+1]; 195 }else if (line.trim().startsWith("yynstates=")){ 196 String[] ss = line.split("[=;]"); 197 yynstates=Integer.parseInt(ss[1].trim()); 198 yykl = new int[yynstates]; 199 yykh = new int[yynstates]; 200 yyml = new int[yynstates]; 201 yymh = new int[yynstates]; 202 yytl = new int[yynstates]; 203 yyth = new int[yynstates]; 204 }else if (line.trim().startsWith("<end>")){ 205 if (inyyk){ 206 inyyk = false; 207 if (yynmarks+1 != yyk_count ){ 208 System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1)); 209 } 210 } 211 else if(inyym){ 212 inyym = false; 213 if (yynmatches+1 != yym_count ){ 214 System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1)); 215 } 216 } 217 else if(inyykl){ 218 inyykl = false; 219 if (yynstates != yykl_count ){ 220 System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count)); 221 } 222 } 223 else if(inyykh){ 224 inyykh = false; 225 if (yynstates != yykh_count ){ 226 System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count)); 227 } 228 } 229 else if(inyyml){ 230 inyyml = false; 231 if (yynstates != yyml_count ){ 232 System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count)); 233 } 234 } 235 else if(inyymh){ 236 inyymh = false; 237 if (yynstates != yymh_count ){ 238 System.out.println("required:"+(yynstates)+" actually:"+(yymh_count)); 239 } 240 } 241 else if(inyytl){ 242 inyytl = false; 243 if (yynstates != yytl_count ){ 244 System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count)); 245 } 246 } 247 else if(inyyth){ 248 inyyth = false; 249 if (yynstates != yyth_count ){ 250 System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count)); 251 } 252 } 253 else if(inyytint){ 254 inyytint = false; 255 if (yyntrans + 1 != yytint_count ){ 256 System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1)); 257 } 258 } 259 else if(inyyt){ 260 inyyt = false; 261 if (yyntrans+1 != yyt_count ){ 262 System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1)); 263 } 264 } 265 else if(inkeyword){ 266 inkeyword = false; 267 } 268 }else if(line.trim().startsWith("yyk =")){ 269 inyyk = true; 270 }else if(line.trim().startsWith("yym =")){ 271 inyym = true; 272 }else if(line.trim().startsWith("yykl =")){ 273 inyykl = true; 274 }else if(line.trim().startsWith("yykh =")){ 275 inyykh = true; 276 }else if(line.trim().startsWith("yyml =")){ 277 inyyml = true; 278 }else if(line.trim().startsWith("yymh =")){ 279 inyymh = true; 280 }else if(line.trim().startsWith("yytl =")){ 281 inyytl = true; 282 }else if(line.trim().startsWith("yyth =")){ 283 inyyth = true; 284 }else if(line.trim().startsWith("yytint =")){ 285 inyytint = true; 286 }else if(line.trim().startsWith("yyt =")){ 287 inyyt = true; 288 }else if(line.trim().startsWith("keywordsvalue =")){ 289 inkeyword = true; 290 }else if(inyyk){ 291 String[] ss = line.split("[,]"); 292 for(int j=0;j<ss.length;j++){ 293 // System.out.println(ss[j].trim()); 294 yyk[yyk_count++] = Integer.parseInt(ss[j].trim()); 295 } 296 }else if(inyym){ 297 String[] ss = line.split("[,]"); 298 for(int j=0;j<ss.length;j++){ 299 // System.out.println(ss[j].trim()); 300 yym[yym_count++] = Integer.parseInt(ss[j].trim()); 301 } 302 }else if(inyykl){ 303 String[] ss = line.split("[,]"); 304 for(int j=0;j<ss.length;j++){ 305 // System.out.println(ss[j].trim()); 306 yykl[yykl_count++] = Integer.parseInt(ss[j].trim()); 307 } 308 }else if(inyykh){ 309 String[] ss = line.split("[,]"); 310 for(int j=0;j<ss.length;j++){ 311 // System.out.println(ss[j].trim()); 312 yykh[yykh_count++] = Integer.parseInt(ss[j].trim()); 313 } 314 }else if(inyyml){ 315 String[] ss = line.split("[,]"); 316 for(int j=0;j<ss.length;j++){ 317 // System.out.println(ss[j].trim()); 318 yyml[yyml_count++] = Integer.parseInt(ss[j].trim()); 319 } 320 }else if(inyymh){ 321 String[] ss = line.split("[,]"); 322 for(int j=0;j<ss.length;j++){ 323 // System.out.println(ss[j].trim()); 324 yymh[yymh_count++] = Integer.parseInt(ss[j].trim()); 325 } 326 }else if(inyytl){ 327 String[] ss = line.split("[,]"); 328 for(int j=0;j<ss.length;j++){ 329 // System.out.println(ss[j].trim()); 330 yytl[yytl_count++] = Integer.parseInt(ss[j].trim()); 331 } 332 }else if(inyyth){ 333 String[] ss = line.split("[,]"); 334 for(int j=0;j<ss.length;j++){ 335 // System.out.println(ss[j].trim()); 336 yyth[yyth_count++] = Integer.parseInt(ss[j].trim()); 337 } 338 }else if(inyytint){ 339 String[] ss = line.split("[,]"); 340 for(int j=0;j<ss.length;j++){ 341 // System.out.println(ss[j].trim()); 342 yytint[yytint_count++] = Integer.parseInt(ss[j].trim()); 343 } 344 }else if(inyyt){ 345 //System.out.println(line.trim()); 346 347 c = 0; 348 String[] st = line.trim().split(",,"); 349 char[] tmp = new char[st.length]; 350 for(int i=0;i<st.length;i++){ 351 352 if(st[i].startsWith("\'")) { 353 if(st[i].length() == 3){ // 'a' 354 tmp[c++] = st[i].charAt(1); 355 }else if(st[i].length() == 4) { // '\\' 356 tmp[c++] = st[i].charAt(2); 357 }else{ 358 System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count); 359 } 360 }else{ 361 try{ 362 tmp[c++] = (char)Integer.parseInt(st[i]); // char in number like 32 that represent space 363 } catch (NumberFormatException nfe) { 364 System.out.println("NumberFormatException: " + nfe.getMessage()); 365 } 366 } 367 } //while hasmoreTokens 368 369 //yyt[lineno] = new YYTrec(tmp,yytint[lineno]); 370 yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]); 371 yyt_count++; 372 373 }else if(inkeyword){ 374 String[] ss =line.split("[=]"); 375 376 int val1 = -1; 377 int val2 = -1; 378 try { 379 val1 = Integer.parseInt(ss[1]); 380 val2 = Integer.parseInt(ss[2]); 381 } 382 catch (NumberFormatException nfe) { 383 System.out.println("NumberFormatException: " + nfe.getMessage()); 384 } 385 keywordValueList.put(ss[0].toUpperCase(),val1); 386 keywordTypeList.put(val1,val2); 387 } 388 } 389 }catch(IOException e){ 390 System.out.println(e.toString()); 391 } 392 393 yystateLookupConfigure(); 394 395 } 396 397 398 399 void yyaction(int yyruleno){ 400 401 402 int ic; 403 char[] tmparray = {'=','+','-','*','/','>','<'}; 404 405 yylvalstr = getyytext(); 406 /* actions: */ 407 switch(yyruleno){ 408 case 1: 409 410 { 411 addlit(yylvalstr,yytextlen); 412 if (xcdepth <= 0) 413 { 414 start(init); 415 yylvalstr = litbufdup(); 416 returni(cmtslashstar); 417 } 418 else 419 xcdepth--; 420 421 break; 422 } 423 424 425 case 2: 426 427 { 428 xcdepth++; 429 yyless(2); 430 addlit(yylvalstr,yytextlen); 431 break; 432 } 433 434 case 3: 435 436 { 437 438 if (yylvalstr.equalsIgnoreCase("/*+")) 439 { 440 // hive hint 441 yyless(1); 442 returnc(yylvalstr.charAt(0)); 443 }else { 444 445 if (getyysstate() == xq) 446 { 447 nchars = yytextlen; 448 addlit(yylvalstr, yytextlen-1); 449 yyless(nchars-1); 450 return;//exit; 451 } 452 453 xcdepth = 0; 454 start(xc); 455 startlit(); 456 yyless(2); 457 addlit(yylvalstr,yytextlen); 458 } 459 460 break; 461 } 462 463 case 4: 464 465 { 466 addlit(yylvalstr,yytextlen); 467 break; 468 } 469 470 case 5: 471 472 { 473 addlitchar(yylvalstr.charAt(0)); 474 break; 475 } 476 477 478 case 6: 479 480 { 481 addlit(yylvalstr, yytextlen); 482 break; 483 } 484 485 case 7: 486 487 { 488 addlit(yylvalstr, yytextlen); 489 break; 490 } 491 492 case 8: 493 494 { 495 start(init); 496 addlit(yylvalstr, yytextlen); 497 yylvalstr = litbufdup(); 498 returni(ident); 499 break; 500 } 501 502 case 9: 503 504 { 505 if ((getyysstate() == xd)||(getyysstate() == xq)) 506 { 507 addlit(yylvalstr, 1); 508 yyless(1); 509 return;//exit; 510 } 511 512 513 start(xbacktick); 514 startlit(); 515 addlit(yylvalstr, yytextlen); 516 517 break; 518 } 519 520 521 522 523 case 10: 524 525 { 526 start(init); 527 addlit(yylvalstr, yytextlen); 528 yylvalstr = litbufdup(); 529 returni(TBaseType.hive_StringLiteral); 530 break; 531 } 532 533 case 11: 534 535 { 536 addlit(yylvalstr, yytextlen); 537 break; 538 } 539 540 541 542 case 12: 543 544 { 545 if (yysstate == xd) 546 { 547 addlit(yylvalstr, yytextlen); 548 } 549 else 550 { 551 start(xq); 552 startlit(); 553 addlit(yylvalstr, yytextlen); 554 } 555 556 break; 557 } 558 559 case 13: 560 561 { 562 addlit(yylvalstr, yytextlen); 563 break; 564 } 565 566 case 14: 567 568 { 569 addlit(yylvalstr, yytextlen); 570 571 break; 572 } 573 574 575 case 15: 576 577 { 578 dummych1 = get_char(); 579 addlit(yylvalstr+dummych1, yytextlen+1); 580 break; 581 } 582 583 case 16: 584 585 { 586 dummych1 = get_char(); 587 addlit(yylvalstr+dummych1, yytextlen+1); 588 break; 589 } 590 591 case 17: 592 593 { 594 start(init); 595 addlit(yylvalstr, yytextlen); 596 if (literallen == 0) returni (error); 597 if (literallen >= namedatalen) 598 { 599 setlengthofliteralbuf(namedatalen); 600 literallen = namedatalen; 601 } 602 yylvalstr = litbufdup(); 603 returni (TBaseType.hive_StringLiteral); 604 605 break; 606 } 607 608 case 18: 609 610 { 611 addlit(yylvalstr, yytextlen); 612 break; 613 } 614 615 case 19: 616 617 { 618 start(xd); 619 startlit(); 620 addlit(yylvalstr, yytextlen); 621 break; 622 } 623 624 case 20: 625 626 { 627 dummych1 = get_char(); 628 unget_char(dummych1); 629 if (dummych1 == (char)10) 630 { 631 dummych1 = get_char(); 632 addlit(yylvalstr+dummych1, yytextlen+1); 633 } else 634 addlit(yylvalstr, yytextlen); 635 636 break; 637 } 638 639 640 641 642 643 case 21: 644 645 { 646 returni(lexnewline); 647 break; 648 } 649 650 case 22: 651 652 { 653 returni(lexspace); 654 break; 655 } 656 657 case 23: 658 659 { 660 if ((getyysstate() == xq) 661 || (getyysstate() == xd) 662 ) 663 { 664 addlit(yylvalstr, 1); 665 yyless(1); 666 return;//exit; 667 } 668 669 returni(cmtdoublehyphen); 670 break; 671 } 672 673 case 24: 674 675 { 676 returni('.'); 677 break; 678 } 679 680 case 25: 681 682 { 683 returni(':'); 684 break; 685 } 686 687 case 26: 688 689 { 690 returni(','); 691 break; 692 } 693 694 case 27: 695 696 { 697 returni(';'); 698 break; 699 } 700 701 702 case 28: 703 704 { 705 returni('('); 706 break; 707 } 708 709 case 29: 710 711 { 712 returni(')'); 713 break; 714 } 715 716 case 30: 717 718 { 719 returni('['); 720 break; 721 } 722 723 case 31: 724 725 { 726 returni(']'); 727 break; 728 } 729 730 case 32: 731 732 { 733 returni('{'); 734 break; 735 } 736 737 case 33: 738 739 { 740 returni('}'); 741 break; 742 } 743 744 745 case 34: 746 747 { 748 returni('<'); 749 break; 750 } 751 752 case 35: 753 754 { 755 returni('>'); 756 break; 757 } 758 759 760 case 36: 761 762 { 763 returni('/'); 764 break; 765 } 766 767 case 37: 768 769 { 770 returni('+'); 771 break; 772 } 773 774 case 38: 775 776 { 777 returni('-'); 778 break; 779 } 780 781 case 39: 782 783 { 784 returni('*'); 785 break; 786 } 787 788 case 40: 789 790 { 791 returni('%'); 792 break; 793 } 794 795 796 case 41: 797 798 { 799 returni('&'); 800 break; 801 } 802 803 case 42: 804 805 { 806 returni('~'); 807 break; 808 } 809 810 case 43: 811 812 { 813 returni('|'); 814 break; 815 } 816 817 case 44: 818 819 { 820 returni('^'); 821 break; 822 } 823 824 case 45: 825 826 { 827 returni('?'); 828 break; 829 } 830 831 case 46: 832 833 { 834 returni('$'); 835 break; 836 } 837 838 case 47: 839 840 { 841 returni(concatenationop); 842 break; 843 } 844 845 846 case 48: 847 848 { 849 returni(TBaseType.hive_equal); 850 break; 851 } 852 853 case 49: 854 855 { 856 returni(TBaseType.safe_equal); 857 break; 858 } 859 860 861 case 50: 862 863 { 864 returni(TBaseType.not_equal); 865 break; 866 } 867 868 case 51: 869 870 { 871 returni(TBaseType.less_equal); 872 break; 873 } 874 875 case 52: 876 877 { 878 returni('!'); 879 break; 880 } 881 882 case 53: 883 884 { 885 returni(TBaseType.boolean_and); 886 break; 887 } 888 889 case 54: 890 891 { 892 returni(TBaseType.boolean_or); 893 break; 894 } 895 896 case 55: 897 898 { 899 returni(TBaseType.great_equal); 900 break; 901 } 902 903 904 case 56: 905 906 { 907 returni(TBaseType.hive_ByteLengthLiteral); 908 break; 909 } 910 911 case 57: 912 913 { 914 returni (TBaseType.hive_BigintLiteral); 915 break; 916 } 917 918 case 58: 919 920 { 921 returni (TBaseType.hive_SmallintLiteral); 922 break; 923 } 924 925 case 59: 926 927 { 928 returni (TBaseType.hive_TinyintLiteral); 929 break; 930 } 931 932 933 case 60: 934 935 { 936 returni (TBaseType.hive_DecimalLiteral); 937 break; 938 } 939 940 case 61: 941 942 { 943 returni (TBaseType.hive_number); 944 break; 945 } 946 947 case 62: 948 949 { 950 returni (TBaseType.hive_CharSetLiteral); 951 break; 952 } 953 954 955 case 63: 956 957 { 958 returni(TBaseType.hive_div); 959 break; 960 } 961 962 case 64: 963 964 { 965 int rw; 966 if ( (rw = iskeyword(yylvalstr)) != -1) { returni(rw);} 967 else 968 returni(ident); 969 break; 970 } 971 972 case 65: 973 974 { 975 if (yylvalstr.endsWith(":")){ 976 yylvalstr = yylvalstr.substring(0,yytextlen-1); 977 yyless(yytextlen-1); 978 } 979 returni(TBaseType.hive_CharSetName); 980 break; 981 } 982 983 case 66: 984 985 { 986 returni( error); 987 break; 988 } 989 default:{ 990 System.out.println("fatal error in yyaction"); 991 } 992 }//switch 993}/*yyaction*/; 994 995 996 997 }