001// lexical analyzer  for GSQLParser component java version
002
003/****************************************************}
004{   Lexical analizer for GSQLParser component java version    }
005{   Copyright (c) 2004-2016 by Gudu software              }
006{****************************************************/
007
008package gudusoft.gsqlparser;
009
010import java.util.HashMap;
011import java.io.InputStreamReader;
012
013import java.util.Locale;
014import java.io.BufferedReader;
015import java.io.IOException;
016
017
018
019public class TLexerImpala extends TCustomLexer{
020
021    static int yynmarks = 0  ;
022    static int yynmatches ;
023    static int yyntrans   ;
024    static int yynstates  ;
025    static int[]  yyk,yym ; // 1 based
026    static int[]  yytint;  // 1 based
027    static TYytRec[] yyt ;  // 1 based
028    static int[]  yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based
029    private  static  String[] keywordlist;
030    static String   table_file;
031    static HashMap<String, Integer> keywordValueList;
032    static HashMap<Integer, Integer> keywordTypeList;
033        static int[][] yystateTable;
034
035    static {
036              keywordValueList = new HashMap<String, Integer>();
037              keywordTypeList = new HashMap<Integer, Integer>();
038        table_file = "/gudusoft/gsqlparser/parser/impala/impala_lex_table.txt";
039                if (TBaseType.enterprise_edition||TBaseType.impala_edition){
040                inittable();
041            }
042    }
043
044    public TLexerImpala(){
045          super();
046          dbvendor = EDbVendor.dbvimpala;
047          sourcetokens = new TSourceTokenList();
048    }
049
050
051    public TSourceTokenList sourcetokens;
052
053
054public boolean canBeColumnName(int tokencode){
055    
056    boolean ret = false;
057    int modifiers = keyword_type_identifier | keyword_type_column ;
058    Integer s = keywordTypeList.get(tokencode);
059     if (s != null){
060        int modifier = s;
061        ret = (modifiers & modifier) == modifier;
062    }
063
064    return ret;
065}
066
067 public  int iskeyword(String str){
068        int ret = -1;
069       Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH));
070        if( s != null){
071            ret = s;
072        }
073        return ret;// -1 means not a keyword
074     }
075
076     public int getkeywordvalue(String keyword){
077        int ret = 0;
078        Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH));
079        if( s != null){
080            ret = s;
081         }
082        return ret;// 0 means not a keyword
083     }
084         
085    public static EKeywordType getKeywordType(String keyword){
086        return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList);
087    }            
088
089    static void yystateLookupConfigure() {
090        int yystates = yytl.length;
091        yystateTable = new int[257][yystates];
092
093        // initialize to empty
094        for(int i = 0; i < yystates; i++) {
095            for (int j = 0; j < 257; j++)
096                yystateTable[j][i] = -1;
097        }
098
099        for(int i = 0; i < yystates; i++) {
100            int low = yytl[i];
101            int high = yyth[i];
102            for (int j = low; j <= high; j++) {
103                for (char c: yyt[j].cc) {
104                    yystateTable[c][i] = j;
105                }
106            }
107        }
108    }    
109
110    int yylex(){
111          int yyn;
112           while (true) { // top level while
113              yynew();
114              while (true){  //scan
115                 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){
116                     yymark(yyk[yyn]);
117                 }
118
119                 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){
120                    yymatch(yym[yyn]);
121                 }
122
123                 if(yytl[yystate] > yyth[yystate]){
124                     break;
125                 }
126
127                 yyscan();
128//                 yyn = yytl[yystate];
129                 totablechar();
130//                 while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){
131//                   yyn++;
132//                 }
133//                 if (yyn > yyth[yystate]){
134//                     break;
135//                 }
136
137                 yyn = yystateTable[yytablechar][yystate];
138                 if (yyn == -1)
139                     break;
140                                         
141                 yystate = yyt[yyn].s;
142              } //scan
143
144              while (true){ //action
145                int yyrule;
146                if ( (yyrule = yyfind()) != -1 ){
147                   yyaction(yyrule);
148                   if (yyreject){
149                       continue;
150                   }
151                }else if( (!yydefault() ) && (yywrap()) ){
152                   yyclear();
153                   returni(0);
154                }
155                break;
156              }
157
158              if (!yydone) {
159                  continue;
160              }
161              break;
162            } // top level while
163
164           return yyretval;
165        }
166
167    static void inittable(){
168                
169                //if (yynmarks > 0) return; //init table already
170
171        String line;
172        boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false;
173        int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0;
174        int c=0;
175        keywordValueList.clear();
176        keywordTypeList.clear();
177        
178        BufferedReader br = new BufferedReader(new InputStreamReader(TLexerImpala.class.getResourceAsStream(table_file)));
179
180            try{
181                while( (line = br.readLine()) != null){
182                          if (line.trim().startsWith("yynmarks=")){
183                             String[] ss = line.split("[=;]");
184                              yynmarks=Integer.parseInt(ss[1].trim());
185                              yyk = new int[yynmarks+1];
186                          }else if (line.trim().startsWith("yynmatches=")){
187                              String[] ss = line.split("[=;]");
188                               yynmatches=Integer.parseInt(ss[1].trim());
189                               yym = new int[yynmatches+1];
190                          }else if (line.trim().startsWith("yyntrans=")){
191                              String[] ss = line.split("[=;]");
192                               yyntrans=Integer.parseInt(ss[1].trim());
193                               yytint = new int[yyntrans+1];
194                               yyt = new TYytRec[yyntrans+1];
195                          }else if (line.trim().startsWith("yynstates=")){
196                              String[] ss = line.split("[=;]");
197                               yynstates=Integer.parseInt(ss[1].trim());
198                               yykl = new int[yynstates];
199                               yykh = new int[yynstates];
200                              yyml = new int[yynstates];
201                              yymh = new int[yynstates];
202                              yytl = new int[yynstates];
203                              yyth = new int[yynstates];
204                          }else if (line.trim().startsWith("<end>")){
205                              if (inyyk){
206                                  inyyk = false;
207                                 if (yynmarks+1 != yyk_count ){
208                                    System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1));
209                                 }
210                              }
211                              else if(inyym){
212                                     inyym = false;
213                                    if (yynmatches+1 != yym_count ){
214                                       System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1));
215                                    }
216                              }
217                              else if(inyykl){
218                                     inyykl = false;
219                                    if (yynstates != yykl_count ){
220                                       System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count));
221                                    }
222                              }
223                              else if(inyykh){
224                                     inyykh = false;
225                                    if (yynstates != yykh_count ){
226                                       System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count));
227                                    }
228                              }
229                              else if(inyyml){
230                                     inyyml = false;
231                                    if (yynstates != yyml_count ){
232                                       System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count));
233                                    }
234                              }
235                              else if(inyymh){
236                                     inyymh = false;
237                                    if (yynstates != yymh_count ){
238                                       System.out.println("required:"+(yynstates)+" actually:"+(yymh_count));
239                                    }
240                              }
241                              else if(inyytl){
242                                     inyytl = false;
243                                    if (yynstates != yytl_count ){
244                                       System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count));
245                                    }
246                              }
247                              else if(inyyth){
248                                     inyyth = false;
249                                    if (yynstates != yyth_count ){
250                                       System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count));
251                                    }
252                              }
253                              else if(inyytint){
254                                     inyytint = false;
255                                    if (yyntrans + 1 != yytint_count ){
256                                       System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1));
257                                    }
258                              }
259                              else if(inyyt){
260                                     inyyt = false;
261                                    if (yyntrans+1 != yyt_count ){
262                                       System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1));
263                                    }
264                              }
265                              else if(inkeyword){
266                                     inkeyword = false;
267                              }
268                          }else if(line.trim().startsWith("yyk =")){
269                             inyyk = true; 
270                          }else if(line.trim().startsWith("yym =")){
271                             inyym = true;
272                          }else if(line.trim().startsWith("yykl =")){
273                             inyykl = true;
274                          }else if(line.trim().startsWith("yykh =")){
275                             inyykh = true;
276                          }else if(line.trim().startsWith("yyml =")){
277                             inyyml = true;
278                          }else if(line.trim().startsWith("yymh =")){
279                             inyymh = true;
280                          }else if(line.trim().startsWith("yytl =")){
281                             inyytl = true;
282                          }else if(line.trim().startsWith("yyth =")){
283                             inyyth = true;
284                          }else if(line.trim().startsWith("yytint =")){
285                             inyytint = true;
286                          }else if(line.trim().startsWith("yyt =")){
287                             inyyt = true;
288                        }else if(line.trim().startsWith("keywordsvalue =")){
289                           inkeyword = true;
290                        }else if(inyyk){
291                             String[] ss = line.split("[,]");
292                               for(int j=0;j<ss.length;j++){
293                                   // System.out.println(ss[j].trim());
294                                 yyk[yyk_count++] = Integer.parseInt(ss[j].trim());
295                               }
296                          }else if(inyym){
297                               String[] ss = line.split("[,]");
298                                 for(int j=0;j<ss.length;j++){
299                                     // System.out.println(ss[j].trim());
300                                   yym[yym_count++] = Integer.parseInt(ss[j].trim());
301                                 }
302                          }else if(inyykl){
303                               String[] ss = line.split("[,]");
304                                 for(int j=0;j<ss.length;j++){
305                                    //  System.out.println(ss[j].trim());
306                                   yykl[yykl_count++] = Integer.parseInt(ss[j].trim());
307                                 }
308                          }else if(inyykh){
309                               String[] ss = line.split("[,]");
310                                 for(int j=0;j<ss.length;j++){
311                                     // System.out.println(ss[j].trim());
312                                   yykh[yykh_count++] = Integer.parseInt(ss[j].trim());
313                                 }
314                          }else if(inyyml){
315                               String[] ss = line.split("[,]");
316                                 for(int j=0;j<ss.length;j++){
317                                     // System.out.println(ss[j].trim());
318                                   yyml[yyml_count++] = Integer.parseInt(ss[j].trim());
319                                 }
320                          }else if(inyymh){
321                               String[] ss = line.split("[,]");
322                                 for(int j=0;j<ss.length;j++){
323                                     // System.out.println(ss[j].trim());
324                                   yymh[yymh_count++] = Integer.parseInt(ss[j].trim());
325                                 }
326                          }else if(inyytl){
327                               String[] ss = line.split("[,]");
328                                 for(int j=0;j<ss.length;j++){
329                                     // System.out.println(ss[j].trim());
330                                   yytl[yytl_count++] = Integer.parseInt(ss[j].trim());
331                                 }
332                          }else if(inyyth){
333                               String[] ss = line.split("[,]");
334                                 for(int j=0;j<ss.length;j++){
335                                     // System.out.println(ss[j].trim());
336                                   yyth[yyth_count++] = Integer.parseInt(ss[j].trim());
337                                 }
338                          }else if(inyytint){
339                               String[] ss = line.split("[,]");
340                                 for(int j=0;j<ss.length;j++){
341                                     // System.out.println(ss[j].trim());
342                                   yytint[yytint_count++] = Integer.parseInt(ss[j].trim());
343                                 }
344                          }else if(inyyt){
345                                //System.out.println(line.trim());
346
347                              c = 0;
348                              String[] st = line.trim().split(",,");
349                              char[] tmp = new char[st.length];
350                              for(int i=0;i<st.length;i++){
351
352                                  if(st[i].startsWith("\'")) {
353                                      if(st[i].length() == 3){  // 'a'
354                                          tmp[c++] = st[i].charAt(1);
355                                      }else if(st[i].length() == 4) { // '\\'
356                                          tmp[c++] = st[i].charAt(2);
357                                      }else{
358                                         System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count);
359                                      }
360                                  }else{
361                                      try{
362                                           tmp[c++] = (char)Integer.parseInt(st[i]);   // char in number like 32 that represent space
363                                          } catch (NumberFormatException nfe) {
364                                             System.out.println("NumberFormatException: " + nfe.getMessage());
365                                          }
366                                  }
367                              } //while hasmoreTokens
368
369                          //yyt[lineno] = new YYTrec(tmp,yytint[lineno]);
370                              yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]);
371                              yyt_count++;
372
373                          }else if(inkeyword){
374                              String[] ss =line.split("[=]");
375
376                              int val1 = -1;
377                              int val2 = -1;
378                              try {
379                                  val1 = Integer.parseInt(ss[1]);
380                                  val2 = Integer.parseInt(ss[2]);
381                              }
382                              catch (NumberFormatException nfe) {
383                                  System.out.println("NumberFormatException: " + nfe.getMessage());
384                              }
385                              keywordValueList.put(ss[0].toUpperCase(),val1);
386                              keywordTypeList.put(val1,val2);
387                             }
388                }
389                }catch(IOException e){
390                  System.out.println(e.toString());
391                }
392                                
393                                yystateLookupConfigure();
394
395    }
396
397
398
399    void yyaction(int yyruleno){
400
401      int ic;
402      char[] tmparray = {'=','+','-','*','/','>','<'};
403
404      yylvalstr = getyytext();
405  /* actions: */
406  switch(yyruleno){
407  case 1:
408                
409        {
410                    addlit(yylvalstr,yytextlen);
411                    if (xcdepth <= 0)
412                       {
413                        start(init);
414                        yylvalstr = litbufdup();
415                        returni(cmtslashstar);
416                       }
417                    else
418                       xcdepth--;
419
420                  break;
421        }
422        
423  case 2:
424                
425                  {
426                         xcdepth++;
427                          yyless(2);
428                          addlit(yylvalstr,yytextlen);
429                      break;
430                   }
431
432  case 3:
433                        
434                  {
435
436                      if (getyysstate() == xq)
437                      {
438                          nchars = yytextlen;
439                          addlit(yylvalstr, yytextlen-1);
440                          yyless(nchars-1);
441                          return;//exit;
442                      }
443
444                      xcdepth = 0;
445                      start(xc);
446                      startlit();
447                      yyless(2);
448                      addlit(yylvalstr,yytextlen);
449
450                  break;
451                  }
452
453  case 4:
454                
455                  {
456                    addlit(yylvalstr,yytextlen);
457                    break;
458                  }
459
460  case 5:
461                
462                  {
463                      addlitchar(yylvalstr.charAt(0));
464        
465                      break;
466                  }
467
468
469  case 6:
470                
471          {
472              start(init);
473              addlit(yylvalstr, yytextlen);
474              yylvalstr = litbufdup();
475              returni(TBaseType.hive_StringLiteral);
476              break;
477          }
478
479  case 7:
480                
481          {
482            addlit(yylvalstr, yytextlen);
483            break;
484          }
485
486
487
488  case 8:
489                        
490          {
491              if (yysstate == xd)
492                {
493                  addlit(yylvalstr, yytextlen);
494                }
495              else
496                {
497                  start(xq);
498                  startlit();
499                  addlit(yylvalstr, yytextlen);
500                }
501
502              break;
503          }
504
505  case 9:
506                
507          {
508              addlit(yylvalstr, yytextlen);
509              break;
510          }
511
512  case 10:
513                
514          {
515            addlit(yylvalstr, yytextlen);
516
517          break;
518         }
519        
520
521  case 11:
522                 
523          {
524            dummych1 = get_char();
525            addlit(yylvalstr+dummych1, yytextlen+1);
526            break;
527         }
528            
529  case 12:
530                 
531          {
532            dummych1 = get_char();
533            addlit(yylvalstr+dummych1, yytextlen+1);
534            break;
535         }
536                           
537  case 13:
538                
539          {
540              start(init);
541              addlit(yylvalstr, yytextlen);
542              if (literallen == 0)   returni (error);
543              if (literallen >= namedatalen)
544              {
545                 setlengthofliteralbuf(namedatalen);
546                 literallen = namedatalen;
547              }
548              yylvalstr = litbufdup();
549              returni (TBaseType.hive_StringLiteral);
550
551              break;
552          }
553
554  case 14:
555               
556          {
557          addlit(yylvalstr, yytextlen);
558          break;
559          }
560
561  case 15:
562                        
563          {
564              start(xd);
565              startlit();
566              addlit(yylvalstr, yytextlen);
567              break;
568          }
569        
570  case 16:
571                
572          {
573            dummych1 = get_char();
574            unget_char(dummych1);
575            if (dummych1 == (char)10)
576              {
577                    dummych1 = get_char();
578                    addlit(yylvalstr+dummych1, yytextlen+1);
579              } else
580                addlit(yylvalstr, yytextlen);
581
582          break;
583          }
584
585
586
587  case 17:
588                
589          {
590            returni(lexnewline);
591            break;
592          }
593
594  case 18:
595        
596          {
597              returni(lexspace);
598              break;
599          }
600
601  case 19:
602                
603          {
604          if ((getyysstate() == xq)
605              || (getyysstate() == xd)
606                          || (getyysstate() == xc)
607              )
608          {
609              addlit(yylvalstr, 1);
610              yyless(1);
611              return;//exit;
612          }
613
614          returni(cmtdoublehyphen);
615          break;
616          }
617
618  case 20:
619      
620          {
621            returni('.');
622            break;
623          }
624          
625  case 21:
626        
627          {
628            returni(':');
629            break;
630          }
631          
632  case 22:
633        
634          {
635            returni(',');
636            break;
637          }
638          
639  case 23:
640            
641          {
642            returni(';');
643            break;
644          }
645          
646
647  case 24:
648         
649          {
650            returni('(');
651            break;
652          }
653          
654  case 25:
655         
656          {
657            returni(')');
658            break;
659          }
660          
661  case 26:
662          
663          {
664            returni('[');
665            break;
666          }
667          
668  case 27:
669          
670          {
671            returni(']');
672            break;
673          }
674          
675  case 28:
676         
677          {
678            returni('{');
679            break;
680          }
681          
682  case 29:
683         
684          {
685            returni('}');
686            break;
687          }
688          
689
690  case 30:
691            
692          {
693            returni('<');
694            break;
695          }
696          
697  case 31:
698              
699          {
700            returni('>');
701            break;
702          }
703          
704
705  case 32:
706         
707          {
708            returni('/');
709            break;
710          }
711          
712  case 33:
713       
714          {
715            returni('+');
716            break;
717          }
718          
719  case 34:
720         
721          {
722            returni('-');
723            break;
724          }
725          
726  case 35:
727       
728          {
729            returni('*');
730            break;
731          }
732          
733  case 36:
734      
735          {
736            returni('%');
737            break;
738          }
739          
740
741  case 37:
742            
743          {
744            returni('&');
745            break;
746          }
747          
748  case 38:
749        
750          {
751            returni('~');
752            break;
753          }
754          
755  case 39:
756            
757          {
758            returni('|');
759            break;
760          }
761          
762  case 40:
763             
764          {
765            returni('^');
766            break;
767          }
768          
769  case 41:
770           
771          {
772            returni('?');
773            break;
774          }
775          
776  case 42:
777         
778          {
779            returni('$');
780            break;
781          }
782
783
784          
785  case 43:
786           
787        {
788                returni(TBaseType.hive_equal);
789                break;
790        }    
791        
792  case 44:
793                
794        {
795                returni(TBaseType.safe_equal);
796                break;
797        }
798        
799                
800  case 45:
801            
802        {
803                returni(TBaseType.not_equal);
804                break;
805        }
806        
807  case 46:
808                        
809        {
810                returni(TBaseType.less_equal);
811                break;
812        }
813
814  case 47:
815              
816        {
817                returni('!');
818                break;
819        }
820        
821  case 48:
822              
823        {
824                returni(TBaseType.boolean_and);
825                break;
826        }
827
828  case 49:
829             
830        {
831                returni(TBaseType.boolean_or);
832                break;
833        }
834
835  case 50:
836                         
837        {
838                returni(TBaseType.great_equal);
839                break;
840        }     
841
842
843  case 51:
844                    
845          {
846               returni(TBaseType.hive_ByteLengthLiteral);
847               break;
848          }
849        
850  case 52:
851                 
852          {
853           returni (TBaseType.hive_BigintLiteral);
854           break;
855          }
856
857  case 53:
858                  
859          {
860           returni (TBaseType.hive_SmallintLiteral);
861           break;
862          }
863
864  case 54:
865                 
866          {
867           returni (TBaseType.hive_TinyintLiteral);
868           break;
869          }
870
871
872  case 55:
873                           
874          {
875           returni (TBaseType.hive_DecimalLiteral);
876           break;
877          }
878          
879  case 56:
880                          
881          {
882           returni (TBaseType.hive_number);
883           break;
884          }
885          
886  case 57:
887             
888          {
889           returni (TBaseType.hive_CharSetLiteral);
890           break;
891          }
892          
893                    
894  case 58:
895              
896          {
897            int rw;
898            if ( (rw = iskeyword(yylvalstr)) != -1)   { returni(rw);}
899            else
900               returni(ident);
901            break;
902          }
903
904  case 59:
905              
906          {
907            returni(TBaseType.hive_CharSetName);
908            break;
909          }
910
911  case 60:
912      
913          {
914            returni(TBaseType.hive_div);
915            break;
916          }
917          
918          
919
920  case 61:
921                        
922          {
923            returni( error);
924            break;
925          }
926    default:{
927     System.out.println("fatal error in yyaction");
928    }
929   }//switch
930}/*yyaction*/;
931
932
933
934        }