001// lexical analyzer  for GSQLParser component java version
002
003/****************************************************}
004{   Lexical analizer for GSQLParser component java version    }
005{   Copyright (c) 2004-2023 by Gudu software              }
006{****************************************************/
007
008package gudusoft.gsqlparser;
009
010import java.util.HashMap;
011import java.io.InputStreamReader;
012
013import java.util.Locale;
014import java.io.BufferedReader;
015import java.io.IOException;
016
017
018
019public class TLexerHive extends TCustomLexer{
020
021    static int yynmarks = 0  ;
022    static int yynmatches ;
023    static int yyntrans   ;
024    static int yynstates  ;
025    static int[]  yyk,yym ; // 1 based
026    static int[]  yytint;  // 1 based
027    static TYytRec[] yyt ;  // 1 based
028    static int[]  yykl,yykh,yyml,yymh,yytl,yyth ; // 0 based
029    private  static  String[] keywordlist;
030    static String   table_file;
031    static HashMap<String, Integer> keywordValueList;
032    static HashMap<Integer, Integer> keywordTypeList;
033        static int[][] yystateTable;
034
035    static {
036                keywordValueList = new HashMap<String, Integer>();
037                keywordTypeList = new HashMap<Integer, Integer>();
038        table_file = "/gudusoft/gsqlparser/parser/hive/hive_lex_table.txt";
039                if (TBaseType.enterprise_edition||TBaseType.hive_edition){
040                inittable();
041            }
042    }
043
044    public TLexerHive(){
045          super();
046          dbvendor = EDbVendor.dbvhive;
047          sourcetokens = new TSourceTokenList();
048    }
049
050
051    public TSourceTokenList sourcetokens;
052
053
054public boolean canBeColumnName(int tokencode){
055    //http://blog.csdn.net/superbeck/article/details/5387476
056    boolean ret = false;
057    int modifiers = keyword_type_identifier | keyword_type_column ;
058    Integer s = keywordTypeList.get(tokencode);
059     if (s != null){
060        int modifier = s;
061        ret = (modifiers & modifier) == modifier;
062    }
063
064    return ret;
065}
066
067 public  int iskeyword(String str){
068        int ret = -1;
069        Integer s = keywordValueList.get(str.toUpperCase(Locale.ENGLISH));
070        if( s != null){
071            ret = s;
072        }
073        return ret;// -1 means not a keyword
074     }
075
076     public int getkeywordvalue(String keyword){
077        int ret = 0;
078        Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH));
079        if( s != null){
080            ret = s;
081         }
082        return ret;// 0 means not a keyword
083     }
084         
085    public static EKeywordType getKeywordType(String keyword){
086        return TCustomLexer.getKeywordType(keyword,keywordValueList,keywordTypeList);
087    }            
088
089    static void yystateLookupConfigure() {
090        int yystates = yytl.length;
091        yystateTable = new int[257][yystates];
092
093        // initialize to empty
094        for(int i = 0; i < yystates; i++) {
095            for (int j = 0; j < 257; j++)
096                yystateTable[j][i] = -1;
097        }
098
099        for(int i = 0; i < yystates; i++) {
100            int low = yytl[i];
101            int high = yyth[i];
102            for (int j = low; j <= high; j++) {
103                for (char c: yyt[j].cc) {
104                    yystateTable[c][i] = j;
105                }
106            }
107        }
108    }
109        
110    int yylex(){
111          int yyn;
112           while (true) { // top level while
113              yynew();
114              while (true){  //scan
115                 for(yyn = yykl[yystate]; yyn <= yykh[yystate]; yyn++){
116                     yymark(yyk[yyn]);
117                 }
118
119                 for(yyn=yymh[yystate]; yyn>= yyml[yystate]; yyn--){
120                    yymatch(yym[yyn]);
121                 }
122
123                 if(yytl[yystate] > yyth[yystate]){
124                     break;
125                 }
126
127                 yyscan();
128//                 yyn = yytl[yystate];
129                 totablechar();
130//                 while( (yyn <= yyth[yystate]) && (!(charinarray(yytablechar,yyt[yyn].cc))) ){
131//                   yyn++;
132//                 }
133//                 if (yyn > yyth[yystate]){
134//                     break;
135//                 }
136
137                 yyn = yystateTable[yytablechar][yystate];
138                 if (yyn == -1)
139                     break;
140                                         
141                 yystate = yyt[yyn].s;
142              } //scan
143
144              while (true){ //action
145                int yyrule;
146                if ( (yyrule = yyfind()) != -1 ){
147                   yyaction(yyrule);
148                   if (yyreject){
149                       continue;
150                   }
151                }else if( (!yydefault() ) && (yywrap()) ){
152                   yyclear();
153                   returni(0);
154                }
155                break;
156              }
157
158              if (!yydone) {
159                  continue;
160              }
161              break;
162            } // top level while
163
164           return yyretval;
165        }
166
167    static void inittable(){
168                
169                //if (yynmarks > 0) return; //init table already
170
171        String line;
172        boolean inyyk=false,inyym=false,inyykl=false,inyykh=false,inyyml=false,inyymh=false,inyytl=false,inyyth=false,inyytint=false,inyyt=false,inkeyword=false;
173        int yyk_count=0,yym_count=0,yykl_count=0,yykh_count=0,yyml_count=0,yymh_count=0,yytl_count=0,yyth_count=0,yytint_count=0,yyt_count=0;
174        int c=0;
175        keywordValueList.clear();
176        keywordTypeList.clear();
177        
178        BufferedReader br = new BufferedReader(new InputStreamReader(TLexerHive.class.getResourceAsStream(table_file)));
179
180            try{
181                while( (line = br.readLine()) != null){
182                          if (line.trim().startsWith("yynmarks=")){
183                             String[] ss = line.split("[=;]");
184                              yynmarks=Integer.parseInt(ss[1].trim());
185                              yyk = new int[yynmarks+1];
186                          }else if (line.trim().startsWith("yynmatches=")){
187                              String[] ss = line.split("[=;]");
188                               yynmatches=Integer.parseInt(ss[1].trim());
189                               yym = new int[yynmatches+1];
190                          }else if (line.trim().startsWith("yyntrans=")){
191                              String[] ss = line.split("[=;]");
192                               yyntrans=Integer.parseInt(ss[1].trim());
193                               yytint = new int[yyntrans+1];
194                               yyt = new TYytRec[yyntrans+1];
195                          }else if (line.trim().startsWith("yynstates=")){
196                              String[] ss = line.split("[=;]");
197                               yynstates=Integer.parseInt(ss[1].trim());
198                               yykl = new int[yynstates];
199                               yykh = new int[yynstates];
200                              yyml = new int[yynstates];
201                              yymh = new int[yynstates];
202                              yytl = new int[yynstates];
203                              yyth = new int[yynstates];
204                          }else if (line.trim().startsWith("<end>")){
205                              if (inyyk){
206                                  inyyk = false;
207                                 if (yynmarks+1 != yyk_count ){
208                                    System.out.println("required1:"+(yynmarks)+" actually:"+(yyk_count-1));
209                                 }
210                              }
211                              else if(inyym){
212                                     inyym = false;
213                                    if (yynmatches+1 != yym_count ){
214                                       System.out.println("required2:"+(yynmatches)+" actually:"+(yym_count-1));
215                                    }
216                              }
217                              else if(inyykl){
218                                     inyykl = false;
219                                    if (yynstates != yykl_count ){
220                                       System.out.println("required3:"+(yynstates)+" actually:"+(yykl_count));
221                                    }
222                              }
223                              else if(inyykh){
224                                     inyykh = false;
225                                    if (yynstates != yykh_count ){
226                                       System.out.println("required4:"+(yynstates)+" actually:"+(yykh_count));
227                                    }
228                              }
229                              else if(inyyml){
230                                     inyyml = false;
231                                    if (yynstates != yyml_count ){
232                                       System.out.println("required5:"+(yynstates)+" actually:"+(yyml_count));
233                                    }
234                              }
235                              else if(inyymh){
236                                     inyymh = false;
237                                    if (yynstates != yymh_count ){
238                                       System.out.println("required:"+(yynstates)+" actually:"+(yymh_count));
239                                    }
240                              }
241                              else if(inyytl){
242                                     inyytl = false;
243                                    if (yynstates != yytl_count ){
244                                       System.out.println("required6:"+(yynstates)+" actually:"+(yytl_count));
245                                    }
246                              }
247                              else if(inyyth){
248                                     inyyth = false;
249                                    if (yynstates != yyth_count ){
250                                       System.out.println("required7:"+(yynstates)+" actually:"+(yyth_count));
251                                    }
252                              }
253                              else if(inyytint){
254                                     inyytint = false;
255                                    if (yyntrans + 1 != yytint_count ){
256                                       System.out.println("required8:"+(yyntrans)+" actually:"+(yytint_count-1));
257                                    }
258                              }
259                              else if(inyyt){
260                                     inyyt = false;
261                                    if (yyntrans+1 != yyt_count ){
262                                       System.out.println("required9:"+(yyntrans)+" actually:"+(yyt_count-1));
263                                    }
264                              }
265                              else if(inkeyword){
266                                     inkeyword = false;
267                              }
268                          }else if(line.trim().startsWith("yyk =")){
269                             inyyk = true; 
270                          }else if(line.trim().startsWith("yym =")){
271                             inyym = true;
272                          }else if(line.trim().startsWith("yykl =")){
273                             inyykl = true;
274                          }else if(line.trim().startsWith("yykh =")){
275                             inyykh = true;
276                          }else if(line.trim().startsWith("yyml =")){
277                             inyyml = true;
278                          }else if(line.trim().startsWith("yymh =")){
279                             inyymh = true;
280                          }else if(line.trim().startsWith("yytl =")){
281                             inyytl = true;
282                          }else if(line.trim().startsWith("yyth =")){
283                             inyyth = true;
284                          }else if(line.trim().startsWith("yytint =")){
285                             inyytint = true;
286                          }else if(line.trim().startsWith("yyt =")){
287                             inyyt = true;
288                        }else if(line.trim().startsWith("keywordsvalue =")){
289                           inkeyword = true;
290                        }else if(inyyk){
291                             String[] ss = line.split("[,]");
292                               for(int j=0;j<ss.length;j++){
293                                   // System.out.println(ss[j].trim());
294                                 yyk[yyk_count++] = Integer.parseInt(ss[j].trim());
295                               }
296                          }else if(inyym){
297                               String[] ss = line.split("[,]");
298                                 for(int j=0;j<ss.length;j++){
299                                     // System.out.println(ss[j].trim());
300                                   yym[yym_count++] = Integer.parseInt(ss[j].trim());
301                                 }
302                          }else if(inyykl){
303                               String[] ss = line.split("[,]");
304                                 for(int j=0;j<ss.length;j++){
305                                    //  System.out.println(ss[j].trim());
306                                   yykl[yykl_count++] = Integer.parseInt(ss[j].trim());
307                                 }
308                          }else if(inyykh){
309                               String[] ss = line.split("[,]");
310                                 for(int j=0;j<ss.length;j++){
311                                     // System.out.println(ss[j].trim());
312                                   yykh[yykh_count++] = Integer.parseInt(ss[j].trim());
313                                 }
314                          }else if(inyyml){
315                               String[] ss = line.split("[,]");
316                                 for(int j=0;j<ss.length;j++){
317                                     // System.out.println(ss[j].trim());
318                                   yyml[yyml_count++] = Integer.parseInt(ss[j].trim());
319                                 }
320                          }else if(inyymh){
321                               String[] ss = line.split("[,]");
322                                 for(int j=0;j<ss.length;j++){
323                                     // System.out.println(ss[j].trim());
324                                   yymh[yymh_count++] = Integer.parseInt(ss[j].trim());
325                                 }
326                          }else if(inyytl){
327                               String[] ss = line.split("[,]");
328                                 for(int j=0;j<ss.length;j++){
329                                     // System.out.println(ss[j].trim());
330                                   yytl[yytl_count++] = Integer.parseInt(ss[j].trim());
331                                 }
332                          }else if(inyyth){
333                               String[] ss = line.split("[,]");
334                                 for(int j=0;j<ss.length;j++){
335                                     // System.out.println(ss[j].trim());
336                                   yyth[yyth_count++] = Integer.parseInt(ss[j].trim());
337                                 }
338                          }else if(inyytint){
339                               String[] ss = line.split("[,]");
340                                 for(int j=0;j<ss.length;j++){
341                                     // System.out.println(ss[j].trim());
342                                   yytint[yytint_count++] = Integer.parseInt(ss[j].trim());
343                                 }
344                          }else if(inyyt){
345                                //System.out.println(line.trim());
346
347                              c = 0;
348                              String[] st = line.trim().split(",,");
349                              char[] tmp = new char[st.length];
350                              for(int i=0;i<st.length;i++){
351
352                                  if(st[i].startsWith("\'")) {
353                                      if(st[i].length() == 3){  // 'a'
354                                          tmp[c++] = st[i].charAt(1);
355                                      }else if(st[i].length() == 4) { // '\\'
356                                          tmp[c++] = st[i].charAt(2);
357                                      }else{
358                                         System.out.println(" read yytstr error, error string is "+st[i]+ "line: "+ yyt_count);
359                                      }
360                                  }else{
361                                      try{
362                                           tmp[c++] = (char)Integer.parseInt(st[i]);   // char in number like 32 that represent space
363                                          } catch (NumberFormatException nfe) {
364                                             System.out.println("NumberFormatException: " + nfe.getMessage());
365                                          }
366                                  }
367                              } //while hasmoreTokens
368
369                          //yyt[lineno] = new YYTrec(tmp,yytint[lineno]);
370                              yyt[yyt_count] = new TYytRec(tmp,yytint[yyt_count]);
371                              yyt_count++;
372
373                          }else if(inkeyword){
374                              String[] ss =line.split("[=]");
375
376                              int val1 = -1;
377                              int val2 = -1;
378                              try {
379                                  val1 = Integer.parseInt(ss[1]);
380                                  val2 = Integer.parseInt(ss[2]);
381                              }
382                              catch (NumberFormatException nfe) {
383                                  System.out.println("NumberFormatException: " + nfe.getMessage());
384                              }
385                              keywordValueList.put(ss[0].toUpperCase(),val1);
386                              keywordTypeList.put(val1,val2);
387                             }
388                }
389                }catch(IOException e){
390                  System.out.println(e.toString());
391                }
392                                
393                                yystateLookupConfigure();
394
395    }
396
397
398
399    void yyaction(int yyruleno){
400
401
402      int ic;
403      char[] tmparray = {'=','+','-','*','/','>','<'};
404
405      yylvalstr = getyytext();
406  /* actions: */
407  switch(yyruleno){
408  case 1:
409                
410        {
411           addlit(yylvalstr,yytextlen);
412           if (xcdepth <= 0)
413              {
414               start(init);
415               yylvalstr = litbufdup();
416               returni(cmtslashstar);
417              }
418           else
419              xcdepth--;
420
421         break;
422        }
423
424
425  case 2:
426                
427          {
428              xcdepth++;
429              yyless(2);
430              addlit(yylvalstr,yytextlen);
431              break;
432           }
433
434  case 3:
435                        
436          {
437
438              if (yylvalstr.equalsIgnoreCase("/*+"))
439              {
440                  // hive hint
441                  yyless(1);
442                  returnc(yylvalstr.charAt(0));
443              }else {
444                          
445                                  if (getyysstate() == xq)
446                                  {
447                                          nchars = yytextlen;
448                                          addlit(yylvalstr, yytextlen-1);
449                                          yyless(nchars-1);
450                                          return;//exit;
451                                  }
452
453                                  xcdepth = 0;
454                                  start(xc);
455                                  startlit();
456                                  yyless(2);
457                                  addlit(yylvalstr,yytextlen);
458                         }
459
460                        break;
461          }
462
463  case 4:
464                
465          {
466            addlit(yylvalstr,yytextlen);
467            break;
468          }
469
470  case 5:
471                
472          {
473              addlitchar(yylvalstr.charAt(0));
474              break;
475          }
476                  
477                  
478  case 6:
479                       
480        {
481                addlit(yylvalstr, yytextlen);
482                break;
483  }
484
485  case 7:
486                       
487        {
488                addlit(yylvalstr, yytextlen);
489                break;
490   }
491   
492  case 8:
493                       
494        {
495                start(init);
496                addlit(yylvalstr, yytextlen);
497                yylvalstr = litbufdup();
498                returni(ident);         
499                break;
500        }
501
502  case 9:
503                        
504        {
505          if ((getyysstate() == xd)||(getyysstate() == xq))
506          {
507              addlit(yylvalstr, 1);
508              yyless(1);
509              return;//exit;
510          }     
511                  
512
513                start(xbacktick);
514                startlit();
515                addlit(yylvalstr, yytextlen);
516                  
517                break;
518        }
519
520
521                  
522
523  case 10:
524                
525          {
526              start(init);
527              addlit(yylvalstr, yytextlen);
528              yylvalstr = litbufdup();
529              returni(TBaseType.hive_StringLiteral);
530              break;
531          }
532
533  case 11:
534                
535          {
536            addlit(yylvalstr, yytextlen);
537            break;
538          }
539
540
541
542  case 12:
543                        
544          {
545              if (yysstate == xd)
546                {
547                  addlit(yylvalstr, yytextlen);
548                }
549              else
550                {
551                  start(xq);
552                  startlit();
553                  addlit(yylvalstr, yytextlen);
554                }
555
556              break;
557          }
558
559  case 13:
560                
561          {
562              addlit(yylvalstr, yytextlen);
563              break;
564          }
565
566  case 14:
567                
568          {
569            addlit(yylvalstr, yytextlen);
570
571          break;
572         }
573        
574
575  case 15:
576                 
577          {
578            dummych1 = get_char();
579            addlit(yylvalstr+dummych1, yytextlen+1);
580            break;
581         }
582            
583  case 16:
584                 
585          {
586            dummych1 = get_char();
587            addlit(yylvalstr+dummych1, yytextlen+1);
588            break;
589         }
590                           
591  case 17:
592                
593          {
594              start(init);
595              addlit(yylvalstr, yytextlen);
596              if (literallen == 0)   returni (error);
597              if (literallen >= namedatalen)
598              {
599                 setlengthofliteralbuf(namedatalen);
600                 literallen = namedatalen;
601              }
602              yylvalstr = litbufdup();
603              returni (TBaseType.hive_StringLiteral);
604
605              break;
606          }
607
608  case 18:
609               
610          {
611          addlit(yylvalstr, yytextlen);
612          break;
613          }
614
615  case 19:
616                        
617          {
618              start(xd);
619              startlit();
620              addlit(yylvalstr, yytextlen);
621              break;
622          }
623        
624  case 20:
625                
626          {
627            dummych1 = get_char();
628            unget_char(dummych1);
629            if (dummych1 == (char)10)
630              {
631                    dummych1 = get_char();
632                    addlit(yylvalstr+dummych1, yytextlen+1);
633              } else
634                addlit(yylvalstr, yytextlen);
635
636          break;
637          }
638
639
640        
641
642
643  case 21:
644                
645          {
646            returni(lexnewline);
647            break;
648          }
649
650  case 22:
651        
652          {
653              returni(lexspace);
654              break;
655          }
656
657  case 23:
658                
659          {
660          if ((getyysstate() == xq)
661              || (getyysstate() == xd)
662              )
663          {
664              addlit(yylvalstr, 1);
665              yyless(1);
666              return;//exit;
667          }
668
669          returni(cmtdoublehyphen);
670          break;
671          }
672
673  case 24:
674      
675          {
676            returni('.');
677            break;
678          }
679          
680  case 25:
681        
682          {
683            returni(':');
684            break;
685          }
686          
687  case 26:
688        
689          {
690            returni(',');
691            break;
692          }
693          
694  case 27:
695            
696          {
697            returni(';');
698            break;
699          }
700          
701
702  case 28:
703         
704          {
705            returni('(');
706            break;
707          }
708          
709  case 29:
710         
711          {
712            returni(')');
713            break;
714          }
715          
716  case 30:
717          
718          {
719            returni('[');
720            break;
721          }
722          
723  case 31:
724          
725          {
726            returni(']');
727            break;
728          }
729          
730  case 32:
731         
732          {
733            returni('{');
734            break;
735          }
736          
737  case 33:
738         
739          {
740            returni('}');
741            break;
742          }
743          
744
745  case 34:
746            
747          {
748            returni('<');
749            break;
750          }
751          
752  case 35:
753              
754          {
755            returni('>');
756            break;
757          }
758          
759
760  case 36:
761         
762          {
763            returni('/');
764            break;
765          }
766          
767  case 37:
768       
769          {
770            returni('+');
771            break;
772          }
773          
774  case 38:
775         
776          {
777            returni('-');
778            break;
779          }
780          
781  case 39:
782       
783          {
784            returni('*');
785            break;
786          }
787          
788  case 40:
789      
790          {
791            returni('%');
792            break;
793          }
794          
795
796  case 41:
797            
798          {
799            returni('&');
800            break;
801          }
802          
803  case 42:
804        
805          {
806            returni('~');
807            break;
808          }
809          
810  case 43:
811            
812          {
813            returni('|');
814            break;
815          }
816          
817  case 44:
818             
819          {
820            returni('^');
821            break;
822          }
823          
824  case 45:
825           
826          {
827            returni('?');
828            break;
829          }
830          
831  case 46:
832         
833          {
834            returni('$');
835            break;
836          }
837
838  case 47:
839                          
840          {
841            returni(concatenationop);
842            break;
843          }
844
845          
846  case 48:
847           
848        {
849                returni(TBaseType.hive_equal);
850                break;
851        }    
852        
853  case 49:
854                
855        {
856                returni(TBaseType.safe_equal);
857                break;
858        }
859        
860                
861  case 50:
862            
863        {
864                returni(TBaseType.not_equal);
865                break;
866        }
867        
868  case 51:
869                        
870        {
871                returni(TBaseType.less_equal);
872                break;
873        }
874
875  case 52:
876              
877        {
878                returni('!');
879                break;
880        }
881        
882  case 53:
883              
884        {
885                returni(TBaseType.boolean_and);
886                break;
887        }
888
889  case 54:
890             
891        {
892                returni(TBaseType.boolean_or);
893                break;
894        }
895
896  case 55:
897                         
898        {
899                returni(TBaseType.great_equal);
900                break;
901        }     
902
903
904  case 56:
905                    
906          {
907               returni(TBaseType.hive_ByteLengthLiteral);
908               break;
909          }
910        
911  case 57:
912                 
913          {
914           returni (TBaseType.hive_BigintLiteral);
915           break;
916          }
917
918  case 58:
919                  
920          {
921           returni (TBaseType.hive_SmallintLiteral);
922           break;
923          }
924
925  case 59:
926                 
927          {
928           returni (TBaseType.hive_TinyintLiteral);
929           break;
930          }
931
932
933  case 60:
934                           
935          {
936           returni (TBaseType.hive_DecimalLiteral);
937           break;
938          }
939          
940  case 61:
941                          
942          {
943           returni (TBaseType.hive_number);
944           break;
945          }
946          
947  case 62:
948             
949          {
950           returni (TBaseType.hive_CharSetLiteral);
951           break;
952          }
953          
954              
955  case 63:
956      
957          {
958            returni(TBaseType.hive_div);
959            break;
960          }
961                  
962  case 64:
963              
964          {
965            int rw;
966            if ( (rw = iskeyword(yylvalstr)) != -1)   { returni(rw);}
967            else
968               returni(ident);
969            break;
970          }
971
972  case 65:
973              
974          {
975            if (yylvalstr.endsWith(":")){
976                yylvalstr =  yylvalstr.substring(0,yytextlen-1);
977                yyless(yytextlen-1);
978            }             
979            returni(TBaseType.hive_CharSetName);
980            break;
981          }
982
983  case 66:
984                        
985          {
986            returni( error);
987            break;
988          }
989    default:{
990     System.out.println("fatal error in yyaction");
991    }
992   }//switch
993}/*yyaction*/;
994
995
996
997        }