001package gudusoft.gsqlparser;
002
003
004import java.io.BufferedReader;
005import java.io.IOException;
006import java.util.ArrayList;
007import java.util.Arrays;
008import java.util.HashMap;
009import java.util.Locale;
010
011/**
012 * Base lexer of all databases - Core tokenization engine for SQL parsing.
013 * 
014 * The lexer reads SQL text character by character and produces tokens that represent
015 * the syntactic units of SQL. This process involves several key components and stages:
016 * 
017 * <h3>1. Input Management and Buffering</h3>
018 * <ul>
019 *   <li><b>yyinput (BufferedReader)</b>: Primary input source for SQL text</li>
020 *   <li><b>yyline (char[])</b>: Current line buffer read from input via readln()</li>
021 *   <li><b>buf (char[])</b>: Reversed line buffer for character-by-character processing</li>
022 *   <li><b>bufptr</b>: Current position in buf, decrements as characters are consumed</li>
023 * </ul>
024 * 
025 * <h3>2. Token Text Formation Process</h3>
026 * <pre>
027 * SQL Input → readln() → yyline[] → reversed into buf[] → get_char() → yytextbuf[]
028 *                                                                        ↓
029 *                                                                yylex() processing
030 *                                                                        ↓
031 *                                                                 yylvalstr (String)
032 *                                                                        ↓
033 *                                                            TSourceToken.astext
034 * </pre>
035 * 
036 * <h4>Key Variables in Token Text Storage:</h4>
037 * <ul>
038 *   <li><b>yytextbuf (char[])</b>: Accumulator buffer for current token being formed</li>
039 *   <li><b>yytextlen</b>: Current length of text in yytextbuf</li>
040 *   <li><b>yytextbufsize</b>: Allocated size of yytextbuf (dynamically grows)</li>
041 *   <li><b>yylvalstr (String)</b>: Final token text string created from yytextbuf</li>
042 *   <li><b>literalbuf (StringBuilder)</b>: Special buffer for string literals and complex tokens</li>
043 * </ul>
044 * 
045 * <h3>3. Position Tracking System</h3>
046 * 
047 * The lexer maintains precise position information for every token:
048 * <ul>
049 *   <li><b>yylineno</b>: Current line number (1-based)</li>
050 *   <li><b>yycolno</b>: Current column number (0-based)</li>
051 *   <li><b>offset</b>: Absolute character offset from start of input</li>
052 *   <li><b>yylineno_p, yycolno_p, offset_p</b>: Previous position values for token start</li>
053 * </ul>
054 * 
055 * <h3>4. Token Creation Workflow</h3>
056 * <ol>
057 *   <li>Characters are read via get_char() from buf[] into yytextbuf[]</li>
058 *   <li>yylex() identifies token boundaries and type</li>
059 *   <li>Token text is extracted: yylvalstr = new String(yytextbuf, 0, yytextlen)</li>
060 *   <li>yylexwrap() creates TSourceToken with:
061 *       <ul>
062 *         <li>astext = yylvalstr (full token text copy)</li>
063 *         <li>lineNo = yylineno_p (start line)</li>
064 *         <li>columnNo = yycolno_p (start column)</li>
065 *         <li>offset = offset_p (absolute position)</li>
066 *       </ul>
067 *   </li>
068 * </ol>
069 * 
070 * <h3>5. Memory Management and Text Copying</h3>
071 * 
072 * <b>Current Implementation (Eager Loading):</b>
073 * <ul>
074 *   <li>Every token immediately copies its text from yytextbuf to TSourceToken.astext</li>
075 *   <li>Original SQL text in yyline is discarded after processing each line</li>
076 *   <li>No direct link maintained between token and original input position</li>
077 * </ul>
078 * 
079 * <h3>6. Tracing Back to Original Position</h3>
080 * 
081 * <b>Currently Possible:</b>
082 * <ul>
083 *   <li>Token stores lineNo, columnNo, and offset</li>
084 *   <li>These can theoretically locate position in original input</li>
085 * </ul>
086 * 
087 * <b>Current Limitations:</b>
088 * <ul>
089 *   <li>Original input text is not retained after line processing</li>
090 *   <li>yyline buffer is overwritten for each new line</li>
091 *   <li>No mechanism to retrieve original text from position alone</li>
092 * </ul>
093 * 
094 * @author Gudu Software
095 */
096public class TCustomLexer {
097
098    // 在 lexer level 创建 token table, 按照 token code存储所有 token 的一些关键信息,主要用于处理一个关键字token被用作column,table name的情况
099    public static int MAX_TOKEN_SIZE = 2048; // 所有可能的token的数量
100    public static int MAX_TOKEN_COLUMN_SIZE = 10;
101
102    // 定义一个具有 MAX_TOKEN_SIZE 个元素的常量数组,每个元素有 MAX_TOKEN_COLUMN_SIZE 列, 列的类型为整数
103    // column 0: 代表该token出现的次数
104    // column 1: 代表该token第一次出现的 x position
105    // column 2: 代表该token第一次出现的 y position
106    // column 3: 代表该token最后一次出现的 x position
107    // column 4: 代表该token最后一次出现的 y position
108    // column 5: 代表该token第一次出现的 position in the token list
109    // column 6: 代表该token最后一次出现的 position in the token list
110
111    public static int COLUMN0_COUNT = 0;
112    public static int COLUMN1_FIRST_X = 1;
113    public static int COLUMN2_FIRST_Y = 2;
114    public static int COLUMN3_LAST_X = 3;
115    public static int COLUMN4_LAST_Y = 4;
116    public static int COLUMN5_FIRST_POS = 5;
117    public static int COLUMN6_LAST_POS = 6;
118
119    /**
120     * Pre-allocated strings for single ASCII characters (0-127).
121     * Used to avoid creating new String objects for common single-char tokens
122     * like '(', ')', ',', ';', '+', '-', '*', '/', etc.
123     * This significantly reduces GC pressure in the lexer hot path.
124     */
125    private static final String[] SINGLE_CHAR_STRINGS = new String[128];
126    static {
127        for (int i = 0; i < 128; i++) {
128            SINGLE_CHAR_STRINGS[i] = String.valueOf((char) i);
129        }
130    }
131
132    public long[][] TOKEN_TABLE = new long[MAX_TOKEN_SIZE][MAX_TOKEN_COLUMN_SIZE];
133
134    /**
135     * Tracks which tokenIds have been written to TOKEN_TABLE during current parse.
136     * Used for incremental reset - only clear entries that were actually used.
137     */
138    private int[] usedTokenIds = new int[512];  // Typical SQL uses <200 distinct token types
139    private int usedTokenCount = 0;
140
141    /**
142     * Reset TOKEN_TABLE by only clearing entries that were used (incremental clear).
143     * This is O(usedTokenCount) instead of O(MAX_TOKEN_SIZE * MAX_TOKEN_COLUMN_SIZE).
144     * For typical SQL with ~100 distinct token types, this saves clearing ~20,000 entries.
145     */
146    public void resetTokenTable() {
147        for (int i = 0; i < usedTokenCount; i++) {
148            int tokenId = usedTokenIds[i];
149            for (int j = 0; j < MAX_TOKEN_COLUMN_SIZE; j++) {
150                TOKEN_TABLE[tokenId][j] = 0L;
151            }
152        }
153        usedTokenCount = 0;
154    }
155
156    // define a function to set value when token is found, input is token id, a token with TSourceToken type
157    public void setTokenTableValue( TSourceToken token) {
158        if (token == null) return;
159        int tokenId = token.tokencode;
160
161        if (tokenId < 0 || tokenId >= MAX_TOKEN_SIZE) {
162            return;
163        }
164        if (TOKEN_TABLE[tokenId][COLUMN0_COUNT] == 0) {
165            // Track this tokenId for incremental reset
166            if (usedTokenCount < usedTokenIds.length) {
167                usedTokenIds[usedTokenCount++] = tokenId;
168            }
169            TOKEN_TABLE[tokenId][COLUMN0_COUNT] = 1;
170            TOKEN_TABLE[tokenId][COLUMN1_FIRST_X] = token.lineNo;
171            TOKEN_TABLE[tokenId][COLUMN2_FIRST_Y] = token.columnNo;
172            TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo;
173            TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo;
174            TOKEN_TABLE[tokenId][COLUMN5_FIRST_POS] = token.posinlist;
175            TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist;
176        } else {
177            TOKEN_TABLE[tokenId][COLUMN0_COUNT] += 1;
178            TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo;
179            TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo;
180            TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist;
181        }
182    }
183
184    public BufferedReader yyinput;
185    long yylineno,yycolno,offset,yylineno_p,yycolno_p,offset_p;
186    int bufptr,yystate,yysstate,yylstate,yytextlen,yyretval, yytextbufsize,
187            yymatches,yysleng;
188    char[] yyline;
189    /**
190     * Reusable buffer for readln() to reduce per-line allocations.
191     * Expands as needed for long lines and stays expanded for reuse.
192     */
193    private char[] lineReadBuffer = new char[4096];
194    /**
195     * Actual content length in lineReadBuffer/yyline.
196     * Used instead of yyline.length since lineReadBuffer is reused without copying.
197     */
198    private int yylineLen;
199    String yylvalstr;
200    public String  dolqstart = "";//postgresql, start part of Dollar-quoted String Constants
201    char yylastchar,yyactchar,yytablechar;
202    boolean yydone,yyreject;
203    char[] yytextbuf;
204    char[] buf;
205    int bufsize;
206    boolean endOfInput;
207
208    //StringBuffer literalbuf;
209    StringBuilder literalbuf;
210    int literallen,literalalloc,xcdepth,nchars,slashstar,dashdash;
211    boolean isqmarktoident;
212    public boolean insqlpluscmd;
213    char dummych1,dummych2,dummych3;
214    boolean utf8NoBreakSpaceReady = false;
215
216    int nestedLessThan = 0;
217
218    boolean isReadyForFunctionBody = false, isInFunctionBody = false;
219    int   functionBodyDelimiterIndex = -1;
220    ArrayList<String> functionBodyDelimiter = new ArrayList<>();
221
222    public static int keyword_type_reserved = 0x0001;
223    public static int keyword_type_keyword = 0x0002;
224    public static int keyword_type_identifier = 0x0004;
225    public static int keyword_type_column = 0x0008;
226
227    public char delimiterchar;
228    public String defaultDelimiterStr;
229    public String tmpDelimiter;
230    
231    final static int intial_bufsize = 16384;
232    final static char lf = (char)10;
233    final static int max_chars = 65536*10*2;
234    final static int max_rules = 256*2*10;
235    int  max_matches = 1024*20*10*2;
236
237
238    // 下面这些常量按照在 l 文件中出现的次序,必须以  +2 的方式递加. 为什么以 +2 的方式递加 原因忘了,尚未搞清楚。
239    final static int init = 2;
240    final static int xc = 4;
241    final static int xd = 6;
242    final static int xq = 8;
243    final static int xqq = 10;  //oracle
244    final static int xdolq = 10;//postgresql
245    final static int xdbracket = 10;
246    final static int xdbrace = 12;
247    final static int xbacktick = 12;
248
249    final static int xbracketrs = 12; //redshift
250    final static int xqtriple = 14;//bigquery
251    final static int xdtriple = 16;//bigquery
252
253
254
255    //https://docs.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server
256    final static int namedatalen = 8060;//255;
257    
258    final static int cmtslashstar = 257;
259    final static int cmtdoublehyphen = 258;
260    final static int lexspace = 259;
261    final static int lexnewline = 260;
262    final static int fconst  = 261;
263    final static int sconst = 262;
264    final static int iconst = 263;
265    final static int ident = 264;
266    final static int op = 265;
267    final static int cmpop = 266;
268    final static int bind_v = 267;
269    final static int assign_sign = 268;
270    final static int double_dot = 269;
271    final static int label_begin = 270;
272    final static int label_end = 271;
273    final static int substitution_v  = 272;
274    final static int filepath_sign = TBaseType.filepath_sign;
275    final static int sqlpluscmd = 273;
276    final static int atversion = TBaseType.atversion; //databricks
277    final static int error = 274;
278    final static int variable = 275;
279    final static int mslabel = 276;
280    public final static int bconst = TBaseType.bconst; //postgresql
281    final static int leftjoin_op = 277;
282    final static int odbc_esc_prefix = 277;
283    final static int rightjoin_op = 278;
284    final static int odbc_esc_terminator = 278;
285    final static int db2label = 279;
286    public final static int xconst = TBaseType.xconst; //postgresql
287    final static int ref_arrow = 280;
288    final static int rw_scriptoptions = 281;
289    public final static int UNICODE_ENCODE_ID = 281;
290    final static int mysqllabel = 282;
291    final static int NAMED_PARAMETER_SIGN = 282; //oracle,db2,snowflake CALL update_order (5000, NEW_STATUS => 'Shipped')
292    final static int QUOTED_IDENT = 282;//used in mdx
293    final static int BTEQCMD = 282;
294    final static int concatenationop = 283;
295    final static int rw_not_deferrable = 284;
296    final static int rw_for1 = 285;
297    final static int stmt_delimiter = 286;
298    final static int AMP_QUOTED_ID = 285; //used in mdx
299    final static int AMP_UNQUOTED_ID = 286; //used in mdx
300    final static int m_clause = 287;
301    final static int MySQL_CHARSET_NAME = 287;
302    final static int typecast = TBaseType.typecast;//postgresql
303    final static int k_clause = 288;
304    final static int slash_dot = 288;
305    final static int outer_join = 289;
306
307    final static int not_equal = 290;
308
309    final static int param = TBaseType.param;
310    final static int mysql_null = TBaseType.rrw_mysql_null;
311
312    final static int rw_locktable = 296;
313    final static int rw_foreign2 = 297;
314    final static int rw_constraint2 = 298;
315    final static int rw_primary2 = 299;
316    final static int rw_unique2 = 300;
317    final static int     NEXT_PARAM = TBaseType.NEXT_PARAM;
318    final static int     POSITIONAL_PARAM = TBaseType.POSITIONAL_PARAM;
319    final static int     NAMED_PARAM = TBaseType.NAMED_PARAM;
320
321    final static int castoperator = TBaseType.castoperator;
322    final static int twocolons = TBaseType.twocolons;
323    final static int compoundAssignmentOperator = TBaseType.compoundAssignmentOperator;
324    final static int postgresql_function_delimiter = TBaseType.rrw_postgresql_function_delimiter;
325    final static int clickhouse_function_delimiter = TBaseType.rrw_clickhouse_function_delimiter;
326    final static int greenplum_function_delimiter = TBaseType.rrw_greenplum_function_delimiter;
327
328    final static int redshift_function_delimiter = TBaseType.rrw_redshift_function_delimiter;
329    final static int snowflake_function_delimiter = TBaseType.rrw_snowflake_function_delimiter;
330
331
332
333    int[] yypos;// = new int[max_rules + 1];      // 1 based in delphi, Position 0 was not used here
334    int[] yystack;// = new int[max_matches + 1];  // 1 based in delphi, Position 0 was not used here
335  //  ArrayList yystack;
336
337    //String keywordvaluefile,keywordfile,yyk_file,yym_file,yykl_file;
338    //String yykh_file,yyml_file,yymh_file,yytl_file,yyth_file,yytint_file,yyt_file;
339
340    EDbVendor dbvendor;
341    TSourceToken prevToken = null;
342
343    public void setSqlCharset(String sqlCharset) {
344        this.sqlCharset = sqlCharset;
345    }
346
347    public String getSqlCharset() {
348        return sqlCharset;
349    }
350
351    private String sqlCharset = null;
352    
353    /**
354     * Check if token code represents a single character operator
355     */
356    protected boolean isSingleCharOperator(int tokenCode) {
357        return tokenCode == '(' || tokenCode == ')' || 
358               tokenCode == '[' || tokenCode == ']' ||
359               tokenCode == '{' || tokenCode == '}' ||
360               tokenCode == ',' || tokenCode == ';' ||
361               tokenCode == '.' || tokenCode == ':' ||
362               tokenCode == '+' || tokenCode == '-' ||
363               tokenCode == '*' || tokenCode == '/' ||
364               tokenCode == '%' || tokenCode == '=' ||
365               tokenCode == '<' || tokenCode == '>' ||
366               tokenCode == '!' || tokenCode == '&' ||
367               tokenCode == '|' || tokenCode == '^' ||
368               tokenCode == '~' || tokenCode == '?';
369    }
370    
371    /**
372     * Check if token code represents a keyword
373     */
374    protected boolean isKeyword(int tokenCode) {
375        // Check if it's in the reserved word range
376        return tokenCode >= TBaseType.rrw_select && tokenCode < TBaseType.rrw_abort;
377    }
378
379    public TCustomLexer(){
380       //this.yyinput = pbuf;
381       yytextbufsize = intial_bufsize - 1;
382       yytextbuf = new char[intial_bufsize];
383       checkyytextbuf(yytextbufsize);
384
385       bufsize = intial_bufsize - 1;
386       buf = new char[intial_bufsize];
387       checkbuf(bufsize);
388
389       //literalbuf = new StringBuffer();
390        literalbuf = new StringBuilder();
391        //keywordList = new TreeMap();
392        delimiterchar = ';';
393        tmpDelimiter = "";
394
395        xcdepth = 0;
396        nchars = 0;
397        isqmarktoident = true;
398
399       yylvalstr = "";
400        yysstate = 0;
401        yylstate = 0;
402        yymatches = 0;
403        yysleng = 0;
404       bufptr = 0;
405       yylineno = 0;
406       yycolno = 0;
407       offset = -1;
408       yylineno_p = 1;
409       yycolno_p = 1;
410       offset_p = 0;
411
412       yypos = new int[max_rules + 1];
413       max_matches = TBaseType.LEXER_INIT_MAX_MATCHES;
414       yystack = new int[max_matches + 1];
415
416        prevToken = null;
417    }
418
419    /*
420     * this function is not used. 
421    private void getkeywordvaluefromfile(){
422        int i;
423        keywordValueList.clear();
424        for(i=0; i<keywordlist.length; i++){
425           // System.out.println(keywordlist[i]);
426            String[] ss = keywordlist[i].split("[=]");
427            keywordValueList.put(ss[0].toUpperCase(),ss[1]);
428        }
429    }
430     */
431
432public  int iskeyword(String str){
433    return -1;
434}
435
436public boolean isAtBeginOfLine(){
437    return (yyretval == lexnewline || yyretval == 0);
438}
439
440//public boolean canBeColumnName(int tokencode){
441//    return false;
442//}
443
444
445public String getStringByCode(int tokenCode){
446    return null;
447}
448
449    public  int getkeywordvalue(String keyword){
450        return 0;
451    }
452
453
454    /**
455     * @deprecated , please use keywordChecker.isKeyword() instead.
456     *
457     * because there are so many non-reserved keywords in some databases, it's not suitable to put those
458     * non-reserved keywords in lexer and parser.
459     *
460     * @param keyword
461     * @param keywordValueList
462     * @param keywordTypeList
463     * @return
464     */
465    public static EKeywordType getKeywordType(String keyword, HashMap<String, Integer> keywordValueList,HashMap<Integer, Integer> keywordTypeList){
466        EKeywordType ret = EKeywordType.NOT_A_KEYWORD;
467        Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH));
468        if( s == null) return ret;
469
470        Integer i = keywordTypeList.get(s);
471        if (i == 1) return EKeywordType.RESERVED_WORD;
472        else if (i == 2) return EKeywordType.NON_RESERVED_KEYWORD;
473        else return  ret;
474    }
475
476    /**
477     * 如果是ascii 字符,直接返回,如果是unicode 字符,需要进行转换。否则 String.charAt() 返回的unicode字符不是我们想要的字符,
478     * 例如中文的括号,我们实际需要的ascii的括号
479     *
480     * @param pYylvalstr
481     * @param index
482     * @return
483     */
484   char lexer_charAt(String pYylvalstr,int index){
485        char ret = pYylvalstr.charAt(index);
486        if (ret > 255){
487            // this is a unicode code
488            if ((ret == 0xFF08)){
489                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128
490                // Unicode code point for FULLWIDTH LEFT PARENTHESIS (, 0xFF08
491                //System.out.println(c);
492                ret = '(';
493            }
494            if ( (ret == 0xFF09)){
495                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128
496                // Unicode code point for FULLWIDTH RIGHT PARENTHESIS ), 0xFF09
497                // System.out.println(c);
498                ret = ')';
499            }
500        }
501        return ret;
502   }
503   void totablechar(){
504       //System.out.println("char:"+yyactchar+" ,hex:"+String.format("%04x", (int) yyactchar));
505       //System.out.println(String.format("0x%08X", (int)yyactchar)+", "+(char)yyactchar);
506
507    if (((int) yyactchar == 0) && !endOfInput) {
508        yytablechar = (char)255;
509        return;
510    }
511
512     if ((int)(yyactchar) < 228){ // 228 is ä in unicode
513       yytablechar = yyactchar;
514         if ((((int)(yyactchar) == 160)&&(utf8NoBreakSpaceReady))||(yyactchar == 0xA0)){
515             yytablechar = (char)32;
516         }
517       utf8NoBreakSpaceReady = false;
518//         if (yyactchar == 0x27){
519//             insideSingleQuoteStr = !insideSingleQuoteStr;
520//          }
521     }else{
522         yytablechar = (char)'a';//(char)255;
523
524         if ((int)(yyactchar) == 914) { // c2 a0, utf-8 NO-BREAK SPACE
525             utf8NoBreakSpaceReady = true;
526             yytablechar = (char) 32;
527         }else if ((yyactchar == 0x2018)||(yyactchar == 0x2019)){
528             if (stringLiteralStartWithUnicodeSingleQuote){
529                 // WHERE Name LIKE ‘Acme%’
530                 // 如上,如果string literal 以unicode quote 开始,则不管当前是否在string literal中,新碰到的unicode quote都看成是string literal的结尾符,
531                 yytablechar = 0x27; // treat  Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it
532             }else {
533                 if (insideSingleQuoteStr){
534                     // don't change the unicode quote char
535                 }else {
536                     yytablechar = 0x27; // treat  Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it
537                 }
538             }
539
540         }else if ((yyactchar == 0x200B)||(yyactchar == 0x3000)){
541             // Unicode code point 0x200B: treat  Unicode Character ZERO WIDTH SPACE as the ascii char space, but don't change it
542             // Unicode code point 0x3000: treat  Unicode Character IDEOGRAPHIC SPACE (UTF-8: e3 80 80) as the ascii char space, but don't change it
543             yytablechar = 0x20;
544         }else if (yyactchar == 0xFF08){
545             yytablechar = '('; // treat  Unicode code point for FULLWIDTH LEFT PARENTHESIS  as the ascii char (, but don't change it
546         }else if (yyactchar == 0xFF09){
547             yytablechar = ')'; // treat  Unicode code point for FULLWIDTH RIGHT PARENTHESIS  as the ascii char ), but don't change it
548         }else if (yyactchar == 0xFF0C){
549             yytablechar = ','; // treat  Unicode code point for FULLWIDTH COMMA  as the ascii char comma, but don't change it
550         }else {
551             utf8NoBreakSpaceReady = false;
552         }
553     }
554   }
555
556    String getyytext(){
557      return new String(yytextbuf,0,yytextlen);
558    }
559
560
561    void checkyytextbuf(int size){
562       while ( size >= yytextbufsize){
563          yytextbufsize = yytextbufsize * 2 > intial_bufsize ? yytextbufsize * 2: intial_bufsize;
564          char[] tmp = new char[yytextbufsize];
565           System.arraycopy(yytextbuf,0,tmp,0, yytextbuf.length);
566           yytextbuf = tmp;
567       }
568    }
569    
570    void checkbuf(int size){
571       // System.out.println("while begin2"+" size:"+size+" bufsize:"+bufsize);
572       while ( size >= bufsize){
573          bufsize = bufsize * 2 > intial_bufsize ? bufsize * 2: intial_bufsize;
574          char[] tmp = new char[bufsize];
575           System.arraycopy(buf,0,tmp,0, buf.length);
576           buf = tmp;
577       }
578       // System.out.println("while end2");
579    }
580
581    boolean eof(BufferedReader pbuf){
582        try{
583        return !pbuf.ready();
584        }catch(IOException e){
585          return true;
586        }
587    }
588
589    void yynew(){
590        if (yylastchar != (char)0){
591          if(yylastchar == lf){
592            yylstate = 1;
593          }else{
594              yylstate = 0;
595          }
596        }
597
598        yystate = yysstate + yylstate;
599        checkyytextbuf(0);
600        yytextlen = 0;
601        yymatches = 0;
602        yydone = false;
603    }
604
605    void yyscan(){
606        yyactchar = get_char();
607        checkyytextbuf(yytextlen + 1);
608        yytextlen++;
609        yytextbuf[yytextlen - 1] = yyactchar;
610    }
611
612    void yymark(int n){
613        if (n > max_rules ){
614           System.out.println("n > max_rules ");
615        }
616        yypos[n] = yytextlen;
617    }
618
619    void yymatch(int n){
620        yymatches++;
621        if(yymatches > max_matches){
622
623            int new_yystack[] = new int[max_matches*2+1];
624            System.arraycopy(yystack, 0, new_yystack, 0, max_matches);
625            yystack = new_yystack;
626            max_matches = max_matches * 2;
627
628           // this is valid in JDK 1.6, proguard will report warning and stop
629           // yystack = Arrays.copyOf(yystack,max_matches+1);
630
631        }
632        yystack [yymatches] = n;
633    }
634
635    int yyfind(){
636        //return -1 mean not found
637        int ret = -1;
638
639        yyreject = false;
640        
641        while (( yymatches > 0 ) && ( yypos[yystack[yymatches]] == 0 )) {
642           yymatches-- ;
643        }
644        
645
646        if (yymatches > 0){
647          yysleng = yytextlen;
648          ret = yystack[yymatches];
649          yyless( yypos[ret] );
650          yypos[ret] = 0;
651          if (yytextlen >0){
652            yylastchar = yytextbuf [yytextlen-1];
653          }else{
654            yylastchar = (char)0;
655          }
656        }else{
657          yyless( 0 );
658          yylastchar = (char)0;
659        }
660
661       return ret;
662    }
663
664    boolean yydefault(){
665        boolean ret;
666
667        yyreject = false;
668        yyactchar = get_char();
669        if (yyactchar != (char)0){
670          //put_char( yyactchar );
671          ret = true;
672        }else{
673          yylstate = 1;
674          ret = false;
675        }
676        yylastchar = yyactchar;
677        return ret;
678    }
679    void yyless(int n){
680        for(int i= yytextlen; i> n; i--){
681            unget_char(yytextbuf[i - 1]);
682        }
683        checkyytextbuf(n);
684        yytextlen = n;
685    }
686    void returni(int n){
687        yyretval = n;
688        yydone = true;
689    }
690    void returnc(char c){
691        yyretval = (int)c;
692        yydone = true;
693    }
694    void yyclear(){
695        bufptr = 0;
696        yysstate = 0;
697        yylstate = 1;
698        yylastchar = (char)0;
699        yytextlen = 0;
700        yylineno = 0;
701        yycolno = 0;
702        offset = -1;
703       // yystext := '';
704
705        yylineno_p = 1;
706        yycolno_p = 1;
707        offset_p = 0;
708
709    }
710
711    
712    boolean yywrap(){
713        return true;
714    }
715    int getyysstate(){
716        return yysstate;
717    }
718    void start(int pstate){
719        yysstate = pstate;
720        if (pstate == xq){
721            insideSingleQuoteStr = true;
722            if ((yylvalstr.charAt(0) == 0x2018)||(yylvalstr.charAt(0) == 0x2019)){
723                stringLiteralStartWithUnicodeSingleQuote = true;
724            }else{
725                stringLiteralStartWithUnicodeSingleQuote = false;
726            }
727        }else{
728            insideSingleQuoteStr = false;
729        }
730    }
731
732
733    void unget_char(char pchar){
734        if(bufptr == max_chars)
735        {
736            System.out.println("input buffer overflow");
737        }
738      //  if (bufptr > 0) {
739        bufptr++;
740        yycolno--;
741        offset--;
742        checkbuf(bufptr+1);
743        buf[bufptr] = pchar;
744      //  }
745
746    }
747
748    public void reset(){
749        insideSingleQuoteStr = false;
750        nestedLessThan = 0;
751    }
752
753    public boolean insideSingleQuoteStr = false;
754    public boolean stringLiteralStartWithUnicodeSingleQuote = false;
755
756
757    // Previous implementation of readln, 2025-05-04
758    // char[] readln()  throws IOException {
759    //     int c;
760    //     char[] buffer = new char[80];
761    //     int bufferSize = 0;
762
763    //     while ((c = yyinput.read()) != -1) {
764    //         if (bufferSize >= buffer.length) {
765    //             char[] newBuffer = new char[buffer.length * 2];
766    //             System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
767    //             buffer = newBuffer;
768    //         }
769
770    //         buffer[bufferSize++] = (char)c;
771
772    //         if (c == '\n' || c == '\r') {
773    //             break;
774    //         }
775    //     }
776
777    //     if (bufferSize > 0 && buffer[bufferSize - 1] == '\r') {
778    //         yyinput.mark(1);
779    //         c = yyinput.read();
780    //         if (c == '\n') {
781    //             if (bufferSize >= buffer.length) {
782    //                 char[] newBuffer = new char[buffer.length + 1];
783    //                 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
784    //                 buffer = newBuffer;
785    //             }
786    //             buffer[bufferSize++] = '\n';
787    //         } else {
788    //             yyinput.reset();
789    //         }
790    //     }
791
792    //     if (bufferSize == 0) {
793    //         return null;
794    //     }else{
795    //         char[] result = new char[bufferSize];
796    //         System.arraycopy(buffer, 0, result, 0, bufferSize);
797    //         return result;
798    //     }
799    // }
800
801/**
802 * High-performance line reader with optimal buffer management.
803 * Reuses lineReadBuffer across calls to reduce per-line allocations.
804 * @return char array containing the line including line ending, or null if end of stream
805 */
806char[] readln() throws IOException {
807    if (yyinput == null) return null;
808
809    int position = 0;
810    int c;
811
812    // Read characters until line ending or EOF
813    while ((c = yyinput.read()) != -1) {
814        // Expand buffer if needed (expanded buffer stays for reuse)
815        if (position >= lineReadBuffer.length) {
816            char[] newBuffer = new char[lineReadBuffer.length * 2];
817            System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length);
818            lineReadBuffer = newBuffer;
819        }
820
821        // Store character
822        lineReadBuffer[position++] = (char)c;
823
824        // Check for line endings
825        if (c == '\n') {
826            break; // LF - end of line
827        } else if (c == '\r') {
828            // Need to check for CR+LF sequence
829            yyinput.mark(1);
830            c = yyinput.read();
831
832            if (c == '\n') {
833                // CR+LF sequence - include LF in result
834                if (position >= lineReadBuffer.length) {
835                    char[] newBuffer = new char[lineReadBuffer.length + 1];
836                    System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length);
837                    lineReadBuffer = newBuffer;
838                }
839                lineReadBuffer[position++] = '\n';
840            } else {
841                // CR only - reset stream to keep the character after CR
842                yyinput.reset();
843            }
844            break;
845        }
846    }
847
848    // Return null if no characters were read (end of stream)
849    if (position == 0) {
850        yylineLen = 0;
851        return null;
852    }
853
854    // Return lineReadBuffer directly, avoiding per-line array allocation.
855    // yylineLen holds the actual content length (replaces yyline.length semantic).
856    yylineLen = position;
857    return lineReadBuffer;
858}
859
860    char get_char(){
861
862        char ret ;
863         boolean readlineok = true;
864
865        if ((bufptr == 0) && !eof(yyinput) )
866        {
867            try{
868               endOfInput = false;
869               yyline = readln();//yyinput.readLine();
870              // System.out.println("readln: "+yyline);
871                if (yyline == null){
872                  readlineok = false;
873                }  else{
874                    yylineno++;
875                    yycolno = 0;
876                    // Use yylineLen instead of yyline.length since lineReadBuffer is reused
877                    bufptr = yylineLen;
878                    checkbuf(bufptr+1);
879                    for(int k=1;k<=bufptr;k++){
880                        buf[k] = yyline[bufptr - k];
881                    }
882                }
883            }catch(IOException e){
884              readlineok = false;
885            }
886        }
887
888        if (! readlineok){
889          endOfInput = true;
890          return (char)0;
891        }
892
893       if (bufptr > 0){
894         bufptr--;
895         yycolno++;
896         offset++;
897
898         return buf[bufptr+1];
899         //return yyline.charAt(yyline.length()  - (bufptr + 1));
900       }else{
901       // bufptr--;
902           endOfInput = true;
903        return  (char)0;
904       }
905
906    }
907
908    void startlit(){
909        literalbuf.setLength(0);
910        literallen = 0;
911        literalalloc = 0;
912    }
913
914    void addlit(String ytext, int yleng){
915        literallen = literallen + yleng;
916        literalbuf.append(ytext,0,yleng);
917    }
918
919    void addlitchar(char ychar){
920        literallen++;
921        literalbuf.append(ychar);
922    }
923
924    String litbufdup(){
925        return literalbuf.toString();//.intern();
926    }
927
928    boolean isopchar(char ch){
929        switch (ch) {
930            case '~':
931            case '!':
932            case '@':
933            case '#':
934            case '^':
935            case '&':
936            case '|':
937            case '`':
938            case '?':
939            case '$':
940            case '%':
941                return true;
942            default:
943                return false;
944        }
945    }
946
947    boolean isselfchar(char ch){
948        switch (ch) {
949            case ',':
950            case '(':
951            case ')':
952            case '[':
953            case ']':
954            case '.':
955            case ';':
956            case '$':
957            case ':':
958            case '+':
959            case '-':
960            case '*':
961            case '/':
962            case '%':
963            case '^':
964            case '<':
965            case '>':
966            case '=':
967            case '!':
968                return true;
969            default:
970                return false;
971        }
972    }
973
974    boolean charinarray(char c, char[] a){
975        int len = a.length;
976        for (int i = 0; i < len; i++) {
977            if (a[i] == c)
978                return true;
979        }
980        return false;
981    }
982
983    void setlengthofliteralbuf(int plen){
984      literalbuf.setLength(plen);
985    }
986
987    void yyaction(int yyruleno){
988    }
989
990    int yylex(){
991        return 0;
992    }
993
994
995    public int yylexwrap(TSourceToken psourcetoken) {
996        // Get token code and handle EOF
997        if ((psourcetoken.tokencode = yylex()) == 0) return 0;
998
999        // Store token text - use shared strings for single ASCII chars to reduce allocations
1000        if (yylvalstr == null) {
1001            if (yytextlen == 1 && yytextbuf[0] < 128) {
1002                yylvalstr = SINGLE_CHAR_STRINGS[yytextbuf[0]];
1003            } else {
1004                yylvalstr = new String(yytextbuf, 0, yytextlen);
1005            }
1006        }
1007        psourcetoken.setAstext(yylvalstr);
1008    
1009        // Record token position information
1010        psourcetoken.lineNo = yylineno_p;
1011        psourcetoken.columnNo = yycolno_p;
1012        psourcetoken.offset = offset_p;
1013        yylineno_p = yylineno;
1014        yycolno_p = yycolno + 1;
1015        offset_p = offset + 1;
1016        
1017        // Track token in token table for analysis
1018        setTokenTableValue(psourcetoken);
1019    
1020        // Handle token types based on token code
1021        switch (psourcetoken.tokencode) {
1022            case cmtdoublehyphen:
1023                psourcetoken.tokentype = ETokenType.ttsimplecomment;
1024                if (dbvendor == EDbVendor.dbvmdx && psourcetoken.toString().startsWith("/")) {
1025                    psourcetoken.tokentype = ETokenType.ttCPPComment;
1026                }
1027                break;
1028                
1029            case cmtslashstar:
1030                psourcetoken.tokentype = ETokenType.ttbracketedcomment;
1031                break;
1032                
1033            case lexspace:
1034                psourcetoken.tokentype = ETokenType.ttwhitespace;
1035                break;
1036                
1037            case lexnewline:
1038                psourcetoken.tokentype = ETokenType.ttreturn;
1039                break;
1040                
1041            case bind_v:
1042                psourcetoken.tokentype = ETokenType.ttbindvar;
1043                if (dbvendor == EDbVendor.dbvoracle) {
1044                    psourcetoken.setAstext(psourcetoken.getAstext().replace(TBaseType.newline, ""));
1045                }
1046                break;
1047                
1048            case stmt_delimiter:
1049                psourcetoken.tokentype = ETokenType.ttstmt_delimiter;
1050                psourcetoken.tokencode = cmtslashstar;
1051                break;
1052                
1053            case concatenationop:
1054                psourcetoken.tokentype = ETokenType.ttconcatenationop;
1055                break;
1056                
1057            case variable:
1058                psourcetoken.tokentype = ETokenType.ttsqlvar;
1059                break;
1060                
1061            case fconst:
1062            case iconst:
1063                psourcetoken.tokentype = ETokenType.ttnumber;
1064                break;
1065                
1066            case sconst:
1067                psourcetoken.tokentype = ETokenType.ttsqstring;
1068                psourcetoken.dolqstart = dolqstart;
1069                dolqstart = "";
1070                break;
1071                
1072            case ident:
1073            case QUOTED_IDENT:
1074                handleIdentifierToken(psourcetoken);
1075                break;
1076                
1077            case cmpop:
1078                handleComparisonOperator(psourcetoken);
1079                break;
1080                
1081            case op:
1082                handleOperatorToken(psourcetoken);
1083                break;
1084                
1085            default:
1086                handleDefaultToken(psourcetoken);
1087                break;
1088        }
1089    
1090        prevToken = psourcetoken;
1091        return psourcetoken.tokencode;
1092    }
1093    
1094    // Helper methods to better organize the complex token handling logic
1095    private void handleIdentifierToken(TSourceToken psourcetoken) {
1096        psourcetoken.tokentype = ETokenType.ttidentifier;
1097        String tokenText = psourcetoken.toString().trim();
1098        
1099        if (tokenText.startsWith("\"")) {
1100            psourcetoken.tokentype = ETokenType.ttdqstring;
1101        } else if (tokenText.startsWith("[")) {
1102            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1103                psourcetoken.tokentype = ETokenType.ttdbstring;
1104            }
1105        } else if (tokenText.startsWith("{")) {
1106            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1107                psourcetoken.tokentype = ETokenType.ttbrstring;
1108                if (tokenText.toLowerCase().startsWith("{escape")) {
1109                    psourcetoken.tokencode = TBaseType.rrw_sqlserver_odbc_escape;
1110                }
1111            }
1112        } else if (tokenText.startsWith("&")) {
1113            if (dbvendor == EDbVendor.dbvmdx) {
1114                if (psourcetoken.tokencode == QUOTED_IDENT) {
1115                    psourcetoken.tokencode = AMP_QUOTED_ID;
1116                } else if (psourcetoken.tokencode == ident) {
1117                    psourcetoken.tokencode = AMP_UNQUOTED_ID;
1118                }
1119            }
1120        } else if (tokenText.startsWith(".")) {
1121            if (dbvendor == EDbVendor.dbvteradata) {
1122                psourcetoken.tokentype = ETokenType.ttBTEQCmd;
1123            }
1124        }
1125    }
1126    
1127    private void handleComparisonOperator(TSourceToken psourcetoken) {
1128        psourcetoken.tokentype = ETokenType.ttmulticharoperator;
1129        String token = yylvalstr;
1130        
1131        if ((token.startsWith("!") && token.endsWith("=")) ||
1132            (token.startsWith("^") && token.endsWith("=")) ||
1133            (token.startsWith("<") && token.endsWith(">"))) {
1134            
1135            psourcetoken.tokencode = TBaseType.not_equal;
1136            
1137            // Handle MySQL NULL-safe equal
1138            if (token.indexOf("=", 1) > 0 && 
1139                token.startsWith("<") && token.endsWith(">")) {
1140                psourcetoken.tokencode = (int)'=';
1141            }
1142        } else if (token.startsWith(">") && token.endsWith("=")) {
1143            psourcetoken.tokencode = TBaseType.great_equal;
1144        } else if (token.startsWith("<") && token.endsWith("=")) {
1145            psourcetoken.tokencode = TBaseType.less_equal;
1146        } else if ((token.startsWith("!") && token.endsWith("<")) ||
1147                   (token.startsWith("^") && token.endsWith("<"))) {
1148            psourcetoken.tokencode = TBaseType.not_less;
1149        } else if ((token.startsWith("!") && token.endsWith(">")) ||
1150                   (token.startsWith("^") && token.endsWith(">"))) {
1151            psourcetoken.tokencode = TBaseType.not_great;
1152        } else if (token.length() == 2 && token.charAt(0) == ':' && token.charAt(1) == '=') {
1153            psourcetoken.tokencode = assign_sign;
1154        }
1155    }
1156    
1157    private void handleOperatorToken(TSourceToken psourcetoken) {
1158        psourcetoken.tokentype = ETokenType.ttmulticharoperator;
1159        String token = yylvalstr;
1160        int tokenLength = token.length();
1161        char firstChar = tokenLength > 0 ? token.charAt(0) : '\0';
1162        char secondChar = tokenLength > 1 ? token.charAt(1) : '\0';
1163        
1164        // Handle question mark specially
1165        if (token.equals("?") && isqmarktoident) {
1166            handleQuestionMark(psourcetoken);
1167            return;
1168        }
1169        
1170        // Handle special two-character operators
1171        if (tokenLength == 2) {
1172            if (handleTwoCharOperator(psourcetoken, firstChar, secondChar)) {
1173                return;
1174            }
1175        }
1176        
1177        // Handle special three-character operators
1178        if (tokenLength == 3) {
1179            if (handleThreeCharOperator(psourcetoken, firstChar, secondChar, token.charAt(2))) {
1180                return;
1181            }
1182        }
1183        
1184        // Handle comparison operators
1185        if (handleComparisonOp(psourcetoken, token)) {
1186            return;
1187        }
1188        
1189        // Handle single character operators
1190        if (tokenLength == 1) {
1191            handleSingleCharOperator(psourcetoken, firstChar);
1192        }
1193    }
1194    
1195    private boolean handleTwoCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar) {
1196        switch (firstChar) {
1197            case '<':
1198                if (secondChar == '<') {
1199                    return handleLeftShiftOperator(psourcetoken);
1200                } else if (secondChar == '@') {
1201                    psourcetoken.tokencode = TBaseType.JSON_RIGHT_CONTAIN;
1202                    return true;
1203                }
1204                break;
1205                
1206            case '>':
1207                if (secondChar == '>') {
1208                    return handleRightShiftOperator(psourcetoken);
1209                }
1210                break;
1211                
1212            case '=':
1213                if (secondChar == '>') {
1214                    if (dbvendor == EDbVendor.dbvodbc) {
1215                        psourcetoken.tokencode = TBaseType.great_equal;
1216                    } else if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb) {
1217                        psourcetoken.tokencode = TBaseType.assign_sign;
1218                    } else {
1219                        psourcetoken.tokencode = NAMED_PARAMETER_SIGN;
1220                    }
1221                    return true;
1222                } else if (secondChar == '*') {
1223                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1224                        psourcetoken.tokencode = rightjoin_op;
1225                    }
1226                    return true;
1227                } else if (secondChar == '<') {
1228                    if (dbvendor == EDbVendor.dbvodbc) {
1229                        psourcetoken.tokencode = TBaseType.less_equal;
1230                    }
1231                    return true;
1232                } else if (secondChar == '=') {
1233                    if (dbvendor == EDbVendor.dbvsparksql) {
1234                        psourcetoken.tokencode = '=';
1235                    }
1236                    return true;
1237                }
1238                break;
1239                
1240            case '-':
1241                if (secondChar == '>') {
1242                    if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb 
1243                        || dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvmysql) {
1244                        psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT;
1245                    } else {
1246                        psourcetoken.tokencode = ref_arrow;
1247                    }
1248                    return true;
1249                } else if (secondChar == '=') {
1250                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1251                        psourcetoken.tokencode = compoundAssignmentOperator;
1252                    }
1253                    return true;
1254                }
1255                break;
1256                
1257            case '.':
1258                if (secondChar == '.') {
1259                    if (dbvendor == EDbVendor.dbvdb2 || dbvendor == EDbVendor.dbvoracle 
1260                        || dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvhana) {
1261                        psourcetoken.tokencode = double_dot;
1262                    }
1263                    return true;
1264                }
1265                break;
1266                
1267            case '*':
1268                if (secondChar == '=') {
1269                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1270                        psourcetoken.tokencode = leftjoin_op;
1271                    }
1272                    return true;
1273                } else if (secondChar == '*') {
1274                    if (dbvendor == EDbVendor.dbvteradata || dbvendor == EDbVendor.dbvnetezza) {
1275                        psourcetoken.tokencode = TBaseType.exponentiate;
1276                    }
1277                    return true;
1278                }
1279                break;
1280                
1281            case '|':
1282                if (secondChar == '|') {
1283                    if (dbvendor == EDbVendor.dbvmysql) {
1284                        psourcetoken.tokencode = TBaseType.logical_or;
1285                    } else if (isStringConcatVendor(dbvendor)) {
1286                        psourcetoken.tokencode = TBaseType.concatenationop;
1287                    }
1288                    return true;
1289                } else if (secondChar == '/') {
1290                    if (dbvendor == EDbVendor.dbvredshift) {
1291                        psourcetoken.tokencode = TBaseType.square_root;
1292                    }
1293                    return true;
1294                }
1295                break;
1296                
1297            case '&':
1298                if (secondChar == '&') {
1299                    if (dbvendor == EDbVendor.dbvmysql) {
1300                        psourcetoken.tokencode = TBaseType.logical_and;
1301                    }
1302                    return true;
1303                }
1304                break;
1305                
1306            case '?':
1307                if (secondChar == '|') {
1308                    psourcetoken.tokencode = TBaseType.JSON_ANY_EXIST;
1309                    return true;
1310                } else if (secondChar == '&') {
1311                    psourcetoken.tokencode = TBaseType.JSON_ALL_EXIST;
1312                    return true;
1313                }
1314                break;
1315                
1316            case '@':
1317                if (secondChar == '>') {
1318                    psourcetoken.tokencode = TBaseType.JSON_LEFT_CONTAIN;
1319                    return true;
1320                }
1321                break;
1322                
1323            case '#':
1324                if (secondChar == '>') {
1325                    psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT_AT_PATH;
1326                    return true;
1327                }
1328                break;
1329                
1330            case ':':
1331                if (secondChar == '=') {
1332                    psourcetoken.tokencode = assign_sign;
1333                    return true;
1334                }
1335                break;
1336        }
1337        
1338        // Handle compound assignment operators
1339        if ((firstChar == '+' || firstChar == '-' || firstChar == '*' || 
1340             firstChar == '/' || firstChar == '%' || firstChar == '&' || 
1341             firstChar == '^' || firstChar == '|') && secondChar == '=') {
1342            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1343                psourcetoken.tokencode = compoundAssignmentOperator;
1344                return true;
1345            } else if (dbvendor == EDbVendor.dbvmysql && firstChar == '^' && secondChar == '=') {
1346                psourcetoken.tokencode = not_equal;
1347                return true;
1348            }
1349        }
1350        
1351        return false;
1352    }
1353    
1354    private boolean handleThreeCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar, char thirdChar) {
1355        if (firstChar == '-' && secondChar == '>' && thirdChar == '>') {
1356            psourcetoken.tokencode = TBaseType.JSON_GET_TEXT;
1357            return true;
1358        } else if (firstChar == '#' && secondChar == '>' && thirdChar == '>') {
1359            psourcetoken.tokencode = TBaseType.JSON_GET_TEXT_AT_PATH;
1360            return true;
1361        } else if (firstChar == '|' && secondChar == '|' && thirdChar == '/') {
1362            if (dbvendor == EDbVendor.dbvredshift) {
1363                psourcetoken.tokencode = TBaseType.cube_root;
1364                return true;
1365            }
1366        }
1367        return false;
1368    }
1369    
1370    private boolean handleComparisonOp(TSourceToken psourcetoken, String token) {
1371        if ((token.startsWith("!") && token.endsWith("=")) ||
1372            (token.startsWith("^") && token.endsWith("=")) ||
1373            (token.startsWith("<") && token.endsWith(">"))) {
1374            psourcetoken.tokencode = TBaseType.not_equal;
1375            return true;
1376        } else if (token.startsWith(">") && token.endsWith("=")) {
1377            psourcetoken.tokencode = TBaseType.great_equal;
1378            return true;
1379        } else if (token.startsWith("<") && token.endsWith("=")) {
1380            psourcetoken.tokencode = TBaseType.less_equal;
1381            return true;
1382        } else if ((token.startsWith("!") && token.endsWith("<")) ||
1383                   (token.startsWith("^") && token.endsWith("<"))) {
1384            psourcetoken.tokencode = TBaseType.not_less;
1385            return true;
1386        } else if ((token.startsWith("!") && token.endsWith(">")) ||
1387                   (token.startsWith("^") && token.endsWith(">"))) {
1388            psourcetoken.tokencode = TBaseType.not_great;
1389            return true;
1390        }
1391        return false;
1392    }
1393    
1394    private void handleSingleCharOperator(TSourceToken psourcetoken, char ch) {
1395        switch (ch) {
1396            case '~':
1397                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvredshift || 
1398                    dbvendor == EDbVendor.dbvsnowflake) {
1399                    psourcetoken.tokencode = (int)'~';
1400                }
1401                break;
1402                
1403            case '#':
1404                if (dbvendor == EDbVendor.dbvmssql) {
1405                    psourcetoken.tokencode = (int)'#';
1406                }
1407                break;
1408                
1409            case '&':
1410                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica || 
1411                    dbvendor == EDbVendor.dbvsparksql) {
1412                    psourcetoken.tokencode = (int)'&';
1413                }
1414                break;
1415                
1416            case '|':
1417                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica) {
1418                    psourcetoken.tokencode = (int)'|';
1419                }
1420                break;
1421        }
1422    }
1423    
1424    private void handleQuestionMark(TSourceToken psourcetoken) {
1425        if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb || 
1426            dbvendor == EDbVendor.dbvgreenplum) {
1427            psourcetoken.tokencode = TBaseType.JSON_EXIST;
1428        } else if (dbvendor == EDbVendor.dbvodbc) {
1429            psourcetoken.tokencode = '?';
1430        } else if (dbvendor == EDbVendor.dbvsnowflake) {
1431            psourcetoken.tokencode = bind_v;
1432            psourcetoken.tokentype = ETokenType.ttquestionmark;
1433        } else {
1434            psourcetoken.tokencode = ident;
1435        }
1436    }
1437    
1438    private boolean handleLeftShiftOperator(TSourceToken psourcetoken) {
1439        if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 
1440            dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 
1441            dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvaccess || 
1442            dbvendor == EDbVendor.dbvgreenplum) {
1443            psourcetoken.tokencode = label_begin;
1444        } else if (dbvendor == EDbVendor.dbvmysql) {
1445            psourcetoken.tokencode = TBaseType.rrw_left_shift;
1446        } else if (dbvendor == EDbVendor.dbvredshift) {
1447            psourcetoken.tokencode = TBaseType.bitwise_shift_left;
1448        } else if (dbvendor == EDbVendor.dbvnetezza) {
1449            psourcetoken.tokencode = TBaseType.rrw_netezza_op_less_less;
1450        }
1451        return true;
1452    }
1453    
1454    private boolean handleRightShiftOperator(TSourceToken psourcetoken) {
1455        if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 
1456            dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 
1457            dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvgreenplum || 
1458            dbvendor == EDbVendor.dbvaccess) {
1459            psourcetoken.tokencode = label_end;
1460        } else if (dbvendor == EDbVendor.dbvmysql) {
1461            psourcetoken.tokencode = TBaseType.rrw_right_shift;
1462        } else if (dbvendor == EDbVendor.dbvredshift) {
1463            psourcetoken.tokencode = TBaseType.bitwise_shift_right;
1464        } else if (dbvendor == EDbVendor.dbvnetezza) {
1465            psourcetoken.tokencode = TBaseType.rrw_netezza_op_great_great;
1466        }
1467        return true;
1468    }
1469    
1470    private boolean isStringConcatVendor(EDbVendor vendor) {
1471        return vendor == EDbVendor.dbvdb2 || vendor == EDbVendor.dbvnetezza || 
1472               vendor == EDbVendor.dbvpostgresql || vendor == EDbVendor.dbvgaussdb || 
1473               vendor == EDbVendor.dbvredshift || vendor == EDbVendor.dbvgreenplum || 
1474               vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake || 
1475               vendor == EDbVendor.dbvsparksql || vendor == EDbVendor.dbvvertica;
1476    }
1477    
1478    private void handleDefaultToken(TSourceToken psourcetoken) {
1479        psourcetoken.tokentype = ETokenType.ttkeyword;
1480        
1481        if (psourcetoken.tokencode < 255) {
1482            // Single character operators (ASCII characters)
1483            psourcetoken.setAstext(Character.toString(yylvalstr.charAt(0)));
1484            psourcetoken.tokentype = ETokenType.ttsinglecharoperator;
1485            
1486            switch (psourcetoken.tokencode) {
1487                case ',':
1488                    psourcetoken.tokentype = ETokenType.ttcomma;
1489                    break;
1490                case '(':
1491                    psourcetoken.tokentype = ETokenType.ttleftparenthesis;
1492                    break;
1493                case ')':
1494                    psourcetoken.tokentype = ETokenType.ttrightparenthesis;
1495                    break;
1496                case '[':
1497                    psourcetoken.tokentype = ETokenType.ttleftbracket;
1498                    break;
1499                case ']':
1500                    psourcetoken.tokentype = ETokenType.ttrightbracket;
1501                    break;
1502                case '.':
1503                    psourcetoken.tokentype = ETokenType.ttperiod;
1504                    break;
1505                case ';':
1506                    psourcetoken.tokentype = ETokenType.ttsemicolon;
1507                    break;
1508                case '$':
1509                    psourcetoken.tokentype = ETokenType.ttdolorsign;
1510                    break;
1511                case ':':
1512                    psourcetoken.tokentype = ETokenType.ttcolon;
1513                    break;
1514                case '+':
1515                    psourcetoken.tokentype = ETokenType.ttplussign;
1516                    break;
1517                case '-':
1518                    psourcetoken.tokentype = ETokenType.ttminussign;
1519                    break;
1520                case '*':
1521                    psourcetoken.tokentype = ETokenType.ttasterisk;
1522                    break;
1523                case '/':
1524                    psourcetoken.tokentype = ETokenType.ttslash;
1525                    break;
1526                case '^':
1527                    psourcetoken.tokentype = ETokenType.ttcaret;
1528                    break;
1529                case '<':
1530                    psourcetoken.tokentype = ETokenType.ttlessthan;
1531                    break;
1532                case '>':
1533                    psourcetoken.tokentype = ETokenType.ttgreaterthan;
1534                    break;
1535                case '=':
1536                    psourcetoken.tokentype = ETokenType.ttequals;
1537                    break;
1538                case '@':
1539                    if (delimiterchar == '@') {
1540                        psourcetoken.tokencode = (int)';';
1541                        psourcetoken.tokentype = ETokenType.ttsemicolon;
1542                    } else {
1543                        psourcetoken.tokentype = ETokenType.ttatsign;
1544                    }
1545                    break;
1546                case '~':
1547                    psourcetoken.tokentype = ETokenType.tttilde;
1548                    break;
1549                case '&':
1550                    psourcetoken.tokentype = ETokenType.ttampersand;
1551                    break;
1552                case '|':
1553                    psourcetoken.tokentype = ETokenType.ttverticalbar;
1554                    break;
1555                case '?':
1556                    if (isqmarktoident && dbvendor != EDbVendor.dbvodbc && 
1557                        dbvendor != EDbVendor.dbvpostgresql && dbvendor != EDbVendor.dbvgaussdb) {
1558                        psourcetoken.tokencode = ident;
1559                    }
1560                    break;
1561            }
1562        } else if (dbvendor == EDbVendor.dbvhive && psourcetoken.tokencode == TBaseType.hive_equal) {
1563            psourcetoken.tokentype = ETokenType.ttequals;
1564        }
1565    }
1566
1567}
1568