001package gudusoft.gsqlparser;
002
003
004import java.io.BufferedReader;
005import java.io.IOException;
006import java.util.ArrayList;
007import java.util.Arrays;
008import java.util.HashMap;
009import java.util.Locale;
010
011/**
012 * Base lexer of all databases - Core tokenization engine for SQL parsing.
013 * 
014 * The lexer reads SQL text character by character and produces tokens that represent
015 * the syntactic units of SQL. This process involves several key components and stages:
016 * 
017 * <h3>1. Input Management and Buffering</h3>
018 * <ul>
019 *   <li><b>yyinput (BufferedReader)</b>: Primary input source for SQL text</li>
020 *   <li><b>yyline (char[])</b>: Current line buffer read from input via readln()</li>
021 *   <li><b>buf (char[])</b>: Reversed line buffer for character-by-character processing</li>
022 *   <li><b>bufptr</b>: Current position in buf, decrements as characters are consumed</li>
023 * </ul>
024 * 
025 * <h3>2. Token Text Formation Process</h3>
026 * <pre>
027 * SQL Input → readln() → yyline[] → reversed into buf[] → get_char() → yytextbuf[]
028 *                                                                        ↓
029 *                                                                yylex() processing
030 *                                                                        ↓
031 *                                                                 yylvalstr (String)
032 *                                                                        ↓
033 *                                                            TSourceToken.astext
034 * </pre>
035 * 
036 * <h4>Key Variables in Token Text Storage:</h4>
037 * <ul>
038 *   <li><b>yytextbuf (char[])</b>: Accumulator buffer for current token being formed</li>
039 *   <li><b>yytextlen</b>: Current length of text in yytextbuf</li>
040 *   <li><b>yytextbufsize</b>: Allocated size of yytextbuf (dynamically grows)</li>
041 *   <li><b>yylvalstr (String)</b>: Final token text string created from yytextbuf</li>
042 *   <li><b>literalbuf (StringBuilder)</b>: Special buffer for string literals and complex tokens</li>
043 * </ul>
044 * 
045 * <h3>3. Position Tracking System</h3>
046 * 
047 * The lexer maintains precise position information for every token:
048 * <ul>
049 *   <li><b>yylineno</b>: Current line number (1-based)</li>
050 *   <li><b>yycolno</b>: Current column number (0-based)</li>
051 *   <li><b>offset</b>: Absolute character offset from start of input</li>
052 *   <li><b>yylineno_p, yycolno_p, offset_p</b>: Previous position values for token start</li>
053 * </ul>
054 * 
055 * <h3>4. Token Creation Workflow</h3>
056 * <ol>
057 *   <li>Characters are read via get_char() from buf[] into yytextbuf[]</li>
058 *   <li>yylex() identifies token boundaries and type</li>
059 *   <li>Token text is extracted: yylvalstr = new String(yytextbuf, 0, yytextlen)</li>
060 *   <li>yylexwrap() creates TSourceToken with:
061 *       <ul>
062 *         <li>astext = yylvalstr (full token text copy)</li>
063 *         <li>lineNo = yylineno_p (start line)</li>
064 *         <li>columnNo = yycolno_p (start column)</li>
065 *         <li>offset = offset_p (absolute position)</li>
066 *       </ul>
067 *   </li>
068 * </ol>
069 * 
070 * <h3>5. Memory Management and Text Copying</h3>
071 * 
072 * <b>Current Implementation (Eager Loading):</b>
073 * <ul>
074 *   <li>Every token immediately copies its text from yytextbuf to TSourceToken.astext</li>
075 *   <li>Original SQL text in yyline is discarded after processing each line</li>
076 *   <li>No direct link maintained between token and original input position</li>
077 * </ul>
078 * 
079 * <h3>6. Tracing Back to Original Position</h3>
080 * 
081 * <b>Currently Possible:</b>
082 * <ul>
083 *   <li>Token stores lineNo, columnNo, and offset</li>
084 *   <li>These can theoretically locate position in original input</li>
085 * </ul>
086 * 
087 * <b>Current Limitations:</b>
088 * <ul>
089 *   <li>Original input text is not retained after line processing</li>
090 *   <li>yyline buffer is overwritten for each new line</li>
091 *   <li>No mechanism to retrieve original text from position alone</li>
092 * </ul>
093 * 
094 * @author Gudu Software
095 */
096public class TCustomLexer {
097
098    // 在 lexer level 创建 token table, 按照 token code存储所有 token 的一些关键信息,主要用于处理一个关键字token被用作column,table name的情况
099    public static int MAX_TOKEN_SIZE = 2048; // 所有可能的token的数量
100    public static int MAX_TOKEN_COLUMN_SIZE = 10;
101
102    // 定义一个具有 MAX_TOKEN_SIZE 个元素的常量数组,每个元素有 MAX_TOKEN_COLUMN_SIZE 列, 列的类型为整数
103    // column 0: 代表该token出现的次数
104    // column 1: 代表该token第一次出现的 x position
105    // column 2: 代表该token第一次出现的 y position
106    // column 3: 代表该token最后一次出现的 x position
107    // column 4: 代表该token最后一次出现的 y position
108    // column 5: 代表该token第一次出现的 position in the token list
109    // column 6: 代表该token最后一次出现的 position in the token list
110
111    public static int COLUMN0_COUNT = 0;
112    public static int COLUMN1_FIRST_X = 1;
113    public static int COLUMN2_FIRST_Y = 2;
114    public static int COLUMN3_LAST_X = 3;
115    public static int COLUMN4_LAST_Y = 4;
116    public static int COLUMN5_FIRST_POS = 5;
117    public static int COLUMN6_LAST_POS = 6;
118
119    /**
120     * Pre-allocated strings for single ASCII characters (0-127).
121     * Used to avoid creating new String objects for common single-char tokens
122     * like '(', ')', ',', ';', '+', '-', '*', '/', etc.
123     * This significantly reduces GC pressure in the lexer hot path.
124     */
125    private static final String[] SINGLE_CHAR_STRINGS = new String[128];
126    static {
127        for (int i = 0; i < 128; i++) {
128            SINGLE_CHAR_STRINGS[i] = String.valueOf((char) i);
129        }
130    }
131
132    public long[][] TOKEN_TABLE = new long[MAX_TOKEN_SIZE][MAX_TOKEN_COLUMN_SIZE];
133
134    /**
135     * Tracks which tokenIds have been written to TOKEN_TABLE during current parse.
136     * Used for incremental reset - only clear entries that were actually used.
137     */
138    private int[] usedTokenIds = new int[512];  // Typical SQL uses <200 distinct token types
139    private int usedTokenCount = 0;
140
141    /**
142     * Reset TOKEN_TABLE by only clearing entries that were used (incremental clear).
143     * This is O(usedTokenCount) instead of O(MAX_TOKEN_SIZE * MAX_TOKEN_COLUMN_SIZE).
144     * For typical SQL with ~100 distinct token types, this saves clearing ~20,000 entries.
145     */
146    public void resetTokenTable() {
147        for (int i = 0; i < usedTokenCount; i++) {
148            int tokenId = usedTokenIds[i];
149            for (int j = 0; j < MAX_TOKEN_COLUMN_SIZE; j++) {
150                TOKEN_TABLE[tokenId][j] = 0L;
151            }
152        }
153        usedTokenCount = 0;
154    }
155
156    // define a function to set value when token is found, input is token id, a token with TSourceToken type
157    public void setTokenTableValue( TSourceToken token) {
158        if (token == null) return;
159        int tokenId = token.tokencode;
160
161        if (tokenId < 0 || tokenId >= MAX_TOKEN_SIZE) {
162            return;
163        }
164        if (TOKEN_TABLE[tokenId][COLUMN0_COUNT] == 0) {
165            // Track this tokenId for incremental reset
166            if (usedTokenCount < usedTokenIds.length) {
167                usedTokenIds[usedTokenCount++] = tokenId;
168            }
169            TOKEN_TABLE[tokenId][COLUMN0_COUNT] = 1;
170            TOKEN_TABLE[tokenId][COLUMN1_FIRST_X] = token.lineNo;
171            TOKEN_TABLE[tokenId][COLUMN2_FIRST_Y] = token.columnNo;
172            TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo;
173            TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo;
174            TOKEN_TABLE[tokenId][COLUMN5_FIRST_POS] = token.posinlist;
175            TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist;
176        } else {
177            TOKEN_TABLE[tokenId][COLUMN0_COUNT] += 1;
178            TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo;
179            TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo;
180            TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist;
181        }
182    }
183
184    public BufferedReader yyinput;
185    long yylineno,yycolno,offset,yylineno_p,yycolno_p,offset_p;
186    int bufptr,yystate,yysstate,yylstate,yytextlen,yyretval, yytextbufsize,
187            yymatches,yysleng;
188    char[] yyline;
189    /**
190     * Reusable buffer for readln() to reduce per-line allocations.
191     * Expands as needed for long lines and stays expanded for reuse.
192     */
193    private char[] lineReadBuffer = new char[4096];
194    /**
195     * Actual content length in lineReadBuffer/yyline.
196     * Used instead of yyline.length since lineReadBuffer is reused without copying.
197     */
198    private int yylineLen;
199    String yylvalstr;
200    public String  dolqstart = "";//postgresql, start part of Dollar-quoted String Constants
201    char yylastchar,yyactchar,yytablechar;
202    boolean yydone,yyreject;
203    char[] yytextbuf;
204    char[] buf;
205    int bufsize;
206    boolean endOfInput;
207
208    //StringBuffer literalbuf;
209    StringBuilder literalbuf;
210    int literallen,literalalloc,xcdepth,nchars,slashstar,dashdash;
211    boolean isqmarktoident;
212    public boolean insqlpluscmd;
213    char dummych1,dummych2,dummych3;
214    boolean utf8NoBreakSpaceReady = false;
215
216    int nestedLessThan = 0;
217
218    boolean isReadyForFunctionBody = false, isInFunctionBody = false;
219    int   functionBodyDelimiterIndex = -1;
220    ArrayList<String> functionBodyDelimiter = new ArrayList<>();
221
222    public static int keyword_type_reserved = 0x0001;
223    public static int keyword_type_keyword = 0x0002;
224    public static int keyword_type_identifier = 0x0004;
225    public static int keyword_type_column = 0x0008;
226
227    public char delimiterchar;
228    public String defaultDelimiterStr;
229    public String tmpDelimiter;
230    
231    final static int intial_bufsize = 16384;
232    final static char lf = (char)10;
233    final static int max_chars = 65536*10*2;
234    final static int max_rules = 256*2*10;
235    int  max_matches = 1024*20*10*2;
236
237
238    // 下面这些常量按照在 l 文件中出现的次序,必须以  +2 的方式递加. 为什么以 +2 的方式递加 原因忘了,尚未搞清楚。
239    final static int init = 2;
240    final static int xc = 4;
241    final static int xd = 6;
242    final static int xq = 8;
243    final static int xqq = 10;  //oracle
244    final static int xdolq = 10;//postgresql
245    final static int xdbracket = 10;
246    final static int xdbrace = 12;
247    final static int xbacktick = 12;
248
249    final static int xbracketrs = 12; //redshift
250    final static int xqtriple = 14;//bigquery
251    final static int xdtriple = 16;//bigquery
252
253
254
255    //https://docs.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server
256    final static int namedatalen = 8060;//255;
257    
258    final static int cmtslashstar = 257;
259    final static int cmtdoublehyphen = 258;
260    final static int lexspace = 259;
261    final static int lexnewline = 260;
262    final static int fconst  = 261;
263    final static int sconst = 262;
264    final static int iconst = 263;
265    final static int ident = 264;
266    final static int op = 265;
267    final static int cmpop = 266;
268    final static int bind_v = 267;
269    final static int assign_sign = 268;
270    final static int double_dot = 269;
271    final static int label_begin = 270;
272    final static int label_end = 271;
273    final static int substitution_v  = 272;
274    final static int filepath_sign = TBaseType.filepath_sign;
275    final static int sqlpluscmd = 273;
276    final static int atversion = TBaseType.atversion; //databricks
277    final static int error = 274;
278    final static int variable = 275;
279    final static int mslabel = 276;
280    public final static int bconst = TBaseType.bconst; //postgresql
281    final static int leftjoin_op = 277;
282    final static int odbc_esc_prefix = 277;
283    final static int rightjoin_op = 278;
284    final static int odbc_esc_terminator = 278;
285    final static int db2label = 279;
286    public final static int xconst = TBaseType.xconst; //postgresql
287    final static int ref_arrow = 280;
288    final static int rw_scriptoptions = 281;
289    public final static int UNICODE_ENCODE_ID = 281;
290    final static int mysqllabel = 282;
291    final static int NAMED_PARAMETER_SIGN = 282; //oracle,db2,snowflake CALL update_order (5000, NEW_STATUS => 'Shipped')
292    final static int QUOTED_IDENT = 282;//used in mdx
293    final static int BTEQCMD = 282;
294    final static int concatenationop = 283;
295    final static int pipe_greater = TBaseType.pipe_greater; // StarRocks pipe operator |>
296    final static int rw_not_deferrable = 284;
297    final static int rw_for1 = 285;
298    final static int stmt_delimiter = 286;
299    final static int AMP_QUOTED_ID = 285; //used in mdx
300    final static int AMP_UNQUOTED_ID = 286; //used in mdx
301    final static int m_clause = 287;
302    final static int MySQL_CHARSET_NAME = 287;
303    final static int typecast = TBaseType.typecast;//postgresql
304    final static int k_clause = 288;
305    final static int slash_dot = 288;
306    final static int outer_join = 289;
307
308    final static int not_equal = 290;
309
310    final static int param = TBaseType.param;
311    final static int mysql_null = TBaseType.rrw_mysql_null;
312
313    final static int rw_locktable = 296;
314    final static int rw_foreign2 = 297;
315    final static int rw_constraint2 = 298;
316    final static int rw_primary2 = 299;
317    final static int rw_unique2 = 300;
318    final static int     NEXT_PARAM = TBaseType.NEXT_PARAM;
319    final static int     POSITIONAL_PARAM = TBaseType.POSITIONAL_PARAM;
320    final static int     NAMED_PARAM = TBaseType.NAMED_PARAM;
321
322    final static int castoperator = TBaseType.castoperator;
323    final static int twocolons = TBaseType.twocolons;
324    final static int compoundAssignmentOperator = TBaseType.compoundAssignmentOperator;
325    final static int postgresql_function_delimiter = TBaseType.rrw_postgresql_function_delimiter;
326    final static int greenplum_function_delimiter = TBaseType.rrw_greenplum_function_delimiter;
327
328    final static int redshift_function_delimiter = TBaseType.rrw_redshift_function_delimiter;
329    final static int snowflake_function_delimiter = TBaseType.rrw_snowflake_function_delimiter;
330
331
332
333    int[] yypos;// = new int[max_rules + 1];      // 1 based in delphi, Position 0 was not used here
334    int[] yystack;// = new int[max_matches + 1];  // 1 based in delphi, Position 0 was not used here
335  //  ArrayList yystack;
336
337    //String keywordvaluefile,keywordfile,yyk_file,yym_file,yykl_file;
338    //String yykh_file,yyml_file,yymh_file,yytl_file,yyth_file,yytint_file,yyt_file;
339
340    EDbVendor dbvendor;
341    TSourceToken prevToken = null;
342
343    public void setSqlCharset(String sqlCharset) {
344        this.sqlCharset = sqlCharset;
345    }
346
347    public String getSqlCharset() {
348        return sqlCharset;
349    }
350
351    private String sqlCharset = null;
352    
353    /**
354     * Check if token code represents a single character operator
355     */
356    protected boolean isSingleCharOperator(int tokenCode) {
357        return tokenCode == '(' || tokenCode == ')' || 
358               tokenCode == '[' || tokenCode == ']' ||
359               tokenCode == '{' || tokenCode == '}' ||
360               tokenCode == ',' || tokenCode == ';' ||
361               tokenCode == '.' || tokenCode == ':' ||
362               tokenCode == '+' || tokenCode == '-' ||
363               tokenCode == '*' || tokenCode == '/' ||
364               tokenCode == '%' || tokenCode == '=' ||
365               tokenCode == '<' || tokenCode == '>' ||
366               tokenCode == '!' || tokenCode == '&' ||
367               tokenCode == '|' || tokenCode == '^' ||
368               tokenCode == '~' || tokenCode == '?';
369    }
370    
371    /**
372     * Check if token code represents a keyword
373     */
374    protected boolean isKeyword(int tokenCode) {
375        // Check if it's in the reserved word range
376        return tokenCode >= TBaseType.rrw_select && tokenCode < TBaseType.rrw_abort;
377    }
378
379    public TCustomLexer(){
380       //this.yyinput = pbuf;
381       yytextbufsize = intial_bufsize - 1;
382       yytextbuf = new char[intial_bufsize];
383       checkyytextbuf(yytextbufsize);
384
385       bufsize = intial_bufsize - 1;
386       buf = new char[intial_bufsize];
387       checkbuf(bufsize);
388
389       //literalbuf = new StringBuffer();
390        literalbuf = new StringBuilder();
391        //keywordList = new TreeMap();
392        delimiterchar = ';';
393        tmpDelimiter = "";
394
395        xcdepth = 0;
396        nchars = 0;
397        isqmarktoident = true;
398
399       yylvalstr = "";
400        yysstate = 0;
401        yylstate = 0;
402        yymatches = 0;
403        yysleng = 0;
404       bufptr = 0;
405       yylineno = 0;
406       yycolno = 0;
407       offset = -1;
408       yylineno_p = 1;
409       yycolno_p = 1;
410       offset_p = 0;
411
412       yypos = new int[max_rules + 1];
413       max_matches = TBaseType.LEXER_INIT_MAX_MATCHES;
414       yystack = new int[max_matches + 1];
415
416        prevToken = null;
417    }
418
419    /*
420     * this function is not used. 
421    private void getkeywordvaluefromfile(){
422        int i;
423        keywordValueList.clear();
424        for(i=0; i<keywordlist.length; i++){
425           // System.out.println(keywordlist[i]);
426            String[] ss = keywordlist[i].split("[=]");
427            keywordValueList.put(ss[0].toUpperCase(),ss[1]);
428        }
429    }
430     */
431
432public  int iskeyword(String str){
433    return -1;
434}
435
436public boolean isAtBeginOfLine(){
437    return (yyretval == lexnewline || yyretval == 0);
438}
439
440//public boolean canBeColumnName(int tokencode){
441//    return false;
442//}
443
444
445public String getStringByCode(int tokenCode){
446    return null;
447}
448
449    public  int getkeywordvalue(String keyword){
450        return 0;
451    }
452
453
454    /**
455     * @deprecated , please use keywordChecker.isKeyword() instead.
456     *
457     * because there are so many non-reserved keywords in some databases, it's not suitable to put those
458     * non-reserved keywords in lexer and parser.
459     *
460     * @param keyword
461     * @param keywordValueList
462     * @param keywordTypeList
463     * @return
464     */
465    public static EKeywordType getKeywordType(String keyword, HashMap<String, Integer> keywordValueList,HashMap<Integer, Integer> keywordTypeList){
466        EKeywordType ret = EKeywordType.NOT_A_KEYWORD;
467        Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH));
468        if( s == null) return ret;
469
470        Integer i = keywordTypeList.get(s);
471        if (i == 1) return EKeywordType.RESERVED_WORD;
472        else if (i == 2) return EKeywordType.NON_RESERVED_KEYWORD;
473        else return  ret;
474    }
475
476    /**
477     * 如果是ascii 字符,直接返回,如果是unicode 字符,需要进行转换。否则 String.charAt() 返回的unicode字符不是我们想要的字符,
478     * 例如中文的括号,我们实际需要的ascii的括号
479     *
480     * @param pYylvalstr
481     * @param index
482     * @return
483     */
484   char lexer_charAt(String pYylvalstr,int index){
485        char ret = pYylvalstr.charAt(index);
486        if (ret > 255){
487            // this is a unicode code
488            if ((ret == 0xFF08)){
489                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128
490                // Unicode code point for FULLWIDTH LEFT PARENTHESIS (, 0xFF08
491                //System.out.println(c);
492                ret = '(';
493            }
494            if ( (ret == 0xFF09)){
495                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128
496                // Unicode code point for FULLWIDTH RIGHT PARENTHESIS ), 0xFF09
497                // System.out.println(c);
498                ret = ')';
499            }
500        }
501        return ret;
502   }
503   void totablechar(){
504       //System.out.println("char:"+yyactchar+" ,hex:"+String.format("%04x", (int) yyactchar));
505       //System.out.println(String.format("0x%08X", (int)yyactchar)+", "+(char)yyactchar);
506
507    if (((int) yyactchar == 0) && !endOfInput) {
508        yytablechar = (char)255;
509        return;
510    }
511
512     if ((int)(yyactchar) < 228){ // 228 is ä in unicode
513       yytablechar = yyactchar;
514         if ((((int)(yyactchar) == 160)&&(utf8NoBreakSpaceReady))||(yyactchar == 0xA0)){
515             yytablechar = (char)32;
516         }
517       utf8NoBreakSpaceReady = false;
518//         if (yyactchar == 0x27){
519//             insideSingleQuoteStr = !insideSingleQuoteStr;
520//          }
521     }else{
522         yytablechar = (char)'a';//(char)255;
523
524         if ((int)(yyactchar) == 914) { // c2 a0, utf-8 NO-BREAK SPACE
525             utf8NoBreakSpaceReady = true;
526             yytablechar = (char) 32;
527         }else if ((yyactchar == 0x2018)||(yyactchar == 0x2019)){
528             if (stringLiteralStartWithUnicodeSingleQuote){
529                 // WHERE Name LIKE ‘Acme%’
530                 // 如上,如果string literal 以unicode quote 开始,则不管当前是否在string literal中,新碰到的unicode quote都看成是string literal的结尾符,
531                 yytablechar = 0x27; // treat  Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it
532             }else {
533                 if (insideSingleQuoteStr){
534                     // don't change the unicode quote char
535                 }else {
536                     yytablechar = 0x27; // treat  Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it
537                 }
538             }
539
540         }else if ((yyactchar == 0x200B)||(yyactchar == 0x3000)||(yyactchar >= 0x2000 && yyactchar <= 0x200A)){
541             // Unicode code point 0x200B: treat  Unicode Character ZERO WIDTH SPACE as the ascii char space, but don't change it
542             // Unicode code point 0x3000: treat  Unicode Character IDEOGRAPHIC SPACE (UTF-8: e3 80 80) as the ascii char space, but don't change it
543             // Unicode code points 0x2000-0x200A: General Punctuation space characters (EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, etc.)
544             yytablechar = 0x20;
545         }else if (yyactchar == 0xFF08){
546             yytablechar = '('; // treat  Unicode code point for FULLWIDTH LEFT PARENTHESIS  as the ascii char (, but don't change it
547         }else if (yyactchar == 0xFF09){
548             yytablechar = ')'; // treat  Unicode code point for FULLWIDTH RIGHT PARENTHESIS  as the ascii char ), but don't change it
549         }else if (yyactchar == 0xFF0C){
550             yytablechar = ','; // treat  Unicode code point for FULLWIDTH COMMA  as the ascii char comma, but don't change it
551         }else {
552             utf8NoBreakSpaceReady = false;
553         }
554     }
555   }
556
557    String getyytext(){
558      return new String(yytextbuf,0,yytextlen);
559    }
560
561
562    void checkyytextbuf(int size){
563       while ( size >= yytextbufsize){
564          yytextbufsize = yytextbufsize * 2 > intial_bufsize ? yytextbufsize * 2: intial_bufsize;
565          char[] tmp = new char[yytextbufsize];
566           System.arraycopy(yytextbuf,0,tmp,0, yytextbuf.length);
567           yytextbuf = tmp;
568       }
569    }
570    
571    void checkbuf(int size){
572       // System.out.println("while begin2"+" size:"+size+" bufsize:"+bufsize);
573       while ( size >= bufsize){
574          bufsize = bufsize * 2 > intial_bufsize ? bufsize * 2: intial_bufsize;
575          char[] tmp = new char[bufsize];
576           System.arraycopy(buf,0,tmp,0, buf.length);
577           buf = tmp;
578       }
579       // System.out.println("while end2");
580    }
581
582    boolean eof(BufferedReader pbuf){
583        try{
584        return !pbuf.ready();
585        }catch(IOException e){
586          return true;
587        }
588    }
589
590    void yynew(){
591        if (yylastchar != (char)0){
592          if(yylastchar == lf){
593            yylstate = 1;
594          }else{
595              yylstate = 0;
596          }
597        }
598
599        yystate = yysstate + yylstate;
600        checkyytextbuf(0);
601        yytextlen = 0;
602        yymatches = 0;
603        yydone = false;
604    }
605
606    void yyscan(){
607        yyactchar = get_char();
608        checkyytextbuf(yytextlen + 1);
609        yytextlen++;
610        yytextbuf[yytextlen - 1] = yyactchar;
611    }
612
613    void yymark(int n){
614        if (n > max_rules ){
615           System.out.println("n > max_rules ");
616        }
617        yypos[n] = yytextlen;
618    }
619
620    void yymatch(int n){
621        yymatches++;
622        if(yymatches > max_matches){
623
624            int new_yystack[] = new int[max_matches*2+1];
625            System.arraycopy(yystack, 0, new_yystack, 0, max_matches);
626            yystack = new_yystack;
627            max_matches = max_matches * 2;
628
629           // this is valid in JDK 1.6, proguard will report warning and stop
630           // yystack = Arrays.copyOf(yystack,max_matches+1);
631
632        }
633        yystack [yymatches] = n;
634    }
635
636    int yyfind(){
637        //return -1 mean not found
638        int ret = -1;
639
640        yyreject = false;
641        
642        while (( yymatches > 0 ) && ( yypos[yystack[yymatches]] == 0 )) {
643           yymatches-- ;
644        }
645        
646
647        if (yymatches > 0){
648          yysleng = yytextlen;
649          ret = yystack[yymatches];
650          yyless( yypos[ret] );
651          yypos[ret] = 0;
652          if (yytextlen >0){
653            yylastchar = yytextbuf [yytextlen-1];
654          }else{
655            yylastchar = (char)0;
656          }
657        }else{
658          yyless( 0 );
659          yylastchar = (char)0;
660        }
661
662       return ret;
663    }
664
665    boolean yydefault(){
666        boolean ret;
667
668        yyreject = false;
669        yyactchar = get_char();
670        if (yyactchar != (char)0){
671          //put_char( yyactchar );
672          ret = true;
673        }else{
674          yylstate = 1;
675          ret = false;
676        }
677        yylastchar = yyactchar;
678        return ret;
679    }
680    void yyless(int n){
681        for(int i= yytextlen; i> n; i--){
682            unget_char(yytextbuf[i - 1]);
683        }
684        checkyytextbuf(n);
685        yytextlen = n;
686    }
687    void returni(int n){
688        yyretval = n;
689        yydone = true;
690    }
691    void returnc(char c){
692        yyretval = (int)c;
693        yydone = true;
694    }
695    void yyclear(){
696        bufptr = 0;
697        yysstate = 0;
698        yylstate = 1;
699        yylastchar = (char)0;
700        yytextlen = 0;
701        yylineno = 0;
702        yycolno = 0;
703        offset = -1;
704       // yystext := '';
705
706        yylineno_p = 1;
707        yycolno_p = 1;
708        offset_p = 0;
709
710    }
711
712    
713    boolean yywrap(){
714        return true;
715    }
716    int getyysstate(){
717        return yysstate;
718    }
719    void start(int pstate){
720        yysstate = pstate;
721        if (pstate == xq){
722            insideSingleQuoteStr = true;
723            if ((yylvalstr.charAt(0) == 0x2018)||(yylvalstr.charAt(0) == 0x2019)){
724                stringLiteralStartWithUnicodeSingleQuote = true;
725            }else{
726                stringLiteralStartWithUnicodeSingleQuote = false;
727            }
728        }else{
729            insideSingleQuoteStr = false;
730        }
731    }
732
733
734    void unget_char(char pchar){
735        if(bufptr == max_chars)
736        {
737            System.out.println("input buffer overflow");
738        }
739      //  if (bufptr > 0) {
740        bufptr++;
741        yycolno--;
742        offset--;
743        checkbuf(bufptr+1);
744        buf[bufptr] = pchar;
745      //  }
746
747    }
748
749    public void reset(){
750        insideSingleQuoteStr = false;
751        nestedLessThan = 0;
752    }
753
754    public boolean insideSingleQuoteStr = false;
755    public boolean stringLiteralStartWithUnicodeSingleQuote = false;
756
757
758    // Previous implementation of readln, 2025-05-04
759    // char[] readln()  throws IOException {
760    //     int c;
761    //     char[] buffer = new char[80];
762    //     int bufferSize = 0;
763
764    //     while ((c = yyinput.read()) != -1) {
765    //         if (bufferSize >= buffer.length) {
766    //             char[] newBuffer = new char[buffer.length * 2];
767    //             System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
768    //             buffer = newBuffer;
769    //         }
770
771    //         buffer[bufferSize++] = (char)c;
772
773    //         if (c == '\n' || c == '\r') {
774    //             break;
775    //         }
776    //     }
777
778    //     if (bufferSize > 0 && buffer[bufferSize - 1] == '\r') {
779    //         yyinput.mark(1);
780    //         c = yyinput.read();
781    //         if (c == '\n') {
782    //             if (bufferSize >= buffer.length) {
783    //                 char[] newBuffer = new char[buffer.length + 1];
784    //                 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
785    //                 buffer = newBuffer;
786    //             }
787    //             buffer[bufferSize++] = '\n';
788    //         } else {
789    //             yyinput.reset();
790    //         }
791    //     }
792
793    //     if (bufferSize == 0) {
794    //         return null;
795    //     }else{
796    //         char[] result = new char[bufferSize];
797    //         System.arraycopy(buffer, 0, result, 0, bufferSize);
798    //         return result;
799    //     }
800    // }
801
802/**
803 * High-performance line reader with optimal buffer management.
804 * Reuses lineReadBuffer across calls to reduce per-line allocations.
805 * @return char array containing the line including line ending, or null if end of stream
806 */
807char[] readln() throws IOException {
808    if (yyinput == null) return null;
809
810    int position = 0;
811    int c;
812
813    // Read characters until line ending or EOF
814    while ((c = yyinput.read()) != -1) {
815        // Expand buffer if needed (expanded buffer stays for reuse)
816        if (position >= lineReadBuffer.length) {
817            char[] newBuffer = new char[lineReadBuffer.length * 2];
818            System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length);
819            lineReadBuffer = newBuffer;
820        }
821
822        // Store character
823        lineReadBuffer[position++] = (char)c;
824
825        // Check for line endings
826        if (c == '\n') {
827            break; // LF - end of line
828        } else if (c == '\r') {
829            // Need to check for CR+LF sequence
830            yyinput.mark(1);
831            c = yyinput.read();
832
833            if (c == '\n') {
834                // CR+LF sequence - include LF in result
835                if (position >= lineReadBuffer.length) {
836                    char[] newBuffer = new char[lineReadBuffer.length + 1];
837                    System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length);
838                    lineReadBuffer = newBuffer;
839                }
840                lineReadBuffer[position++] = '\n';
841            } else {
842                // CR only - reset stream to keep the character after CR
843                yyinput.reset();
844            }
845            break;
846        }
847    }
848
849    // Return null if no characters were read (end of stream)
850    if (position == 0) {
851        yylineLen = 0;
852        return null;
853    }
854
855    // Return lineReadBuffer directly, avoiding per-line array allocation.
856    // yylineLen holds the actual content length (replaces yyline.length semantic).
857    yylineLen = position;
858    return lineReadBuffer;
859}
860
861    char get_char(){
862
863        char ret ;
864         boolean readlineok = true;
865
866        if ((bufptr == 0) && !eof(yyinput) )
867        {
868            try{
869               endOfInput = false;
870               yyline = readln();//yyinput.readLine();
871              // System.out.println("readln: "+yyline);
872                if (yyline == null){
873                  readlineok = false;
874                }  else{
875                    yylineno++;
876                    yycolno = 0;
877                    // Use yylineLen instead of yyline.length since lineReadBuffer is reused
878                    bufptr = yylineLen;
879                    checkbuf(bufptr+1);
880                    for(int k=1;k<=bufptr;k++){
881                        buf[k] = yyline[bufptr - k];
882                    }
883                }
884            }catch(IOException e){
885              readlineok = false;
886            }
887        }
888
889        if (! readlineok){
890          endOfInput = true;
891          return (char)0;
892        }
893
894       if (bufptr > 0){
895         bufptr--;
896         yycolno++;
897         offset++;
898
899         return buf[bufptr+1];
900         //return yyline.charAt(yyline.length()  - (bufptr + 1));
901       }else{
902       // bufptr--;
903           endOfInput = true;
904        return  (char)0;
905       }
906
907    }
908
909    void startlit(){
910        literalbuf.setLength(0);
911        literallen = 0;
912        literalalloc = 0;
913    }
914
915    void addlit(String ytext, int yleng){
916        literallen = literallen + yleng;
917        literalbuf.append(ytext,0,yleng);
918    }
919
920    void addlitchar(char ychar){
921        literallen++;
922        literalbuf.append(ychar);
923    }
924
925    String litbufdup(){
926        return literalbuf.toString();//.intern();
927    }
928
929    boolean isopchar(char ch){
930        switch (ch) {
931            case '~':
932            case '!':
933            case '@':
934            case '#':
935            case '^':
936            case '&':
937            case '|':
938            case '`':
939            case '?':
940            case '$':
941            case '%':
942                return true;
943            default:
944                return false;
945        }
946    }
947
948    boolean isselfchar(char ch){
949        switch (ch) {
950            case ',':
951            case '(':
952            case ')':
953            case '[':
954            case ']':
955            case '.':
956            case ';':
957            case '$':
958            case ':':
959            case '+':
960            case '-':
961            case '*':
962            case '/':
963            case '%':
964            case '^':
965            case '<':
966            case '>':
967            case '=':
968            case '!':
969            case '{':
970            case '}':
971                return true;
972            default:
973                return false;
974        }
975    }
976
977    boolean charinarray(char c, char[] a){
978        int len = a.length;
979        for (int i = 0; i < len; i++) {
980            if (a[i] == c)
981                return true;
982        }
983        return false;
984    }
985
986    void setlengthofliteralbuf(int plen){
987      literalbuf.setLength(plen);
988    }
989
990    void yyaction(int yyruleno){
991    }
992
993    int yylex(){
994        return 0;
995    }
996
997
998    public int yylexwrap(TSourceToken psourcetoken) {
999        // Get token code and handle EOF
1000        if ((psourcetoken.tokencode = yylex()) == 0) return 0;
1001
1002        // Store token text - use shared strings for single ASCII chars to reduce allocations
1003        if (yylvalstr == null) {
1004            if (yytextlen == 1 && yytextbuf[0] < 128) {
1005                yylvalstr = SINGLE_CHAR_STRINGS[yytextbuf[0]];
1006            } else {
1007                yylvalstr = new String(yytextbuf, 0, yytextlen);
1008            }
1009        }
1010        psourcetoken.setAstext(yylvalstr);
1011    
1012        // Record token position information
1013        psourcetoken.lineNo = yylineno_p;
1014        psourcetoken.columnNo = yycolno_p;
1015        psourcetoken.offset = offset_p;
1016        yylineno_p = yylineno;
1017        yycolno_p = yycolno + 1;
1018        offset_p = offset + 1;
1019        
1020        // Track token in token table for analysis
1021        setTokenTableValue(psourcetoken);
1022    
1023        // Handle token types based on token code
1024        switch (psourcetoken.tokencode) {
1025            case cmtdoublehyphen:
1026                psourcetoken.tokentype = ETokenType.ttsimplecomment;
1027                if (dbvendor == EDbVendor.dbvmdx && psourcetoken.toString().startsWith("/")) {
1028                    psourcetoken.tokentype = ETokenType.ttCPPComment;
1029                }
1030                break;
1031                
1032            case cmtslashstar:
1033                psourcetoken.tokentype = ETokenType.ttbracketedcomment;
1034                break;
1035                
1036            case lexspace:
1037                psourcetoken.tokentype = ETokenType.ttwhitespace;
1038                break;
1039                
1040            case lexnewline:
1041                psourcetoken.tokentype = ETokenType.ttreturn;
1042                break;
1043                
1044            case bind_v:
1045                psourcetoken.tokentype = ETokenType.ttbindvar;
1046                if (dbvendor == EDbVendor.dbvoracle) {
1047                    psourcetoken.setAstext(psourcetoken.getAstext().replace(TBaseType.newline, ""));
1048                }
1049                break;
1050                
1051            case stmt_delimiter:
1052                psourcetoken.tokentype = ETokenType.ttstmt_delimiter;
1053                psourcetoken.tokencode = cmtslashstar;
1054                break;
1055                
1056            case concatenationop:
1057                psourcetoken.tokentype = ETokenType.ttconcatenationop;
1058                break;
1059                
1060            case variable:
1061                psourcetoken.tokentype = ETokenType.ttsqlvar;
1062                break;
1063                
1064            case fconst:
1065            case iconst:
1066                psourcetoken.tokentype = ETokenType.ttnumber;
1067                break;
1068                
1069            case sconst:
1070                psourcetoken.tokentype = ETokenType.ttsqstring;
1071                psourcetoken.dolqstart = dolqstart;
1072                dolqstart = "";
1073                break;
1074                
1075            case ident:
1076            case QUOTED_IDENT:
1077                handleIdentifierToken(psourcetoken);
1078                break;
1079                
1080            case cmpop:
1081                handleComparisonOperator(psourcetoken);
1082                break;
1083                
1084            case op:
1085                handleOperatorToken(psourcetoken);
1086                break;
1087                
1088            default:
1089                handleDefaultToken(psourcetoken);
1090                break;
1091        }
1092    
1093        prevToken = psourcetoken;
1094        return psourcetoken.tokencode;
1095    }
1096    
1097    // Helper methods to better organize the complex token handling logic
1098    private void handleIdentifierToken(TSourceToken psourcetoken) {
1099        psourcetoken.tokentype = ETokenType.ttidentifier;
1100        String tokenText = psourcetoken.toString().trim();
1101        
1102        if (tokenText.startsWith("\"")) {
1103            psourcetoken.tokentype = ETokenType.ttdqstring;
1104        } else if (tokenText.startsWith("[")) {
1105            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1106                psourcetoken.tokentype = ETokenType.ttdbstring;
1107            }
1108        } else if (tokenText.startsWith("{")) {
1109            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1110                psourcetoken.tokentype = ETokenType.ttbrstring;
1111                if (tokenText.toLowerCase().startsWith("{escape")) {
1112                    psourcetoken.tokencode = TBaseType.rrw_sqlserver_odbc_escape;
1113                }
1114            }
1115        } else if (tokenText.startsWith("&")) {
1116            if (dbvendor == EDbVendor.dbvmdx) {
1117                if (psourcetoken.tokencode == QUOTED_IDENT) {
1118                    psourcetoken.tokencode = AMP_QUOTED_ID;
1119                } else if (psourcetoken.tokencode == ident) {
1120                    psourcetoken.tokencode = AMP_UNQUOTED_ID;
1121                }
1122            }
1123        } else if (tokenText.startsWith(".")) {
1124            if (dbvendor == EDbVendor.dbvteradata) {
1125                psourcetoken.tokentype = ETokenType.ttBTEQCmd;
1126            }
1127        }
1128    }
1129    
1130    private void handleComparisonOperator(TSourceToken psourcetoken) {
1131        psourcetoken.tokentype = ETokenType.ttmulticharoperator;
1132        String token = yylvalstr;
1133
1134        // Oracle 26c vector distance operators (3-char, Oracle only)
1135        if (dbvendor == EDbVendor.dbvoracle) {
1136            if (token.equals("<=>")) {
1137                psourcetoken.tokencode = TBaseType.vector_cosine_distance;
1138                return;
1139            } else if (token.equals("<->")) {
1140                psourcetoken.tokencode = TBaseType.vector_euclidean_distance;
1141                return;
1142            } else if (token.equals("<#>")) {
1143                psourcetoken.tokencode = TBaseType.vector_dot_product;
1144                return;
1145            }
1146        }
1147
1148        if ((token.startsWith("!") && token.endsWith("=")) ||
1149            (token.startsWith("^") && token.endsWith("=")) ||
1150            (token.startsWith("~") && token.endsWith("=")) ||
1151            (token.startsWith("<") && token.endsWith(">"))) {
1152
1153            psourcetoken.tokencode = TBaseType.not_equal;
1154
1155            // Handle MySQL NULL-safe equal
1156            if (token.indexOf("=", 1) > 0 &&
1157                token.startsWith("<") && token.endsWith(">")) {
1158                psourcetoken.tokencode = (int)'=';
1159            }
1160        } else if (token.startsWith(">") && token.endsWith("=")) {
1161            psourcetoken.tokencode = TBaseType.great_equal;
1162        } else if (token.startsWith("<") && token.endsWith("=")) {
1163            psourcetoken.tokencode = TBaseType.less_equal;
1164        } else if ((token.startsWith("!") && token.endsWith("<")) ||
1165                   (token.startsWith("^") && token.endsWith("<"))) {
1166            psourcetoken.tokencode = TBaseType.not_less;
1167        } else if ((token.startsWith("!") && token.endsWith(">")) ||
1168                   (token.startsWith("^") && token.endsWith(">"))) {
1169            psourcetoken.tokencode = TBaseType.not_great;
1170        } else if (token.length() == 2 && token.charAt(0) == ':' && token.charAt(1) == '=') {
1171            psourcetoken.tokencode = assign_sign;
1172        }
1173    }
1174    
1175    private void handleOperatorToken(TSourceToken psourcetoken) {
1176        psourcetoken.tokentype = ETokenType.ttmulticharoperator;
1177        String token = yylvalstr;
1178        int tokenLength = token.length();
1179        char firstChar = tokenLength > 0 ? token.charAt(0) : '\0';
1180        char secondChar = tokenLength > 1 ? token.charAt(1) : '\0';
1181        
1182        // Handle question mark specially
1183        if (token.equals("?") && isqmarktoident) {
1184            handleQuestionMark(psourcetoken);
1185            return;
1186        }
1187        
1188        // Handle special two-character operators
1189        if (tokenLength == 2) {
1190            if (handleTwoCharOperator(psourcetoken, firstChar, secondChar)) {
1191                return;
1192            }
1193        }
1194        
1195        // Handle special three-character operators
1196        if (tokenLength == 3) {
1197            if (handleThreeCharOperator(psourcetoken, firstChar, secondChar, token.charAt(2))) {
1198                return;
1199            }
1200        }
1201        
1202        // Handle comparison operators
1203        if (handleComparisonOp(psourcetoken, token)) {
1204            return;
1205        }
1206        
1207        // Handle single character operators
1208        if (tokenLength == 1) {
1209            handleSingleCharOperator(psourcetoken, firstChar);
1210        }
1211    }
1212    
1213    private boolean handleTwoCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar) {
1214        switch (firstChar) {
1215            case '<':
1216                if (secondChar == '<') {
1217                    return handleLeftShiftOperator(psourcetoken);
1218                } else if (secondChar == '@') {
1219                    psourcetoken.tokencode = TBaseType.JSON_RIGHT_CONTAIN;
1220                    return true;
1221                }
1222                break;
1223                
1224            case '>':
1225                if (secondChar == '>') {
1226                    return handleRightShiftOperator(psourcetoken);
1227                }
1228                break;
1229                
1230            case '=':
1231                if (secondChar == '>') {
1232                    if (dbvendor == EDbVendor.dbvodbc) {
1233                        psourcetoken.tokencode = TBaseType.great_equal;
1234                    } else if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb) {
1235                        psourcetoken.tokencode = TBaseType.assign_sign;
1236                    } else {
1237                        psourcetoken.tokencode = NAMED_PARAMETER_SIGN;
1238                    }
1239                    return true;
1240                } else if (secondChar == '*') {
1241                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1242                        psourcetoken.tokencode = rightjoin_op;
1243                    }
1244                    return true;
1245                } else if (secondChar == '<') {
1246                    if (dbvendor == EDbVendor.dbvodbc) {
1247                        psourcetoken.tokencode = TBaseType.less_equal;
1248                    }
1249                    return true;
1250                } else if (secondChar == '=') {
1251                    if (dbvendor == EDbVendor.dbvsparksql) {
1252                        psourcetoken.tokencode = '=';
1253                    }
1254                    return true;
1255                }
1256                break;
1257                
1258            case '-':
1259                if (secondChar == '>') {
1260                    if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb 
1261                        || dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvmysql) {
1262                        psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT;
1263                    } else {
1264                        psourcetoken.tokencode = ref_arrow;
1265                    }
1266                    return true;
1267                } else if (secondChar == '=') {
1268                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1269                        psourcetoken.tokencode = compoundAssignmentOperator;
1270                    }
1271                    return true;
1272                }
1273                break;
1274                
1275            case '.':
1276                if (secondChar == '.') {
1277                    if (dbvendor == EDbVendor.dbvdb2 || dbvendor == EDbVendor.dbvoracle 
1278                        || dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvhana) {
1279                        psourcetoken.tokencode = double_dot;
1280                    }
1281                    return true;
1282                }
1283                break;
1284                
1285            case '*':
1286                if (secondChar == '=') {
1287                    if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1288                        psourcetoken.tokencode = leftjoin_op;
1289                    }
1290                    return true;
1291                } else if (secondChar == '*') {
1292                    if (dbvendor == EDbVendor.dbvteradata || dbvendor == EDbVendor.dbvnetezza) {
1293                        psourcetoken.tokencode = TBaseType.exponentiate;
1294                    }
1295                    return true;
1296                }
1297                break;
1298                
1299            case '|':
1300                if (secondChar == '|') {
1301                    if (dbvendor == EDbVendor.dbvmysql) {
1302                        psourcetoken.tokencode = TBaseType.logical_or;
1303                    } else if (isStringConcatVendor(dbvendor)) {
1304                        psourcetoken.tokencode = TBaseType.concatenationop;
1305                    }
1306                    return true;
1307                } else if (secondChar == '>') {
1308                    if (dbvendor == EDbVendor.dbvsparksql) {
1309                        psourcetoken.tokencode = TBaseType.sparksql_pipe_arrow;
1310                    } else {
1311                        psourcetoken.tokencode = TBaseType.pipe_greater;
1312                    }
1313                    return true;
1314                } else if (secondChar == '/') {
1315                    if (dbvendor == EDbVendor.dbvredshift) {
1316                        psourcetoken.tokencode = TBaseType.square_root;
1317                    }
1318                    return true;
1319                }
1320                break;
1321                
1322            case '&':
1323                if (secondChar == '&') {
1324                    if (dbvendor == EDbVendor.dbvmysql) {
1325                        psourcetoken.tokencode = TBaseType.logical_and;
1326                    }
1327                    return true;
1328                }
1329                break;
1330                
1331            case '?':
1332                if (secondChar == '|') {
1333                    psourcetoken.tokencode = TBaseType.JSON_ANY_EXIST;
1334                    return true;
1335                } else if (secondChar == '&') {
1336                    psourcetoken.tokencode = TBaseType.JSON_ALL_EXIST;
1337                    return true;
1338                }
1339                break;
1340                
1341            case '@':
1342                if (secondChar == '>') {
1343                    psourcetoken.tokencode = TBaseType.JSON_LEFT_CONTAIN;
1344                    return true;
1345                }
1346                break;
1347                
1348            case '#':
1349                if (secondChar == '>') {
1350                    psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT_AT_PATH;
1351                    return true;
1352                }
1353                break;
1354                
1355            case ':':
1356                if (secondChar == '=') {
1357                    psourcetoken.tokencode = assign_sign;
1358                    return true;
1359                }
1360                break;
1361        }
1362        
1363        // Handle compound assignment operators
1364        if ((firstChar == '+' || firstChar == '-' || firstChar == '*' || 
1365             firstChar == '/' || firstChar == '%' || firstChar == '&' || 
1366             firstChar == '^' || firstChar == '|') && secondChar == '=') {
1367            if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) {
1368                psourcetoken.tokencode = compoundAssignmentOperator;
1369                return true;
1370            } else if (dbvendor == EDbVendor.dbvmysql && firstChar == '^' && secondChar == '=') {
1371                psourcetoken.tokencode = not_equal;
1372                return true;
1373            }
1374        }
1375        
1376        return false;
1377    }
1378    
1379    private boolean handleThreeCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar, char thirdChar) {
1380        if (firstChar == '-' && secondChar == '>' && thirdChar == '>') {
1381            psourcetoken.tokencode = TBaseType.JSON_GET_TEXT;
1382            return true;
1383        } else if (firstChar == '#' && secondChar == '>' && thirdChar == '>') {
1384            psourcetoken.tokencode = TBaseType.JSON_GET_TEXT_AT_PATH;
1385            return true;
1386        } else if (firstChar == '|' && secondChar == '|' && thirdChar == '/') {
1387            if (dbvendor == EDbVendor.dbvredshift) {
1388                psourcetoken.tokencode = TBaseType.cube_root;
1389                return true;
1390            }
1391        }
1392        return false;
1393    }
1394    
1395    private boolean handleComparisonOp(TSourceToken psourcetoken, String token) {
1396        if ((token.startsWith("!") && token.endsWith("=")) ||
1397            (token.startsWith("^") && token.endsWith("=")) ||
1398            (token.startsWith("<") && token.endsWith(">"))) {
1399            psourcetoken.tokencode = TBaseType.not_equal;
1400            return true;
1401        } else if (token.startsWith(">") && token.endsWith("=")) {
1402            psourcetoken.tokencode = TBaseType.great_equal;
1403            return true;
1404        } else if (token.startsWith("<") && token.endsWith("=")) {
1405            psourcetoken.tokencode = TBaseType.less_equal;
1406            return true;
1407        } else if ((token.startsWith("!") && token.endsWith("<")) ||
1408                   (token.startsWith("^") && token.endsWith("<"))) {
1409            psourcetoken.tokencode = TBaseType.not_less;
1410            return true;
1411        } else if ((token.startsWith("!") && token.endsWith(">")) ||
1412                   (token.startsWith("^") && token.endsWith(">"))) {
1413            psourcetoken.tokencode = TBaseType.not_great;
1414            return true;
1415        }
1416        return false;
1417    }
1418    
1419    private void handleSingleCharOperator(TSourceToken psourcetoken, char ch) {
1420        switch (ch) {
1421            case '~':
1422                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvredshift || 
1423                    dbvendor == EDbVendor.dbvsnowflake) {
1424                    psourcetoken.tokencode = (int)'~';
1425                }
1426                break;
1427                
1428            case '#':
1429                if (dbvendor == EDbVendor.dbvmssql) {
1430                    psourcetoken.tokencode = (int)'#';
1431                }
1432                break;
1433                
1434            case '&':
1435                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica || 
1436                    dbvendor == EDbVendor.dbvsparksql) {
1437                    psourcetoken.tokencode = (int)'&';
1438                }
1439                break;
1440                
1441            case '|':
1442                if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica) {
1443                    psourcetoken.tokencode = (int)'|';
1444                }
1445                break;
1446        }
1447    }
1448    
1449    private void handleQuestionMark(TSourceToken psourcetoken) {
1450        if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb || 
1451            dbvendor == EDbVendor.dbvgreenplum) {
1452            psourcetoken.tokencode = TBaseType.JSON_EXIST;
1453        } else if (dbvendor == EDbVendor.dbvodbc) {
1454            psourcetoken.tokencode = '?';
1455        } else if (dbvendor == EDbVendor.dbvsnowflake) {
1456            psourcetoken.tokencode = bind_v;
1457            psourcetoken.tokentype = ETokenType.ttquestionmark;
1458        } else {
1459            psourcetoken.tokencode = ident;
1460        }
1461    }
1462    
1463    private boolean handleLeftShiftOperator(TSourceToken psourcetoken) {
1464        if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql ||
1465            dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql ||
1466            dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvaccess ||
1467            dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvsnowflake) {
1468            psourcetoken.tokencode = label_begin;
1469        } else if (dbvendor == EDbVendor.dbvmysql) {
1470            psourcetoken.tokencode = TBaseType.rrw_left_shift;
1471        } else if (dbvendor == EDbVendor.dbvredshift) {
1472            psourcetoken.tokencode = TBaseType.bitwise_shift_left;
1473        } else if (dbvendor == EDbVendor.dbvnetezza) {
1474            psourcetoken.tokencode = TBaseType.rrw_netezza_op_less_less;
1475        }
1476        return true;
1477    }
1478    
1479    private boolean handleRightShiftOperator(TSourceToken psourcetoken) {
1480        if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql ||
1481            dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql ||
1482            dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvgreenplum ||
1483            dbvendor == EDbVendor.dbvaccess || dbvendor == EDbVendor.dbvsnowflake) {
1484            psourcetoken.tokencode = label_end;
1485        } else if (dbvendor == EDbVendor.dbvmysql) {
1486            psourcetoken.tokencode = TBaseType.rrw_right_shift;
1487        } else if (dbvendor == EDbVendor.dbvredshift) {
1488            psourcetoken.tokencode = TBaseType.bitwise_shift_right;
1489        } else if (dbvendor == EDbVendor.dbvnetezza) {
1490            psourcetoken.tokencode = TBaseType.rrw_netezza_op_great_great;
1491        }
1492        return true;
1493    }
1494    
1495    private boolean isStringConcatVendor(EDbVendor vendor) {
1496        return vendor == EDbVendor.dbvdb2 || vendor == EDbVendor.dbvnetezza || 
1497               vendor == EDbVendor.dbvpostgresql || vendor == EDbVendor.dbvgaussdb || 
1498               vendor == EDbVendor.dbvredshift || vendor == EDbVendor.dbvgreenplum || 
1499               vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake || 
1500               vendor == EDbVendor.dbvsparksql || vendor == EDbVendor.dbvvertica;
1501    }
1502    
1503    private void handleDefaultToken(TSourceToken psourcetoken) {
1504        psourcetoken.tokentype = ETokenType.ttkeyword;
1505        
1506        if (psourcetoken.tokencode < 255) {
1507            // Single character operators (ASCII characters)
1508            psourcetoken.setAstext(Character.toString(yylvalstr.charAt(0)));
1509            psourcetoken.tokentype = ETokenType.ttsinglecharoperator;
1510            
1511            switch (psourcetoken.tokencode) {
1512                case ',':
1513                    psourcetoken.tokentype = ETokenType.ttcomma;
1514                    break;
1515                case '(':
1516                    psourcetoken.tokentype = ETokenType.ttleftparenthesis;
1517                    break;
1518                case ')':
1519                    psourcetoken.tokentype = ETokenType.ttrightparenthesis;
1520                    break;
1521                case '[':
1522                    psourcetoken.tokentype = ETokenType.ttleftbracket;
1523                    break;
1524                case ']':
1525                    psourcetoken.tokentype = ETokenType.ttrightbracket;
1526                    break;
1527                case '.':
1528                    psourcetoken.tokentype = ETokenType.ttperiod;
1529                    break;
1530                case ';':
1531                    psourcetoken.tokentype = ETokenType.ttsemicolon;
1532                    break;
1533                case '$':
1534                    psourcetoken.tokentype = ETokenType.ttdolorsign;
1535                    break;
1536                case ':':
1537                    psourcetoken.tokentype = ETokenType.ttcolon;
1538                    break;
1539                case '+':
1540                    psourcetoken.tokentype = ETokenType.ttplussign;
1541                    break;
1542                case '-':
1543                    psourcetoken.tokentype = ETokenType.ttminussign;
1544                    break;
1545                case '*':
1546                    psourcetoken.tokentype = ETokenType.ttasterisk;
1547                    break;
1548                case '/':
1549                    psourcetoken.tokentype = ETokenType.ttslash;
1550                    break;
1551                case '^':
1552                    psourcetoken.tokentype = ETokenType.ttcaret;
1553                    break;
1554                case '<':
1555                    psourcetoken.tokentype = ETokenType.ttlessthan;
1556                    break;
1557                case '>':
1558                    psourcetoken.tokentype = ETokenType.ttgreaterthan;
1559                    break;
1560                case '=':
1561                    psourcetoken.tokentype = ETokenType.ttequals;
1562                    break;
1563                case '@':
1564                    if (delimiterchar == '@') {
1565                        psourcetoken.tokencode = (int)';';
1566                        psourcetoken.tokentype = ETokenType.ttsemicolon;
1567                    } else {
1568                        psourcetoken.tokentype = ETokenType.ttatsign;
1569                    }
1570                    break;
1571                case '~':
1572                    psourcetoken.tokentype = ETokenType.tttilde;
1573                    break;
1574                case '&':
1575                    psourcetoken.tokentype = ETokenType.ttampersand;
1576                    break;
1577                case '|':
1578                    psourcetoken.tokentype = ETokenType.ttverticalbar;
1579                    break;
1580                case '?':
1581                    if (isqmarktoident && dbvendor != EDbVendor.dbvodbc && 
1582                        dbvendor != EDbVendor.dbvpostgresql && dbvendor != EDbVendor.dbvgaussdb) {
1583                        psourcetoken.tokencode = ident;
1584                    }
1585                    break;
1586            }
1587        } else if (dbvendor == EDbVendor.dbvhive && psourcetoken.tokencode == TBaseType.hive_equal) {
1588            psourcetoken.tokentype = ETokenType.ttequals;
1589        }
1590    }
1591
1592}
1593