001package gudusoft.gsqlparser; 002 003 004import java.io.BufferedReader; 005import java.io.IOException; 006import java.util.ArrayList; 007import java.util.Arrays; 008import java.util.HashMap; 009import java.util.Locale; 010 011/** 012 * Base lexer of all databases - Core tokenization engine for SQL parsing. 013 * 014 * The lexer reads SQL text character by character and produces tokens that represent 015 * the syntactic units of SQL. This process involves several key components and stages: 016 * 017 * <h3>1. Input Management and Buffering</h3> 018 * <ul> 019 * <li><b>yyinput (BufferedReader)</b>: Primary input source for SQL text</li> 020 * <li><b>yyline (char[])</b>: Current line buffer read from input via readln()</li> 021 * <li><b>buf (char[])</b>: Reversed line buffer for character-by-character processing</li> 022 * <li><b>bufptr</b>: Current position in buf, decrements as characters are consumed</li> 023 * </ul> 024 * 025 * <h3>2. Token Text Formation Process</h3> 026 * <pre> 027 * SQL Input → readln() → yyline[] → reversed into buf[] → get_char() → yytextbuf[] 028 * ↓ 029 * yylex() processing 030 * ↓ 031 * yylvalstr (String) 032 * ↓ 033 * TSourceToken.astext 034 * </pre> 035 * 036 * <h4>Key Variables in Token Text Storage:</h4> 037 * <ul> 038 * <li><b>yytextbuf (char[])</b>: Accumulator buffer for current token being formed</li> 039 * <li><b>yytextlen</b>: Current length of text in yytextbuf</li> 040 * <li><b>yytextbufsize</b>: Allocated size of yytextbuf (dynamically grows)</li> 041 * <li><b>yylvalstr (String)</b>: Final token text string created from yytextbuf</li> 042 * <li><b>literalbuf (StringBuilder)</b>: Special buffer for string literals and complex tokens</li> 043 * </ul> 044 * 045 * <h3>3. Position Tracking System</h3> 046 * 047 * The lexer maintains precise position information for every token: 048 * <ul> 049 * <li><b>yylineno</b>: Current line number (1-based)</li> 050 * <li><b>yycolno</b>: Current column number (0-based)</li> 051 * <li><b>offset</b>: Absolute character offset from start of input</li> 052 * <li><b>yylineno_p, yycolno_p, offset_p</b>: Previous position values for token start</li> 053 * </ul> 054 * 055 * <h3>4. Token Creation Workflow</h3> 056 * <ol> 057 * <li>Characters are read via get_char() from buf[] into yytextbuf[]</li> 058 * <li>yylex() identifies token boundaries and type</li> 059 * <li>Token text is extracted: yylvalstr = new String(yytextbuf, 0, yytextlen)</li> 060 * <li>yylexwrap() creates TSourceToken with: 061 * <ul> 062 * <li>astext = yylvalstr (full token text copy)</li> 063 * <li>lineNo = yylineno_p (start line)</li> 064 * <li>columnNo = yycolno_p (start column)</li> 065 * <li>offset = offset_p (absolute position)</li> 066 * </ul> 067 * </li> 068 * </ol> 069 * 070 * <h3>5. Memory Management and Text Copying</h3> 071 * 072 * <b>Current Implementation (Eager Loading):</b> 073 * <ul> 074 * <li>Every token immediately copies its text from yytextbuf to TSourceToken.astext</li> 075 * <li>Original SQL text in yyline is discarded after processing each line</li> 076 * <li>No direct link maintained between token and original input position</li> 077 * </ul> 078 * 079 * <h3>6. Tracing Back to Original Position</h3> 080 * 081 * <b>Currently Possible:</b> 082 * <ul> 083 * <li>Token stores lineNo, columnNo, and offset</li> 084 * <li>These can theoretically locate position in original input</li> 085 * </ul> 086 * 087 * <b>Current Limitations:</b> 088 * <ul> 089 * <li>Original input text is not retained after line processing</li> 090 * <li>yyline buffer is overwritten for each new line</li> 091 * <li>No mechanism to retrieve original text from position alone</li> 092 * </ul> 093 * 094 * @author Gudu Software 095 */ 096public class TCustomLexer { 097 098 // 在 lexer level 创建 token table, 按照 token code存储所有 token 的一些关键信息,主要用于处理一个关键字token被用作column,table name的情况 099 public static int MAX_TOKEN_SIZE = 2048; // 所有可能的token的数量 100 public static int MAX_TOKEN_COLUMN_SIZE = 10; 101 102 // 定义一个具有 MAX_TOKEN_SIZE 个元素的常量数组,每个元素有 MAX_TOKEN_COLUMN_SIZE 列, 列的类型为整数 103 // column 0: 代表该token出现的次数 104 // column 1: 代表该token第一次出现的 x position 105 // column 2: 代表该token第一次出现的 y position 106 // column 3: 代表该token最后一次出现的 x position 107 // column 4: 代表该token最后一次出现的 y position 108 // column 5: 代表该token第一次出现的 position in the token list 109 // column 6: 代表该token最后一次出现的 position in the token list 110 111 public static int COLUMN0_COUNT = 0; 112 public static int COLUMN1_FIRST_X = 1; 113 public static int COLUMN2_FIRST_Y = 2; 114 public static int COLUMN3_LAST_X = 3; 115 public static int COLUMN4_LAST_Y = 4; 116 public static int COLUMN5_FIRST_POS = 5; 117 public static int COLUMN6_LAST_POS = 6; 118 119 /** 120 * Pre-allocated strings for single ASCII characters (0-127). 121 * Used to avoid creating new String objects for common single-char tokens 122 * like '(', ')', ',', ';', '+', '-', '*', '/', etc. 123 * This significantly reduces GC pressure in the lexer hot path. 124 */ 125 private static final String[] SINGLE_CHAR_STRINGS = new String[128]; 126 static { 127 for (int i = 0; i < 128; i++) { 128 SINGLE_CHAR_STRINGS[i] = String.valueOf((char) i); 129 } 130 } 131 132 public long[][] TOKEN_TABLE = new long[MAX_TOKEN_SIZE][MAX_TOKEN_COLUMN_SIZE]; 133 134 /** 135 * Tracks which tokenIds have been written to TOKEN_TABLE during current parse. 136 * Used for incremental reset - only clear entries that were actually used. 137 */ 138 private int[] usedTokenIds = new int[512]; // Typical SQL uses <200 distinct token types 139 private int usedTokenCount = 0; 140 141 /** 142 * Reset TOKEN_TABLE by only clearing entries that were used (incremental clear). 143 * This is O(usedTokenCount) instead of O(MAX_TOKEN_SIZE * MAX_TOKEN_COLUMN_SIZE). 144 * For typical SQL with ~100 distinct token types, this saves clearing ~20,000 entries. 145 */ 146 public void resetTokenTable() { 147 for (int i = 0; i < usedTokenCount; i++) { 148 int tokenId = usedTokenIds[i]; 149 for (int j = 0; j < MAX_TOKEN_COLUMN_SIZE; j++) { 150 TOKEN_TABLE[tokenId][j] = 0L; 151 } 152 } 153 usedTokenCount = 0; 154 } 155 156 // define a function to set value when token is found, input is token id, a token with TSourceToken type 157 public void setTokenTableValue( TSourceToken token) { 158 if (token == null) return; 159 int tokenId = token.tokencode; 160 161 if (tokenId < 0 || tokenId >= MAX_TOKEN_SIZE) { 162 return; 163 } 164 if (TOKEN_TABLE[tokenId][COLUMN0_COUNT] == 0) { 165 // Track this tokenId for incremental reset 166 if (usedTokenCount < usedTokenIds.length) { 167 usedTokenIds[usedTokenCount++] = tokenId; 168 } 169 TOKEN_TABLE[tokenId][COLUMN0_COUNT] = 1; 170 TOKEN_TABLE[tokenId][COLUMN1_FIRST_X] = token.lineNo; 171 TOKEN_TABLE[tokenId][COLUMN2_FIRST_Y] = token.columnNo; 172 TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo; 173 TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo; 174 TOKEN_TABLE[tokenId][COLUMN5_FIRST_POS] = token.posinlist; 175 TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist; 176 } else { 177 TOKEN_TABLE[tokenId][COLUMN0_COUNT] += 1; 178 TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo; 179 TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo; 180 TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist; 181 } 182 } 183 184 public BufferedReader yyinput; 185 long yylineno,yycolno,offset,yylineno_p,yycolno_p,offset_p; 186 int bufptr,yystate,yysstate,yylstate,yytextlen,yyretval, yytextbufsize, 187 yymatches,yysleng; 188 char[] yyline; 189 /** 190 * Reusable buffer for readln() to reduce per-line allocations. 191 * Expands as needed for long lines and stays expanded for reuse. 192 */ 193 private char[] lineReadBuffer = new char[4096]; 194 /** 195 * Actual content length in lineReadBuffer/yyline. 196 * Used instead of yyline.length since lineReadBuffer is reused without copying. 197 */ 198 private int yylineLen; 199 String yylvalstr; 200 public String dolqstart = "";//postgresql, start part of Dollar-quoted String Constants 201 char yylastchar,yyactchar,yytablechar; 202 boolean yydone,yyreject; 203 char[] yytextbuf; 204 char[] buf; 205 int bufsize; 206 boolean endOfInput; 207 208 //StringBuffer literalbuf; 209 StringBuilder literalbuf; 210 int literallen,literalalloc,xcdepth,nchars,slashstar,dashdash; 211 boolean isqmarktoident; 212 public boolean insqlpluscmd; 213 char dummych1,dummych2,dummych3; 214 boolean utf8NoBreakSpaceReady = false; 215 216 int nestedLessThan = 0; 217 218 boolean isReadyForFunctionBody = false, isInFunctionBody = false; 219 int functionBodyDelimiterIndex = -1; 220 ArrayList<String> functionBodyDelimiter = new ArrayList<>(); 221 222 public static int keyword_type_reserved = 0x0001; 223 public static int keyword_type_keyword = 0x0002; 224 public static int keyword_type_identifier = 0x0004; 225 public static int keyword_type_column = 0x0008; 226 227 public char delimiterchar; 228 public String defaultDelimiterStr; 229 public String tmpDelimiter; 230 231 final static int intial_bufsize = 16384; 232 final static char lf = (char)10; 233 final static int max_chars = 65536*10*2; 234 final static int max_rules = 256*2*10; 235 int max_matches = 1024*20*10*2; 236 237 238 // 下面这些常量按照在 l 文件中出现的次序,必须以 +2 的方式递加. 为什么以 +2 的方式递加 原因忘了,尚未搞清楚。 239 final static int init = 2; 240 final static int xc = 4; 241 final static int xd = 6; 242 final static int xq = 8; 243 final static int xqq = 10; //oracle 244 final static int xdolq = 10;//postgresql 245 final static int xdbracket = 10; 246 final static int xdbrace = 12; 247 final static int xbacktick = 12; 248 249 final static int xbracketrs = 12; //redshift 250 final static int xqtriple = 14;//bigquery 251 final static int xdtriple = 16;//bigquery 252 253 254 255 //https://docs.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server 256 final static int namedatalen = 8060;//255; 257 258 final static int cmtslashstar = 257; 259 final static int cmtdoublehyphen = 258; 260 final static int lexspace = 259; 261 final static int lexnewline = 260; 262 final static int fconst = 261; 263 final static int sconst = 262; 264 final static int iconst = 263; 265 final static int ident = 264; 266 final static int op = 265; 267 final static int cmpop = 266; 268 final static int bind_v = 267; 269 final static int assign_sign = 268; 270 final static int double_dot = 269; 271 final static int label_begin = 270; 272 final static int label_end = 271; 273 final static int substitution_v = 272; 274 final static int filepath_sign = TBaseType.filepath_sign; 275 final static int sqlpluscmd = 273; 276 final static int atversion = TBaseType.atversion; //databricks 277 final static int error = 274; 278 final static int variable = 275; 279 final static int mslabel = 276; 280 public final static int bconst = TBaseType.bconst; //postgresql 281 final static int leftjoin_op = 277; 282 final static int odbc_esc_prefix = 277; 283 final static int rightjoin_op = 278; 284 final static int odbc_esc_terminator = 278; 285 final static int db2label = 279; 286 public final static int xconst = TBaseType.xconst; //postgresql 287 final static int ref_arrow = 280; 288 final static int rw_scriptoptions = 281; 289 public final static int UNICODE_ENCODE_ID = 281; 290 final static int mysqllabel = 282; 291 final static int NAMED_PARAMETER_SIGN = 282; //oracle,db2,snowflake CALL update_order (5000, NEW_STATUS => 'Shipped') 292 final static int QUOTED_IDENT = 282;//used in mdx 293 final static int BTEQCMD = 282; 294 final static int concatenationop = 283; 295 final static int pipe_greater = TBaseType.pipe_greater; // StarRocks pipe operator |> 296 final static int rw_not_deferrable = 284; 297 final static int rw_for1 = 285; 298 final static int stmt_delimiter = 286; 299 final static int AMP_QUOTED_ID = 285; //used in mdx 300 final static int AMP_UNQUOTED_ID = 286; //used in mdx 301 final static int m_clause = 287; 302 final static int MySQL_CHARSET_NAME = 287; 303 final static int typecast = TBaseType.typecast;//postgresql 304 final static int k_clause = 288; 305 final static int slash_dot = 288; 306 final static int outer_join = 289; 307 308 final static int not_equal = 290; 309 310 final static int param = TBaseType.param; 311 final static int mysql_null = TBaseType.rrw_mysql_null; 312 313 final static int rw_locktable = 296; 314 final static int rw_foreign2 = 297; 315 final static int rw_constraint2 = 298; 316 final static int rw_primary2 = 299; 317 final static int rw_unique2 = 300; 318 final static int NEXT_PARAM = TBaseType.NEXT_PARAM; 319 final static int POSITIONAL_PARAM = TBaseType.POSITIONAL_PARAM; 320 final static int NAMED_PARAM = TBaseType.NAMED_PARAM; 321 322 final static int castoperator = TBaseType.castoperator; 323 final static int twocolons = TBaseType.twocolons; 324 final static int compoundAssignmentOperator = TBaseType.compoundAssignmentOperator; 325 final static int postgresql_function_delimiter = TBaseType.rrw_postgresql_function_delimiter; 326 final static int greenplum_function_delimiter = TBaseType.rrw_greenplum_function_delimiter; 327 328 final static int redshift_function_delimiter = TBaseType.rrw_redshift_function_delimiter; 329 final static int snowflake_function_delimiter = TBaseType.rrw_snowflake_function_delimiter; 330 331 332 333 int[] yypos;// = new int[max_rules + 1]; // 1 based in delphi, Position 0 was not used here 334 int[] yystack;// = new int[max_matches + 1]; // 1 based in delphi, Position 0 was not used here 335 // ArrayList yystack; 336 337 //String keywordvaluefile,keywordfile,yyk_file,yym_file,yykl_file; 338 //String yykh_file,yyml_file,yymh_file,yytl_file,yyth_file,yytint_file,yyt_file; 339 340 EDbVendor dbvendor; 341 TSourceToken prevToken = null; 342 343 public void setSqlCharset(String sqlCharset) { 344 this.sqlCharset = sqlCharset; 345 } 346 347 public String getSqlCharset() { 348 return sqlCharset; 349 } 350 351 private String sqlCharset = null; 352 353 /** 354 * Check if token code represents a single character operator 355 */ 356 protected boolean isSingleCharOperator(int tokenCode) { 357 return tokenCode == '(' || tokenCode == ')' || 358 tokenCode == '[' || tokenCode == ']' || 359 tokenCode == '{' || tokenCode == '}' || 360 tokenCode == ',' || tokenCode == ';' || 361 tokenCode == '.' || tokenCode == ':' || 362 tokenCode == '+' || tokenCode == '-' || 363 tokenCode == '*' || tokenCode == '/' || 364 tokenCode == '%' || tokenCode == '=' || 365 tokenCode == '<' || tokenCode == '>' || 366 tokenCode == '!' || tokenCode == '&' || 367 tokenCode == '|' || tokenCode == '^' || 368 tokenCode == '~' || tokenCode == '?'; 369 } 370 371 /** 372 * Check if token code represents a keyword 373 */ 374 protected boolean isKeyword(int tokenCode) { 375 // Check if it's in the reserved word range 376 return tokenCode >= TBaseType.rrw_select && tokenCode < TBaseType.rrw_abort; 377 } 378 379 public TCustomLexer(){ 380 //this.yyinput = pbuf; 381 yytextbufsize = intial_bufsize - 1; 382 yytextbuf = new char[intial_bufsize]; 383 checkyytextbuf(yytextbufsize); 384 385 bufsize = intial_bufsize - 1; 386 buf = new char[intial_bufsize]; 387 checkbuf(bufsize); 388 389 //literalbuf = new StringBuffer(); 390 literalbuf = new StringBuilder(); 391 //keywordList = new TreeMap(); 392 delimiterchar = ';'; 393 tmpDelimiter = ""; 394 395 xcdepth = 0; 396 nchars = 0; 397 isqmarktoident = true; 398 399 yylvalstr = ""; 400 yysstate = 0; 401 yylstate = 0; 402 yymatches = 0; 403 yysleng = 0; 404 bufptr = 0; 405 yylineno = 0; 406 yycolno = 0; 407 offset = -1; 408 yylineno_p = 1; 409 yycolno_p = 1; 410 offset_p = 0; 411 412 yypos = new int[max_rules + 1]; 413 max_matches = TBaseType.LEXER_INIT_MAX_MATCHES; 414 yystack = new int[max_matches + 1]; 415 416 prevToken = null; 417 } 418 419 /* 420 * this function is not used. 421 private void getkeywordvaluefromfile(){ 422 int i; 423 keywordValueList.clear(); 424 for(i=0; i<keywordlist.length; i++){ 425 // System.out.println(keywordlist[i]); 426 String[] ss = keywordlist[i].split("[=]"); 427 keywordValueList.put(ss[0].toUpperCase(),ss[1]); 428 } 429 } 430 */ 431 432public int iskeyword(String str){ 433 return -1; 434} 435 436public boolean isAtBeginOfLine(){ 437 return (yyretval == lexnewline || yyretval == 0); 438} 439 440//public boolean canBeColumnName(int tokencode){ 441// return false; 442//} 443 444 445public String getStringByCode(int tokenCode){ 446 return null; 447} 448 449 public int getkeywordvalue(String keyword){ 450 return 0; 451 } 452 453 454 /** 455 * @deprecated , please use keywordChecker.isKeyword() instead. 456 * 457 * because there are so many non-reserved keywords in some databases, it's not suitable to put those 458 * non-reserved keywords in lexer and parser. 459 * 460 * @param keyword 461 * @param keywordValueList 462 * @param keywordTypeList 463 * @return 464 */ 465 public static EKeywordType getKeywordType(String keyword, HashMap<String, Integer> keywordValueList,HashMap<Integer, Integer> keywordTypeList){ 466 EKeywordType ret = EKeywordType.NOT_A_KEYWORD; 467 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 468 if( s == null) return ret; 469 470 Integer i = keywordTypeList.get(s); 471 if (i == 1) return EKeywordType.RESERVED_WORD; 472 else if (i == 2) return EKeywordType.NON_RESERVED_KEYWORD; 473 else return ret; 474 } 475 476 /** 477 * 如果是ascii 字符,直接返回,如果是unicode 字符,需要进行转换。否则 String.charAt() 返回的unicode字符不是我们想要的字符, 478 * 例如中文的括号,我们实际需要的ascii的括号 479 * 480 * @param pYylvalstr 481 * @param index 482 * @return 483 */ 484 char lexer_charAt(String pYylvalstr,int index){ 485 char ret = pYylvalstr.charAt(index); 486 if (ret > 255){ 487 // this is a unicode code 488 if ((ret == 0xFF08)){ 489 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128 490 // Unicode code point for FULLWIDTH LEFT PARENTHESIS (, 0xFF08 491 //System.out.println(c); 492 ret = '('; 493 } 494 if ( (ret == 0xFF09)){ 495 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128 496 // Unicode code point for FULLWIDTH RIGHT PARENTHESIS ), 0xFF09 497 // System.out.println(c); 498 ret = ')'; 499 } 500 } 501 return ret; 502 } 503 void totablechar(){ 504 //System.out.println("char:"+yyactchar+" ,hex:"+String.format("%04x", (int) yyactchar)); 505 //System.out.println(String.format("0x%08X", (int)yyactchar)+", "+(char)yyactchar); 506 507 if (((int) yyactchar == 0) && !endOfInput) { 508 yytablechar = (char)255; 509 return; 510 } 511 512 if ((int)(yyactchar) < 228){ // 228 is ä in unicode 513 yytablechar = yyactchar; 514 if ((((int)(yyactchar) == 160)&&(utf8NoBreakSpaceReady))||(yyactchar == 0xA0)){ 515 yytablechar = (char)32; 516 } 517 utf8NoBreakSpaceReady = false; 518// if (yyactchar == 0x27){ 519// insideSingleQuoteStr = !insideSingleQuoteStr; 520// } 521 }else{ 522 yytablechar = (char)'a';//(char)255; 523 524 if ((int)(yyactchar) == 914) { // c2 a0, utf-8 NO-BREAK SPACE 525 utf8NoBreakSpaceReady = true; 526 yytablechar = (char) 32; 527 }else if ((yyactchar == 0x2018)||(yyactchar == 0x2019)){ 528 if (stringLiteralStartWithUnicodeSingleQuote){ 529 // WHERE Name LIKE ‘Acme%’ 530 // 如上,如果string literal 以unicode quote 开始,则不管当前是否在string literal中,新碰到的unicode quote都看成是string literal的结尾符, 531 yytablechar = 0x27; // treat Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it 532 }else { 533 if (insideSingleQuoteStr){ 534 // don't change the unicode quote char 535 }else { 536 yytablechar = 0x27; // treat Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it 537 } 538 } 539 540 }else if ((yyactchar == 0x200B)||(yyactchar == 0x3000)||(yyactchar >= 0x2000 && yyactchar <= 0x200A)){ 541 // Unicode code point 0x200B: treat Unicode Character ZERO WIDTH SPACE as the ascii char space, but don't change it 542 // Unicode code point 0x3000: treat Unicode Character IDEOGRAPHIC SPACE (UTF-8: e3 80 80) as the ascii char space, but don't change it 543 // Unicode code points 0x2000-0x200A: General Punctuation space characters (EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, etc.) 544 yytablechar = 0x20; 545 }else if (yyactchar == 0xFF08){ 546 yytablechar = '('; // treat Unicode code point for FULLWIDTH LEFT PARENTHESIS as the ascii char (, but don't change it 547 }else if (yyactchar == 0xFF09){ 548 yytablechar = ')'; // treat Unicode code point for FULLWIDTH RIGHT PARENTHESIS as the ascii char ), but don't change it 549 }else if (yyactchar == 0xFF0C){ 550 yytablechar = ','; // treat Unicode code point for FULLWIDTH COMMA as the ascii char comma, but don't change it 551 }else { 552 utf8NoBreakSpaceReady = false; 553 } 554 } 555 } 556 557 String getyytext(){ 558 return new String(yytextbuf,0,yytextlen); 559 } 560 561 562 void checkyytextbuf(int size){ 563 while ( size >= yytextbufsize){ 564 yytextbufsize = yytextbufsize * 2 > intial_bufsize ? yytextbufsize * 2: intial_bufsize; 565 char[] tmp = new char[yytextbufsize]; 566 System.arraycopy(yytextbuf,0,tmp,0, yytextbuf.length); 567 yytextbuf = tmp; 568 } 569 } 570 571 void checkbuf(int size){ 572 // System.out.println("while begin2"+" size:"+size+" bufsize:"+bufsize); 573 while ( size >= bufsize){ 574 bufsize = bufsize * 2 > intial_bufsize ? bufsize * 2: intial_bufsize; 575 char[] tmp = new char[bufsize]; 576 System.arraycopy(buf,0,tmp,0, buf.length); 577 buf = tmp; 578 } 579 // System.out.println("while end2"); 580 } 581 582 boolean eof(BufferedReader pbuf){ 583 try{ 584 return !pbuf.ready(); 585 }catch(IOException e){ 586 return true; 587 } 588 } 589 590 void yynew(){ 591 if (yylastchar != (char)0){ 592 if(yylastchar == lf){ 593 yylstate = 1; 594 }else{ 595 yylstate = 0; 596 } 597 } 598 599 yystate = yysstate + yylstate; 600 checkyytextbuf(0); 601 yytextlen = 0; 602 yymatches = 0; 603 yydone = false; 604 } 605 606 void yyscan(){ 607 yyactchar = get_char(); 608 checkyytextbuf(yytextlen + 1); 609 yytextlen++; 610 yytextbuf[yytextlen - 1] = yyactchar; 611 } 612 613 void yymark(int n){ 614 if (n > max_rules ){ 615 System.out.println("n > max_rules "); 616 } 617 yypos[n] = yytextlen; 618 } 619 620 void yymatch(int n){ 621 yymatches++; 622 if(yymatches > max_matches){ 623 624 int new_yystack[] = new int[max_matches*2+1]; 625 System.arraycopy(yystack, 0, new_yystack, 0, max_matches); 626 yystack = new_yystack; 627 max_matches = max_matches * 2; 628 629 // this is valid in JDK 1.6, proguard will report warning and stop 630 // yystack = Arrays.copyOf(yystack,max_matches+1); 631 632 } 633 yystack [yymatches] = n; 634 } 635 636 int yyfind(){ 637 //return -1 mean not found 638 int ret = -1; 639 640 yyreject = false; 641 642 while (( yymatches > 0 ) && ( yypos[yystack[yymatches]] == 0 )) { 643 yymatches-- ; 644 } 645 646 647 if (yymatches > 0){ 648 yysleng = yytextlen; 649 ret = yystack[yymatches]; 650 yyless( yypos[ret] ); 651 yypos[ret] = 0; 652 if (yytextlen >0){ 653 yylastchar = yytextbuf [yytextlen-1]; 654 }else{ 655 yylastchar = (char)0; 656 } 657 }else{ 658 yyless( 0 ); 659 yylastchar = (char)0; 660 } 661 662 return ret; 663 } 664 665 boolean yydefault(){ 666 boolean ret; 667 668 yyreject = false; 669 yyactchar = get_char(); 670 if (yyactchar != (char)0){ 671 //put_char( yyactchar ); 672 ret = true; 673 }else{ 674 yylstate = 1; 675 ret = false; 676 } 677 yylastchar = yyactchar; 678 return ret; 679 } 680 void yyless(int n){ 681 for(int i= yytextlen; i> n; i--){ 682 unget_char(yytextbuf[i - 1]); 683 } 684 checkyytextbuf(n); 685 yytextlen = n; 686 } 687 void returni(int n){ 688 yyretval = n; 689 yydone = true; 690 } 691 void returnc(char c){ 692 yyretval = (int)c; 693 yydone = true; 694 } 695 void yyclear(){ 696 bufptr = 0; 697 yysstate = 0; 698 yylstate = 1; 699 yylastchar = (char)0; 700 yytextlen = 0; 701 yylineno = 0; 702 yycolno = 0; 703 offset = -1; 704 // yystext := ''; 705 706 yylineno_p = 1; 707 yycolno_p = 1; 708 offset_p = 0; 709 710 } 711 712 713 boolean yywrap(){ 714 return true; 715 } 716 int getyysstate(){ 717 return yysstate; 718 } 719 void start(int pstate){ 720 yysstate = pstate; 721 if (pstate == xq){ 722 insideSingleQuoteStr = true; 723 if ((yylvalstr.charAt(0) == 0x2018)||(yylvalstr.charAt(0) == 0x2019)){ 724 stringLiteralStartWithUnicodeSingleQuote = true; 725 }else{ 726 stringLiteralStartWithUnicodeSingleQuote = false; 727 } 728 }else{ 729 insideSingleQuoteStr = false; 730 } 731 } 732 733 734 void unget_char(char pchar){ 735 if(bufptr == max_chars) 736 { 737 System.out.println("input buffer overflow"); 738 } 739 // if (bufptr > 0) { 740 bufptr++; 741 yycolno--; 742 offset--; 743 checkbuf(bufptr+1); 744 buf[bufptr] = pchar; 745 // } 746 747 } 748 749 public void reset(){ 750 insideSingleQuoteStr = false; 751 nestedLessThan = 0; 752 } 753 754 public boolean insideSingleQuoteStr = false; 755 public boolean stringLiteralStartWithUnicodeSingleQuote = false; 756 757 758 // Previous implementation of readln, 2025-05-04 759 // char[] readln() throws IOException { 760 // int c; 761 // char[] buffer = new char[80]; 762 // int bufferSize = 0; 763 764 // while ((c = yyinput.read()) != -1) { 765 // if (bufferSize >= buffer.length) { 766 // char[] newBuffer = new char[buffer.length * 2]; 767 // System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 768 // buffer = newBuffer; 769 // } 770 771 // buffer[bufferSize++] = (char)c; 772 773 // if (c == '\n' || c == '\r') { 774 // break; 775 // } 776 // } 777 778 // if (bufferSize > 0 && buffer[bufferSize - 1] == '\r') { 779 // yyinput.mark(1); 780 // c = yyinput.read(); 781 // if (c == '\n') { 782 // if (bufferSize >= buffer.length) { 783 // char[] newBuffer = new char[buffer.length + 1]; 784 // System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 785 // buffer = newBuffer; 786 // } 787 // buffer[bufferSize++] = '\n'; 788 // } else { 789 // yyinput.reset(); 790 // } 791 // } 792 793 // if (bufferSize == 0) { 794 // return null; 795 // }else{ 796 // char[] result = new char[bufferSize]; 797 // System.arraycopy(buffer, 0, result, 0, bufferSize); 798 // return result; 799 // } 800 // } 801 802/** 803 * High-performance line reader with optimal buffer management. 804 * Reuses lineReadBuffer across calls to reduce per-line allocations. 805 * @return char array containing the line including line ending, or null if end of stream 806 */ 807char[] readln() throws IOException { 808 if (yyinput == null) return null; 809 810 int position = 0; 811 int c; 812 813 // Read characters until line ending or EOF 814 while ((c = yyinput.read()) != -1) { 815 // Expand buffer if needed (expanded buffer stays for reuse) 816 if (position >= lineReadBuffer.length) { 817 char[] newBuffer = new char[lineReadBuffer.length * 2]; 818 System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length); 819 lineReadBuffer = newBuffer; 820 } 821 822 // Store character 823 lineReadBuffer[position++] = (char)c; 824 825 // Check for line endings 826 if (c == '\n') { 827 break; // LF - end of line 828 } else if (c == '\r') { 829 // Need to check for CR+LF sequence 830 yyinput.mark(1); 831 c = yyinput.read(); 832 833 if (c == '\n') { 834 // CR+LF sequence - include LF in result 835 if (position >= lineReadBuffer.length) { 836 char[] newBuffer = new char[lineReadBuffer.length + 1]; 837 System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length); 838 lineReadBuffer = newBuffer; 839 } 840 lineReadBuffer[position++] = '\n'; 841 } else { 842 // CR only - reset stream to keep the character after CR 843 yyinput.reset(); 844 } 845 break; 846 } 847 } 848 849 // Return null if no characters were read (end of stream) 850 if (position == 0) { 851 yylineLen = 0; 852 return null; 853 } 854 855 // Return lineReadBuffer directly, avoiding per-line array allocation. 856 // yylineLen holds the actual content length (replaces yyline.length semantic). 857 yylineLen = position; 858 return lineReadBuffer; 859} 860 861 char get_char(){ 862 863 char ret ; 864 boolean readlineok = true; 865 866 if ((bufptr == 0) && !eof(yyinput) ) 867 { 868 try{ 869 endOfInput = false; 870 yyline = readln();//yyinput.readLine(); 871 // System.out.println("readln: "+yyline); 872 if (yyline == null){ 873 readlineok = false; 874 } else{ 875 yylineno++; 876 yycolno = 0; 877 // Use yylineLen instead of yyline.length since lineReadBuffer is reused 878 bufptr = yylineLen; 879 checkbuf(bufptr+1); 880 for(int k=1;k<=bufptr;k++){ 881 buf[k] = yyline[bufptr - k]; 882 } 883 } 884 }catch(IOException e){ 885 readlineok = false; 886 } 887 } 888 889 if (! readlineok){ 890 endOfInput = true; 891 return (char)0; 892 } 893 894 if (bufptr > 0){ 895 bufptr--; 896 yycolno++; 897 offset++; 898 899 return buf[bufptr+1]; 900 //return yyline.charAt(yyline.length() - (bufptr + 1)); 901 }else{ 902 // bufptr--; 903 endOfInput = true; 904 return (char)0; 905 } 906 907 } 908 909 void startlit(){ 910 literalbuf.setLength(0); 911 literallen = 0; 912 literalalloc = 0; 913 } 914 915 void addlit(String ytext, int yleng){ 916 literallen = literallen + yleng; 917 literalbuf.append(ytext,0,yleng); 918 } 919 920 void addlitchar(char ychar){ 921 literallen++; 922 literalbuf.append(ychar); 923 } 924 925 String litbufdup(){ 926 return literalbuf.toString();//.intern(); 927 } 928 929 boolean isopchar(char ch){ 930 switch (ch) { 931 case '~': 932 case '!': 933 case '@': 934 case '#': 935 case '^': 936 case '&': 937 case '|': 938 case '`': 939 case '?': 940 case '$': 941 case '%': 942 return true; 943 default: 944 return false; 945 } 946 } 947 948 boolean isselfchar(char ch){ 949 switch (ch) { 950 case ',': 951 case '(': 952 case ')': 953 case '[': 954 case ']': 955 case '.': 956 case ';': 957 case '$': 958 case ':': 959 case '+': 960 case '-': 961 case '*': 962 case '/': 963 case '%': 964 case '^': 965 case '<': 966 case '>': 967 case '=': 968 case '!': 969 case '{': 970 case '}': 971 return true; 972 default: 973 return false; 974 } 975 } 976 977 boolean charinarray(char c, char[] a){ 978 int len = a.length; 979 for (int i = 0; i < len; i++) { 980 if (a[i] == c) 981 return true; 982 } 983 return false; 984 } 985 986 void setlengthofliteralbuf(int plen){ 987 literalbuf.setLength(plen); 988 } 989 990 void yyaction(int yyruleno){ 991 } 992 993 int yylex(){ 994 return 0; 995 } 996 997 998 public int yylexwrap(TSourceToken psourcetoken) { 999 // Get token code and handle EOF 1000 if ((psourcetoken.tokencode = yylex()) == 0) return 0; 1001 1002 // Store token text - use shared strings for single ASCII chars to reduce allocations 1003 if (yylvalstr == null) { 1004 if (yytextlen == 1 && yytextbuf[0] < 128) { 1005 yylvalstr = SINGLE_CHAR_STRINGS[yytextbuf[0]]; 1006 } else { 1007 yylvalstr = new String(yytextbuf, 0, yytextlen); 1008 } 1009 } 1010 psourcetoken.setAstext(yylvalstr); 1011 1012 // Record token position information 1013 psourcetoken.lineNo = yylineno_p; 1014 psourcetoken.columnNo = yycolno_p; 1015 psourcetoken.offset = offset_p; 1016 yylineno_p = yylineno; 1017 yycolno_p = yycolno + 1; 1018 offset_p = offset + 1; 1019 1020 // Track token in token table for analysis 1021 setTokenTableValue(psourcetoken); 1022 1023 // Handle token types based on token code 1024 switch (psourcetoken.tokencode) { 1025 case cmtdoublehyphen: 1026 psourcetoken.tokentype = ETokenType.ttsimplecomment; 1027 if (dbvendor == EDbVendor.dbvmdx && psourcetoken.toString().startsWith("/")) { 1028 psourcetoken.tokentype = ETokenType.ttCPPComment; 1029 } 1030 break; 1031 1032 case cmtslashstar: 1033 psourcetoken.tokentype = ETokenType.ttbracketedcomment; 1034 break; 1035 1036 case lexspace: 1037 psourcetoken.tokentype = ETokenType.ttwhitespace; 1038 break; 1039 1040 case lexnewline: 1041 psourcetoken.tokentype = ETokenType.ttreturn; 1042 break; 1043 1044 case bind_v: 1045 psourcetoken.tokentype = ETokenType.ttbindvar; 1046 if (dbvendor == EDbVendor.dbvoracle) { 1047 psourcetoken.setAstext(psourcetoken.getAstext().replace(TBaseType.newline, "")); 1048 } 1049 break; 1050 1051 case stmt_delimiter: 1052 psourcetoken.tokentype = ETokenType.ttstmt_delimiter; 1053 psourcetoken.tokencode = cmtslashstar; 1054 break; 1055 1056 case concatenationop: 1057 psourcetoken.tokentype = ETokenType.ttconcatenationop; 1058 break; 1059 1060 case variable: 1061 psourcetoken.tokentype = ETokenType.ttsqlvar; 1062 break; 1063 1064 case fconst: 1065 case iconst: 1066 psourcetoken.tokentype = ETokenType.ttnumber; 1067 break; 1068 1069 case sconst: 1070 psourcetoken.tokentype = ETokenType.ttsqstring; 1071 psourcetoken.dolqstart = dolqstart; 1072 dolqstart = ""; 1073 break; 1074 1075 case ident: 1076 case QUOTED_IDENT: 1077 handleIdentifierToken(psourcetoken); 1078 break; 1079 1080 case cmpop: 1081 handleComparisonOperator(psourcetoken); 1082 break; 1083 1084 case op: 1085 handleOperatorToken(psourcetoken); 1086 break; 1087 1088 default: 1089 handleDefaultToken(psourcetoken); 1090 break; 1091 } 1092 1093 prevToken = psourcetoken; 1094 return psourcetoken.tokencode; 1095 } 1096 1097 // Helper methods to better organize the complex token handling logic 1098 private void handleIdentifierToken(TSourceToken psourcetoken) { 1099 psourcetoken.tokentype = ETokenType.ttidentifier; 1100 String tokenText = psourcetoken.toString().trim(); 1101 1102 if (tokenText.startsWith("\"")) { 1103 psourcetoken.tokentype = ETokenType.ttdqstring; 1104 } else if (tokenText.startsWith("[")) { 1105 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1106 psourcetoken.tokentype = ETokenType.ttdbstring; 1107 } 1108 } else if (tokenText.startsWith("{")) { 1109 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1110 psourcetoken.tokentype = ETokenType.ttbrstring; 1111 if (tokenText.toLowerCase().startsWith("{escape")) { 1112 psourcetoken.tokencode = TBaseType.rrw_sqlserver_odbc_escape; 1113 } 1114 } 1115 } else if (tokenText.startsWith("&")) { 1116 if (dbvendor == EDbVendor.dbvmdx) { 1117 if (psourcetoken.tokencode == QUOTED_IDENT) { 1118 psourcetoken.tokencode = AMP_QUOTED_ID; 1119 } else if (psourcetoken.tokencode == ident) { 1120 psourcetoken.tokencode = AMP_UNQUOTED_ID; 1121 } 1122 } 1123 } else if (tokenText.startsWith(".")) { 1124 if (dbvendor == EDbVendor.dbvteradata) { 1125 psourcetoken.tokentype = ETokenType.ttBTEQCmd; 1126 } 1127 } 1128 } 1129 1130 private void handleComparisonOperator(TSourceToken psourcetoken) { 1131 psourcetoken.tokentype = ETokenType.ttmulticharoperator; 1132 String token = yylvalstr; 1133 1134 // Oracle 26c vector distance operators (3-char, Oracle only) 1135 if (dbvendor == EDbVendor.dbvoracle) { 1136 if (token.equals("<=>")) { 1137 psourcetoken.tokencode = TBaseType.vector_cosine_distance; 1138 return; 1139 } else if (token.equals("<->")) { 1140 psourcetoken.tokencode = TBaseType.vector_euclidean_distance; 1141 return; 1142 } else if (token.equals("<#>")) { 1143 psourcetoken.tokencode = TBaseType.vector_dot_product; 1144 return; 1145 } 1146 } 1147 1148 if ((token.startsWith("!") && token.endsWith("=")) || 1149 (token.startsWith("^") && token.endsWith("=")) || 1150 (token.startsWith("~") && token.endsWith("=")) || 1151 (token.startsWith("<") && token.endsWith(">"))) { 1152 1153 psourcetoken.tokencode = TBaseType.not_equal; 1154 1155 // Handle MySQL NULL-safe equal 1156 if (token.indexOf("=", 1) > 0 && 1157 token.startsWith("<") && token.endsWith(">")) { 1158 psourcetoken.tokencode = (int)'='; 1159 } 1160 } else if (token.startsWith(">") && token.endsWith("=")) { 1161 psourcetoken.tokencode = TBaseType.great_equal; 1162 } else if (token.startsWith("<") && token.endsWith("=")) { 1163 psourcetoken.tokencode = TBaseType.less_equal; 1164 } else if ((token.startsWith("!") && token.endsWith("<")) || 1165 (token.startsWith("^") && token.endsWith("<"))) { 1166 psourcetoken.tokencode = TBaseType.not_less; 1167 } else if ((token.startsWith("!") && token.endsWith(">")) || 1168 (token.startsWith("^") && token.endsWith(">"))) { 1169 psourcetoken.tokencode = TBaseType.not_great; 1170 } else if (token.length() == 2 && token.charAt(0) == ':' && token.charAt(1) == '=') { 1171 psourcetoken.tokencode = assign_sign; 1172 } 1173 } 1174 1175 private void handleOperatorToken(TSourceToken psourcetoken) { 1176 psourcetoken.tokentype = ETokenType.ttmulticharoperator; 1177 String token = yylvalstr; 1178 int tokenLength = token.length(); 1179 char firstChar = tokenLength > 0 ? token.charAt(0) : '\0'; 1180 char secondChar = tokenLength > 1 ? token.charAt(1) : '\0'; 1181 1182 // Handle question mark specially 1183 if (token.equals("?") && isqmarktoident) { 1184 handleQuestionMark(psourcetoken); 1185 return; 1186 } 1187 1188 // Handle special two-character operators 1189 if (tokenLength == 2) { 1190 if (handleTwoCharOperator(psourcetoken, firstChar, secondChar)) { 1191 return; 1192 } 1193 } 1194 1195 // Handle special three-character operators 1196 if (tokenLength == 3) { 1197 if (handleThreeCharOperator(psourcetoken, firstChar, secondChar, token.charAt(2))) { 1198 return; 1199 } 1200 } 1201 1202 // Handle comparison operators 1203 if (handleComparisonOp(psourcetoken, token)) { 1204 return; 1205 } 1206 1207 // Handle single character operators 1208 if (tokenLength == 1) { 1209 handleSingleCharOperator(psourcetoken, firstChar); 1210 } 1211 } 1212 1213 private boolean handleTwoCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar) { 1214 switch (firstChar) { 1215 case '<': 1216 if (secondChar == '<') { 1217 return handleLeftShiftOperator(psourcetoken); 1218 } else if (secondChar == '@') { 1219 psourcetoken.tokencode = TBaseType.JSON_RIGHT_CONTAIN; 1220 return true; 1221 } 1222 break; 1223 1224 case '>': 1225 if (secondChar == '>') { 1226 return handleRightShiftOperator(psourcetoken); 1227 } 1228 break; 1229 1230 case '=': 1231 if (secondChar == '>') { 1232 if (dbvendor == EDbVendor.dbvodbc) { 1233 psourcetoken.tokencode = TBaseType.great_equal; 1234 } else if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb) { 1235 psourcetoken.tokencode = TBaseType.assign_sign; 1236 } else { 1237 psourcetoken.tokencode = NAMED_PARAMETER_SIGN; 1238 } 1239 return true; 1240 } else if (secondChar == '*') { 1241 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1242 psourcetoken.tokencode = rightjoin_op; 1243 } 1244 return true; 1245 } else if (secondChar == '<') { 1246 if (dbvendor == EDbVendor.dbvodbc) { 1247 psourcetoken.tokencode = TBaseType.less_equal; 1248 } 1249 return true; 1250 } else if (secondChar == '=') { 1251 if (dbvendor == EDbVendor.dbvsparksql) { 1252 psourcetoken.tokencode = '='; 1253 } 1254 return true; 1255 } 1256 break; 1257 1258 case '-': 1259 if (secondChar == '>') { 1260 if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb 1261 || dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvmysql) { 1262 psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT; 1263 } else { 1264 psourcetoken.tokencode = ref_arrow; 1265 } 1266 return true; 1267 } else if (secondChar == '=') { 1268 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1269 psourcetoken.tokencode = compoundAssignmentOperator; 1270 } 1271 return true; 1272 } 1273 break; 1274 1275 case '.': 1276 if (secondChar == '.') { 1277 if (dbvendor == EDbVendor.dbvdb2 || dbvendor == EDbVendor.dbvoracle 1278 || dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvhana) { 1279 psourcetoken.tokencode = double_dot; 1280 } 1281 return true; 1282 } 1283 break; 1284 1285 case '*': 1286 if (secondChar == '=') { 1287 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1288 psourcetoken.tokencode = leftjoin_op; 1289 } 1290 return true; 1291 } else if (secondChar == '*') { 1292 if (dbvendor == EDbVendor.dbvteradata || dbvendor == EDbVendor.dbvnetezza) { 1293 psourcetoken.tokencode = TBaseType.exponentiate; 1294 } 1295 return true; 1296 } 1297 break; 1298 1299 case '|': 1300 if (secondChar == '|') { 1301 if (dbvendor == EDbVendor.dbvmysql) { 1302 psourcetoken.tokencode = TBaseType.logical_or; 1303 } else if (isStringConcatVendor(dbvendor)) { 1304 psourcetoken.tokencode = TBaseType.concatenationop; 1305 } 1306 return true; 1307 } else if (secondChar == '>') { 1308 if (dbvendor == EDbVendor.dbvsparksql) { 1309 psourcetoken.tokencode = TBaseType.sparksql_pipe_arrow; 1310 } else { 1311 psourcetoken.tokencode = TBaseType.pipe_greater; 1312 } 1313 return true; 1314 } else if (secondChar == '/') { 1315 if (dbvendor == EDbVendor.dbvredshift) { 1316 psourcetoken.tokencode = TBaseType.square_root; 1317 } 1318 return true; 1319 } 1320 break; 1321 1322 case '&': 1323 if (secondChar == '&') { 1324 if (dbvendor == EDbVendor.dbvmysql) { 1325 psourcetoken.tokencode = TBaseType.logical_and; 1326 } 1327 return true; 1328 } 1329 break; 1330 1331 case '?': 1332 if (secondChar == '|') { 1333 psourcetoken.tokencode = TBaseType.JSON_ANY_EXIST; 1334 return true; 1335 } else if (secondChar == '&') { 1336 psourcetoken.tokencode = TBaseType.JSON_ALL_EXIST; 1337 return true; 1338 } 1339 break; 1340 1341 case '@': 1342 if (secondChar == '>') { 1343 psourcetoken.tokencode = TBaseType.JSON_LEFT_CONTAIN; 1344 return true; 1345 } 1346 break; 1347 1348 case '#': 1349 if (secondChar == '>') { 1350 psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT_AT_PATH; 1351 return true; 1352 } 1353 break; 1354 1355 case ':': 1356 if (secondChar == '=') { 1357 psourcetoken.tokencode = assign_sign; 1358 return true; 1359 } 1360 break; 1361 } 1362 1363 // Handle compound assignment operators 1364 if ((firstChar == '+' || firstChar == '-' || firstChar == '*' || 1365 firstChar == '/' || firstChar == '%' || firstChar == '&' || 1366 firstChar == '^' || firstChar == '|') && secondChar == '=') { 1367 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1368 psourcetoken.tokencode = compoundAssignmentOperator; 1369 return true; 1370 } else if (dbvendor == EDbVendor.dbvmysql && firstChar == '^' && secondChar == '=') { 1371 psourcetoken.tokencode = not_equal; 1372 return true; 1373 } 1374 } 1375 1376 return false; 1377 } 1378 1379 private boolean handleThreeCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar, char thirdChar) { 1380 if (firstChar == '-' && secondChar == '>' && thirdChar == '>') { 1381 psourcetoken.tokencode = TBaseType.JSON_GET_TEXT; 1382 return true; 1383 } else if (firstChar == '#' && secondChar == '>' && thirdChar == '>') { 1384 psourcetoken.tokencode = TBaseType.JSON_GET_TEXT_AT_PATH; 1385 return true; 1386 } else if (firstChar == '|' && secondChar == '|' && thirdChar == '/') { 1387 if (dbvendor == EDbVendor.dbvredshift) { 1388 psourcetoken.tokencode = TBaseType.cube_root; 1389 return true; 1390 } 1391 } 1392 return false; 1393 } 1394 1395 private boolean handleComparisonOp(TSourceToken psourcetoken, String token) { 1396 if ((token.startsWith("!") && token.endsWith("=")) || 1397 (token.startsWith("^") && token.endsWith("=")) || 1398 (token.startsWith("<") && token.endsWith(">"))) { 1399 psourcetoken.tokencode = TBaseType.not_equal; 1400 return true; 1401 } else if (token.startsWith(">") && token.endsWith("=")) { 1402 psourcetoken.tokencode = TBaseType.great_equal; 1403 return true; 1404 } else if (token.startsWith("<") && token.endsWith("=")) { 1405 psourcetoken.tokencode = TBaseType.less_equal; 1406 return true; 1407 } else if ((token.startsWith("!") && token.endsWith("<")) || 1408 (token.startsWith("^") && token.endsWith("<"))) { 1409 psourcetoken.tokencode = TBaseType.not_less; 1410 return true; 1411 } else if ((token.startsWith("!") && token.endsWith(">")) || 1412 (token.startsWith("^") && token.endsWith(">"))) { 1413 psourcetoken.tokencode = TBaseType.not_great; 1414 return true; 1415 } 1416 return false; 1417 } 1418 1419 private void handleSingleCharOperator(TSourceToken psourcetoken, char ch) { 1420 switch (ch) { 1421 case '~': 1422 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvredshift || 1423 dbvendor == EDbVendor.dbvsnowflake) { 1424 psourcetoken.tokencode = (int)'~'; 1425 } 1426 break; 1427 1428 case '#': 1429 if (dbvendor == EDbVendor.dbvmssql) { 1430 psourcetoken.tokencode = (int)'#'; 1431 } 1432 break; 1433 1434 case '&': 1435 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica || 1436 dbvendor == EDbVendor.dbvsparksql) { 1437 psourcetoken.tokencode = (int)'&'; 1438 } 1439 break; 1440 1441 case '|': 1442 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica) { 1443 psourcetoken.tokencode = (int)'|'; 1444 } 1445 break; 1446 } 1447 } 1448 1449 private void handleQuestionMark(TSourceToken psourcetoken) { 1450 if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb || 1451 dbvendor == EDbVendor.dbvgreenplum) { 1452 psourcetoken.tokencode = TBaseType.JSON_EXIST; 1453 } else if (dbvendor == EDbVendor.dbvodbc) { 1454 psourcetoken.tokencode = '?'; 1455 } else if (dbvendor == EDbVendor.dbvsnowflake) { 1456 psourcetoken.tokencode = bind_v; 1457 psourcetoken.tokentype = ETokenType.ttquestionmark; 1458 } else { 1459 psourcetoken.tokencode = ident; 1460 } 1461 } 1462 1463 private boolean handleLeftShiftOperator(TSourceToken psourcetoken) { 1464 if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 1465 dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 1466 dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvaccess || 1467 dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvsnowflake) { 1468 psourcetoken.tokencode = label_begin; 1469 } else if (dbvendor == EDbVendor.dbvmysql) { 1470 psourcetoken.tokencode = TBaseType.rrw_left_shift; 1471 } else if (dbvendor == EDbVendor.dbvredshift) { 1472 psourcetoken.tokencode = TBaseType.bitwise_shift_left; 1473 } else if (dbvendor == EDbVendor.dbvnetezza) { 1474 psourcetoken.tokencode = TBaseType.rrw_netezza_op_less_less; 1475 } 1476 return true; 1477 } 1478 1479 private boolean handleRightShiftOperator(TSourceToken psourcetoken) { 1480 if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 1481 dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 1482 dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvgreenplum || 1483 dbvendor == EDbVendor.dbvaccess || dbvendor == EDbVendor.dbvsnowflake) { 1484 psourcetoken.tokencode = label_end; 1485 } else if (dbvendor == EDbVendor.dbvmysql) { 1486 psourcetoken.tokencode = TBaseType.rrw_right_shift; 1487 } else if (dbvendor == EDbVendor.dbvredshift) { 1488 psourcetoken.tokencode = TBaseType.bitwise_shift_right; 1489 } else if (dbvendor == EDbVendor.dbvnetezza) { 1490 psourcetoken.tokencode = TBaseType.rrw_netezza_op_great_great; 1491 } 1492 return true; 1493 } 1494 1495 private boolean isStringConcatVendor(EDbVendor vendor) { 1496 return vendor == EDbVendor.dbvdb2 || vendor == EDbVendor.dbvnetezza || 1497 vendor == EDbVendor.dbvpostgresql || vendor == EDbVendor.dbvgaussdb || 1498 vendor == EDbVendor.dbvredshift || vendor == EDbVendor.dbvgreenplum || 1499 vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake || 1500 vendor == EDbVendor.dbvsparksql || vendor == EDbVendor.dbvvertica; 1501 } 1502 1503 private void handleDefaultToken(TSourceToken psourcetoken) { 1504 psourcetoken.tokentype = ETokenType.ttkeyword; 1505 1506 if (psourcetoken.tokencode < 255) { 1507 // Single character operators (ASCII characters) 1508 psourcetoken.setAstext(Character.toString(yylvalstr.charAt(0))); 1509 psourcetoken.tokentype = ETokenType.ttsinglecharoperator; 1510 1511 switch (psourcetoken.tokencode) { 1512 case ',': 1513 psourcetoken.tokentype = ETokenType.ttcomma; 1514 break; 1515 case '(': 1516 psourcetoken.tokentype = ETokenType.ttleftparenthesis; 1517 break; 1518 case ')': 1519 psourcetoken.tokentype = ETokenType.ttrightparenthesis; 1520 break; 1521 case '[': 1522 psourcetoken.tokentype = ETokenType.ttleftbracket; 1523 break; 1524 case ']': 1525 psourcetoken.tokentype = ETokenType.ttrightbracket; 1526 break; 1527 case '.': 1528 psourcetoken.tokentype = ETokenType.ttperiod; 1529 break; 1530 case ';': 1531 psourcetoken.tokentype = ETokenType.ttsemicolon; 1532 break; 1533 case '$': 1534 psourcetoken.tokentype = ETokenType.ttdolorsign; 1535 break; 1536 case ':': 1537 psourcetoken.tokentype = ETokenType.ttcolon; 1538 break; 1539 case '+': 1540 psourcetoken.tokentype = ETokenType.ttplussign; 1541 break; 1542 case '-': 1543 psourcetoken.tokentype = ETokenType.ttminussign; 1544 break; 1545 case '*': 1546 psourcetoken.tokentype = ETokenType.ttasterisk; 1547 break; 1548 case '/': 1549 psourcetoken.tokentype = ETokenType.ttslash; 1550 break; 1551 case '^': 1552 psourcetoken.tokentype = ETokenType.ttcaret; 1553 break; 1554 case '<': 1555 psourcetoken.tokentype = ETokenType.ttlessthan; 1556 break; 1557 case '>': 1558 psourcetoken.tokentype = ETokenType.ttgreaterthan; 1559 break; 1560 case '=': 1561 psourcetoken.tokentype = ETokenType.ttequals; 1562 break; 1563 case '@': 1564 if (delimiterchar == '@') { 1565 psourcetoken.tokencode = (int)';'; 1566 psourcetoken.tokentype = ETokenType.ttsemicolon; 1567 } else { 1568 psourcetoken.tokentype = ETokenType.ttatsign; 1569 } 1570 break; 1571 case '~': 1572 psourcetoken.tokentype = ETokenType.tttilde; 1573 break; 1574 case '&': 1575 psourcetoken.tokentype = ETokenType.ttampersand; 1576 break; 1577 case '|': 1578 psourcetoken.tokentype = ETokenType.ttverticalbar; 1579 break; 1580 case '?': 1581 if (isqmarktoident && dbvendor != EDbVendor.dbvodbc && 1582 dbvendor != EDbVendor.dbvpostgresql && dbvendor != EDbVendor.dbvgaussdb) { 1583 psourcetoken.tokencode = ident; 1584 } 1585 break; 1586 } 1587 } else if (dbvendor == EDbVendor.dbvhive && psourcetoken.tokencode == TBaseType.hive_equal) { 1588 psourcetoken.tokentype = ETokenType.ttequals; 1589 } 1590 } 1591 1592} 1593