001package gudusoft.gsqlparser; 002 003 004import java.io.BufferedReader; 005import java.io.IOException; 006import java.util.ArrayList; 007import java.util.Arrays; 008import java.util.HashMap; 009import java.util.Locale; 010 011/** 012 * Base lexer of all databases - Core tokenization engine for SQL parsing. 013 * 014 * The lexer reads SQL text character by character and produces tokens that represent 015 * the syntactic units of SQL. This process involves several key components and stages: 016 * 017 * <h3>1. Input Management and Buffering</h3> 018 * <ul> 019 * <li><b>yyinput (BufferedReader)</b>: Primary input source for SQL text</li> 020 * <li><b>yyline (char[])</b>: Current line buffer read from input via readln()</li> 021 * <li><b>buf (char[])</b>: Reversed line buffer for character-by-character processing</li> 022 * <li><b>bufptr</b>: Current position in buf, decrements as characters are consumed</li> 023 * </ul> 024 * 025 * <h3>2. Token Text Formation Process</h3> 026 * <pre> 027 * SQL Input → readln() → yyline[] → reversed into buf[] → get_char() → yytextbuf[] 028 * ↓ 029 * yylex() processing 030 * ↓ 031 * yylvalstr (String) 032 * ↓ 033 * TSourceToken.astext 034 * </pre> 035 * 036 * <h4>Key Variables in Token Text Storage:</h4> 037 * <ul> 038 * <li><b>yytextbuf (char[])</b>: Accumulator buffer for current token being formed</li> 039 * <li><b>yytextlen</b>: Current length of text in yytextbuf</li> 040 * <li><b>yytextbufsize</b>: Allocated size of yytextbuf (dynamically grows)</li> 041 * <li><b>yylvalstr (String)</b>: Final token text string created from yytextbuf</li> 042 * <li><b>literalbuf (StringBuilder)</b>: Special buffer for string literals and complex tokens</li> 043 * </ul> 044 * 045 * <h3>3. Position Tracking System</h3> 046 * 047 * The lexer maintains precise position information for every token: 048 * <ul> 049 * <li><b>yylineno</b>: Current line number (1-based)</li> 050 * <li><b>yycolno</b>: Current column number (0-based)</li> 051 * <li><b>offset</b>: Absolute character offset from start of input</li> 052 * <li><b>yylineno_p, yycolno_p, offset_p</b>: Previous position values for token start</li> 053 * </ul> 054 * 055 * <h3>4. Token Creation Workflow</h3> 056 * <ol> 057 * <li>Characters are read via get_char() from buf[] into yytextbuf[]</li> 058 * <li>yylex() identifies token boundaries and type</li> 059 * <li>Token text is extracted: yylvalstr = new String(yytextbuf, 0, yytextlen)</li> 060 * <li>yylexwrap() creates TSourceToken with: 061 * <ul> 062 * <li>astext = yylvalstr (full token text copy)</li> 063 * <li>lineNo = yylineno_p (start line)</li> 064 * <li>columnNo = yycolno_p (start column)</li> 065 * <li>offset = offset_p (absolute position)</li> 066 * </ul> 067 * </li> 068 * </ol> 069 * 070 * <h3>5. Memory Management and Text Copying</h3> 071 * 072 * <b>Current Implementation (Eager Loading):</b> 073 * <ul> 074 * <li>Every token immediately copies its text from yytextbuf to TSourceToken.astext</li> 075 * <li>Original SQL text in yyline is discarded after processing each line</li> 076 * <li>No direct link maintained between token and original input position</li> 077 * </ul> 078 * 079 * <h3>6. Tracing Back to Original Position</h3> 080 * 081 * <b>Currently Possible:</b> 082 * <ul> 083 * <li>Token stores lineNo, columnNo, and offset</li> 084 * <li>These can theoretically locate position in original input</li> 085 * </ul> 086 * 087 * <b>Current Limitations:</b> 088 * <ul> 089 * <li>Original input text is not retained after line processing</li> 090 * <li>yyline buffer is overwritten for each new line</li> 091 * <li>No mechanism to retrieve original text from position alone</li> 092 * </ul> 093 * 094 * @author Gudu Software 095 */ 096public class TCustomLexer { 097 098 // 在 lexer level 创建 token table, 按照 token code存储所有 token 的一些关键信息,主要用于处理一个关键字token被用作column,table name的情况 099 public static int MAX_TOKEN_SIZE = 2048; // 所有可能的token的数量 100 public static int MAX_TOKEN_COLUMN_SIZE = 10; 101 102 // 定义一个具有 MAX_TOKEN_SIZE 个元素的常量数组,每个元素有 MAX_TOKEN_COLUMN_SIZE 列, 列的类型为整数 103 // column 0: 代表该token出现的次数 104 // column 1: 代表该token第一次出现的 x position 105 // column 2: 代表该token第一次出现的 y position 106 // column 3: 代表该token最后一次出现的 x position 107 // column 4: 代表该token最后一次出现的 y position 108 // column 5: 代表该token第一次出现的 position in the token list 109 // column 6: 代表该token最后一次出现的 position in the token list 110 111 public static int COLUMN0_COUNT = 0; 112 public static int COLUMN1_FIRST_X = 1; 113 public static int COLUMN2_FIRST_Y = 2; 114 public static int COLUMN3_LAST_X = 3; 115 public static int COLUMN4_LAST_Y = 4; 116 public static int COLUMN5_FIRST_POS = 5; 117 public static int COLUMN6_LAST_POS = 6; 118 119 /** 120 * Pre-allocated strings for single ASCII characters (0-127). 121 * Used to avoid creating new String objects for common single-char tokens 122 * like '(', ')', ',', ';', '+', '-', '*', '/', etc. 123 * This significantly reduces GC pressure in the lexer hot path. 124 */ 125 private static final String[] SINGLE_CHAR_STRINGS = new String[128]; 126 static { 127 for (int i = 0; i < 128; i++) { 128 SINGLE_CHAR_STRINGS[i] = String.valueOf((char) i); 129 } 130 } 131 132 public long[][] TOKEN_TABLE = new long[MAX_TOKEN_SIZE][MAX_TOKEN_COLUMN_SIZE]; 133 134 /** 135 * Tracks which tokenIds have been written to TOKEN_TABLE during current parse. 136 * Used for incremental reset - only clear entries that were actually used. 137 */ 138 private int[] usedTokenIds = new int[512]; // Typical SQL uses <200 distinct token types 139 private int usedTokenCount = 0; 140 141 /** 142 * Reset TOKEN_TABLE by only clearing entries that were used (incremental clear). 143 * This is O(usedTokenCount) instead of O(MAX_TOKEN_SIZE * MAX_TOKEN_COLUMN_SIZE). 144 * For typical SQL with ~100 distinct token types, this saves clearing ~20,000 entries. 145 */ 146 public void resetTokenTable() { 147 for (int i = 0; i < usedTokenCount; i++) { 148 int tokenId = usedTokenIds[i]; 149 for (int j = 0; j < MAX_TOKEN_COLUMN_SIZE; j++) { 150 TOKEN_TABLE[tokenId][j] = 0L; 151 } 152 } 153 usedTokenCount = 0; 154 } 155 156 // define a function to set value when token is found, input is token id, a token with TSourceToken type 157 public void setTokenTableValue( TSourceToken token) { 158 if (token == null) return; 159 int tokenId = token.tokencode; 160 161 if (tokenId < 0 || tokenId >= MAX_TOKEN_SIZE) { 162 return; 163 } 164 if (TOKEN_TABLE[tokenId][COLUMN0_COUNT] == 0) { 165 // Track this tokenId for incremental reset 166 if (usedTokenCount < usedTokenIds.length) { 167 usedTokenIds[usedTokenCount++] = tokenId; 168 } 169 TOKEN_TABLE[tokenId][COLUMN0_COUNT] = 1; 170 TOKEN_TABLE[tokenId][COLUMN1_FIRST_X] = token.lineNo; 171 TOKEN_TABLE[tokenId][COLUMN2_FIRST_Y] = token.columnNo; 172 TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo; 173 TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo; 174 TOKEN_TABLE[tokenId][COLUMN5_FIRST_POS] = token.posinlist; 175 TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist; 176 } else { 177 TOKEN_TABLE[tokenId][COLUMN0_COUNT] += 1; 178 TOKEN_TABLE[tokenId][COLUMN3_LAST_X] = token.lineNo; 179 TOKEN_TABLE[tokenId][COLUMN4_LAST_Y] = token.columnNo; 180 TOKEN_TABLE[tokenId][COLUMN6_LAST_POS] = token.posinlist; 181 } 182 } 183 184 public BufferedReader yyinput; 185 long yylineno,yycolno,offset,yylineno_p,yycolno_p,offset_p; 186 int bufptr,yystate,yysstate,yylstate,yytextlen,yyretval, yytextbufsize, 187 yymatches,yysleng; 188 char[] yyline; 189 /** 190 * Reusable buffer for readln() to reduce per-line allocations. 191 * Expands as needed for long lines and stays expanded for reuse. 192 */ 193 private char[] lineReadBuffer = new char[4096]; 194 /** 195 * Actual content length in lineReadBuffer/yyline. 196 * Used instead of yyline.length since lineReadBuffer is reused without copying. 197 */ 198 private int yylineLen; 199 String yylvalstr; 200 public String dolqstart = "";//postgresql, start part of Dollar-quoted String Constants 201 char yylastchar,yyactchar,yytablechar; 202 boolean yydone,yyreject; 203 char[] yytextbuf; 204 char[] buf; 205 int bufsize; 206 boolean endOfInput; 207 208 //StringBuffer literalbuf; 209 StringBuilder literalbuf; 210 int literallen,literalalloc,xcdepth,nchars,slashstar,dashdash; 211 boolean isqmarktoident; 212 public boolean insqlpluscmd; 213 char dummych1,dummych2,dummych3; 214 boolean utf8NoBreakSpaceReady = false; 215 216 int nestedLessThan = 0; 217 218 boolean isReadyForFunctionBody = false, isInFunctionBody = false; 219 int functionBodyDelimiterIndex = -1; 220 ArrayList<String> functionBodyDelimiter = new ArrayList<>(); 221 222 public static int keyword_type_reserved = 0x0001; 223 public static int keyword_type_keyword = 0x0002; 224 public static int keyword_type_identifier = 0x0004; 225 public static int keyword_type_column = 0x0008; 226 227 public char delimiterchar; 228 public String defaultDelimiterStr; 229 public String tmpDelimiter; 230 231 final static int intial_bufsize = 16384; 232 final static char lf = (char)10; 233 final static int max_chars = 65536*10*2; 234 final static int max_rules = 256*2*10; 235 int max_matches = 1024*20*10*2; 236 237 238 // 下面这些常量按照在 l 文件中出现的次序,必须以 +2 的方式递加. 为什么以 +2 的方式递加 原因忘了,尚未搞清楚。 239 final static int init = 2; 240 final static int xc = 4; 241 final static int xd = 6; 242 final static int xq = 8; 243 final static int xqq = 10; //oracle 244 final static int xdolq = 10;//postgresql 245 final static int xdbracket = 10; 246 final static int xdbrace = 12; 247 final static int xbacktick = 12; 248 249 final static int xbracketrs = 12; //redshift 250 final static int xqtriple = 14;//bigquery 251 final static int xdtriple = 16;//bigquery 252 253 254 255 //https://docs.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server 256 final static int namedatalen = 8060;//255; 257 258 final static int cmtslashstar = 257; 259 final static int cmtdoublehyphen = 258; 260 final static int lexspace = 259; 261 final static int lexnewline = 260; 262 final static int fconst = 261; 263 final static int sconst = 262; 264 final static int iconst = 263; 265 final static int ident = 264; 266 final static int op = 265; 267 final static int cmpop = 266; 268 final static int bind_v = 267; 269 final static int assign_sign = 268; 270 final static int double_dot = 269; 271 final static int label_begin = 270; 272 final static int label_end = 271; 273 final static int substitution_v = 272; 274 final static int filepath_sign = TBaseType.filepath_sign; 275 final static int sqlpluscmd = 273; 276 final static int atversion = TBaseType.atversion; //databricks 277 final static int error = 274; 278 final static int variable = 275; 279 final static int mslabel = 276; 280 public final static int bconst = TBaseType.bconst; //postgresql 281 final static int leftjoin_op = 277; 282 final static int odbc_esc_prefix = 277; 283 final static int rightjoin_op = 278; 284 final static int odbc_esc_terminator = 278; 285 final static int db2label = 279; 286 public final static int xconst = TBaseType.xconst; //postgresql 287 final static int ref_arrow = 280; 288 final static int rw_scriptoptions = 281; 289 public final static int UNICODE_ENCODE_ID = 281; 290 final static int mysqllabel = 282; 291 final static int NAMED_PARAMETER_SIGN = 282; //oracle,db2,snowflake CALL update_order (5000, NEW_STATUS => 'Shipped') 292 final static int QUOTED_IDENT = 282;//used in mdx 293 final static int BTEQCMD = 282; 294 final static int concatenationop = 283; 295 final static int rw_not_deferrable = 284; 296 final static int rw_for1 = 285; 297 final static int stmt_delimiter = 286; 298 final static int AMP_QUOTED_ID = 285; //used in mdx 299 final static int AMP_UNQUOTED_ID = 286; //used in mdx 300 final static int m_clause = 287; 301 final static int MySQL_CHARSET_NAME = 287; 302 final static int typecast = TBaseType.typecast;//postgresql 303 final static int k_clause = 288; 304 final static int slash_dot = 288; 305 final static int outer_join = 289; 306 307 final static int not_equal = 290; 308 309 final static int param = TBaseType.param; 310 final static int mysql_null = TBaseType.rrw_mysql_null; 311 312 final static int rw_locktable = 296; 313 final static int rw_foreign2 = 297; 314 final static int rw_constraint2 = 298; 315 final static int rw_primary2 = 299; 316 final static int rw_unique2 = 300; 317 final static int NEXT_PARAM = TBaseType.NEXT_PARAM; 318 final static int POSITIONAL_PARAM = TBaseType.POSITIONAL_PARAM; 319 final static int NAMED_PARAM = TBaseType.NAMED_PARAM; 320 321 final static int castoperator = TBaseType.castoperator; 322 final static int twocolons = TBaseType.twocolons; 323 final static int compoundAssignmentOperator = TBaseType.compoundAssignmentOperator; 324 final static int postgresql_function_delimiter = TBaseType.rrw_postgresql_function_delimiter; 325 final static int clickhouse_function_delimiter = TBaseType.rrw_clickhouse_function_delimiter; 326 final static int greenplum_function_delimiter = TBaseType.rrw_greenplum_function_delimiter; 327 328 final static int redshift_function_delimiter = TBaseType.rrw_redshift_function_delimiter; 329 final static int snowflake_function_delimiter = TBaseType.rrw_snowflake_function_delimiter; 330 331 332 333 int[] yypos;// = new int[max_rules + 1]; // 1 based in delphi, Position 0 was not used here 334 int[] yystack;// = new int[max_matches + 1]; // 1 based in delphi, Position 0 was not used here 335 // ArrayList yystack; 336 337 //String keywordvaluefile,keywordfile,yyk_file,yym_file,yykl_file; 338 //String yykh_file,yyml_file,yymh_file,yytl_file,yyth_file,yytint_file,yyt_file; 339 340 EDbVendor dbvendor; 341 TSourceToken prevToken = null; 342 343 public void setSqlCharset(String sqlCharset) { 344 this.sqlCharset = sqlCharset; 345 } 346 347 public String getSqlCharset() { 348 return sqlCharset; 349 } 350 351 private String sqlCharset = null; 352 353 /** 354 * Check if token code represents a single character operator 355 */ 356 protected boolean isSingleCharOperator(int tokenCode) { 357 return tokenCode == '(' || tokenCode == ')' || 358 tokenCode == '[' || tokenCode == ']' || 359 tokenCode == '{' || tokenCode == '}' || 360 tokenCode == ',' || tokenCode == ';' || 361 tokenCode == '.' || tokenCode == ':' || 362 tokenCode == '+' || tokenCode == '-' || 363 tokenCode == '*' || tokenCode == '/' || 364 tokenCode == '%' || tokenCode == '=' || 365 tokenCode == '<' || tokenCode == '>' || 366 tokenCode == '!' || tokenCode == '&' || 367 tokenCode == '|' || tokenCode == '^' || 368 tokenCode == '~' || tokenCode == '?'; 369 } 370 371 /** 372 * Check if token code represents a keyword 373 */ 374 protected boolean isKeyword(int tokenCode) { 375 // Check if it's in the reserved word range 376 return tokenCode >= TBaseType.rrw_select && tokenCode < TBaseType.rrw_abort; 377 } 378 379 public TCustomLexer(){ 380 //this.yyinput = pbuf; 381 yytextbufsize = intial_bufsize - 1; 382 yytextbuf = new char[intial_bufsize]; 383 checkyytextbuf(yytextbufsize); 384 385 bufsize = intial_bufsize - 1; 386 buf = new char[intial_bufsize]; 387 checkbuf(bufsize); 388 389 //literalbuf = new StringBuffer(); 390 literalbuf = new StringBuilder(); 391 //keywordList = new TreeMap(); 392 delimiterchar = ';'; 393 tmpDelimiter = ""; 394 395 xcdepth = 0; 396 nchars = 0; 397 isqmarktoident = true; 398 399 yylvalstr = ""; 400 yysstate = 0; 401 yylstate = 0; 402 yymatches = 0; 403 yysleng = 0; 404 bufptr = 0; 405 yylineno = 0; 406 yycolno = 0; 407 offset = -1; 408 yylineno_p = 1; 409 yycolno_p = 1; 410 offset_p = 0; 411 412 yypos = new int[max_rules + 1]; 413 max_matches = TBaseType.LEXER_INIT_MAX_MATCHES; 414 yystack = new int[max_matches + 1]; 415 416 prevToken = null; 417 } 418 419 /* 420 * this function is not used. 421 private void getkeywordvaluefromfile(){ 422 int i; 423 keywordValueList.clear(); 424 for(i=0; i<keywordlist.length; i++){ 425 // System.out.println(keywordlist[i]); 426 String[] ss = keywordlist[i].split("[=]"); 427 keywordValueList.put(ss[0].toUpperCase(),ss[1]); 428 } 429 } 430 */ 431 432public int iskeyword(String str){ 433 return -1; 434} 435 436public boolean isAtBeginOfLine(){ 437 return (yyretval == lexnewline || yyretval == 0); 438} 439 440//public boolean canBeColumnName(int tokencode){ 441// return false; 442//} 443 444 445public String getStringByCode(int tokenCode){ 446 return null; 447} 448 449 public int getkeywordvalue(String keyword){ 450 return 0; 451 } 452 453 454 /** 455 * @deprecated , please use keywordChecker.isKeyword() instead. 456 * 457 * because there are so many non-reserved keywords in some databases, it's not suitable to put those 458 * non-reserved keywords in lexer and parser. 459 * 460 * @param keyword 461 * @param keywordValueList 462 * @param keywordTypeList 463 * @return 464 */ 465 public static EKeywordType getKeywordType(String keyword, HashMap<String, Integer> keywordValueList,HashMap<Integer, Integer> keywordTypeList){ 466 EKeywordType ret = EKeywordType.NOT_A_KEYWORD; 467 Integer s = keywordValueList.get(keyword.toUpperCase(Locale.ENGLISH)); 468 if( s == null) return ret; 469 470 Integer i = keywordTypeList.get(s); 471 if (i == 1) return EKeywordType.RESERVED_WORD; 472 else if (i == 2) return EKeywordType.NON_RESERVED_KEYWORD; 473 else return ret; 474 } 475 476 /** 477 * 如果是ascii 字符,直接返回,如果是unicode 字符,需要进行转换。否则 String.charAt() 返回的unicode字符不是我们想要的字符, 478 * 例如中文的括号,我们实际需要的ascii的括号 479 * 480 * @param pYylvalstr 481 * @param index 482 * @return 483 */ 484 char lexer_charAt(String pYylvalstr,int index){ 485 char ret = pYylvalstr.charAt(index); 486 if (ret > 255){ 487 // this is a unicode code 488 if ((ret == 0xFF08)){ 489 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128 490 // Unicode code point for FULLWIDTH LEFT PARENTHESIS (, 0xFF08 491 //System.out.println(c); 492 ret = '('; 493 } 494 if ( (ret == 0xFF09)){ 495 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=65280&number=128 496 // Unicode code point for FULLWIDTH RIGHT PARENTHESIS ), 0xFF09 497 // System.out.println(c); 498 ret = ')'; 499 } 500 } 501 return ret; 502 } 503 void totablechar(){ 504 //System.out.println("char:"+yyactchar+" ,hex:"+String.format("%04x", (int) yyactchar)); 505 //System.out.println(String.format("0x%08X", (int)yyactchar)+", "+(char)yyactchar); 506 507 if (((int) yyactchar == 0) && !endOfInput) { 508 yytablechar = (char)255; 509 return; 510 } 511 512 if ((int)(yyactchar) < 228){ // 228 is ä in unicode 513 yytablechar = yyactchar; 514 if ((((int)(yyactchar) == 160)&&(utf8NoBreakSpaceReady))||(yyactchar == 0xA0)){ 515 yytablechar = (char)32; 516 } 517 utf8NoBreakSpaceReady = false; 518// if (yyactchar == 0x27){ 519// insideSingleQuoteStr = !insideSingleQuoteStr; 520// } 521 }else{ 522 yytablechar = (char)'a';//(char)255; 523 524 if ((int)(yyactchar) == 914) { // c2 a0, utf-8 NO-BREAK SPACE 525 utf8NoBreakSpaceReady = true; 526 yytablechar = (char) 32; 527 }else if ((yyactchar == 0x2018)||(yyactchar == 0x2019)){ 528 if (stringLiteralStartWithUnicodeSingleQuote){ 529 // WHERE Name LIKE ‘Acme%’ 530 // 如上,如果string literal 以unicode quote 开始,则不管当前是否在string literal中,新碰到的unicode quote都看成是string literal的结尾符, 531 yytablechar = 0x27; // treat Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it 532 }else { 533 if (insideSingleQuoteStr){ 534 // don't change the unicode quote char 535 }else { 536 yytablechar = 0x27; // treat Unicode Character 'LEFT SINGLE QUOTATION MARK' as the ascii char ', but don't change it 537 } 538 } 539 540 }else if ((yyactchar == 0x200B)||(yyactchar == 0x3000)){ 541 // Unicode code point 0x200B: treat Unicode Character ZERO WIDTH SPACE as the ascii char space, but don't change it 542 // Unicode code point 0x3000: treat Unicode Character IDEOGRAPHIC SPACE (UTF-8: e3 80 80) as the ascii char space, but don't change it 543 yytablechar = 0x20; 544 }else if (yyactchar == 0xFF08){ 545 yytablechar = '('; // treat Unicode code point for FULLWIDTH LEFT PARENTHESIS as the ascii char (, but don't change it 546 }else if (yyactchar == 0xFF09){ 547 yytablechar = ')'; // treat Unicode code point for FULLWIDTH RIGHT PARENTHESIS as the ascii char ), but don't change it 548 }else if (yyactchar == 0xFF0C){ 549 yytablechar = ','; // treat Unicode code point for FULLWIDTH COMMA as the ascii char comma, but don't change it 550 }else { 551 utf8NoBreakSpaceReady = false; 552 } 553 } 554 } 555 556 String getyytext(){ 557 return new String(yytextbuf,0,yytextlen); 558 } 559 560 561 void checkyytextbuf(int size){ 562 while ( size >= yytextbufsize){ 563 yytextbufsize = yytextbufsize * 2 > intial_bufsize ? yytextbufsize * 2: intial_bufsize; 564 char[] tmp = new char[yytextbufsize]; 565 System.arraycopy(yytextbuf,0,tmp,0, yytextbuf.length); 566 yytextbuf = tmp; 567 } 568 } 569 570 void checkbuf(int size){ 571 // System.out.println("while begin2"+" size:"+size+" bufsize:"+bufsize); 572 while ( size >= bufsize){ 573 bufsize = bufsize * 2 > intial_bufsize ? bufsize * 2: intial_bufsize; 574 char[] tmp = new char[bufsize]; 575 System.arraycopy(buf,0,tmp,0, buf.length); 576 buf = tmp; 577 } 578 // System.out.println("while end2"); 579 } 580 581 boolean eof(BufferedReader pbuf){ 582 try{ 583 return !pbuf.ready(); 584 }catch(IOException e){ 585 return true; 586 } 587 } 588 589 void yynew(){ 590 if (yylastchar != (char)0){ 591 if(yylastchar == lf){ 592 yylstate = 1; 593 }else{ 594 yylstate = 0; 595 } 596 } 597 598 yystate = yysstate + yylstate; 599 checkyytextbuf(0); 600 yytextlen = 0; 601 yymatches = 0; 602 yydone = false; 603 } 604 605 void yyscan(){ 606 yyactchar = get_char(); 607 checkyytextbuf(yytextlen + 1); 608 yytextlen++; 609 yytextbuf[yytextlen - 1] = yyactchar; 610 } 611 612 void yymark(int n){ 613 if (n > max_rules ){ 614 System.out.println("n > max_rules "); 615 } 616 yypos[n] = yytextlen; 617 } 618 619 void yymatch(int n){ 620 yymatches++; 621 if(yymatches > max_matches){ 622 623 int new_yystack[] = new int[max_matches*2+1]; 624 System.arraycopy(yystack, 0, new_yystack, 0, max_matches); 625 yystack = new_yystack; 626 max_matches = max_matches * 2; 627 628 // this is valid in JDK 1.6, proguard will report warning and stop 629 // yystack = Arrays.copyOf(yystack,max_matches+1); 630 631 } 632 yystack [yymatches] = n; 633 } 634 635 int yyfind(){ 636 //return -1 mean not found 637 int ret = -1; 638 639 yyreject = false; 640 641 while (( yymatches > 0 ) && ( yypos[yystack[yymatches]] == 0 )) { 642 yymatches-- ; 643 } 644 645 646 if (yymatches > 0){ 647 yysleng = yytextlen; 648 ret = yystack[yymatches]; 649 yyless( yypos[ret] ); 650 yypos[ret] = 0; 651 if (yytextlen >0){ 652 yylastchar = yytextbuf [yytextlen-1]; 653 }else{ 654 yylastchar = (char)0; 655 } 656 }else{ 657 yyless( 0 ); 658 yylastchar = (char)0; 659 } 660 661 return ret; 662 } 663 664 boolean yydefault(){ 665 boolean ret; 666 667 yyreject = false; 668 yyactchar = get_char(); 669 if (yyactchar != (char)0){ 670 //put_char( yyactchar ); 671 ret = true; 672 }else{ 673 yylstate = 1; 674 ret = false; 675 } 676 yylastchar = yyactchar; 677 return ret; 678 } 679 void yyless(int n){ 680 for(int i= yytextlen; i> n; i--){ 681 unget_char(yytextbuf[i - 1]); 682 } 683 checkyytextbuf(n); 684 yytextlen = n; 685 } 686 void returni(int n){ 687 yyretval = n; 688 yydone = true; 689 } 690 void returnc(char c){ 691 yyretval = (int)c; 692 yydone = true; 693 } 694 void yyclear(){ 695 bufptr = 0; 696 yysstate = 0; 697 yylstate = 1; 698 yylastchar = (char)0; 699 yytextlen = 0; 700 yylineno = 0; 701 yycolno = 0; 702 offset = -1; 703 // yystext := ''; 704 705 yylineno_p = 1; 706 yycolno_p = 1; 707 offset_p = 0; 708 709 } 710 711 712 boolean yywrap(){ 713 return true; 714 } 715 int getyysstate(){ 716 return yysstate; 717 } 718 void start(int pstate){ 719 yysstate = pstate; 720 if (pstate == xq){ 721 insideSingleQuoteStr = true; 722 if ((yylvalstr.charAt(0) == 0x2018)||(yylvalstr.charAt(0) == 0x2019)){ 723 stringLiteralStartWithUnicodeSingleQuote = true; 724 }else{ 725 stringLiteralStartWithUnicodeSingleQuote = false; 726 } 727 }else{ 728 insideSingleQuoteStr = false; 729 } 730 } 731 732 733 void unget_char(char pchar){ 734 if(bufptr == max_chars) 735 { 736 System.out.println("input buffer overflow"); 737 } 738 // if (bufptr > 0) { 739 bufptr++; 740 yycolno--; 741 offset--; 742 checkbuf(bufptr+1); 743 buf[bufptr] = pchar; 744 // } 745 746 } 747 748 public void reset(){ 749 insideSingleQuoteStr = false; 750 nestedLessThan = 0; 751 } 752 753 public boolean insideSingleQuoteStr = false; 754 public boolean stringLiteralStartWithUnicodeSingleQuote = false; 755 756 757 // Previous implementation of readln, 2025-05-04 758 // char[] readln() throws IOException { 759 // int c; 760 // char[] buffer = new char[80]; 761 // int bufferSize = 0; 762 763 // while ((c = yyinput.read()) != -1) { 764 // if (bufferSize >= buffer.length) { 765 // char[] newBuffer = new char[buffer.length * 2]; 766 // System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 767 // buffer = newBuffer; 768 // } 769 770 // buffer[bufferSize++] = (char)c; 771 772 // if (c == '\n' || c == '\r') { 773 // break; 774 // } 775 // } 776 777 // if (bufferSize > 0 && buffer[bufferSize - 1] == '\r') { 778 // yyinput.mark(1); 779 // c = yyinput.read(); 780 // if (c == '\n') { 781 // if (bufferSize >= buffer.length) { 782 // char[] newBuffer = new char[buffer.length + 1]; 783 // System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 784 // buffer = newBuffer; 785 // } 786 // buffer[bufferSize++] = '\n'; 787 // } else { 788 // yyinput.reset(); 789 // } 790 // } 791 792 // if (bufferSize == 0) { 793 // return null; 794 // }else{ 795 // char[] result = new char[bufferSize]; 796 // System.arraycopy(buffer, 0, result, 0, bufferSize); 797 // return result; 798 // } 799 // } 800 801/** 802 * High-performance line reader with optimal buffer management. 803 * Reuses lineReadBuffer across calls to reduce per-line allocations. 804 * @return char array containing the line including line ending, or null if end of stream 805 */ 806char[] readln() throws IOException { 807 if (yyinput == null) return null; 808 809 int position = 0; 810 int c; 811 812 // Read characters until line ending or EOF 813 while ((c = yyinput.read()) != -1) { 814 // Expand buffer if needed (expanded buffer stays for reuse) 815 if (position >= lineReadBuffer.length) { 816 char[] newBuffer = new char[lineReadBuffer.length * 2]; 817 System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length); 818 lineReadBuffer = newBuffer; 819 } 820 821 // Store character 822 lineReadBuffer[position++] = (char)c; 823 824 // Check for line endings 825 if (c == '\n') { 826 break; // LF - end of line 827 } else if (c == '\r') { 828 // Need to check for CR+LF sequence 829 yyinput.mark(1); 830 c = yyinput.read(); 831 832 if (c == '\n') { 833 // CR+LF sequence - include LF in result 834 if (position >= lineReadBuffer.length) { 835 char[] newBuffer = new char[lineReadBuffer.length + 1]; 836 System.arraycopy(lineReadBuffer, 0, newBuffer, 0, lineReadBuffer.length); 837 lineReadBuffer = newBuffer; 838 } 839 lineReadBuffer[position++] = '\n'; 840 } else { 841 // CR only - reset stream to keep the character after CR 842 yyinput.reset(); 843 } 844 break; 845 } 846 } 847 848 // Return null if no characters were read (end of stream) 849 if (position == 0) { 850 yylineLen = 0; 851 return null; 852 } 853 854 // Return lineReadBuffer directly, avoiding per-line array allocation. 855 // yylineLen holds the actual content length (replaces yyline.length semantic). 856 yylineLen = position; 857 return lineReadBuffer; 858} 859 860 char get_char(){ 861 862 char ret ; 863 boolean readlineok = true; 864 865 if ((bufptr == 0) && !eof(yyinput) ) 866 { 867 try{ 868 endOfInput = false; 869 yyline = readln();//yyinput.readLine(); 870 // System.out.println("readln: "+yyline); 871 if (yyline == null){ 872 readlineok = false; 873 } else{ 874 yylineno++; 875 yycolno = 0; 876 // Use yylineLen instead of yyline.length since lineReadBuffer is reused 877 bufptr = yylineLen; 878 checkbuf(bufptr+1); 879 for(int k=1;k<=bufptr;k++){ 880 buf[k] = yyline[bufptr - k]; 881 } 882 } 883 }catch(IOException e){ 884 readlineok = false; 885 } 886 } 887 888 if (! readlineok){ 889 endOfInput = true; 890 return (char)0; 891 } 892 893 if (bufptr > 0){ 894 bufptr--; 895 yycolno++; 896 offset++; 897 898 return buf[bufptr+1]; 899 //return yyline.charAt(yyline.length() - (bufptr + 1)); 900 }else{ 901 // bufptr--; 902 endOfInput = true; 903 return (char)0; 904 } 905 906 } 907 908 void startlit(){ 909 literalbuf.setLength(0); 910 literallen = 0; 911 literalalloc = 0; 912 } 913 914 void addlit(String ytext, int yleng){ 915 literallen = literallen + yleng; 916 literalbuf.append(ytext,0,yleng); 917 } 918 919 void addlitchar(char ychar){ 920 literallen++; 921 literalbuf.append(ychar); 922 } 923 924 String litbufdup(){ 925 return literalbuf.toString();//.intern(); 926 } 927 928 boolean isopchar(char ch){ 929 switch (ch) { 930 case '~': 931 case '!': 932 case '@': 933 case '#': 934 case '^': 935 case '&': 936 case '|': 937 case '`': 938 case '?': 939 case '$': 940 case '%': 941 return true; 942 default: 943 return false; 944 } 945 } 946 947 boolean isselfchar(char ch){ 948 switch (ch) { 949 case ',': 950 case '(': 951 case ')': 952 case '[': 953 case ']': 954 case '.': 955 case ';': 956 case '$': 957 case ':': 958 case '+': 959 case '-': 960 case '*': 961 case '/': 962 case '%': 963 case '^': 964 case '<': 965 case '>': 966 case '=': 967 case '!': 968 return true; 969 default: 970 return false; 971 } 972 } 973 974 boolean charinarray(char c, char[] a){ 975 int len = a.length; 976 for (int i = 0; i < len; i++) { 977 if (a[i] == c) 978 return true; 979 } 980 return false; 981 } 982 983 void setlengthofliteralbuf(int plen){ 984 literalbuf.setLength(plen); 985 } 986 987 void yyaction(int yyruleno){ 988 } 989 990 int yylex(){ 991 return 0; 992 } 993 994 995 public int yylexwrap(TSourceToken psourcetoken) { 996 // Get token code and handle EOF 997 if ((psourcetoken.tokencode = yylex()) == 0) return 0; 998 999 // Store token text - use shared strings for single ASCII chars to reduce allocations 1000 if (yylvalstr == null) { 1001 if (yytextlen == 1 && yytextbuf[0] < 128) { 1002 yylvalstr = SINGLE_CHAR_STRINGS[yytextbuf[0]]; 1003 } else { 1004 yylvalstr = new String(yytextbuf, 0, yytextlen); 1005 } 1006 } 1007 psourcetoken.setAstext(yylvalstr); 1008 1009 // Record token position information 1010 psourcetoken.lineNo = yylineno_p; 1011 psourcetoken.columnNo = yycolno_p; 1012 psourcetoken.offset = offset_p; 1013 yylineno_p = yylineno; 1014 yycolno_p = yycolno + 1; 1015 offset_p = offset + 1; 1016 1017 // Track token in token table for analysis 1018 setTokenTableValue(psourcetoken); 1019 1020 // Handle token types based on token code 1021 switch (psourcetoken.tokencode) { 1022 case cmtdoublehyphen: 1023 psourcetoken.tokentype = ETokenType.ttsimplecomment; 1024 if (dbvendor == EDbVendor.dbvmdx && psourcetoken.toString().startsWith("/")) { 1025 psourcetoken.tokentype = ETokenType.ttCPPComment; 1026 } 1027 break; 1028 1029 case cmtslashstar: 1030 psourcetoken.tokentype = ETokenType.ttbracketedcomment; 1031 break; 1032 1033 case lexspace: 1034 psourcetoken.tokentype = ETokenType.ttwhitespace; 1035 break; 1036 1037 case lexnewline: 1038 psourcetoken.tokentype = ETokenType.ttreturn; 1039 break; 1040 1041 case bind_v: 1042 psourcetoken.tokentype = ETokenType.ttbindvar; 1043 if (dbvendor == EDbVendor.dbvoracle) { 1044 psourcetoken.setAstext(psourcetoken.getAstext().replace(TBaseType.newline, "")); 1045 } 1046 break; 1047 1048 case stmt_delimiter: 1049 psourcetoken.tokentype = ETokenType.ttstmt_delimiter; 1050 psourcetoken.tokencode = cmtslashstar; 1051 break; 1052 1053 case concatenationop: 1054 psourcetoken.tokentype = ETokenType.ttconcatenationop; 1055 break; 1056 1057 case variable: 1058 psourcetoken.tokentype = ETokenType.ttsqlvar; 1059 break; 1060 1061 case fconst: 1062 case iconst: 1063 psourcetoken.tokentype = ETokenType.ttnumber; 1064 break; 1065 1066 case sconst: 1067 psourcetoken.tokentype = ETokenType.ttsqstring; 1068 psourcetoken.dolqstart = dolqstart; 1069 dolqstart = ""; 1070 break; 1071 1072 case ident: 1073 case QUOTED_IDENT: 1074 handleIdentifierToken(psourcetoken); 1075 break; 1076 1077 case cmpop: 1078 handleComparisonOperator(psourcetoken); 1079 break; 1080 1081 case op: 1082 handleOperatorToken(psourcetoken); 1083 break; 1084 1085 default: 1086 handleDefaultToken(psourcetoken); 1087 break; 1088 } 1089 1090 prevToken = psourcetoken; 1091 return psourcetoken.tokencode; 1092 } 1093 1094 // Helper methods to better organize the complex token handling logic 1095 private void handleIdentifierToken(TSourceToken psourcetoken) { 1096 psourcetoken.tokentype = ETokenType.ttidentifier; 1097 String tokenText = psourcetoken.toString().trim(); 1098 1099 if (tokenText.startsWith("\"")) { 1100 psourcetoken.tokentype = ETokenType.ttdqstring; 1101 } else if (tokenText.startsWith("[")) { 1102 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1103 psourcetoken.tokentype = ETokenType.ttdbstring; 1104 } 1105 } else if (tokenText.startsWith("{")) { 1106 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1107 psourcetoken.tokentype = ETokenType.ttbrstring; 1108 if (tokenText.toLowerCase().startsWith("{escape")) { 1109 psourcetoken.tokencode = TBaseType.rrw_sqlserver_odbc_escape; 1110 } 1111 } 1112 } else if (tokenText.startsWith("&")) { 1113 if (dbvendor == EDbVendor.dbvmdx) { 1114 if (psourcetoken.tokencode == QUOTED_IDENT) { 1115 psourcetoken.tokencode = AMP_QUOTED_ID; 1116 } else if (psourcetoken.tokencode == ident) { 1117 psourcetoken.tokencode = AMP_UNQUOTED_ID; 1118 } 1119 } 1120 } else if (tokenText.startsWith(".")) { 1121 if (dbvendor == EDbVendor.dbvteradata) { 1122 psourcetoken.tokentype = ETokenType.ttBTEQCmd; 1123 } 1124 } 1125 } 1126 1127 private void handleComparisonOperator(TSourceToken psourcetoken) { 1128 psourcetoken.tokentype = ETokenType.ttmulticharoperator; 1129 String token = yylvalstr; 1130 1131 if ((token.startsWith("!") && token.endsWith("=")) || 1132 (token.startsWith("^") && token.endsWith("=")) || 1133 (token.startsWith("<") && token.endsWith(">"))) { 1134 1135 psourcetoken.tokencode = TBaseType.not_equal; 1136 1137 // Handle MySQL NULL-safe equal 1138 if (token.indexOf("=", 1) > 0 && 1139 token.startsWith("<") && token.endsWith(">")) { 1140 psourcetoken.tokencode = (int)'='; 1141 } 1142 } else if (token.startsWith(">") && token.endsWith("=")) { 1143 psourcetoken.tokencode = TBaseType.great_equal; 1144 } else if (token.startsWith("<") && token.endsWith("=")) { 1145 psourcetoken.tokencode = TBaseType.less_equal; 1146 } else if ((token.startsWith("!") && token.endsWith("<")) || 1147 (token.startsWith("^") && token.endsWith("<"))) { 1148 psourcetoken.tokencode = TBaseType.not_less; 1149 } else if ((token.startsWith("!") && token.endsWith(">")) || 1150 (token.startsWith("^") && token.endsWith(">"))) { 1151 psourcetoken.tokencode = TBaseType.not_great; 1152 } else if (token.length() == 2 && token.charAt(0) == ':' && token.charAt(1) == '=') { 1153 psourcetoken.tokencode = assign_sign; 1154 } 1155 } 1156 1157 private void handleOperatorToken(TSourceToken psourcetoken) { 1158 psourcetoken.tokentype = ETokenType.ttmulticharoperator; 1159 String token = yylvalstr; 1160 int tokenLength = token.length(); 1161 char firstChar = tokenLength > 0 ? token.charAt(0) : '\0'; 1162 char secondChar = tokenLength > 1 ? token.charAt(1) : '\0'; 1163 1164 // Handle question mark specially 1165 if (token.equals("?") && isqmarktoident) { 1166 handleQuestionMark(psourcetoken); 1167 return; 1168 } 1169 1170 // Handle special two-character operators 1171 if (tokenLength == 2) { 1172 if (handleTwoCharOperator(psourcetoken, firstChar, secondChar)) { 1173 return; 1174 } 1175 } 1176 1177 // Handle special three-character operators 1178 if (tokenLength == 3) { 1179 if (handleThreeCharOperator(psourcetoken, firstChar, secondChar, token.charAt(2))) { 1180 return; 1181 } 1182 } 1183 1184 // Handle comparison operators 1185 if (handleComparisonOp(psourcetoken, token)) { 1186 return; 1187 } 1188 1189 // Handle single character operators 1190 if (tokenLength == 1) { 1191 handleSingleCharOperator(psourcetoken, firstChar); 1192 } 1193 } 1194 1195 private boolean handleTwoCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar) { 1196 switch (firstChar) { 1197 case '<': 1198 if (secondChar == '<') { 1199 return handleLeftShiftOperator(psourcetoken); 1200 } else if (secondChar == '@') { 1201 psourcetoken.tokencode = TBaseType.JSON_RIGHT_CONTAIN; 1202 return true; 1203 } 1204 break; 1205 1206 case '>': 1207 if (secondChar == '>') { 1208 return handleRightShiftOperator(psourcetoken); 1209 } 1210 break; 1211 1212 case '=': 1213 if (secondChar == '>') { 1214 if (dbvendor == EDbVendor.dbvodbc) { 1215 psourcetoken.tokencode = TBaseType.great_equal; 1216 } else if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb) { 1217 psourcetoken.tokencode = TBaseType.assign_sign; 1218 } else { 1219 psourcetoken.tokencode = NAMED_PARAMETER_SIGN; 1220 } 1221 return true; 1222 } else if (secondChar == '*') { 1223 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1224 psourcetoken.tokencode = rightjoin_op; 1225 } 1226 return true; 1227 } else if (secondChar == '<') { 1228 if (dbvendor == EDbVendor.dbvodbc) { 1229 psourcetoken.tokencode = TBaseType.less_equal; 1230 } 1231 return true; 1232 } else if (secondChar == '=') { 1233 if (dbvendor == EDbVendor.dbvsparksql) { 1234 psourcetoken.tokencode = '='; 1235 } 1236 return true; 1237 } 1238 break; 1239 1240 case '-': 1241 if (secondChar == '>') { 1242 if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb 1243 || dbvendor == EDbVendor.dbvgreenplum || dbvendor == EDbVendor.dbvmysql) { 1244 psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT; 1245 } else { 1246 psourcetoken.tokencode = ref_arrow; 1247 } 1248 return true; 1249 } else if (secondChar == '=') { 1250 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1251 psourcetoken.tokencode = compoundAssignmentOperator; 1252 } 1253 return true; 1254 } 1255 break; 1256 1257 case '.': 1258 if (secondChar == '.') { 1259 if (dbvendor == EDbVendor.dbvdb2 || dbvendor == EDbVendor.dbvoracle 1260 || dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvhana) { 1261 psourcetoken.tokencode = double_dot; 1262 } 1263 return true; 1264 } 1265 break; 1266 1267 case '*': 1268 if (secondChar == '=') { 1269 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1270 psourcetoken.tokencode = leftjoin_op; 1271 } 1272 return true; 1273 } else if (secondChar == '*') { 1274 if (dbvendor == EDbVendor.dbvteradata || dbvendor == EDbVendor.dbvnetezza) { 1275 psourcetoken.tokencode = TBaseType.exponentiate; 1276 } 1277 return true; 1278 } 1279 break; 1280 1281 case '|': 1282 if (secondChar == '|') { 1283 if (dbvendor == EDbVendor.dbvmysql) { 1284 psourcetoken.tokencode = TBaseType.logical_or; 1285 } else if (isStringConcatVendor(dbvendor)) { 1286 psourcetoken.tokencode = TBaseType.concatenationop; 1287 } 1288 return true; 1289 } else if (secondChar == '/') { 1290 if (dbvendor == EDbVendor.dbvredshift) { 1291 psourcetoken.tokencode = TBaseType.square_root; 1292 } 1293 return true; 1294 } 1295 break; 1296 1297 case '&': 1298 if (secondChar == '&') { 1299 if (dbvendor == EDbVendor.dbvmysql) { 1300 psourcetoken.tokencode = TBaseType.logical_and; 1301 } 1302 return true; 1303 } 1304 break; 1305 1306 case '?': 1307 if (secondChar == '|') { 1308 psourcetoken.tokencode = TBaseType.JSON_ANY_EXIST; 1309 return true; 1310 } else if (secondChar == '&') { 1311 psourcetoken.tokencode = TBaseType.JSON_ALL_EXIST; 1312 return true; 1313 } 1314 break; 1315 1316 case '@': 1317 if (secondChar == '>') { 1318 psourcetoken.tokencode = TBaseType.JSON_LEFT_CONTAIN; 1319 return true; 1320 } 1321 break; 1322 1323 case '#': 1324 if (secondChar == '>') { 1325 psourcetoken.tokencode = TBaseType.JSON_GET_OBJECT_AT_PATH; 1326 return true; 1327 } 1328 break; 1329 1330 case ':': 1331 if (secondChar == '=') { 1332 psourcetoken.tokencode = assign_sign; 1333 return true; 1334 } 1335 break; 1336 } 1337 1338 // Handle compound assignment operators 1339 if ((firstChar == '+' || firstChar == '-' || firstChar == '*' || 1340 firstChar == '/' || firstChar == '%' || firstChar == '&' || 1341 firstChar == '^' || firstChar == '|') && secondChar == '=') { 1342 if (dbvendor == EDbVendor.dbvmssql || dbvendor == EDbVendor.dbvsybase) { 1343 psourcetoken.tokencode = compoundAssignmentOperator; 1344 return true; 1345 } else if (dbvendor == EDbVendor.dbvmysql && firstChar == '^' && secondChar == '=') { 1346 psourcetoken.tokencode = not_equal; 1347 return true; 1348 } 1349 } 1350 1351 return false; 1352 } 1353 1354 private boolean handleThreeCharOperator(TSourceToken psourcetoken, char firstChar, char secondChar, char thirdChar) { 1355 if (firstChar == '-' && secondChar == '>' && thirdChar == '>') { 1356 psourcetoken.tokencode = TBaseType.JSON_GET_TEXT; 1357 return true; 1358 } else if (firstChar == '#' && secondChar == '>' && thirdChar == '>') { 1359 psourcetoken.tokencode = TBaseType.JSON_GET_TEXT_AT_PATH; 1360 return true; 1361 } else if (firstChar == '|' && secondChar == '|' && thirdChar == '/') { 1362 if (dbvendor == EDbVendor.dbvredshift) { 1363 psourcetoken.tokencode = TBaseType.cube_root; 1364 return true; 1365 } 1366 } 1367 return false; 1368 } 1369 1370 private boolean handleComparisonOp(TSourceToken psourcetoken, String token) { 1371 if ((token.startsWith("!") && token.endsWith("=")) || 1372 (token.startsWith("^") && token.endsWith("=")) || 1373 (token.startsWith("<") && token.endsWith(">"))) { 1374 psourcetoken.tokencode = TBaseType.not_equal; 1375 return true; 1376 } else if (token.startsWith(">") && token.endsWith("=")) { 1377 psourcetoken.tokencode = TBaseType.great_equal; 1378 return true; 1379 } else if (token.startsWith("<") && token.endsWith("=")) { 1380 psourcetoken.tokencode = TBaseType.less_equal; 1381 return true; 1382 } else if ((token.startsWith("!") && token.endsWith("<")) || 1383 (token.startsWith("^") && token.endsWith("<"))) { 1384 psourcetoken.tokencode = TBaseType.not_less; 1385 return true; 1386 } else if ((token.startsWith("!") && token.endsWith(">")) || 1387 (token.startsWith("^") && token.endsWith(">"))) { 1388 psourcetoken.tokencode = TBaseType.not_great; 1389 return true; 1390 } 1391 return false; 1392 } 1393 1394 private void handleSingleCharOperator(TSourceToken psourcetoken, char ch) { 1395 switch (ch) { 1396 case '~': 1397 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvredshift || 1398 dbvendor == EDbVendor.dbvsnowflake) { 1399 psourcetoken.tokencode = (int)'~'; 1400 } 1401 break; 1402 1403 case '#': 1404 if (dbvendor == EDbVendor.dbvmssql) { 1405 psourcetoken.tokencode = (int)'#'; 1406 } 1407 break; 1408 1409 case '&': 1410 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica || 1411 dbvendor == EDbVendor.dbvsparksql) { 1412 psourcetoken.tokencode = (int)'&'; 1413 } 1414 break; 1415 1416 case '|': 1417 if (dbvendor == EDbVendor.dbvmysql || dbvendor == EDbVendor.dbvvertica) { 1418 psourcetoken.tokencode = (int)'|'; 1419 } 1420 break; 1421 } 1422 } 1423 1424 private void handleQuestionMark(TSourceToken psourcetoken) { 1425 if (dbvendor == EDbVendor.dbvpostgresql || dbvendor == EDbVendor.dbvgaussdb || 1426 dbvendor == EDbVendor.dbvgreenplum) { 1427 psourcetoken.tokencode = TBaseType.JSON_EXIST; 1428 } else if (dbvendor == EDbVendor.dbvodbc) { 1429 psourcetoken.tokencode = '?'; 1430 } else if (dbvendor == EDbVendor.dbvsnowflake) { 1431 psourcetoken.tokencode = bind_v; 1432 psourcetoken.tokentype = ETokenType.ttquestionmark; 1433 } else { 1434 psourcetoken.tokencode = ident; 1435 } 1436 } 1437 1438 private boolean handleLeftShiftOperator(TSourceToken psourcetoken) { 1439 if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 1440 dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 1441 dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvaccess || 1442 dbvendor == EDbVendor.dbvgreenplum) { 1443 psourcetoken.tokencode = label_begin; 1444 } else if (dbvendor == EDbVendor.dbvmysql) { 1445 psourcetoken.tokencode = TBaseType.rrw_left_shift; 1446 } else if (dbvendor == EDbVendor.dbvredshift) { 1447 psourcetoken.tokencode = TBaseType.bitwise_shift_left; 1448 } else if (dbvendor == EDbVendor.dbvnetezza) { 1449 psourcetoken.tokencode = TBaseType.rrw_netezza_op_less_less; 1450 } 1451 return true; 1452 } 1453 1454 private boolean handleRightShiftOperator(TSourceToken psourcetoken) { 1455 if (dbvendor == EDbVendor.dbvoracle || dbvendor == EDbVendor.dbvmssql || 1456 dbvendor == EDbVendor.dbvsybase || dbvendor == EDbVendor.dbvpostgresql || 1457 dbvendor == EDbVendor.dbvgaussdb || dbvendor == EDbVendor.dbvgreenplum || 1458 dbvendor == EDbVendor.dbvaccess) { 1459 psourcetoken.tokencode = label_end; 1460 } else if (dbvendor == EDbVendor.dbvmysql) { 1461 psourcetoken.tokencode = TBaseType.rrw_right_shift; 1462 } else if (dbvendor == EDbVendor.dbvredshift) { 1463 psourcetoken.tokencode = TBaseType.bitwise_shift_right; 1464 } else if (dbvendor == EDbVendor.dbvnetezza) { 1465 psourcetoken.tokencode = TBaseType.rrw_netezza_op_great_great; 1466 } 1467 return true; 1468 } 1469 1470 private boolean isStringConcatVendor(EDbVendor vendor) { 1471 return vendor == EDbVendor.dbvdb2 || vendor == EDbVendor.dbvnetezza || 1472 vendor == EDbVendor.dbvpostgresql || vendor == EDbVendor.dbvgaussdb || 1473 vendor == EDbVendor.dbvredshift || vendor == EDbVendor.dbvgreenplum || 1474 vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake || 1475 vendor == EDbVendor.dbvsparksql || vendor == EDbVendor.dbvvertica; 1476 } 1477 1478 private void handleDefaultToken(TSourceToken psourcetoken) { 1479 psourcetoken.tokentype = ETokenType.ttkeyword; 1480 1481 if (psourcetoken.tokencode < 255) { 1482 // Single character operators (ASCII characters) 1483 psourcetoken.setAstext(Character.toString(yylvalstr.charAt(0))); 1484 psourcetoken.tokentype = ETokenType.ttsinglecharoperator; 1485 1486 switch (psourcetoken.tokencode) { 1487 case ',': 1488 psourcetoken.tokentype = ETokenType.ttcomma; 1489 break; 1490 case '(': 1491 psourcetoken.tokentype = ETokenType.ttleftparenthesis; 1492 break; 1493 case ')': 1494 psourcetoken.tokentype = ETokenType.ttrightparenthesis; 1495 break; 1496 case '[': 1497 psourcetoken.tokentype = ETokenType.ttleftbracket; 1498 break; 1499 case ']': 1500 psourcetoken.tokentype = ETokenType.ttrightbracket; 1501 break; 1502 case '.': 1503 psourcetoken.tokentype = ETokenType.ttperiod; 1504 break; 1505 case ';': 1506 psourcetoken.tokentype = ETokenType.ttsemicolon; 1507 break; 1508 case '$': 1509 psourcetoken.tokentype = ETokenType.ttdolorsign; 1510 break; 1511 case ':': 1512 psourcetoken.tokentype = ETokenType.ttcolon; 1513 break; 1514 case '+': 1515 psourcetoken.tokentype = ETokenType.ttplussign; 1516 break; 1517 case '-': 1518 psourcetoken.tokentype = ETokenType.ttminussign; 1519 break; 1520 case '*': 1521 psourcetoken.tokentype = ETokenType.ttasterisk; 1522 break; 1523 case '/': 1524 psourcetoken.tokentype = ETokenType.ttslash; 1525 break; 1526 case '^': 1527 psourcetoken.tokentype = ETokenType.ttcaret; 1528 break; 1529 case '<': 1530 psourcetoken.tokentype = ETokenType.ttlessthan; 1531 break; 1532 case '>': 1533 psourcetoken.tokentype = ETokenType.ttgreaterthan; 1534 break; 1535 case '=': 1536 psourcetoken.tokentype = ETokenType.ttequals; 1537 break; 1538 case '@': 1539 if (delimiterchar == '@') { 1540 psourcetoken.tokencode = (int)';'; 1541 psourcetoken.tokentype = ETokenType.ttsemicolon; 1542 } else { 1543 psourcetoken.tokentype = ETokenType.ttatsign; 1544 } 1545 break; 1546 case '~': 1547 psourcetoken.tokentype = ETokenType.tttilde; 1548 break; 1549 case '&': 1550 psourcetoken.tokentype = ETokenType.ttampersand; 1551 break; 1552 case '|': 1553 psourcetoken.tokentype = ETokenType.ttverticalbar; 1554 break; 1555 case '?': 1556 if (isqmarktoident && dbvendor != EDbVendor.dbvodbc && 1557 dbvendor != EDbVendor.dbvpostgresql && dbvendor != EDbVendor.dbvgaussdb) { 1558 psourcetoken.tokencode = ident; 1559 } 1560 break; 1561 } 1562 } else if (dbvendor == EDbVendor.dbvhive && psourcetoken.tokencode == TBaseType.hive_equal) { 1563 psourcetoken.tokentype = ETokenType.ttequals; 1564 } 1565 } 1566 1567} 1568