001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerClickhouse; 009import gudusoft.gsqlparser.TParserClickhouse; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 022import gudusoft.gsqlparser.compiler.TContext; 023import gudusoft.gsqlparser.sqlenv.TSQLEnv; 024import gudusoft.gsqlparser.compiler.TGlobalScope; 025import gudusoft.gsqlparser.compiler.TFrame; 026 027import java.util.ArrayList; 028import java.util.List; 029import java.util.Stack; 030 031/** 032 * ClickHouse database SQL parser implementation. 033 * 034 * <p>This parser handles ClickHouse-specific SQL syntax including: 035 * <ul> 036 * <li>Standard SQL DML/DDL (SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, ALTER)</li> 037 * <li>ClickHouse-specific ENGINE clause in CREATE TABLE</li> 038 * <li>PREWHERE, FINAL, SAMPLE, ARRAY JOIN clauses</li> 039 * <li>FORMAT and SETTINGS clauses</li> 040 * <li>ClickHouse data types (Array, Tuple, Map, Nullable, LowCardinality, etc.)</li> 041 * </ul> 042 * 043 * <p><b>Design Notes:</b> 044 * <ul> 045 * <li>Extends {@link AbstractSqlParser}</li> 046 * <li>Based on MySQL grammar (ClickHouse shares backtick quoting, # comments, ENGINE clause)</li> 047 * <li>No stored procedure support (ClickHouse has no stored procedures)</li> 048 * <li>Delimiter character: ';' for SQL statements</li> 049 * </ul> 050 * 051 * @see AbstractSqlParser 052 * @see TLexerClickhouse 053 * @see TParserClickhouse 054 * @since 3.2.0.0 055 */ 056public class ClickhouseSqlParser extends AbstractSqlParser { 057 058 // ========== Lexer and Parser Instances ========== 059 060 /** The ClickHouse lexer used for tokenization (public for TGSqlParser.getFlexer()) */ 061 public TLexerClickhouse flexer; 062 private TParserClickhouse fparser; 063 064 // ========== Constructor ========== 065 066 /** 067 * Construct ClickHouse SQL parser. 068 * <p> 069 * Configures the parser for ClickHouse database with default delimiter: semicolon (;) 070 */ 071 public ClickhouseSqlParser() { 072 super(EDbVendor.dbvclickhouse); 073 074 // Set delimiter character - ClickHouse uses semicolon 075 this.delimiterChar = ';'; 076 this.defaultDelimiterStr = ";"; 077 078 // Create lexer once - will be reused for all parsing operations 079 this.flexer = new TLexerClickhouse(); 080 this.flexer.delimiterchar = this.delimiterChar; 081 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 082 083 // CRITICAL: Set lexer for inherited getanewsourcetoken() method 084 this.lexer = this.flexer; 085 086 // Create parser once - will be reused for all parsing operations 087 this.fparser = new TParserClickhouse(null); 088 this.fparser.lexer = this.flexer; 089 } 090 091 // ========== AbstractSqlParser Abstract Methods Implementation ========== 092 093 @Override 094 protected TCustomLexer getLexer(ParserContext context) { 095 return this.flexer; 096 } 097 098 @Override 099 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 100 this.fparser.sourcetokenlist = tokens; 101 return this.fparser; 102 } 103 104 @Override 105 protected void tokenizeVendorSql() { 106 doclickhousetexttotokenlist(); 107 } 108 109 @Override 110 protected void setupVendorParsersForExtraction() { 111 this.fparser.sqlcmds = this.sqlcmds; 112 this.fparser.sourcetokenlist = this.sourcetokenlist; 113 } 114 115 @Override 116 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 117 doclickhousegetrawsqlstatements(builder); 118 } 119 120 @Override 121 protected TStatementList performParsing(ParserContext context, 122 TCustomParser parser, 123 TCustomParser secondaryParser, 124 TSourceTokenList tokens, 125 TStatementList rawStatements) { 126 this.sourcetokenlist = tokens; 127 this.parserContext = context; 128 this.sqlstatements = rawStatements; 129 130 // Initialize sqlcmds for the parser 131 this.sqlcmds = SqlCmdsFactory.get(vendor); 132 this.fparser.sqlcmds = this.sqlcmds; 133 134 // Initialize global context for statement parsing 135 initializeGlobalContext(); 136 137 // Parse each statement 138 for (int i = 0; i < sqlstatements.size(); i++) { 139 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 140 141 try { 142 stmt.setFrameStack(frameStack); 143 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 144 145 // Handle error recovery for CREATE TABLE statements if enabled 146 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 147 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 148 handleCreateTableErrorRecovery(stmt); 149 } 150 151 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 152 copyErrorsFromStatement(stmt); 153 } 154 } catch (Exception ex) { 155 handleStatementParsingException(stmt, i, ex); 156 continue; 157 } 158 } 159 160 // Clean up frame stack 161 if (globalFrame != null) { 162 globalFrame.popMeFromStack(frameStack); 163 } 164 165 return this.sqlstatements; 166 } 167 168 /** 169 * Handle error recovery for CREATE TABLE statements. 170 */ 171 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 172 if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) { 173 return; 174 } 175 176 int nested = 0; 177 boolean isIgnore = false, isFoundIgnoreToken = false; 178 TSourceToken firstIgnoreToken = null; 179 180 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 181 TSourceToken st = stmt.sourcetokenlist.get(k); 182 if (isIgnore) { 183 if (st.issolidtoken() && (st.tokencode != ';')) { 184 isFoundIgnoreToken = true; 185 if (firstIgnoreToken == null) { 186 firstIgnoreToken = st; 187 } 188 } 189 if (st.tokencode != ';') { 190 st.tokencode = TBaseType.sqlpluscmd; 191 } 192 continue; 193 } 194 if (st.tokencode == (int) ')') { 195 nested--; 196 if (nested == 0) { 197 boolean isSelect = false; 198 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 199 if (st1 != null) { 200 TSourceToken st2 = st.searchToken((int) '(', 2); 201 if (st2 != null) { 202 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 203 isSelect = (st3 != null); 204 } 205 } 206 if (!isSelect) isIgnore = true; 207 } 208 } else if (st.tokencode == (int) '(') { 209 nested++; 210 } 211 } 212 213 if (isFoundIgnoreToken) { 214 stmt.clearError(); 215 stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree()); 216 } 217 } 218 219 // ========== ClickHouse-Specific Tokenization ========== 220 221 /** 222 * Perform ClickHouse-specific tokenization. 223 * <p> 224 * Based on MySQL tokenization but simplified - no DELIMITER command, 225 * no stored procedure handling. Handles ClickHouse-specific token 226 * transformations including :: typecast operator splitting. 227 */ 228 private void doclickhousetexttotokenlist() { 229 TSourceToken asourcetoken; 230 int yychar; 231 232 asourcetoken = getanewsourcetoken(); 233 if (asourcetoken == null) return; 234 yychar = asourcetoken.tokencode; 235 236 while (yychar > 0) { 237 // Handle :: typecast operator: the lexer produces mysqllabel (identifier:) 238 // followed by bind_v (:name) for "identifier::TypeName". We need to 239 // rewrite as CAST(identifier AS TypeName). 240 if (yychar == TBaseType.mysqllabel) { 241 TSourceToken nextToken = getanewsourcetoken(); 242 if (nextToken != null && nextToken.tokencode == TBaseType.bind_v) { 243 // We have mysqllabel + bind_v pattern = "ident:" + ":TypeName" 244 // Rewrite as: CAST ( identifier AS TypeName ) 245 String labelText = asourcetoken.toString(); 246 String identText = labelText.substring(0, labelText.length() - 1); 247 String bindText = nextToken.toString(); 248 String typeText = bindText.substring(1); 249 int line = (int) asourcetoken.lineNo; 250 int col = (int) asourcetoken.columnNo; 251 252 // Check if preceding token is '.' (qualified name like t1.col::Type) 253 // If so, pull qualified name prefix into the CAST expression 254 List<TSourceToken> qualifiedPrefix = new ArrayList<>(); 255 int lastIdx = sourcetokenlist.size() - 1; 256 // Skip trailing whitespace 257 while (lastIdx >= 0 && (sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttwhitespace 258 || sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttreturn)) { 259 lastIdx--; 260 } 261 if (lastIdx >= 0 && sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttperiod) { 262 // Walk back collecting name.name.name. pattern 263 int prefixEnd = lastIdx; 264 int idx = lastIdx; 265 while (idx >= 0) { 266 TSourceToken t = sourcetokenlist.get(idx); 267 if (t.tokentype == ETokenType.ttperiod) { 268 idx--; 269 } else if (t.tokentype == ETokenType.ttidentifier || t.tokentype == ETokenType.ttkeyword) { 270 idx--; 271 // Skip whitespace between identifier and dot 272 while (idx >= 0 && (sourcetokenlist.get(idx).tokentype == ETokenType.ttwhitespace 273 || sourcetokenlist.get(idx).tokentype == ETokenType.ttreturn)) { 274 idx--; 275 } 276 // Check if next non-ws token is another dot (continue) or stop 277 if (idx >= 0 && sourcetokenlist.get(idx).tokentype == ETokenType.ttperiod) { 278 continue; 279 } else { 280 break; 281 } 282 } else { 283 break; 284 } 285 } 286 int prefixStart = idx + 1; 287 // Collect prefix tokens and remove from sourcetokenlist 288 for (int i = prefixStart; i <= prefixEnd; i++) { 289 qualifiedPrefix.add(sourcetokenlist.get(i)); 290 } 291 // Remove prefix tokens from end of sourcetokenlist 292 while (sourcetokenlist.size() > prefixStart) { 293 sourcetokenlist.remove(sourcetokenlist.size() - 1); 294 } 295 } 296 297 // Emit: CAST ( 298 sourcetokenlist.add(createSyntheticToken("CAST", 750, ETokenType.ttkeyword, line, col)); 299 sourcetokenlist.add(createSyntheticToken("(", 40, ETokenType.ttleftparenthesis, line, col)); 300 301 // Re-emit qualified prefix if any (e.g., "t1.") 302 for (TSourceToken prefixToken : qualifiedPrefix) { 303 prefixToken.posinlist = sourcetokenlist.size(); 304 sourcetokenlist.add(prefixToken); 305 } 306 307 // Emit: identifier or integer 308 asourcetoken.setAstext(identText); 309 if (identText.matches("\\d+")) { 310 asourcetoken.tokencode = TBaseType.iconst; 311 asourcetoken.tokentype = ETokenType.ttnumber; 312 } else { 313 asourcetoken.tokencode = TBaseType.ident; 314 asourcetoken.tokentype = ETokenType.ttidentifier; 315 } 316 sourcetokenlist.add(asourcetoken); 317 318 // Emit: AS 319 sourcetokenlist.add(createSyntheticToken("AS", 341, ETokenType.ttkeyword, line, col)); 320 321 // Emit: TypeName 322 nextToken.setAstext(typeText); 323 nextToken.tokencode = TBaseType.ident; 324 nextToken.tokentype = ETokenType.ttidentifier; 325 sourcetokenlist.add(nextToken); 326 327 // Check for parameterized types: TypeName(params) 328 emitTypeParamsAndClose(line, col); 329 330 asourcetoken = getanewsourcetoken(); 331 if (asourcetoken == null) break; 332 yychar = asourcetoken.tokencode; 333 continue; 334 } else { 335 // Not followed by bind_v, add mysqllabel normally 336 sourcetokenlist.add(asourcetoken); 337 if (nextToken == null) break; 338 asourcetoken = nextToken; 339 yychar = asourcetoken.tokencode; 340 continue; 341 } 342 } 343 344 // Handle standalone :: typecast token (e.g., after string literals: '2024-01-15'::Date) 345 if (yychar == TBaseType.typecast) { 346 rewriteStandaloneTypecast(asourcetoken); 347 asourcetoken = getanewsourcetoken(); 348 if (asourcetoken == null) break; 349 yychar = asourcetoken.tokencode; 350 continue; 351 } 352 353 sourcetokenlist.add(asourcetoken); 354 355 asourcetoken = getanewsourcetoken(); 356 if (asourcetoken == null) break; 357 358 yychar = asourcetoken.tokencode; 359 } 360 } 361 362 /** 363 * Check if next token is '(' for parameterized types and emit matching tokens, 364 * then close with ')' for CAST. 365 */ 366 private void emitTypeParamsAndClose(int line, int col) { 367 TSourceToken peekToken = getanewsourcetoken(); 368 if (peekToken != null && peekToken.tokencode == 40) { // '(' 369 sourcetokenlist.add(peekToken); 370 int depth = 1; 371 while (depth > 0) { 372 TSourceToken innerToken = getanewsourcetoken(); 373 if (innerToken == null) break; 374 sourcetokenlist.add(innerToken); 375 if (innerToken.tokencode == 40) depth++; 376 else if (innerToken.tokencode == 41) depth--; 377 } 378 sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col)); 379 } else { 380 sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col)); 381 // The peeked token must be put back into the stream; 382 // since we can't push back, check if it needs further processing 383 if (peekToken != null) { 384 // Check if the peeked token is also a typecast (chained casts like x::A::B) 385 if (peekToken.tokencode == TBaseType.typecast) { 386 rewriteStandaloneTypecast(peekToken); 387 } else { 388 sourcetokenlist.add(peekToken); 389 } 390 } 391 } 392 } 393 394 /** 395 * Create a synthetic token for CAST() rewriting. 396 */ 397 private TSourceToken createSyntheticToken(String text, int tokencode, ETokenType tokentype, int lineNo, int columnNo) { 398 TSourceToken token = new TSourceToken(text); 399 token.tokencode = tokencode; 400 token.tokentype = tokentype; 401 token.tokenstatus = ETokenStatus.tsoriginal; 402 token.lineNo = lineNo; 403 token.columnNo = columnNo; 404 token.container = sourcetokenlist; 405 token.posinlist = sourcetokenlist.size(); 406 return token; 407 } 408 409 /** 410 * Handle standalone :: typecast (lexer correctly tokenized :: as typecast, 411 * e.g., after string literals or closing parentheses). 412 * The expression before :: is already in sourcetokenlist. 413 * Rewrites: ... expr :: TypeName ... → ... CAST( expr AS TypeName ) ... 414 */ 415 private void rewriteStandaloneTypecast(TSourceToken typecastToken) { 416 int line = (int) typecastToken.lineNo; 417 int col = (int) typecastToken.columnNo; 418 419 // Find the preceding expression's last solid token 420 int exprEndIdx = sourcetokenlist.size() - 1; 421 while (exprEndIdx >= 0) { 422 TSourceToken t = sourcetokenlist.get(exprEndIdx); 423 if (t.tokentype != ETokenType.ttwhitespace && t.tokentype != ETokenType.ttreturn 424 && t.tokentype != ETokenType.ttsimplecomment && t.tokentype != ETokenType.ttbracketedcomment) { 425 break; 426 } 427 exprEndIdx--; 428 } 429 if (exprEndIdx < 0) return; 430 431 // Find expression start 432 int exprStartIdx = exprEndIdx; 433 TSourceToken exprEndToken = sourcetokenlist.get(exprEndIdx); 434 435 if (exprEndToken.tokentype == ETokenType.ttrightparenthesis) { 436 // Walk back to find matching '(' 437 int depth = 1; 438 int idx = exprEndIdx - 1; 439 while (idx >= 0 && depth > 0) { 440 TSourceToken t = sourcetokenlist.get(idx); 441 if (t.tokentype == ETokenType.ttrightparenthesis) depth++; 442 else if (t.tokentype == ETokenType.ttleftparenthesis) depth--; 443 idx--; 444 } 445 exprStartIdx = idx + 1; // idx+1 because we decremented one extra 446 // Check for function name before '(' 447 if (exprStartIdx > 0) { 448 int fnIdx = exprStartIdx - 1; 449 while (fnIdx >= 0) { 450 TSourceToken t = sourcetokenlist.get(fnIdx); 451 if (t.tokentype != ETokenType.ttwhitespace && t.tokentype != ETokenType.ttreturn) { 452 break; 453 } 454 fnIdx--; 455 } 456 if (fnIdx >= 0) { 457 TSourceToken fnToken = sourcetokenlist.get(fnIdx); 458 if (fnToken.tokentype == ETokenType.ttidentifier || fnToken.tokentype == ETokenType.ttkeyword) { 459 exprStartIdx = fnIdx; 460 } 461 } 462 } 463 } 464 465 // Collect expression tokens and whitespace 466 List<TSourceToken> beforeExpr = new ArrayList<>(); 467 for (int i = 0; i < exprStartIdx; i++) { 468 beforeExpr.add(sourcetokenlist.get(i)); 469 } 470 // Collect whitespace just before expression that should go before CAST 471 List<TSourceToken> wsBeforeExpr = new ArrayList<>(); 472 while (!beforeExpr.isEmpty()) { 473 TSourceToken last = beforeExpr.get(beforeExpr.size() - 1); 474 if (last.tokentype == ETokenType.ttwhitespace || last.tokentype == ETokenType.ttreturn) { 475 wsBeforeExpr.add(0, beforeExpr.remove(beforeExpr.size() - 1)); 476 } else { 477 break; 478 } 479 } 480 481 List<TSourceToken> exprTokens = new ArrayList<>(); 482 for (int i = exprStartIdx; i <= exprEndIdx; i++) { 483 exprTokens.add(sourcetokenlist.get(i)); 484 } 485 List<TSourceToken> afterExpr = new ArrayList<>(); 486 for (int i = exprEndIdx + 1; i < sourcetokenlist.size(); i++) { 487 afterExpr.add(sourcetokenlist.get(i)); 488 } 489 490 // Clear and rebuild sourcetokenlist 491 sourcetokenlist.clear(); 492 sourcetokenlist.curpos = -1; 493 494 // Re-add tokens before expression (without trailing whitespace) 495 for (TSourceToken t : beforeExpr) { 496 t.posinlist = sourcetokenlist.size(); 497 sourcetokenlist.add(t); 498 } 499 500 // Add whitespace before CAST 501 for (TSourceToken t : wsBeforeExpr) { 502 t.posinlist = sourcetokenlist.size(); 503 sourcetokenlist.add(t); 504 } 505 506 // Emit CAST( 507 sourcetokenlist.add(createSyntheticToken("CAST", 750, ETokenType.ttkeyword, line, col)); 508 sourcetokenlist.add(createSyntheticToken("(", 40, ETokenType.ttleftparenthesis, line, col)); 509 510 // Re-add expression tokens 511 for (TSourceToken t : exprTokens) { 512 t.posinlist = sourcetokenlist.size(); 513 sourcetokenlist.add(t); 514 } 515 516 // Emit AS 517 sourcetokenlist.add(createSyntheticToken("AS", 341, ETokenType.ttkeyword, line, col)); 518 519 // Read type name and emit closing paren 520 TSourceToken typeToken = getanewsourcetoken(); 521 if (typeToken == null) { 522 sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col)); 523 return; 524 } 525 526 // Skip whitespace 527 while (typeToken != null && (typeToken.tokentype == ETokenType.ttwhitespace || typeToken.tokentype == ETokenType.ttreturn)) { 528 typeToken = getanewsourcetoken(); 529 } 530 if (typeToken == null) { 531 sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col)); 532 return; 533 } 534 535 sourcetokenlist.add(typeToken); 536 537 // Check for parameterized types and close 538 emitTypeParamsAndClose(line, col); 539 } 540 541 /** 542 * Get previous non-whitespace token. 543 */ 544 private TSourceToken getprevsolidtoken(TSourceToken ptoken) { 545 TSourceToken ret = null; 546 TSourceTokenList lctokenlist = ptoken.container; 547 548 if (lctokenlist != null) { 549 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 550 if (!( 551 (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace) 552 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn) 553 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment) 554 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment) 555 )) { 556 ret = lctokenlist.get(ptoken.posinlist - 1); 557 } else { 558 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 559 } 560 } 561 } 562 return ret; 563 } 564 565 // ========== ClickHouse-Specific Raw Statement Extraction ========== 566 567 /** 568 * Extract raw ClickHouse SQL statements from tokenized source. 569 * <p> 570 * Simplified from MySQL - no stored procedure states, 571 * no DELIMITER command, no custom delimiters. 572 * ClickHouse uses semicolon as the only delimiter. 573 */ 574 private void doclickhousegetrawsqlstatements(SqlParseResult.Builder builder) { 575 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 576 if (!TBaseType.assigned(sourcetokenlist)) { 577 builder.sqlStatements(this.sqlstatements); 578 builder.errorCode(1); 579 builder.errorMessage("No source token list available"); 580 return; 581 } 582 583 TCustomSqlStatement gcurrentsqlstatement = null; 584 EFindSqlStateType gst = EFindSqlStateType.stnormal; 585 586 for (int i = 0; i < sourcetokenlist.size(); i++) { 587 TSourceToken ast = sourcetokenlist.get(i); 588 sourcetokenlist.curpos = i; 589 590 // Token transformations during raw statement extraction 591 performRawStatementTokenTransformations(ast); 592 593 switch (gst) { 594 case sterror: { 595 if (ast.tokentype == ETokenType.ttsemicolon) { 596 appendToken(gcurrentsqlstatement, ast); 597 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 598 gst = EFindSqlStateType.stnormal; 599 } else { 600 appendToken(gcurrentsqlstatement, ast); 601 } 602 break; 603 } 604 605 case stnormal: { 606 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 607 || (ast.tokencode == TBaseType.cmtslashstar) 608 || (ast.tokencode == TBaseType.lexspace) 609 || (ast.tokencode == TBaseType.lexnewline) 610 || (ast.tokentype == ETokenType.ttsemicolon)) { 611 if (gcurrentsqlstatement != null) { 612 appendToken(gcurrentsqlstatement, ast); 613 } 614 continue; 615 } 616 617 // Find a token to start sql mode 618 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 619 620 if (gcurrentsqlstatement != null) { 621 gst = EFindSqlStateType.stsql; 622 appendToken(gcurrentsqlstatement, ast); 623 } else { 624 // Error token found 625 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, 626 (ast.columnNo < 0 ? 0 : ast.columnNo), 627 "Error when tokenize", EErrorType.spwarning, 628 TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 629 630 ast.tokentype = ETokenType.tttokenlizererrortoken; 631 gst = EFindSqlStateType.sterror; 632 633 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 634 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 635 appendToken(gcurrentsqlstatement, ast); 636 } 637 638 break; 639 } 640 641 case stsql: { 642 if (ast.tokentype == ETokenType.ttsemicolon) { 643 gst = EFindSqlStateType.stnormal; 644 appendToken(gcurrentsqlstatement, ast); 645 gcurrentsqlstatement.semicolonended = ast; 646 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 647 continue; 648 } 649 650 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 651 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { 652 gst = EFindSqlStateType.stnormal; 653 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 654 continue; 655 } 656 } 657 658 appendToken(gcurrentsqlstatement, ast); 659 break; 660 } 661 662 default: 663 break; 664 } 665 } 666 667 // Last statement 668 if ((gcurrentsqlstatement != null) && 669 ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) { 670 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder); 671 } 672 673 // Populate builder with results 674 builder.sqlStatements(this.sqlstatements); 675 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 676 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 677 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 678 } 679 680 /** 681 * Handle token transformations during raw statement extraction. 682 * <p> 683 * Handles MySQL-inherited token transformations for DATE, TIME, TIMESTAMP keywords 684 * and other context-dependent token adjustments. 685 */ 686 private void performRawStatementTokenTransformations(TSourceToken ast) { 687 if (ast.tokencode == TBaseType.rrw_date) { 688 TSourceToken st1 = ast.nextSolidToken(); 689 if (st1 != null) { 690 if (st1.tokencode == '(') { 691 ast.tokencode = TBaseType.rrw_mysql_date_function; 692 } else if (st1.tokencode == TBaseType.sconst) { 693 ast.tokencode = TBaseType.rrw_mysql_date_const; 694 } 695 } 696 } else if (ast.tokencode == TBaseType.rrw_time) { 697 TSourceToken st1 = ast.nextSolidToken(); 698 if (st1 != null) { 699 if (st1.tokencode == TBaseType.sconst) { 700 ast.tokencode = TBaseType.rrw_mysql_time_const; 701 } 702 } 703 } else if (ast.tokencode == TBaseType.rrw_timestamp) { 704 TSourceToken st1 = ast.nextSolidToken(); 705 if (st1 != null) { 706 if (st1.tokencode == TBaseType.sconst) { 707 ast.tokencode = TBaseType.rrw_mysql_timestamp_constant; 708 } else if (st1.tokencode == TBaseType.ident) { 709 if (st1.toString().startsWith("\"")) { 710 ast.tokencode = TBaseType.rrw_mysql_timestamp_constant; 711 st1.tokencode = TBaseType.sconst; 712 } 713 } 714 } 715 } else if (ast.tokencode == TBaseType.rrw_mysql_position) { 716 TSourceToken st1 = ast.nextSolidToken(); 717 if (st1 != null) { 718 if (st1.tokencode != '(') { 719 ast.tokencode = TBaseType.ident; 720 } 721 } 722 } else if (ast.tokencode == TBaseType.rrw_interval) { 723 TSourceToken leftParen = ast.searchToken('(', 1); 724 if (leftParen != null) { 725 int k = leftParen.posinlist + 1; 726 int nested = 1; 727 boolean commaToken = false; 728 while (k < ast.container.size()) { 729 if (ast.container.get(k).tokencode == '(') { 730 nested++; 731 } 732 if (ast.container.get(k).tokencode == ')') { 733 nested--; 734 if (nested == 0) break; 735 } 736 if ((ast.container.get(k).tokencode == ',') && (nested == 1)) { 737 commaToken = true; 738 break; 739 } 740 k++; 741 } 742 if (commaToken) { 743 ast.tokencode = TBaseType.rrw_mysql_interval_func; 744 } 745 } 746 } 747 } 748 749 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 750 if (statement == null || token == null) { 751 return; 752 } 753 token.stmt = statement; 754 statement.sourcetokenlist.add(token); 755 } 756 757 @Override 758 public String toString() { 759 return "ClickhouseSqlParser{vendor=" + vendor + "}"; 760 } 761}