Source code

001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerClickhouse;
009import gudusoft.gsqlparser.TParserClickhouse;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
022import gudusoft.gsqlparser.compiler.TContext;
023import gudusoft.gsqlparser.sqlenv.TSQLEnv;
024import gudusoft.gsqlparser.compiler.TGlobalScope;
025import gudusoft.gsqlparser.compiler.TFrame;
026
027import java.util.ArrayList;
028import java.util.List;
029import java.util.Stack;
030
031/**
032 * ClickHouse database SQL parser implementation.
033 *
034 * <p>This parser handles ClickHouse-specific SQL syntax including:
035 * <ul>
036 *   <li>Standard SQL DML/DDL (SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, ALTER)</li>
037 *   <li>ClickHouse-specific ENGINE clause in CREATE TABLE</li>
038 *   <li>PREWHERE, FINAL, SAMPLE, ARRAY JOIN clauses</li>
039 *   <li>FORMAT and SETTINGS clauses</li>
040 *   <li>ClickHouse data types (Array, Tuple, Map, Nullable, LowCardinality, etc.)</li>
041 * </ul>
042 *
043 * <p><b>Design Notes:</b>
044 * <ul>
045 *   <li>Extends {@link AbstractSqlParser}</li>
046 *   <li>Based on MySQL grammar (ClickHouse shares backtick quoting, # comments, ENGINE clause)</li>
047 *   <li>No stored procedure support (ClickHouse has no stored procedures)</li>
048 *   <li>Delimiter character: ';' for SQL statements</li>
049 * </ul>
050 *
051 * @see AbstractSqlParser
052 * @see TLexerClickhouse
053 * @see TParserClickhouse
054 * @since 3.2.0.0
055 */
056public class ClickhouseSqlParser extends AbstractSqlParser {
057
058    // ========== Lexer and Parser Instances ==========
059
060    /** The ClickHouse lexer used for tokenization (public for TGSqlParser.getFlexer()) */
061    public TLexerClickhouse flexer;
062    private TParserClickhouse fparser;
063
064    // ========== Constructor ==========
065
066    /**
067     * Construct ClickHouse SQL parser.
068     * <p>
069     * Configures the parser for ClickHouse database with default delimiter: semicolon (;)
070     */
071    public ClickhouseSqlParser() {
072        super(EDbVendor.dbvclickhouse);
073
074        // Set delimiter character - ClickHouse uses semicolon
075        this.delimiterChar = ';';
076        this.defaultDelimiterStr = ";";
077
078        // Create lexer once - will be reused for all parsing operations
079        this.flexer = new TLexerClickhouse();
080        this.flexer.delimiterchar = this.delimiterChar;
081        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
082
083        // CRITICAL: Set lexer for inherited getanewsourcetoken() method
084        this.lexer = this.flexer;
085
086        // Create parser once - will be reused for all parsing operations
087        this.fparser = new TParserClickhouse(null);
088        this.fparser.lexer = this.flexer;
089    }
090
091    // ========== AbstractSqlParser Abstract Methods Implementation ==========
092
093    @Override
094    protected TCustomLexer getLexer(ParserContext context) {
095        return this.flexer;
096    }
097
098    @Override
099    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
100        this.fparser.sourcetokenlist = tokens;
101        return this.fparser;
102    }
103
104    @Override
105    protected void tokenizeVendorSql() {
106        doclickhousetexttotokenlist();
107    }
108
109    @Override
110    protected void setupVendorParsersForExtraction() {
111        this.fparser.sqlcmds = this.sqlcmds;
112        this.fparser.sourcetokenlist = this.sourcetokenlist;
113    }
114
115    @Override
116    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
117        doclickhousegetrawsqlstatements(builder);
118    }
119
120    @Override
121    protected TStatementList performParsing(ParserContext context,
122                                           TCustomParser parser,
123                                           TCustomParser secondaryParser,
124                                           TSourceTokenList tokens,
125                                           TStatementList rawStatements) {
126        this.sourcetokenlist = tokens;
127        this.parserContext = context;
128        this.sqlstatements = rawStatements;
129
130        // Initialize sqlcmds for the parser
131        this.sqlcmds = SqlCmdsFactory.get(vendor);
132        this.fparser.sqlcmds = this.sqlcmds;
133
134        // Initialize global context for statement parsing
135        initializeGlobalContext();
136
137        // Parse each statement
138        for (int i = 0; i < sqlstatements.size(); i++) {
139            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
140
141            try {
142                stmt.setFrameStack(frameStack);
143                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
144
145                // Handle error recovery for CREATE TABLE statements if enabled
146                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
147                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
148                    handleCreateTableErrorRecovery(stmt);
149                }
150
151                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
152                    copyErrorsFromStatement(stmt);
153                }
154            } catch (Exception ex) {
155                handleStatementParsingException(stmt, i, ex);
156                continue;
157            }
158        }
159
160        // Clean up frame stack
161        if (globalFrame != null) {
162            globalFrame.popMeFromStack(frameStack);
163        }
164
165        return this.sqlstatements;
166    }
167
168    /**
169     * Handle error recovery for CREATE TABLE statements.
170     */
171    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
172        if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) {
173            return;
174        }
175
176        int nested = 0;
177        boolean isIgnore = false, isFoundIgnoreToken = false;
178        TSourceToken firstIgnoreToken = null;
179
180        for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
181            TSourceToken st = stmt.sourcetokenlist.get(k);
182            if (isIgnore) {
183                if (st.issolidtoken() && (st.tokencode != ';')) {
184                    isFoundIgnoreToken = true;
185                    if (firstIgnoreToken == null) {
186                        firstIgnoreToken = st;
187                    }
188                }
189                if (st.tokencode != ';') {
190                    st.tokencode = TBaseType.sqlpluscmd;
191                }
192                continue;
193            }
194            if (st.tokencode == (int) ')') {
195                nested--;
196                if (nested == 0) {
197                    boolean isSelect = false;
198                    TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
199                    if (st1 != null) {
200                        TSourceToken st2 = st.searchToken((int) '(', 2);
201                        if (st2 != null) {
202                            TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
203                            isSelect = (st3 != null);
204                        }
205                    }
206                    if (!isSelect) isIgnore = true;
207                }
208            } else if (st.tokencode == (int) '(') {
209                nested++;
210            }
211        }
212
213        if (isFoundIgnoreToken) {
214            stmt.clearError();
215            stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree());
216        }
217    }
218
219    // ========== ClickHouse-Specific Tokenization ==========
220
221    /**
222     * Perform ClickHouse-specific tokenization.
223     * <p>
224     * Based on MySQL tokenization but simplified - no DELIMITER command,
225     * no stored procedure handling. Handles ClickHouse-specific token
226     * transformations including :: typecast operator splitting.
227     */
228    private void doclickhousetexttotokenlist() {
229        TSourceToken asourcetoken;
230        int yychar;
231
232        asourcetoken = getanewsourcetoken();
233        if (asourcetoken == null) return;
234        yychar = asourcetoken.tokencode;
235
236        while (yychar > 0) {
237            // Handle :: typecast operator: the lexer produces mysqllabel (identifier:)
238            // followed by bind_v (:name) for "identifier::TypeName". We need to
239            // rewrite as CAST(identifier AS TypeName).
240            if (yychar == TBaseType.mysqllabel) {
241                TSourceToken nextToken = getanewsourcetoken();
242                if (nextToken != null && nextToken.tokencode == TBaseType.bind_v) {
243                    // We have mysqllabel + bind_v pattern = "ident:" + ":TypeName"
244                    // Rewrite as: CAST ( identifier AS TypeName )
245                    String labelText = asourcetoken.toString();
246                    String identText = labelText.substring(0, labelText.length() - 1);
247                    String bindText = nextToken.toString();
248                    String typeText = bindText.substring(1);
249                    int line = (int) asourcetoken.lineNo;
250                    int col = (int) asourcetoken.columnNo;
251
252                    // Check if preceding token is '.' (qualified name like t1.col::Type)
253                    // If so, pull qualified name prefix into the CAST expression
254                    List<TSourceToken> qualifiedPrefix = new ArrayList<>();
255                    int lastIdx = sourcetokenlist.size() - 1;
256                    // Skip trailing whitespace
257                    while (lastIdx >= 0 && (sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttwhitespace
258                            || sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttreturn)) {
259                        lastIdx--;
260                    }
261                    if (lastIdx >= 0 && sourcetokenlist.get(lastIdx).tokentype == ETokenType.ttperiod) {
262                        // Walk back collecting name.name.name. pattern
263                        int prefixEnd = lastIdx;
264                        int idx = lastIdx;
265                        while (idx >= 0) {
266                            TSourceToken t = sourcetokenlist.get(idx);
267                            if (t.tokentype == ETokenType.ttperiod) {
268                                idx--;
269                            } else if (t.tokentype == ETokenType.ttidentifier || t.tokentype == ETokenType.ttkeyword) {
270                                idx--;
271                                // Skip whitespace between identifier and dot
272                                while (idx >= 0 && (sourcetokenlist.get(idx).tokentype == ETokenType.ttwhitespace
273                                        || sourcetokenlist.get(idx).tokentype == ETokenType.ttreturn)) {
274                                    idx--;
275                                }
276                                // Check if next non-ws token is another dot (continue) or stop
277                                if (idx >= 0 && sourcetokenlist.get(idx).tokentype == ETokenType.ttperiod) {
278                                    continue;
279                                } else {
280                                    break;
281                                }
282                            } else {
283                                break;
284                            }
285                        }
286                        int prefixStart = idx + 1;
287                        // Collect prefix tokens and remove from sourcetokenlist
288                        for (int i = prefixStart; i <= prefixEnd; i++) {
289                            qualifiedPrefix.add(sourcetokenlist.get(i));
290                        }
291                        // Remove prefix tokens from end of sourcetokenlist
292                        while (sourcetokenlist.size() > prefixStart) {
293                            sourcetokenlist.remove(sourcetokenlist.size() - 1);
294                        }
295                    }
296
297                    // Emit: CAST (
298                    sourcetokenlist.add(createSyntheticToken("CAST", 750, ETokenType.ttkeyword, line, col));
299                    sourcetokenlist.add(createSyntheticToken("(", 40, ETokenType.ttleftparenthesis, line, col));
300
301                    // Re-emit qualified prefix if any (e.g., "t1.")
302                    for (TSourceToken prefixToken : qualifiedPrefix) {
303                        prefixToken.posinlist = sourcetokenlist.size();
304                        sourcetokenlist.add(prefixToken);
305                    }
306
307                    // Emit: identifier or integer
308                    asourcetoken.setAstext(identText);
309                    if (identText.matches("\\d+")) {
310                        asourcetoken.tokencode = TBaseType.iconst;
311                        asourcetoken.tokentype = ETokenType.ttnumber;
312                    } else {
313                        asourcetoken.tokencode = TBaseType.ident;
314                        asourcetoken.tokentype = ETokenType.ttidentifier;
315                    }
316                    sourcetokenlist.add(asourcetoken);
317
318                    // Emit: AS
319                    sourcetokenlist.add(createSyntheticToken("AS", 341, ETokenType.ttkeyword, line, col));
320
321                    // Emit: TypeName
322                    nextToken.setAstext(typeText);
323                    nextToken.tokencode = TBaseType.ident;
324                    nextToken.tokentype = ETokenType.ttidentifier;
325                    sourcetokenlist.add(nextToken);
326
327                    // Check for parameterized types: TypeName(params)
328                    emitTypeParamsAndClose(line, col);
329
330                    asourcetoken = getanewsourcetoken();
331                    if (asourcetoken == null) break;
332                    yychar = asourcetoken.tokencode;
333                    continue;
334                } else {
335                    // Not followed by bind_v, add mysqllabel normally
336                    sourcetokenlist.add(asourcetoken);
337                    if (nextToken == null) break;
338                    asourcetoken = nextToken;
339                    yychar = asourcetoken.tokencode;
340                    continue;
341                }
342            }
343
344            // Handle standalone :: typecast token (e.g., after string literals: '2024-01-15'::Date)
345            if (yychar == TBaseType.typecast) {
346                rewriteStandaloneTypecast(asourcetoken);
347                asourcetoken = getanewsourcetoken();
348                if (asourcetoken == null) break;
349                yychar = asourcetoken.tokencode;
350                continue;
351            }
352
353            sourcetokenlist.add(asourcetoken);
354
355            asourcetoken = getanewsourcetoken();
356            if (asourcetoken == null) break;
357
358            yychar = asourcetoken.tokencode;
359        }
360    }
361
362    /**
363     * Check if next token is '(' for parameterized types and emit matching tokens,
364     * then close with ')' for CAST.
365     */
366    private void emitTypeParamsAndClose(int line, int col) {
367        TSourceToken peekToken = getanewsourcetoken();
368        if (peekToken != null && peekToken.tokencode == 40) { // '('
369            sourcetokenlist.add(peekToken);
370            int depth = 1;
371            while (depth > 0) {
372                TSourceToken innerToken = getanewsourcetoken();
373                if (innerToken == null) break;
374                sourcetokenlist.add(innerToken);
375                if (innerToken.tokencode == 40) depth++;
376                else if (innerToken.tokencode == 41) depth--;
377            }
378            sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col));
379        } else {
380            sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col));
381            // The peeked token must be put back into the stream;
382            // since we can't push back, check if it needs further processing
383            if (peekToken != null) {
384                // Check if the peeked token is also a typecast (chained casts like x::A::B)
385                if (peekToken.tokencode == TBaseType.typecast) {
386                    rewriteStandaloneTypecast(peekToken);
387                } else {
388                    sourcetokenlist.add(peekToken);
389                }
390            }
391        }
392    }
393
394    /**
395     * Create a synthetic token for CAST() rewriting.
396     */
397    private TSourceToken createSyntheticToken(String text, int tokencode, ETokenType tokentype, int lineNo, int columnNo) {
398        TSourceToken token = new TSourceToken(text);
399        token.tokencode = tokencode;
400        token.tokentype = tokentype;
401        token.tokenstatus = ETokenStatus.tsoriginal;
402        token.lineNo = lineNo;
403        token.columnNo = columnNo;
404        token.container = sourcetokenlist;
405        token.posinlist = sourcetokenlist.size();
406        return token;
407    }
408
409    /**
410     * Handle standalone :: typecast (lexer correctly tokenized :: as typecast,
411     * e.g., after string literals or closing parentheses).
412     * The expression before :: is already in sourcetokenlist.
413     * Rewrites: ... expr :: TypeName ... → ... CAST( expr AS TypeName ) ...
414     */
415    private void rewriteStandaloneTypecast(TSourceToken typecastToken) {
416        int line = (int) typecastToken.lineNo;
417        int col = (int) typecastToken.columnNo;
418
419        // Find the preceding expression's last solid token
420        int exprEndIdx = sourcetokenlist.size() - 1;
421        while (exprEndIdx >= 0) {
422            TSourceToken t = sourcetokenlist.get(exprEndIdx);
423            if (t.tokentype != ETokenType.ttwhitespace && t.tokentype != ETokenType.ttreturn
424                && t.tokentype != ETokenType.ttsimplecomment && t.tokentype != ETokenType.ttbracketedcomment) {
425                break;
426            }
427            exprEndIdx--;
428        }
429        if (exprEndIdx < 0) return;
430
431        // Find expression start
432        int exprStartIdx = exprEndIdx;
433        TSourceToken exprEndToken = sourcetokenlist.get(exprEndIdx);
434
435        if (exprEndToken.tokentype == ETokenType.ttrightparenthesis) {
436            // Walk back to find matching '('
437            int depth = 1;
438            int idx = exprEndIdx - 1;
439            while (idx >= 0 && depth > 0) {
440                TSourceToken t = sourcetokenlist.get(idx);
441                if (t.tokentype == ETokenType.ttrightparenthesis) depth++;
442                else if (t.tokentype == ETokenType.ttleftparenthesis) depth--;
443                idx--;
444            }
445            exprStartIdx = idx + 1; // idx+1 because we decremented one extra
446            // Check for function name before '('
447            if (exprStartIdx > 0) {
448                int fnIdx = exprStartIdx - 1;
449                while (fnIdx >= 0) {
450                    TSourceToken t = sourcetokenlist.get(fnIdx);
451                    if (t.tokentype != ETokenType.ttwhitespace && t.tokentype != ETokenType.ttreturn) {
452                        break;
453                    }
454                    fnIdx--;
455                }
456                if (fnIdx >= 0) {
457                    TSourceToken fnToken = sourcetokenlist.get(fnIdx);
458                    if (fnToken.tokentype == ETokenType.ttidentifier || fnToken.tokentype == ETokenType.ttkeyword) {
459                        exprStartIdx = fnIdx;
460                    }
461                }
462            }
463        }
464
465        // Collect expression tokens and whitespace
466        List<TSourceToken> beforeExpr = new ArrayList<>();
467        for (int i = 0; i < exprStartIdx; i++) {
468            beforeExpr.add(sourcetokenlist.get(i));
469        }
470        // Collect whitespace just before expression that should go before CAST
471        List<TSourceToken> wsBeforeExpr = new ArrayList<>();
472        while (!beforeExpr.isEmpty()) {
473            TSourceToken last = beforeExpr.get(beforeExpr.size() - 1);
474            if (last.tokentype == ETokenType.ttwhitespace || last.tokentype == ETokenType.ttreturn) {
475                wsBeforeExpr.add(0, beforeExpr.remove(beforeExpr.size() - 1));
476            } else {
477                break;
478            }
479        }
480
481        List<TSourceToken> exprTokens = new ArrayList<>();
482        for (int i = exprStartIdx; i <= exprEndIdx; i++) {
483            exprTokens.add(sourcetokenlist.get(i));
484        }
485        List<TSourceToken> afterExpr = new ArrayList<>();
486        for (int i = exprEndIdx + 1; i < sourcetokenlist.size(); i++) {
487            afterExpr.add(sourcetokenlist.get(i));
488        }
489
490        // Clear and rebuild sourcetokenlist
491        sourcetokenlist.clear();
492        sourcetokenlist.curpos = -1;
493
494        // Re-add tokens before expression (without trailing whitespace)
495        for (TSourceToken t : beforeExpr) {
496            t.posinlist = sourcetokenlist.size();
497            sourcetokenlist.add(t);
498        }
499
500        // Add whitespace before CAST
501        for (TSourceToken t : wsBeforeExpr) {
502            t.posinlist = sourcetokenlist.size();
503            sourcetokenlist.add(t);
504        }
505
506        // Emit CAST(
507        sourcetokenlist.add(createSyntheticToken("CAST", 750, ETokenType.ttkeyword, line, col));
508        sourcetokenlist.add(createSyntheticToken("(", 40, ETokenType.ttleftparenthesis, line, col));
509
510        // Re-add expression tokens
511        for (TSourceToken t : exprTokens) {
512            t.posinlist = sourcetokenlist.size();
513            sourcetokenlist.add(t);
514        }
515
516        // Emit AS
517        sourcetokenlist.add(createSyntheticToken("AS", 341, ETokenType.ttkeyword, line, col));
518
519        // Read type name and emit closing paren
520        TSourceToken typeToken = getanewsourcetoken();
521        if (typeToken == null) {
522            sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col));
523            return;
524        }
525
526        // Skip whitespace
527        while (typeToken != null && (typeToken.tokentype == ETokenType.ttwhitespace || typeToken.tokentype == ETokenType.ttreturn)) {
528            typeToken = getanewsourcetoken();
529        }
530        if (typeToken == null) {
531            sourcetokenlist.add(createSyntheticToken(")", 41, ETokenType.ttrightparenthesis, line, col));
532            return;
533        }
534
535        sourcetokenlist.add(typeToken);
536
537        // Check for parameterized types and close
538        emitTypeParamsAndClose(line, col);
539    }
540
541    /**
542     * Get previous non-whitespace token.
543     */
544    private TSourceToken getprevsolidtoken(TSourceToken ptoken) {
545        TSourceToken ret = null;
546        TSourceTokenList lctokenlist = ptoken.container;
547
548        if (lctokenlist != null) {
549            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
550                if (!(
551                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace)
552                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn)
553                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment)
554                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment)
555                )) {
556                    ret = lctokenlist.get(ptoken.posinlist - 1);
557                } else {
558                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
559                }
560            }
561        }
562        return ret;
563    }
564
565    // ========== ClickHouse-Specific Raw Statement Extraction ==========
566
567    /**
568     * Extract raw ClickHouse SQL statements from tokenized source.
569     * <p>
570     * Simplified from MySQL - no stored procedure states,
571     * no DELIMITER command, no custom delimiters.
572     * ClickHouse uses semicolon as the only delimiter.
573     */
574    private void doclickhousegetrawsqlstatements(SqlParseResult.Builder builder) {
575        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
576        if (!TBaseType.assigned(sourcetokenlist)) {
577            builder.sqlStatements(this.sqlstatements);
578            builder.errorCode(1);
579            builder.errorMessage("No source token list available");
580            return;
581        }
582
583        TCustomSqlStatement gcurrentsqlstatement = null;
584        EFindSqlStateType gst = EFindSqlStateType.stnormal;
585
586        for (int i = 0; i < sourcetokenlist.size(); i++) {
587            TSourceToken ast = sourcetokenlist.get(i);
588            sourcetokenlist.curpos = i;
589
590            // Token transformations during raw statement extraction
591            performRawStatementTokenTransformations(ast);
592
593            switch (gst) {
594                case sterror: {
595                    if (ast.tokentype == ETokenType.ttsemicolon) {
596                        appendToken(gcurrentsqlstatement, ast);
597                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
598                        gst = EFindSqlStateType.stnormal;
599                    } else {
600                        appendToken(gcurrentsqlstatement, ast);
601                    }
602                    break;
603                }
604
605                case stnormal: {
606                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
607                            || (ast.tokencode == TBaseType.cmtslashstar)
608                            || (ast.tokencode == TBaseType.lexspace)
609                            || (ast.tokencode == TBaseType.lexnewline)
610                            || (ast.tokentype == ETokenType.ttsemicolon)) {
611                        if (gcurrentsqlstatement != null) {
612                            appendToken(gcurrentsqlstatement, ast);
613                        }
614                        continue;
615                    }
616
617                    // Find a token to start sql mode
618                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
619
620                    if (gcurrentsqlstatement != null) {
621                        gst = EFindSqlStateType.stsql;
622                        appendToken(gcurrentsqlstatement, ast);
623                    } else {
624                        // Error token found
625                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo,
626                                (ast.columnNo < 0 ? 0 : ast.columnNo),
627                                "Error when tokenize", EErrorType.spwarning,
628                                TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
629
630                        ast.tokentype = ETokenType.tttokenlizererrortoken;
631                        gst = EFindSqlStateType.sterror;
632
633                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
634                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
635                        appendToken(gcurrentsqlstatement, ast);
636                    }
637
638                    break;
639                }
640
641                case stsql: {
642                    if (ast.tokentype == ETokenType.ttsemicolon) {
643                        gst = EFindSqlStateType.stnormal;
644                        appendToken(gcurrentsqlstatement, ast);
645                        gcurrentsqlstatement.semicolonended = ast;
646                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
647                        continue;
648                    }
649
650                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
651                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
652                            gst = EFindSqlStateType.stnormal;
653                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
654                            continue;
655                        }
656                    }
657
658                    appendToken(gcurrentsqlstatement, ast);
659                    break;
660                }
661
662                default:
663                    break;
664            }
665        }
666
667        // Last statement
668        if ((gcurrentsqlstatement != null) &&
669                ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) {
670            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder);
671        }
672
673        // Populate builder with results
674        builder.sqlStatements(this.sqlstatements);
675        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
676                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
677        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
678    }
679
680    /**
681     * Handle token transformations during raw statement extraction.
682     * <p>
683     * Handles MySQL-inherited token transformations for DATE, TIME, TIMESTAMP keywords
684     * and other context-dependent token adjustments.
685     */
686    private void performRawStatementTokenTransformations(TSourceToken ast) {
687        if (ast.tokencode == TBaseType.rrw_date) {
688            TSourceToken st1 = ast.nextSolidToken();
689            if (st1 != null) {
690                if (st1.tokencode == '(') {
691                    ast.tokencode = TBaseType.rrw_mysql_date_function;
692                } else if (st1.tokencode == TBaseType.sconst) {
693                    ast.tokencode = TBaseType.rrw_mysql_date_const;
694                }
695            }
696        } else if (ast.tokencode == TBaseType.rrw_time) {
697            TSourceToken st1 = ast.nextSolidToken();
698            if (st1 != null) {
699                if (st1.tokencode == TBaseType.sconst) {
700                    ast.tokencode = TBaseType.rrw_mysql_time_const;
701                }
702            }
703        } else if (ast.tokencode == TBaseType.rrw_timestamp) {
704            TSourceToken st1 = ast.nextSolidToken();
705            if (st1 != null) {
706                if (st1.tokencode == TBaseType.sconst) {
707                    ast.tokencode = TBaseType.rrw_mysql_timestamp_constant;
708                } else if (st1.tokencode == TBaseType.ident) {
709                    if (st1.toString().startsWith("\"")) {
710                        ast.tokencode = TBaseType.rrw_mysql_timestamp_constant;
711                        st1.tokencode = TBaseType.sconst;
712                    }
713                }
714            }
715        } else if (ast.tokencode == TBaseType.rrw_mysql_position) {
716            TSourceToken st1 = ast.nextSolidToken();
717            if (st1 != null) {
718                if (st1.tokencode != '(') {
719                    ast.tokencode = TBaseType.ident;
720                }
721            }
722        } else if (ast.tokencode == TBaseType.rrw_interval) {
723            TSourceToken leftParen = ast.searchToken('(', 1);
724            if (leftParen != null) {
725                int k = leftParen.posinlist + 1;
726                int nested = 1;
727                boolean commaToken = false;
728                while (k < ast.container.size()) {
729                    if (ast.container.get(k).tokencode == '(') {
730                        nested++;
731                    }
732                    if (ast.container.get(k).tokencode == ')') {
733                        nested--;
734                        if (nested == 0) break;
735                    }
736                    if ((ast.container.get(k).tokencode == ',') && (nested == 1)) {
737                        commaToken = true;
738                        break;
739                    }
740                    k++;
741                }
742                if (commaToken) {
743                    ast.tokencode = TBaseType.rrw_mysql_interval_func;
744                }
745            }
746        }
747    }
748
749    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
750        if (statement == null || token == null) {
751            return;
752        }
753        token.stmt = statement;
754        statement.sourcetokenlist.add(token);
755    }
756
757    @Override
758    public String toString() {
759        return "ClickhouseSqlParser{vendor=" + vendor + "}";
760    }
761}