001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerStarrocks;
009import gudusoft.gsqlparser.TParserStarrocksSql;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
022import gudusoft.gsqlparser.compiler.TContext;
023import gudusoft.gsqlparser.sqlenv.TSQLEnv;
024import gudusoft.gsqlparser.compiler.TGlobalScope;
025import gudusoft.gsqlparser.compiler.TFrame;
026
027import java.io.BufferedReader;
028import java.util.ArrayList;
029import java.util.List;
030import java.util.Stack;
031
032/**
033 * StarRocks SQL parser implementation.
034 *
035 * <p>StarRocks is a fork of Apache Doris (from 2020) and maintains MySQL protocol
036 * compatibility. This parser is based on the Doris parser with extensions for
037 * StarRocks-specific features including:
038 * <ul>
039 *   <li>OLAP data models (DUPLICATE KEY, AGGREGATE KEY, UNIQUE KEY, PRIMARY KEY)</li>
040 *   <li>DISTRIBUTED BY HASH/RANDOM clause</li>
041 *   <li>PROPERTIES clause</li>
042 *   <li>StarRocks-specific data types (LARGEINT, HLL, BITMAP, JSON, ARRAY, MAP, STRUCT)</li>
043 *   <li>External catalogs (CREATE EXTERNAL CATALOG)</li>
044 *   <li>Asynchronous materialized views (REFRESH ASYNC/MANUAL)</li>
045 *   <li>SUBMIT TASK for async ETL</li>
046 * </ul>
047 *
048 * <p><b>Design Notes:</b>
049 * <ul>
050 *   <li>Extends {@link AbstractSqlParser}</li>
051 *   <li>Uses {@link TLexerStarrocks} and {@link TParserStarrocksSql}</li>
052 *   <li>Delimiter character: ';'</li>
053 * </ul>
054 *
055 * @see SqlParser
056 * @see AbstractSqlParser
057 * @see TLexerStarrocks
058 * @see TParserStarrocksSql
059 * @since 4.0.2.0
060 */
061public class StarrocksSqlParser extends AbstractSqlParser {
062
063    // ========== Lexer and Parser Instances ==========
064
065    /** The StarRocks lexer used for tokenization (public for TGSqlParser.getFlexer()) */
066    public TLexerStarrocks flexer;
067    private TParserStarrocksSql fparser;
068
069    // ========== State Variables for Raw Statement Extraction ==========
070    private String userDelimiterStr;
071    private char curdelimiterchar;
072    private boolean waitingDelimiter;
073
074    // ========== Constructor ==========
075
076    /**
077     * Construct StarRocks SQL parser.
078     * <p>
079     * Configures the parser for StarRocks database with default delimiter: semicolon (;)
080     */
081    public StarrocksSqlParser() {
082        super(EDbVendor.dbvstarrocks);
083
084        // Set delimiter character - StarRocks uses semicolon like standard MySQL
085        this.delimiterChar = ';';
086        this.defaultDelimiterStr = ";";
087
088        // Create lexer once - will be reused for all parsing operations
089        this.flexer = new TLexerStarrocks();
090        this.flexer.delimiterchar = this.delimiterChar;
091        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
092
093        // CRITICAL: Set lexer for inherited getanewsourcetoken() method
094        this.lexer = this.flexer;
095
096        // Create parser once - will be reused for all parsing operations
097        this.fparser = new TParserStarrocksSql(null);
098        this.fparser.lexer = this.flexer;
099    }
100
101    // ========== AbstractSqlParser Abstract Methods Implementation ==========
102
103    @Override
104    protected TCustomLexer getLexer(ParserContext context) {
105        return this.flexer;
106    }
107
108    @Override
109    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
110        this.fparser.sourcetokenlist = tokens;
111        return this.fparser;
112    }
113
114    @Override
115    protected void tokenizeVendorSql() {
116        dostarrockstexttotokenlist();
117    }
118
119    @Override
120    protected void setupVendorParsersForExtraction() {
121        this.fparser.sqlcmds = this.sqlcmds;
122        this.fparser.sourcetokenlist = this.sourcetokenlist;
123    }
124
125    @Override
126    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
127        dostarrocksgetrawsqlstatements(builder);
128    }
129
130    @Override
131    protected TStatementList performParsing(ParserContext context,
132                                           TCustomParser parser,
133                                           TCustomParser secondaryParser,
134                                           TSourceTokenList tokens,
135                                           TStatementList rawStatements) {
136        this.sourcetokenlist = tokens;
137        this.parserContext = context;
138        this.sqlstatements = rawStatements;
139
140        this.sqlcmds = SqlCmdsFactory.get(vendor);
141        this.fparser.sqlcmds = this.sqlcmds;
142
143        initializeGlobalContext();
144
145        for (int i = 0; i < sqlstatements.size(); i++) {
146            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
147
148            try {
149                stmt.setFrameStack(frameStack);
150                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
151
152                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
153                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
154                    handleCreateTableErrorRecovery(stmt);
155                }
156
157                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
158                    copyErrorsFromStatement(stmt);
159                }
160            } catch (Exception ex) {
161                handleStatementParsingException(stmt, i, ex);
162                continue;
163            }
164        }
165
166        if (globalFrame != null) {
167            globalFrame.popMeFromStack(frameStack);
168        }
169
170        return this.sqlstatements;
171    }
172
173    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
174        if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) {
175            return;
176        }
177
178        int nested = 0;
179        boolean isIgnore = false, isFoundIgnoreToken = false;
180        TSourceToken firstIgnoreToken = null;
181
182        for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
183            TSourceToken st = stmt.sourcetokenlist.get(k);
184            if (isIgnore) {
185                if (st.issolidtoken() && (st.tokencode != ';')) {
186                    isFoundIgnoreToken = true;
187                    if (firstIgnoreToken == null) {
188                        firstIgnoreToken = st;
189                    }
190                }
191                if (st.tokencode != ';') {
192                    st.tokencode = TBaseType.sqlpluscmd;
193                }
194                continue;
195            }
196            if (st.tokencode == (int) ')') {
197                nested--;
198                if (nested == 0) {
199                    boolean isSelect = false;
200                    TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
201                    if (st1 != null) {
202                        TSourceToken st2 = st.searchToken((int) '(', 2);
203                        if (st2 != null) {
204                            TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
205                            isSelect = (st3 != null);
206                        }
207                    }
208                    if (!isSelect) isIgnore = true;
209                }
210            } else if (st.tokencode == (int) '(') {
211                nested++;
212            }
213        }
214
215        if (isFoundIgnoreToken) {
216            stmt.clearError();
217            stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree());
218        }
219    }
220
221    // ========== StarRocks-Specific Tokenization ==========
222
223    private void dostarrockstexttotokenlist() {
224        TSourceToken asourcetoken, lcprevst;
225        int yychar;
226
227        asourcetoken = getanewsourcetoken();
228        if (asourcetoken == null) return;
229        yychar = asourcetoken.tokencode;
230
231        while (yychar > 0) {
232            sourcetokenlist.add(asourcetoken);
233            asourcetoken = getanewsourcetoken();
234            if (asourcetoken == null) break;
235
236            if (asourcetoken.tokencode == TBaseType.rrw_rollup) {
237                lcprevst = getprevsolidtoken(asourcetoken);
238                if (lcprevst != null) {
239                    if (lcprevst.tokencode == TBaseType.rrw_with)
240                        lcprevst.tokencode = TBaseType.with_rollup;
241                }
242            }
243
244            yychar = asourcetoken.tokencode;
245        }
246    }
247
248    private TSourceToken getprevsolidtoken(TSourceToken ptoken) {
249        TSourceToken ret = null;
250        TSourceTokenList lctokenlist = ptoken.container;
251
252        if (lctokenlist != null) {
253            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
254                if (!(
255                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace)
256                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn)
257                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment)
258                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment)
259                )) {
260                    ret = lctokenlist.get(ptoken.posinlist - 1);
261                } else {
262                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
263                }
264            }
265        }
266        return ret;
267    }
268
269    // ========== StarRocks-Specific Raw Statement Extraction ==========
270
271    private void dostarrocksgetrawsqlstatements(SqlParseResult.Builder builder) {
272        TCustomSqlStatement gcurrentsqlstatement = null;
273        EFindSqlStateType gst = EFindSqlStateType.stnormal;
274
275        userDelimiterStr = defaultDelimiterStr;
276
277        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
278        if (!TBaseType.assigned(sourcetokenlist)) {
279            builder.sqlStatements(this.sqlstatements);
280            builder.errorCode(1);
281            builder.errorMessage("No source token list available");
282            return;
283        }
284
285        for (int i = 0; i < sourcetokenlist.size(); i++) {
286            TSourceToken ast = sourcetokenlist.get(i);
287            sourcetokenlist.curpos = i;
288
289            performRawStatementTokenTransformations(ast);
290
291            switch (gst) {
292                case sterror: {
293                    if (ast.tokentype == ETokenType.ttsemicolon) {
294                        appendToken(gcurrentsqlstatement, ast);
295                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
296                        gst = EFindSqlStateType.stnormal;
297                    } else {
298                        appendToken(gcurrentsqlstatement, ast);
299                    }
300                    break;
301                }
302
303                case stnormal: {
304                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
305                            || (ast.tokencode == TBaseType.cmtslashstar)
306                            || (ast.tokencode == TBaseType.lexspace)
307                            || (ast.tokencode == TBaseType.lexnewline)
308                            || (ast.tokentype == ETokenType.ttsemicolon)) {
309                        if (TBaseType.assigned(gcurrentsqlstatement)) {
310                            appendToken(gcurrentsqlstatement, ast);
311                        }
312                        continue;
313                    }
314
315                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
316
317                    if (TBaseType.assigned(gcurrentsqlstatement)) {
318                        gst = EFindSqlStateType.stsql;
319                        appendToken(gcurrentsqlstatement, ast);
320                    }
321
322                    if (!TBaseType.assigned(gcurrentsqlstatement)) {
323                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo),
324                                "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
325
326                        ast.tokentype = ETokenType.tttokenlizererrortoken;
327                        gst = EFindSqlStateType.sterror;
328
329                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
330                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
331                        appendToken(gcurrentsqlstatement, ast);
332                    }
333                    break;
334                }
335
336                case stsql: {
337                    if (ast.tokentype == ETokenType.ttsemicolon) {
338                        gst = EFindSqlStateType.stnormal;
339                        appendToken(gcurrentsqlstatement, ast);
340                        gcurrentsqlstatement.semicolonended = ast;
341                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
342                        continue;
343                    }
344
345                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
346                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
347                            gst = EFindSqlStateType.stnormal;
348                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
349                            continue;
350                        }
351                    }
352
353                    appendToken(gcurrentsqlstatement, ast);
354                    break;
355                }
356
357                default:
358                    break;
359            }
360        }
361
362        // Last statement
363        if (TBaseType.assigned(gcurrentsqlstatement) && ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) {
364            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder);
365        }
366
367        builder.sqlStatements(this.sqlstatements);
368        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
369                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
370        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
371    }
372
373    private void performRawStatementTokenTransformations(TSourceToken ast) {
374        // StarRocks-specific token transformations (MySQL-compatible)
375        if (ast.tokencode == TBaseType.rrw_date) {
376            TSourceToken st1 = ast.nextSolidToken();
377            if (st1 != null) {
378                if (st1.tokencode == '(') {
379                    ast.tokencode = TBaseType.rrw_mysql_date_function;
380                } else if (st1.tokencode == TBaseType.sconst) {
381                    ast.tokencode = TBaseType.rrw_mysql_date_const;
382                }
383            }
384        } else if (ast.tokencode == TBaseType.rrw_time) {
385            TSourceToken st1 = ast.nextSolidToken();
386            if (st1 != null) {
387                if (st1.tokencode == TBaseType.sconst) {
388                    ast.tokencode = TBaseType.rrw_mysql_time_const;
389                }
390            }
391        } else if (ast.tokencode == TBaseType.rrw_timestamp) {
392            TSourceToken st1 = ast.nextSolidToken();
393            if (st1 != null) {
394                if (st1.tokencode == TBaseType.sconst) {
395                    ast.tokencode = TBaseType.rrw_mysql_timestamp_constant;
396                }
397            }
398        }
399    }
400
401    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
402        if (statement == null || token == null) {
403            return;
404        }
405        token.stmt = statement;
406        statement.sourcetokenlist.add(token);
407    }
408
409    @Override
410    public String toString() {
411        return "StarrocksSqlParser{vendor=" + vendor + "}";
412    }
413}