001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerDoris;
009import gudusoft.gsqlparser.TParserDoris;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
022import gudusoft.gsqlparser.compiler.TContext;
023import gudusoft.gsqlparser.sqlenv.TSQLEnv;
024import gudusoft.gsqlparser.compiler.TGlobalScope;
025import gudusoft.gsqlparser.compiler.TFrame;
026
027import java.io.BufferedReader;
028import java.util.ArrayList;
029import java.util.List;
030import java.util.Stack;
031
032/**
033 * Apache Doris SQL parser implementation.
034 *
035 * <p>Doris is MySQL-compatible, so this parser is based on the MySQL parser
036 * with extensions for Doris-specific features including:
037 * <ul>
038 *   <li>OLAP data models (DUPLICATE KEY, AGGREGATE KEY, UNIQUE KEY)</li>
039 *   <li>DISTRIBUTED BY clause</li>
040 *   <li>PROPERTIES clause</li>
041 *   <li>Doris-specific data types (LARGEINT, HLL, BITMAP, etc.)</li>
042 *   <li>Load and export statements (BROKER LOAD, EXPORT, ROUTINE LOAD)</li>
043 * </ul>
044 *
045 * <p><b>Design Notes:</b>
046 * <ul>
047 *   <li>Extends {@link AbstractSqlParser}</li>
048 *   <li>Uses {@link TLexerDoris} and {@link TParserDoris}</li>
049 *   <li>Delimiter character: ';'</li>
050 * </ul>
051 *
052 * @see SqlParser
053 * @see AbstractSqlParser
054 * @see TLexerDoris
055 * @see TParserDoris
056 * @since 3.2.0.0
057 */
058public class DorisSqlParser extends AbstractSqlParser {
059
060    // ========== Lexer and Parser Instances ==========
061
062    /** The Doris lexer used for tokenization (public for TGSqlParser.getFlexer()) */
063    public TLexerDoris flexer;
064    private TParserDoris fparser;
065
066    // ========== State Variables for Raw Statement Extraction ==========
067    private String userDelimiterStr;
068    private char curdelimiterchar;
069    private boolean waitingDelimiter;
070
071    // ========== Constructor ==========
072
073    /**
074     * Construct Doris SQL parser.
075     * <p>
076     * Configures the parser for Apache Doris database with default delimiter: semicolon (;)
077     */
078    public DorisSqlParser() {
079        super(EDbVendor.dbvdoris);
080
081        // Set delimiter character - Doris uses semicolon like standard MySQL
082        this.delimiterChar = ';';
083        this.defaultDelimiterStr = ";";
084
085        // Create lexer once - will be reused for all parsing operations
086        this.flexer = new TLexerDoris();
087        this.flexer.delimiterchar = this.delimiterChar;
088        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
089
090        // CRITICAL: Set lexer for inherited getanewsourcetoken() method
091        this.lexer = this.flexer;
092
093        // Create parser once - will be reused for all parsing operations
094        this.fparser = new TParserDoris(null);
095        this.fparser.lexer = this.flexer;
096    }
097
098    // ========== AbstractSqlParser Abstract Methods Implementation ==========
099
100    @Override
101    protected TCustomLexer getLexer(ParserContext context) {
102        return this.flexer;
103    }
104
105    @Override
106    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
107        this.fparser.sourcetokenlist = tokens;
108        return this.fparser;
109    }
110
111    @Override
112    protected void tokenizeVendorSql() {
113        dodoristexttotokenlist();
114    }
115
116    @Override
117    protected void setupVendorParsersForExtraction() {
118        this.fparser.sqlcmds = this.sqlcmds;
119        this.fparser.sourcetokenlist = this.sourcetokenlist;
120    }
121
122    @Override
123    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
124        dodorisgetrawsqlstatements(builder);
125    }
126
127    @Override
128    protected TStatementList performParsing(ParserContext context,
129                                           TCustomParser parser,
130                                           TCustomParser secondaryParser,
131                                           TSourceTokenList tokens,
132                                           TStatementList rawStatements) {
133        this.sourcetokenlist = tokens;
134        this.parserContext = context;
135        this.sqlstatements = rawStatements;
136
137        this.sqlcmds = SqlCmdsFactory.get(vendor);
138        this.fparser.sqlcmds = this.sqlcmds;
139
140        initializeGlobalContext();
141
142        for (int i = 0; i < sqlstatements.size(); i++) {
143            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
144
145            try {
146                stmt.setFrameStack(frameStack);
147                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
148
149                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
150                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
151                    handleCreateTableErrorRecovery(stmt);
152                }
153
154                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
155                    copyErrorsFromStatement(stmt);
156                }
157            } catch (Exception ex) {
158                handleStatementParsingException(stmt, i, ex);
159                continue;
160            }
161        }
162
163        if (globalFrame != null) {
164            globalFrame.popMeFromStack(frameStack);
165        }
166
167        return this.sqlstatements;
168    }
169
170    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
171        if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) {
172            return;
173        }
174
175        int nested = 0;
176        boolean isIgnore = false, isFoundIgnoreToken = false;
177        TSourceToken firstIgnoreToken = null;
178
179        for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
180            TSourceToken st = stmt.sourcetokenlist.get(k);
181            if (isIgnore) {
182                if (st.issolidtoken() && (st.tokencode != ';')) {
183                    isFoundIgnoreToken = true;
184                    if (firstIgnoreToken == null) {
185                        firstIgnoreToken = st;
186                    }
187                }
188                if (st.tokencode != ';') {
189                    st.tokencode = TBaseType.sqlpluscmd;
190                }
191                continue;
192            }
193            if (st.tokencode == (int) ')') {
194                nested--;
195                if (nested == 0) {
196                    boolean isSelect = false;
197                    TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
198                    if (st1 != null) {
199                        TSourceToken st2 = st.searchToken((int) '(', 2);
200                        if (st2 != null) {
201                            TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
202                            isSelect = (st3 != null);
203                        }
204                    }
205                    if (!isSelect) isIgnore = true;
206                }
207            } else if (st.tokencode == (int) '(') {
208                nested++;
209            }
210        }
211
212        if (isFoundIgnoreToken) {
213            stmt.clearError();
214            stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree());
215        }
216    }
217
218    // ========== Doris-Specific Tokenization ==========
219
220    private void dodoristexttotokenlist() {
221        TSourceToken asourcetoken, lcprevst;
222        int yychar;
223
224        asourcetoken = getanewsourcetoken();
225        if (asourcetoken == null) return;
226        yychar = asourcetoken.tokencode;
227
228        while (yychar > 0) {
229            sourcetokenlist.add(asourcetoken);
230            asourcetoken = getanewsourcetoken();
231            if (asourcetoken == null) break;
232
233            if (asourcetoken.tokencode == TBaseType.rrw_rollup) {
234                lcprevst = getprevsolidtoken(asourcetoken);
235                if (lcprevst != null) {
236                    if (lcprevst.tokencode == TBaseType.rrw_with)
237                        lcprevst.tokencode = TBaseType.with_rollup;
238                }
239            }
240
241            yychar = asourcetoken.tokencode;
242        }
243    }
244
245    private TSourceToken getprevsolidtoken(TSourceToken ptoken) {
246        TSourceToken ret = null;
247        TSourceTokenList lctokenlist = ptoken.container;
248
249        if (lctokenlist != null) {
250            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
251                if (!(
252                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace)
253                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn)
254                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment)
255                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment)
256                )) {
257                    ret = lctokenlist.get(ptoken.posinlist - 1);
258                } else {
259                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
260                }
261            }
262        }
263        return ret;
264    }
265
266    // ========== Doris-Specific Raw Statement Extraction ==========
267
268    private void dodorisgetrawsqlstatements(SqlParseResult.Builder builder) {
269        TCustomSqlStatement gcurrentsqlstatement = null;
270        EFindSqlStateType gst = EFindSqlStateType.stnormal;
271
272        userDelimiterStr = defaultDelimiterStr;
273
274        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
275        if (!TBaseType.assigned(sourcetokenlist)) {
276            builder.sqlStatements(this.sqlstatements);
277            builder.errorCode(1);
278            builder.errorMessage("No source token list available");
279            return;
280        }
281
282        for (int i = 0; i < sourcetokenlist.size(); i++) {
283            TSourceToken ast = sourcetokenlist.get(i);
284            sourcetokenlist.curpos = i;
285
286            performRawStatementTokenTransformations(ast);
287
288            switch (gst) {
289                case sterror: {
290                    if (ast.tokentype == ETokenType.ttsemicolon) {
291                        appendToken(gcurrentsqlstatement, ast);
292                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
293                        gst = EFindSqlStateType.stnormal;
294                    } else {
295                        appendToken(gcurrentsqlstatement, ast);
296                    }
297                    break;
298                }
299
300                case stnormal: {
301                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
302                            || (ast.tokencode == TBaseType.cmtslashstar)
303                            || (ast.tokencode == TBaseType.lexspace)
304                            || (ast.tokencode == TBaseType.lexnewline)
305                            || (ast.tokentype == ETokenType.ttsemicolon)) {
306                        if (TBaseType.assigned(gcurrentsqlstatement)) {
307                            appendToken(gcurrentsqlstatement, ast);
308                        }
309                        continue;
310                    }
311
312                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
313
314                    if (TBaseType.assigned(gcurrentsqlstatement)) {
315                        gst = EFindSqlStateType.stsql;
316                        appendToken(gcurrentsqlstatement, ast);
317                    }
318
319                    if (!TBaseType.assigned(gcurrentsqlstatement)) {
320                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo),
321                                "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
322
323                        ast.tokentype = ETokenType.tttokenlizererrortoken;
324                        gst = EFindSqlStateType.sterror;
325
326                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
327                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
328                        appendToken(gcurrentsqlstatement, ast);
329                    }
330                    break;
331                }
332
333                case stsql: {
334                    if (ast.tokentype == ETokenType.ttsemicolon) {
335                        gst = EFindSqlStateType.stnormal;
336                        appendToken(gcurrentsqlstatement, ast);
337                        gcurrentsqlstatement.semicolonended = ast;
338                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
339                        continue;
340                    }
341
342                    if (ast.tokencode == TBaseType.cmtdoublehyphen) {
343                        if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) {
344                            gst = EFindSqlStateType.stnormal;
345                            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder);
346                            continue;
347                        }
348                    }
349
350                    appendToken(gcurrentsqlstatement, ast);
351                    break;
352                }
353
354                default:
355                    break;
356            }
357        }
358
359        // Last statement
360        if (TBaseType.assigned(gcurrentsqlstatement) && ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) {
361            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder);
362        }
363
364        builder.sqlStatements(this.sqlstatements);
365        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
366                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
367        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
368    }
369
370    private void performRawStatementTokenTransformations(TSourceToken ast) {
371        // Doris-specific token transformations can be added here
372        // For now, handle common MySQL-compatible transformations
373        if (ast.tokencode == TBaseType.rrw_date) {
374            TSourceToken st1 = ast.nextSolidToken();
375            if (st1 != null) {
376                if (st1.tokencode == '(') {
377                    ast.tokencode = TBaseType.rrw_mysql_date_function;
378                } else if (st1.tokencode == TBaseType.sconst) {
379                    ast.tokencode = TBaseType.rrw_mysql_date_const;
380                }
381            }
382        } else if (ast.tokencode == TBaseType.rrw_time) {
383            TSourceToken st1 = ast.nextSolidToken();
384            if (st1 != null) {
385                if (st1.tokencode == TBaseType.sconst) {
386                    ast.tokencode = TBaseType.rrw_mysql_time_const;
387                }
388            }
389        } else if (ast.tokencode == TBaseType.rrw_timestamp) {
390            TSourceToken st1 = ast.nextSolidToken();
391            if (st1 != null) {
392                if (st1.tokencode == TBaseType.sconst) {
393                    ast.tokencode = TBaseType.rrw_mysql_timestamp_constant;
394                }
395            }
396        }
397    }
398
399    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
400        if (statement == null || token == null) {
401            return;
402        }
403        token.stmt = statement;
404        statement.sourcetokenlist.add(token);
405    }
406
407    @Override
408    public String toString() {
409        return "DorisSqlParser{vendor=" + vendor + "}";
410    }
411}