001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerStarrocks; 009import gudusoft.gsqlparser.TParserStarrocksSql; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ETokenType; 016import gudusoft.gsqlparser.ETokenStatus; 017import gudusoft.gsqlparser.ESqlStatementType; 018import gudusoft.gsqlparser.EErrorType; 019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 020import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 022import gudusoft.gsqlparser.compiler.TContext; 023import gudusoft.gsqlparser.sqlenv.TSQLEnv; 024import gudusoft.gsqlparser.compiler.TGlobalScope; 025import gudusoft.gsqlparser.compiler.TFrame; 026 027import java.io.BufferedReader; 028import java.util.ArrayList; 029import java.util.List; 030import java.util.Stack; 031 032/** 033 * StarRocks SQL parser implementation. 034 * 035 * <p>StarRocks is a fork of Apache Doris (from 2020) and maintains MySQL protocol 036 * compatibility. This parser is based on the Doris parser with extensions for 037 * StarRocks-specific features including: 038 * <ul> 039 * <li>OLAP data models (DUPLICATE KEY, AGGREGATE KEY, UNIQUE KEY, PRIMARY KEY)</li> 040 * <li>DISTRIBUTED BY HASH/RANDOM clause</li> 041 * <li>PROPERTIES clause</li> 042 * <li>StarRocks-specific data types (LARGEINT, HLL, BITMAP, JSON, ARRAY, MAP, STRUCT)</li> 043 * <li>External catalogs (CREATE EXTERNAL CATALOG)</li> 044 * <li>Asynchronous materialized views (REFRESH ASYNC/MANUAL)</li> 045 * <li>SUBMIT TASK for async ETL</li> 046 * </ul> 047 * 048 * <p><b>Design Notes:</b> 049 * <ul> 050 * <li>Extends {@link AbstractSqlParser}</li> 051 * <li>Uses {@link TLexerStarrocks} and {@link TParserStarrocksSql}</li> 052 * <li>Delimiter character: ';'</li> 053 * </ul> 054 * 055 * @see SqlParser 056 * @see AbstractSqlParser 057 * @see TLexerStarrocks 058 * @see TParserStarrocksSql 059 * @since 4.0.2.0 060 */ 061public class StarrocksSqlParser extends AbstractSqlParser { 062 063 // ========== Lexer and Parser Instances ========== 064 065 /** The StarRocks lexer used for tokenization (public for TGSqlParser.getFlexer()) */ 066 public TLexerStarrocks flexer; 067 private TParserStarrocksSql fparser; 068 069 // ========== State Variables for Raw Statement Extraction ========== 070 private String userDelimiterStr; 071 private char curdelimiterchar; 072 private boolean waitingDelimiter; 073 074 // ========== Constructor ========== 075 076 /** 077 * Construct StarRocks SQL parser. 078 * <p> 079 * Configures the parser for StarRocks database with default delimiter: semicolon (;) 080 */ 081 public StarrocksSqlParser() { 082 super(EDbVendor.dbvstarrocks); 083 084 // Set delimiter character - StarRocks uses semicolon like standard MySQL 085 this.delimiterChar = ';'; 086 this.defaultDelimiterStr = ";"; 087 088 // Create lexer once - will be reused for all parsing operations 089 this.flexer = new TLexerStarrocks(); 090 this.flexer.delimiterchar = this.delimiterChar; 091 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 092 093 // CRITICAL: Set lexer for inherited getanewsourcetoken() method 094 this.lexer = this.flexer; 095 096 // Create parser once - will be reused for all parsing operations 097 this.fparser = new TParserStarrocksSql(null); 098 this.fparser.lexer = this.flexer; 099 } 100 101 // ========== AbstractSqlParser Abstract Methods Implementation ========== 102 103 @Override 104 protected TCustomLexer getLexer(ParserContext context) { 105 return this.flexer; 106 } 107 108 @Override 109 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 110 this.fparser.sourcetokenlist = tokens; 111 return this.fparser; 112 } 113 114 @Override 115 protected void tokenizeVendorSql() { 116 dostarrockstexttotokenlist(); 117 } 118 119 @Override 120 protected void setupVendorParsersForExtraction() { 121 this.fparser.sqlcmds = this.sqlcmds; 122 this.fparser.sourcetokenlist = this.sourcetokenlist; 123 } 124 125 @Override 126 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 127 dostarrocksgetrawsqlstatements(builder); 128 } 129 130 @Override 131 protected TStatementList performParsing(ParserContext context, 132 TCustomParser parser, 133 TCustomParser secondaryParser, 134 TSourceTokenList tokens, 135 TStatementList rawStatements) { 136 this.sourcetokenlist = tokens; 137 this.parserContext = context; 138 this.sqlstatements = rawStatements; 139 140 this.sqlcmds = SqlCmdsFactory.get(vendor); 141 this.fparser.sqlcmds = this.sqlcmds; 142 143 initializeGlobalContext(); 144 145 for (int i = 0; i < sqlstatements.size(); i++) { 146 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 147 148 try { 149 stmt.setFrameStack(frameStack); 150 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 151 152 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 153 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 154 handleCreateTableErrorRecovery(stmt); 155 } 156 157 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 158 copyErrorsFromStatement(stmt); 159 } 160 } catch (Exception ex) { 161 handleStatementParsingException(stmt, i, ex); 162 continue; 163 } 164 } 165 166 if (globalFrame != null) { 167 globalFrame.popMeFromStack(frameStack); 168 } 169 170 return this.sqlstatements; 171 } 172 173 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 174 if ((stmt.sqlstatementtype != ESqlStatementType.sstcreatetable) || TBaseType.c_createTableStrictParsing) { 175 return; 176 } 177 178 int nested = 0; 179 boolean isIgnore = false, isFoundIgnoreToken = false; 180 TSourceToken firstIgnoreToken = null; 181 182 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 183 TSourceToken st = stmt.sourcetokenlist.get(k); 184 if (isIgnore) { 185 if (st.issolidtoken() && (st.tokencode != ';')) { 186 isFoundIgnoreToken = true; 187 if (firstIgnoreToken == null) { 188 firstIgnoreToken = st; 189 } 190 } 191 if (st.tokencode != ';') { 192 st.tokencode = TBaseType.sqlpluscmd; 193 } 194 continue; 195 } 196 if (st.tokencode == (int) ')') { 197 nested--; 198 if (nested == 0) { 199 boolean isSelect = false; 200 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 201 if (st1 != null) { 202 TSourceToken st2 = st.searchToken((int) '(', 2); 203 if (st2 != null) { 204 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 205 isSelect = (st3 != null); 206 } 207 } 208 if (!isSelect) isIgnore = true; 209 } 210 } else if (st.tokencode == (int) '(') { 211 nested++; 212 } 213 } 214 215 if (isFoundIgnoreToken) { 216 stmt.clearError(); 217 stmt.parsestatement(null, false, this.parserContext.isOnlyNeedRawParseTree()); 218 } 219 } 220 221 // ========== StarRocks-Specific Tokenization ========== 222 223 private void dostarrockstexttotokenlist() { 224 TSourceToken asourcetoken, lcprevst; 225 int yychar; 226 227 asourcetoken = getanewsourcetoken(); 228 if (asourcetoken == null) return; 229 yychar = asourcetoken.tokencode; 230 231 while (yychar > 0) { 232 sourcetokenlist.add(asourcetoken); 233 asourcetoken = getanewsourcetoken(); 234 if (asourcetoken == null) break; 235 236 if (asourcetoken.tokencode == TBaseType.rrw_rollup) { 237 lcprevst = getprevsolidtoken(asourcetoken); 238 if (lcprevst != null) { 239 if (lcprevst.tokencode == TBaseType.rrw_with) 240 lcprevst.tokencode = TBaseType.with_rollup; 241 } 242 } 243 244 yychar = asourcetoken.tokencode; 245 } 246 } 247 248 private TSourceToken getprevsolidtoken(TSourceToken ptoken) { 249 TSourceToken ret = null; 250 TSourceTokenList lctokenlist = ptoken.container; 251 252 if (lctokenlist != null) { 253 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 254 if (!( 255 (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttwhitespace) 256 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttreturn) 257 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttsimplecomment) 258 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == ETokenType.ttbracketedcomment) 259 )) { 260 ret = lctokenlist.get(ptoken.posinlist - 1); 261 } else { 262 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 263 } 264 } 265 } 266 return ret; 267 } 268 269 // ========== StarRocks-Specific Raw Statement Extraction ========== 270 271 private void dostarrocksgetrawsqlstatements(SqlParseResult.Builder builder) { 272 TCustomSqlStatement gcurrentsqlstatement = null; 273 EFindSqlStateType gst = EFindSqlStateType.stnormal; 274 275 userDelimiterStr = defaultDelimiterStr; 276 277 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 278 if (!TBaseType.assigned(sourcetokenlist)) { 279 builder.sqlStatements(this.sqlstatements); 280 builder.errorCode(1); 281 builder.errorMessage("No source token list available"); 282 return; 283 } 284 285 for (int i = 0; i < sourcetokenlist.size(); i++) { 286 TSourceToken ast = sourcetokenlist.get(i); 287 sourcetokenlist.curpos = i; 288 289 performRawStatementTokenTransformations(ast); 290 291 switch (gst) { 292 case sterror: { 293 if (ast.tokentype == ETokenType.ttsemicolon) { 294 appendToken(gcurrentsqlstatement, ast); 295 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 296 gst = EFindSqlStateType.stnormal; 297 } else { 298 appendToken(gcurrentsqlstatement, ast); 299 } 300 break; 301 } 302 303 case stnormal: { 304 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 305 || (ast.tokencode == TBaseType.cmtslashstar) 306 || (ast.tokencode == TBaseType.lexspace) 307 || (ast.tokencode == TBaseType.lexnewline) 308 || (ast.tokentype == ETokenType.ttsemicolon)) { 309 if (TBaseType.assigned(gcurrentsqlstatement)) { 310 appendToken(gcurrentsqlstatement, ast); 311 } 312 continue; 313 } 314 315 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 316 317 if (TBaseType.assigned(gcurrentsqlstatement)) { 318 gst = EFindSqlStateType.stsql; 319 appendToken(gcurrentsqlstatement, ast); 320 } 321 322 if (!TBaseType.assigned(gcurrentsqlstatement)) { 323 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo), 324 "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 325 326 ast.tokentype = ETokenType.tttokenlizererrortoken; 327 gst = EFindSqlStateType.sterror; 328 329 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 330 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 331 appendToken(gcurrentsqlstatement, ast); 332 } 333 break; 334 } 335 336 case stsql: { 337 if (ast.tokentype == ETokenType.ttsemicolon) { 338 gst = EFindSqlStateType.stnormal; 339 appendToken(gcurrentsqlstatement, ast); 340 gcurrentsqlstatement.semicolonended = ast; 341 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 342 continue; 343 } 344 345 if (ast.tokencode == TBaseType.cmtdoublehyphen) { 346 if (ast.toString().trim().endsWith(TBaseType.sqlflow_stmt_delimiter_str)) { 347 gst = EFindSqlStateType.stnormal; 348 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, false, builder); 349 continue; 350 } 351 } 352 353 appendToken(gcurrentsqlstatement, ast); 354 break; 355 } 356 357 default: 358 break; 359 } 360 } 361 362 // Last statement 363 if (TBaseType.assigned(gcurrentsqlstatement) && ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) { 364 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, null, this.sqlstatements, true, builder); 365 } 366 367 builder.sqlStatements(this.sqlstatements); 368 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 369 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 370 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 371 } 372 373 private void performRawStatementTokenTransformations(TSourceToken ast) { 374 // StarRocks-specific token transformations (MySQL-compatible) 375 if (ast.tokencode == TBaseType.rrw_date) { 376 TSourceToken st1 = ast.nextSolidToken(); 377 if (st1 != null) { 378 if (st1.tokencode == '(') { 379 ast.tokencode = TBaseType.rrw_mysql_date_function; 380 } else if (st1.tokencode == TBaseType.sconst) { 381 ast.tokencode = TBaseType.rrw_mysql_date_const; 382 } 383 } 384 } else if (ast.tokencode == TBaseType.rrw_time) { 385 TSourceToken st1 = ast.nextSolidToken(); 386 if (st1 != null) { 387 if (st1.tokencode == TBaseType.sconst) { 388 ast.tokencode = TBaseType.rrw_mysql_time_const; 389 } 390 } 391 } else if (ast.tokencode == TBaseType.rrw_timestamp) { 392 TSourceToken st1 = ast.nextSolidToken(); 393 if (st1 != null) { 394 if (st1.tokencode == TBaseType.sconst) { 395 ast.tokencode = TBaseType.rrw_mysql_timestamp_constant; 396 } 397 } 398 } 399 } 400 401 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 402 if (statement == null || token == null) { 403 return; 404 } 405 token.stmt = statement; 406 statement.sourcetokenlist.add(token); 407 } 408 409 @Override 410 public String toString() { 411 return "StarrocksSqlParser{vendor=" + vendor + "}"; 412 } 413}