001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.EFindSqlStateType;
005import gudusoft.gsqlparser.EErrorType;
006import gudusoft.gsqlparser.ESqlStatementType;
007import gudusoft.gsqlparser.ETokenStatus;
008import gudusoft.gsqlparser.ETokenType;
009import gudusoft.gsqlparser.TBaseType;
010import gudusoft.gsqlparser.TCustomLexer;
011import gudusoft.gsqlparser.TCustomParser;
012import gudusoft.gsqlparser.TCustomSqlStatement;
013import gudusoft.gsqlparser.TLexerDatabricks;
014import gudusoft.gsqlparser.TParserDatabricks;
015import gudusoft.gsqlparser.TSourceToken;
016import gudusoft.gsqlparser.TSourceTokenList;
017import gudusoft.gsqlparser.TStatementList;
018import gudusoft.gsqlparser.TSyntaxError;
019import gudusoft.gsqlparser.TLog;
020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
021import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
022import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
023import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
024import gudusoft.gsqlparser.compiler.TContext;
025import gudusoft.gsqlparser.compiler.TGlobalScope;
026import gudusoft.gsqlparser.sqlenv.TSQLEnv;
027import gudusoft.gsqlparser.compiler.TFrame;
028import gudusoft.gsqlparser.resolver.TSQLResolver;
029import gudusoft.gsqlparser.compiler.TASTEvaluator;
030import gudusoft.gsqlparser.nodes.TTypeName;
031import gudusoft.gsqlparser.EDataType;
032
033import java.util.Stack;
034import java.util.ArrayList;
035
036/**
037 * Databricks SQL parser implementation.
038 *
039 * <p>This parser handles Databricks-specific SQL syntax including:
040 * <ul>
041 *   <li>Databricks SQL dialect and extensions</li>
042 *   <li>Databricks PL/SQL blocks</li>
043 *   <li>Special handling for VALUES keyword in INSERT statements</li>
044 *   <li>Datatype casting with literals (e.g., DATE '2021-2-1')</li>
045 * </ul>
046 *
047 * <p><b>Implementation Status:</b> MIGRATED
048 * <ul>
049 *   <li><b>Phase:</b> Complete migration from delegation to full AbstractSqlParser implementation</li>
050 *   <li><b>Current:</b> Self-contained Databricks parser using AbstractSqlParser template</li>
051 *   <li><b>Goal:</b> No delegation to legacy TGSqlParser</li>
052 * </ul>
053 *
054 * @see SqlParser
055 * @see AbstractSqlParser
056 * @see TLexerDatabricks
057 * @see TParserDatabricks
058 * @since 3.2.0.0
059 */
060public class DatabricksSqlParser extends AbstractSqlParser {
061
062    /**
063     * Construct Databricks SQL parser.
064     * <p>
065     * Configures the parser for Databricks database with default delimiter: semicolon (;)
066     * <p>
067     * Following the original TGSqlParser pattern, the lexer and parser are
068     * created once in the constructor and reused for all parsing operations.
069     */
070    public DatabricksSqlParser() {
071        super(EDbVendor.dbvdatabricks);
072        this.delimiterChar = ';';
073        this.defaultDelimiterStr = ";";
074
075        // Create lexer once - will be reused for all parsing operations
076        this.flexer = new TLexerDatabricks();
077        this.flexer.delimiterchar = this.delimiterChar;
078        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
079
080        // Set parent's lexer reference for shared tokenization logic
081        this.lexer = this.flexer;
082
083        // Create parser once - will be reused for all parsing operations
084        this.fparser = new TParserDatabricks(null);
085        this.fparser.lexer = this.flexer;
086    }
087
088    // ========== Tokenization State (used during tokenization) ==========
089
090    /** The Databricks lexer used for tokenization */
091    public TLexerDatabricks flexer;
092
093    // ========== Statement Parsing State (used during statement parsing) ==========
094
095    /** Current statement being built */
096    private TCustomSqlStatement gcurrentsqlstatement;
097
098    /** SQL parser (for Databricks SQL statements) */
099    private TParserDatabricks fparser;
100
101    // ========== AbstractSqlParser Abstract Methods Implementation ==========
102
103    /**
104     * Return the Databricks lexer instance.
105     * <p>
106     * The lexer is created once in the constructor and reused for all
107     * parsing operations.
108     *
109     * @param context parser context (not used, lexer already created)
110     * @return the Databricks lexer instance created in constructor
111     */
112    @Override
113    protected TCustomLexer getLexer(ParserContext context) {
114        return this.flexer;
115    }
116
117    /**
118     * Return the Databricks SQL parser instance with updated token list.
119     * <p>
120     * The parser is created once in the constructor and reused for all
121     * parsing operations.
122     *
123     * @param context parser context (not used, parser already created)
124     * @param tokens source token list to parse
125     * @return the Databricks SQL parser instance created in constructor
126     */
127    @Override
128    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
129        this.fparser.sourcetokenlist = tokens;
130        return this.fparser;
131    }
132
133    /**
134     * Databricks uses a single parser, no secondary parser needed.
135     *
136     * @param context parser context
137     * @param tokens source token list
138     * @return null (no secondary parser)
139     */
140    @Override
141    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
142        return null;
143    }
144
145    /**
146     * Hook method: Tokenize Databricks SQL by calling vendor-specific tokenization.
147     */
148    @Override
149    protected void tokenizeVendorSql() {
150        dodatabrickstexttotokenlist();
151    }
152
153    /**
154     * Hook method: Setup parsers for raw statement extraction.
155     * Inject sqlcmds and sourcetokenlist into parser.
156     */
157    @Override
158    protected void setupVendorParsersForExtraction() {
159        this.fparser.sqlcmds = this.sqlcmds;
160        this.fparser.sourcetokenlist = this.sourcetokenlist;
161    }
162
163    /**
164     * Hook method: Extract raw Databricks SQL statements.
165     *
166     * @param builder the result builder to populate
167     */
168    @Override
169    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
170        dodatabricksgetrawsqlstatements(builder);
171    }
172
173    // ========== Databricks-Specific Tokenization Logic ==========
174
175    /**
176     * Tokenize Databricks SQL text to token list.
177     * <p>
178     * This method processes the input SQL text and converts it into a sequence
179     * of tokens. It handles Databricks-specific token processing including
180     * MySQL-style comments and delimiter handling.
181     * <p>
182     * Migrated from TGSqlParser.dodatabrickstexttotokenlist() at line 4696.
183     */
184    private void dodatabrickstexttotokenlist() {
185        TSourceToken asourcetoken, lcprevst;
186        int yychar;
187        boolean startDelimiter = false;
188
189        flexer.tmpDelimiter = "";
190
191        asourcetoken = getanewsourcetoken();
192        if (asourcetoken == null) return;
193        yychar = asourcetoken.tokencode;
194
195        while (yychar > 0) {
196            sourcetokenlist.add(asourcetoken);
197            asourcetoken = getanewsourcetoken();
198            if (asourcetoken == null) break;
199            checkMySQLCommentToken(asourcetoken);
200
201            if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) {
202                startDelimiter = false;
203                flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext();
204            }
205
206            yychar = asourcetoken.tokencode;
207        }
208    }
209
210    /**
211     * Check for MySQL-style comments in tokens.
212     * <p>
213     * This method is used to handle MySQL comment syntax which is also
214     * supported by Databricks.
215     *
216     * @param asourcetoken the token to check
217     */
218    private void checkMySQLCommentToken(TSourceToken asourcetoken) {
219        // MySQL comment handling - placeholder for now
220        // The actual implementation would check for MySQL-style comments
221        // This matches the pattern from TGSqlParser
222    }
223
224    // ========== Databricks-Specific Raw Statement Extraction Logic ==========
225
226    /**
227     * Extract raw SQL statements from token list for Databricks.
228     * <p>
229     * This method separates the token list into individual SQL statements
230     * without performing full parsing. It handles Databricks-specific syntax:
231     * <ul>
232     *   <li>VALUES keyword disambiguation for INSERT statements</li>
233     *   <li>Datatype casting with literals (DATE '2021-2-1')</li>
234     *   <li>PL/SQL block detection with BEGIN/END</li>
235     *   <li>Statement terminators (semicolons, slash, period)</li>
236     * </ul>
237     * <p>
238     * Migrated from TGSqlParser.dodatabricksgetrawsqlstatements() at line 6944.
239     *
240     * @param builder the result builder to populate with raw statements
241     */
242    private void dodatabricksgetrawsqlstatements(SqlParseResult.Builder builder) {
243        int waitingEnd = 0;
244        boolean foundEnd = false;
245        EDataType tmpDatatype = null;
246
247        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
248        if (!TBaseType.assigned(sourcetokenlist)) {
249            builder.errorCode(-1);
250            builder.errorMessage("Source token list not assigned");
251            return;
252        }
253
254        gcurrentsqlstatement = null;
255        EFindSqlStateType gst = EFindSqlStateType.stnormal;
256        TSourceToken lcprevsolidtoken = null, ast = null;
257
258        for (int i = 0; i < sourcetokenlist.size(); i++) {
259
260            if ((ast != null) && (ast.issolidtoken()))
261                lcprevsolidtoken = ast;
262
263            ast = sourcetokenlist.get(i);
264            sourcetokenlist.curpos = i;
265
266            // Databricks-specific token adjustments
267            if (ast.tokencode == TBaseType.rrw_values) {
268                TSourceToken stParen = ast.searchToken('(', 1);
269                if (stParen != null) {
270                    TSourceToken stInsert = ast.searchToken(TBaseType.rrw_insert, -ast.posinlist, ';', true);
271                    if (stInsert != null) {
272                        TSourceToken stSemiColon = ast.searchToken(';', -ast.posinlist);
273                        if ((stSemiColon != null) && (stSemiColon.posinlist > stInsert.posinlist)) {
274                            // INSERT INTO test values (16,1), (8,2), (4,4), (2,0), (97, 16);
275                            // VALUES (1);
276                            // don't treat values(1) as insert values
277                        } else {
278                            TSourceToken stFrom = ast.searchToken(TBaseType.rrw_from, -ast.posinlist, ';', true);
279                            if (stFrom != null) {
280                                // don't treat values after from keyword as an insert values
281                                // insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT),
282                                // ((select 2), (select i from (values(3) ) as foo (i)), 'values are fun!');
283
284                                // let check the INSERT keyword is close to VALUES than FROM keyword, if yes, treat it as insert values
285                                if (stInsert.posinlist > stFrom.posinlist) {
286                                    // https://www.sqlparser.com/bugs/mantisbt/view.php?id=3354
287                                    ast.tokencode = TBaseType.rrw_databricks_values_insert;
288                                }
289                            } else {
290                                ast.tokencode = TBaseType.rrw_databricks_values_insert;
291                            }
292                        }
293                    }
294                }
295            } else if ((ast.tokencode == TBaseType.sconst) || (ast.tokencode == '+') || (ast.tokencode == '-')) {
296                if ((lcprevsolidtoken != null) && (TTypeName.searchTypeByName(lcprevsolidtoken.toString()) != null)) {
297                    // date '2021-2-1', turn date to TBaseType.rrw_databricks_datatype_used_to_cast
298                    if (lcprevsolidtoken.tokencode != TBaseType.rrw_interval) {
299                        lcprevsolidtoken.tokencode = TBaseType.rrw_databricks_datatype_used_to_cast;
300                    }
301                }
302            }
303
304            switch (gst) {
305                case sterror: {
306                    if (ast.tokentype == ETokenType.ttsemicolon) {
307                        appendToken(gcurrentsqlstatement, ast);
308                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
309                        gst = EFindSqlStateType.stnormal;
310                    } else {
311                        appendToken(gcurrentsqlstatement, ast);
312                    }
313                    break;
314                } //sterror
315
316                case stnormal: {
317                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
318                            || (ast.tokencode == TBaseType.cmtslashstar)
319                            || (ast.tokencode == TBaseType.lexspace)
320                            || (ast.tokencode == TBaseType.lexnewline)
321                            || (ast.tokentype == ETokenType.ttsemicolon)) {
322                        if (gcurrentsqlstatement != null) {
323                            appendToken(gcurrentsqlstatement, ast);
324                        }
325
326                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
327                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
328                                // ;;;; continuous semicolon, treat it as comment
329                                ast.tokentype = ETokenType.ttsimplecomment;
330                                ast.tokencode = TBaseType.cmtdoublehyphen;
331                            }
332                        }
333
334                        continue;
335                    }
336
337                    // find a tokentext to start sql or plsql mode
338                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
339
340                    if (gcurrentsqlstatement != null) {
341                        if (gcurrentsqlstatement.isdatabricksplsql()) {
342                            gst = EFindSqlStateType.ststoredprocedure;
343                            appendToken(gcurrentsqlstatement, ast);
344                            foundEnd = false;
345                            if ((ast.tokencode == TBaseType.rrw_begin)
346                                    || (ast.tokencode == TBaseType.rrw_package)
347                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
348                                waitingEnd = 1;
349                            }
350                        } else {
351                            gst = EFindSqlStateType.stsql;
352                            appendToken(gcurrentsqlstatement, ast);
353                        }
354                    } else {
355                        //error tokentext found
356                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo)
357                                , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
358
359                        ast.tokentype = ETokenType.tttokenlizererrortoken;
360                        gst = EFindSqlStateType.sterror;
361
362                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
363                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
364                        appendToken(gcurrentsqlstatement, ast);
365                    }
366
367                    break;
368                } // stnormal
369
370                case stsql: {
371                    if (ast.tokentype == ETokenType.ttsemicolon) {
372                        gst = EFindSqlStateType.stnormal;
373                        appendToken(gcurrentsqlstatement, ast);
374                        gcurrentsqlstatement.semicolonended = ast;
375                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
376                        continue;
377                    }
378
379                    if (sourcetokenlist.sqlplusaftercurtoken()) { //most probably is / cmd
380                        gst = EFindSqlStateType.stnormal;
381                        appendToken(gcurrentsqlstatement, ast);
382                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
383                        continue;
384                    }
385                    appendToken(gcurrentsqlstatement, ast);
386                    break;
387                }//case stsql
388
389                case ststoredprocedure: {
390                    if (ast.tokencode == TBaseType.rrw_begin) {
391                        waitingEnd++;
392                    } else if (ast.tokencode == TBaseType.rrw_if) {
393                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
394                            //this is not if after END
395                            waitingEnd++;
396                        }
397                    } else if (ast.tokencode == TBaseType.rrw_case) {
398                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
399                            //this is not case after END
400                            waitingEnd++;
401                        }
402                    } else if (ast.tokencode == TBaseType.rrw_loop) {
403                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
404                            //this is not loop after END
405                            waitingEnd++;
406                        }
407                    } else if (ast.tokencode == TBaseType.rrw_end) {
408                        foundEnd = true;
409                        waitingEnd--;
410                        if (waitingEnd < 0) {
411                            waitingEnd = 0;
412                        }
413                    }
414
415                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
416                        // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast;
417                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
418                        gst = EFindSqlStateType.stnormal;
419                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
420
421                        //make / a sqlplus cmd
422                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
423                        appendToken(gcurrentsqlstatement, ast);
424                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
425                    } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) {
426                        // single dot at a separate line
427                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
428                        gst = EFindSqlStateType.stnormal;
429                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
430
431                        //make ttperiod a sqlplus cmd
432                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
433                        appendToken(gcurrentsqlstatement, ast);
434                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
435                    } else {
436                        appendToken(gcurrentsqlstatement, ast);
437                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0)
438                                && (foundEnd)) {
439                            gst = EFindSqlStateType.stnormal;
440                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
441                        }
442                    }
443
444                    if (ast.tokencode == TBaseType.sqlpluscmd) {
445                        //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd
446                        //in a sql statement(almost is plsql block) is not really a sqlplus cmd
447                        int m = flexer.getkeywordvalue(ast.getAstext());
448                        if (m != 0) {
449                            ast.tokencode = m;
450                        } else {
451                            ast.tokencode = TBaseType.ident;
452                        }
453                    }
454
455                    break;
456                } //ststoredprocedure
457            } //switch
458        }//for
459
460        //last statement
461        if ((gcurrentsqlstatement != null) &&
462                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) ||
463                        (gst == EFindSqlStateType.sterror))) {
464            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
465        }
466
467        // Populate builder with results
468        builder.sqlStatements(this.sqlstatements);
469        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
470                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
471        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
472        if (!syntaxErrors.isEmpty()) {
473            builder.errorMessage(String.format("Extraction completed with %d error(s)", syntaxErrors.size()));
474        }
475    }
476
477    /**
478     * Helper method to append token to statement.
479     * Sets the token's stmt reference and adds it to the statement's token list.
480     *
481     * @param statement the statement to append to
482     * @param token the token to append
483     */
484    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
485        if (statement == null || token == null) {
486            return;
487        }
488        token.stmt = statement;
489        statement.sourcetokenlist.add(token);
490    }
491
492    // ========== Statement Parsing Logic ==========
493
494    /**
495     * Parse all raw statements to build AST.
496     * <p>
497     * This method iterates through all raw statements and calls parsestatement()
498     * on each one to build the Abstract Syntax Tree. It handles error recovery
499     * for CREATE TABLE statements and collects syntax errors.
500     *
501     * @param context parser context with configuration
502     * @param parser primary parser instance
503     * @param secondaryParser secondary parser (null for Databricks)
504     * @param tokens source token list
505     * @param rawStatements raw statements from extraction phase
506     * @return statement list with parsed AST
507     */
508    @Override
509    protected TStatementList performParsing(ParserContext context, TCustomParser parser,
510                                           TCustomParser secondaryParser, TSourceTokenList tokens,
511                                           TStatementList rawStatements) {
512        // Store references
513        this.fparser = (TParserDatabricks) parser;
514        this.sourcetokenlist = tokens;
515        this.parserContext = context;
516        this.sqlstatements = rawStatements;
517
518        // Initialize sqlcmds
519        this.sqlcmds = SqlCmdsFactory.get(vendor);
520        this.fparser.sqlcmds = this.sqlcmds;
521
522        // Initialize global context (inherited from AbstractSqlParser)
523        initializeGlobalContext();
524
525        // Parse each statement
526        for (int i = 0; i < sqlstatements.size(); i++) {
527            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
528            try {
529                stmt.setFrameStack(frameStack);
530                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
531
532                // Vendor-specific post-processing (override hook if needed)
533                afterStatementParsed(stmt);
534
535                // Error recovery
536                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
537                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
538                    handleCreateTableErrorRecovery(stmt);
539                }
540
541                // Collect errors
542                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
543                    copyErrorsFromStatement(stmt);
544                }
545            } catch (Exception ex) {
546                // Use inherited exception handler
547                handleStatementParsingException(stmt, i, ex);
548                continue;
549            }
550        }
551
552        // Clean up frame stack
553        if (globalFrame != null) globalFrame.popMeFromStack(frameStack);
554
555        return sqlstatements;
556    }
557
558    /**
559     * Post-processing hook after each statement is parsed.
560     * <p>
561     * Default implementation does nothing. Override if needed for vendor-specific
562     * post-processing.
563     *
564     * @param stmt the statement that was just parsed
565     */
566    protected void afterStatementParsed(TCustomSqlStatement stmt) {
567        // Default: no post-processing needed for Databricks
568    }
569
570    /**
571     * Handle error recovery for CREATE TABLE statements.
572     * <p>
573     * This method attempts to recover from parse errors in CREATE TABLE statements
574     * by marking unparseable table properties (like ROW FORMAT, STORED AS, etc.)
575     * as sqlpluscmd and retrying.
576     * <p>
577     * Databricks/Hive DDL allows complex table properties after the column definition
578     * that may not be fully supported in the grammar. This error recovery allows
579     * partial parsing of the main table structure.
580     * <p>
581     * Extracted from TGSqlParser.doparse() lines 16916-16971
582     *
583     * @param stmt the statement with errors
584     */
585    protected void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
586        if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) ||
587             (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) &&
588            (!TBaseType.c_createTableStrictParsing)) {
589
590            // Find the closing parenthesis of table/column definition
591            // Mark everything after it as sqlpluscmd (ignored table properties)
592            int nested = 0;
593            boolean isIgnore = false, isFoundIgnoreToken = false;
594            TSourceToken firstIgnoreToken = null;
595
596            for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
597                TSourceToken st = stmt.sourcetokenlist.get(k);
598
599                if (isIgnore) {
600                    // Mark tokens after closing paren as sqlpluscmd (to be ignored)
601                    if (st.issolidtoken() && (st.tokencode != ';')) {
602                        isFoundIgnoreToken = true;
603                        if (firstIgnoreToken == null) {
604                            firstIgnoreToken = st;
605                        }
606                    }
607                    if (st.tokencode != ';') {
608                        st.tokencode = TBaseType.sqlpluscmd;
609                    }
610                    continue;
611                }
612
613                // Track nested parentheses to find the matching closing paren
614                if (st.tokencode == (int) ')') {
615                    nested--;
616                    if (nested == 0) {
617                        // Check if next token is "AS ( SELECT" - don't ignore CTAS subquery
618                        boolean isSelect = false;
619                        TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
620                        if (st1 != null) {
621                            TSourceToken st2 = st.searchToken((int) '(', 2);
622                            if (st2 != null) {
623                                TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
624                                isSelect = (st3 != null);
625                            }
626                        }
627                        if (!isSelect) {
628                            // Found the closing paren, start ignoring subsequent tokens
629                            isIgnore = true;
630                        }
631                    }
632                }
633
634                if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) {
635                    nested++;
636                }
637            }
638
639            // For Databricks, we don't validate specific table properties
640            // (unlike Oracle which checks TBaseType.searchOracleTablePros)
641            // This allows any Hive/Databricks DDL syntax like:
642            // ROW FORMAT, STORED AS, TBLPROPERTIES, LOCATION, etc.
643
644            // Retry parsing if we found ignoreable properties
645            if (isFoundIgnoreToken) {
646                stmt.clearError();
647                stmt.parsestatement(null, false);
648            }
649        }
650    }
651
652    // ========== Semantic Analysis and Interpretation ==========
653
654    /**
655     * Perform semantic analysis (resolve column-table relationships, etc.).
656     * <p>
657     * This method runs the TSQLResolver to build semantic relationships
658     * between columns and tables, among other analysis.
659     *
660     * @param context parser context
661     * @param statements statement list to analyze
662     */
663    @Override
664    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
665        if (!TBaseType.isEnableResolver()) {
666            return;
667        }
668
669        if (!getSyntaxErrors().isEmpty()) {
670            return;
671        }
672
673        try {
674            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
675            resolver.resolve();
676        } catch (Exception e) {
677            // Log but don't fail - semantic analysis is optional
678            System.err.println("Semantic analysis failed: " + e.getMessage());
679        }
680    }
681
682    /**
683     * Perform interpretation (execute SQL in interpreter mode).
684     * <p>
685     * This method runs the TASTEvaluator to interpret/execute the SQL.
686     *
687     * @param context parser context
688     * @param statements statement list to interpret
689     */
690    @Override
691    protected void performInterpreter(ParserContext context, TStatementList statements) {
692        if (!TBaseType.ENABLE_INTERPRETER) {
693            return;
694        }
695
696        try {
697            TGlobalScope interpreterScope = new TGlobalScope(sqlEnv);
698            TLog.enableInterpreterLogOnly();
699            TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope);
700            astEvaluator.eval();
701        } catch (Exception e) {
702            // Log but don't fail - interpretation is optional
703            System.err.println("Interpretation failed: " + e.getMessage());
704        }
705    }
706
707    @Override
708    public String toString() {
709        return "DatabricksSqlParser{vendor=" + vendor + "}";
710    }
711}