001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerHive;
009import gudusoft.gsqlparser.TParserHive;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
021import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
022import gudusoft.gsqlparser.compiler.TContext;
023import gudusoft.gsqlparser.sqlenv.TSQLEnv;
024import gudusoft.gsqlparser.compiler.TGlobalScope;
025import gudusoft.gsqlparser.compiler.TFrame;
026import gudusoft.gsqlparser.resolver.TSQLResolver;
027import gudusoft.gsqlparser.TLog;
028import gudusoft.gsqlparser.compiler.TASTEvaluator;
029
030import java.io.BufferedReader;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.List;
034import java.util.Stack;
035
036/**
037 * Apache Hive SQL parser implementation.
038 *
039 * <p>This parser handles Hive-specific SQL syntax including:
040 * <ul>
041 *   <li>Hive DDL statements (CREATE TABLE/DATABASE with Hive-specific options)</li>
042 *   <li>Hive DML statements (INSERT OVERWRITE, LOAD DATA, etc.)</li>
043 *   <li>HiveQL functions and extensions</li>
044 *   <li>Backtick-quoted identifiers including qualified names (`schema.table`)</li>
045 *   <li>Hive-specific keywords and data types</li>
046 * </ul>
047 *
048 * <p><b>Design Notes:</b>
049 * <ul>
050 *   <li>Extends {@link AbstractSqlParser} using the template method pattern</li>
051 *   <li>Uses {@link TLexerHive} for tokenization</li>
052 *   <li>Uses {@link TParserHive} for parsing</li>
053 *   <li>Delimiter character: ';' for SQL statements</li>
054 *   <li>Splits backtick-quoted qualified names (`schema.table`) into individual tokens</li>
055 * </ul>
056 *
057 * <p><b>Usage Example:</b>
058 * <pre>
059 * // Get Hive parser from factory
060 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvhive);
061 *
062 * // Build context
063 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvhive)
064 *     .sqlText("SELECT * FROM `default.employee` WHERE dept = 'IT'")
065 *     .build();
066 *
067 * // Parse
068 * SqlParseResult result = parser.parse(context);
069 *
070 * // Access statements
071 * TStatementList statements = result.getSqlStatements();
072 * </pre>
073 *
074 * @see SqlParser
075 * @see AbstractSqlParser
076 * @see TLexerHive
077 * @see TParserHive
078 * @since 3.2.0.0
079 */
080public class HiveSqlParser extends AbstractSqlParser {
081
082    /**
083     * Construct Hive SQL parser.
084     * <p>
085     * Configures the parser for Hive database with default delimiter (;).
086     * <p>
087     * Following the original TGSqlParser pattern, the lexer and parser are
088     * created once in the constructor and reused for all parsing operations.
089     */
090    public HiveSqlParser() {
091        super(EDbVendor.dbvhive);
092        this.delimiterChar = ';';
093        this.defaultDelimiterStr = ";";
094
095        // Create lexer once - will be reused for all parsing operations
096        this.flexer = new TLexerHive();
097        this.flexer.delimiterchar = this.delimiterChar;
098        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
099
100        // Set parent's lexer reference for shared tokenization logic
101        this.lexer = this.flexer;
102
103        // Create parser once - will be reused for all parsing operations
104        this.fparser = new TParserHive(null);
105        this.fparser.lexer = this.flexer;
106    }
107
108    // ========== Parser Components ==========
109
110    /** The Hive lexer used for tokenization */
111    public TLexerHive flexer;
112
113    /** SQL parser (for Hive statements) */
114    private TParserHive fparser;
115
116    /** Current statement being built during extraction */
117    private TCustomSqlStatement gcurrentsqlstatement;
118
119    /** Parser context for current operation */
120    private ParserContext parserContext;
121
122    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
123    // - protected TContext globalContext
124    // - protected TSQLEnv sqlEnv
125    // - protected Stack<TFrame> frameStack
126    // - protected TFrame globalFrame
127    // - protected TSourceTokenList sourcetokenlist
128    // - protected TStatementList sqlstatements
129    // - protected ISqlCmds sqlcmds
130    // - protected TCustomLexer lexer
131
132    // ========== AbstractSqlParser Abstract Methods Implementation ==========
133
134    /**
135     * Return the Hive lexer instance.
136     */
137    @Override
138    protected TCustomLexer getLexer(ParserContext context) {
139        return this.flexer;
140    }
141
142    /**
143     * Return the Hive SQL parser instance with updated token list.
144     */
145    @Override
146    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
147        this.fparser.sourcetokenlist = tokens;
148        return this.fparser;
149    }
150
151    /**
152     * Hive does not use a secondary parser (unlike Oracle with PL/SQL).
153     */
154    @Override
155    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
156        return null;
157    }
158
159    /**
160     * Call Hive-specific tokenization logic.
161     * <p>
162     * Delegates to dohivetexttotokenlist which handles Hive's
163     * specific keyword recognition, backtick-quoted identifiers, and
164     * qualified name splitting.
165     */
166    @Override
167    protected void tokenizeVendorSql() {
168        dohivetexttotokenlist();
169    }
170
171    /**
172     * Setup Hive parser for raw statement extraction.
173     * <p>
174     * Hive uses a single parser, so we inject sqlcmds and update
175     * the token list for the main parser only.
176     */
177    @Override
178    protected void setupVendorParsersForExtraction() {
179        // Inject sqlcmds into parser (required for make_stmt)
180        this.fparser.sqlcmds = this.sqlcmds;
181
182        // Update token list for parser
183        this.fparser.sourcetokenlist = this.sourcetokenlist;
184    }
185
186    /**
187     * Call Hive-specific raw statement extraction logic.
188     * <p>
189     * Delegates to dohivegetrawsqlstatements which handles Hive's
190     * statement delimiters (semicolons).
191     * <p>
192     * Note: parserContext is already set by AbstractSqlParser before this is called
193     */
194    @Override
195    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
196        int errorCount = dohivegetrawsqlstatements(builder);
197        // Error count is tracked internally; errors are already added to syntaxErrors list
198
199        // Set the extracted statements in the builder
200        builder.sqlStatements(this.sqlstatements);
201    }
202
203    // ========== Tokenization Methods ==========
204
205    /**
206     * Tokenize Hive SQL text into a list of tokens.
207     * <p>
208     * This method handles Hive-specific token processing:
209     * <ul>
210     *   <li>Splits backtick-quoted qualified names (`schema.table`) into separate tokens</li>
211     *   <li>Handles MAP keyword disambiguation</li>
212     *   <li>Handles all standard SQL tokens (keywords, identifiers, operators, etc.)</li>
213     * </ul>
214     * <p>
215     * Migrated from TGSqlParser.dohivetexttotokenlist()
216     */
217    private void dohivetexttotokenlist() {
218
219        TSourceToken asourcetoken, lcprevst;
220        int yychar;
221
222        asourcetoken = getanewsourcetoken();
223        if (asourcetoken == null) return;
224        yychar = asourcetoken.tokencode;
225
226        while (yychar > 0) {
227            if (asourcetoken != null) {
228                sourcetokenlist.add(asourcetoken);
229            }
230            asourcetoken = getanewsourcetoken();
231            if (asourcetoken == null) break;
232            if (asourcetoken.tokencode == TBaseType.rrw_map) {
233                TSourceToken token = asourcetoken.searchToken(')', -1);
234                if (token != null) {
235                    asourcetoken.tokencode = TBaseType.ident;
236                }
237            } else if (asourcetoken.tokencode == '(') {
238//            TSourceToken token = asourcetoken.searchToken(TBaseType.ident,-1);
239//            if (token != null){
240//                token.tokencode = TBaseType.HIVE_FUNC_IDENT;
241//            }
242            }
243            yychar = asourcetoken.tokencode;
244
245            // `schema.table_name`
246            if ((asourcetoken.tokencode == TBaseType.ident)
247                    && (asourcetoken.toString().startsWith("`")) && (asourcetoken.toString().endsWith("`"))
248                    && (asourcetoken.toString().indexOf(".") > 0)
249            ) {
250                // Do not split when the backtick-quoted identifier is used as
251                // an alias (follows `)` or `AS`). See MantisBT #3441.
252                TSourceToken prevSolid = null;
253                for (int pi = sourcetokenlist.size() - 1; pi >= 0; pi--) {
254                    TSourceToken t = sourcetokenlist.get(pi);
255                    if (t.issolidtoken()) { prevSolid = t; break; }
256                }
257                boolean isAlias = (prevSolid != null)
258                        && (prevSolid.tokencode == ')');
259                if (!isAlias) {
260                    yychar = splitQualifiedNameInBacktick(asourcetoken);
261                    asourcetoken = null;
262                }
263            }
264
265        }
266
267    }
268
269    /**
270     * Turn one token: `schema.table_name` into 3 tokens: `schema` . `table_name`
271     * <p>
272     * This helper method splits backtick-quoted qualified names into individual
273     * identifier and period tokens, preserving line/column information for each part.
274     * <p>
275     * Migrated from TGSqlParser.splitQualifiedNameInBacktick()
276     *
277     * @param asourcetoken the token to split
278     * @return the token code of the last token created
279     */
280    private int splitQualifiedNameInBacktick(TSourceToken asourcetoken) {
281        int yychar = 0;
282
283        List<String> elephantList = Arrays.asList(TBaseType.getTextWithoutQuoted(asourcetoken.toString()).split("\\."));
284        int p = 0, offset = 0;
285        for (String s : elephantList) {
286            TSourceToken pst = new TSourceToken("`" + s + "`");
287            pst.tokencode = asourcetoken.tokencode;
288            pst.tokentype = asourcetoken.tokentype;
289            pst.tokenstatus = asourcetoken.tokenstatus;
290            pst.lineNo = asourcetoken.lineNo;
291            pst.columnNo = asourcetoken.columnNo + offset;
292            if (p == 0) offset++; // this count the first ` token
293            offset = offset + s.length();
294            pst.container = sourcetokenlist;
295            if (p > 0) { // 第一个token使用被拆分前那个token的位置,从第二个开始的token,需要先把列表的位置指针加 1
296                sourcetokenlist.curpos = sourcetokenlist.curpos + 1;
297            }
298            pst.posinlist = sourcetokenlist.curpos;
299
300            sourcetokenlist.add(pst);
301            yychar = pst.tokencode;
302
303            if (p != elephantList.size() - 1) {
304                //`schema.table_name`, add period token in the middle of the backtick included identifier.
305                TSourceToken periodst = new TSourceToken(".");
306                periodst.tokencode = '.';
307                periodst.tokentype = ETokenType.ttperiod;
308                periodst.tokenstatus = asourcetoken.tokenstatus;
309                periodst.lineNo = asourcetoken.lineNo;
310                periodst.columnNo = asourcetoken.columnNo + offset;
311                offset++;
312                periodst.container = sourcetokenlist;
313                sourcetokenlist.curpos = sourcetokenlist.curpos + 1;
314                periodst.posinlist = sourcetokenlist.curpos;
315                sourcetokenlist.add(periodst);
316                yychar = periodst.tokencode;
317            }
318
319            p++;
320        }
321
322        return yychar;
323
324    }
325
326    // ========== Raw Statement Extraction ==========
327
328    /**
329     * Extract raw SQL statements from the token list.
330     * <p>
331     * This method separates individual SQL statements without full syntax checking.
332     * It handles Hive-specific syntax including:
333     * <ul>
334     *   <li>Token code adjustments (CharSetName, DATE function, SORT keyword)</li>
335     *   <li>Semicolon-terminated statements</li>
336     *   <li>Continuous semicolon handling (treated as comments)</li>
337     * </ul>
338     * <p>
339     * Migrated from TGSqlParser.dohivegetrawsqlstatements()
340     *
341     * @param builder the result builder to populate
342     * @return number of errors encountered
343     */
344    private int dohivegetrawsqlstatements(SqlParseResult.Builder builder) {
345
346        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
347        if (!TBaseType.assigned(sourcetokenlist)) return -1;
348
349        gcurrentsqlstatement = null;
350        EFindSqlStateType gst = EFindSqlStateType.stnormal;
351        TSourceToken lcprevsolidtoken = null, ast = null;
352        int parenDepth = 0;
353        int compoundBlockNesting = 0;
354        int caseDepth = 0;
355
356        for (int i = 0; i < sourcetokenlist.size(); i++) {
357
358            if ((ast != null) && (ast.issolidtoken()))
359                lcprevsolidtoken = ast;
360
361            ast = sourcetokenlist.get(i);
362            sourcetokenlist.curpos = i;
363
364            if (ast.tokencode == TBaseType.hive_CharSetName) {
365                TSourceToken st1 = ast.searchToken(TBaseType.hive_CharSetLiteral, 1);
366                if (st1 == null) {
367                    ast.tokencode = TBaseType.ident;
368                }
369            } else if (ast.tokencode == TBaseType.rrw_date) {
370                TSourceToken st1 = ast.nextSolidToken(); //ast.searchToken('(',1);
371                if (st1 != null) {
372                    if (st1.tokencode == '(') {
373                        ast.tokencode = TBaseType.rrw_hive_DATE_FUNCTION;
374                    }
375                }
376            } else if (ast.tokencode == TBaseType.rrw_sort) {
377                TSourceToken st1 = ast.searchToken(TBaseType.rrw_by, 1);
378                if (st1 == null) {
379                    ast.tokencode = TBaseType.ident;
380                }
381            }
382
383            switch (gst) {
384                case sterror: {
385                    if (ast.tokentype == ETokenType.ttsemicolon) {
386                        gcurrentsqlstatement.sourcetokenlist.add(ast);
387                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
388                        gst = EFindSqlStateType.stnormal;
389                    } else {
390                        gcurrentsqlstatement.sourcetokenlist.add(ast);
391                    }
392                    break;
393                } //sterror
394
395                case stnormal: {
396                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
397                            || (ast.tokencode == TBaseType.cmtslashstar)
398                            || (ast.tokencode == TBaseType.lexspace)
399                            || (ast.tokencode == TBaseType.lexnewline)
400                            || (ast.tokentype == ETokenType.ttsemicolon)) {
401                        if (gcurrentsqlstatement != null) {
402                            gcurrentsqlstatement.sourcetokenlist.add(ast);
403                        }
404
405                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
406                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
407                                // ;;;; continuous semicolon,treat it as comment
408                                ast.tokentype = ETokenType.ttsimplecomment;
409                                ast.tokencode = TBaseType.cmtdoublehyphen;
410                            }
411                        }
412
413                        continue;
414                    }
415
416
417                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
418
419                    if (gcurrentsqlstatement != null) {
420                        gst = EFindSqlStateType.stsql;
421                        gcurrentsqlstatement.sourcetokenlist.add(ast);
422                    } else {
423                        //error tokentext found
424
425                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo)
426                                , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
427
428                        ast.tokentype = ETokenType.tttokenlizererrortoken;
429                        gst = EFindSqlStateType.sterror;
430
431                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
432                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
433                        gcurrentsqlstatement.sourcetokenlist.add(ast);
434
435                    }
436
437                    break;
438                } // stnormal
439
440                case stsql: {
441                    // Track BEGIN/END and CASE/END nesting for compound blocks
442                    if (ast.tokencode == TBaseType.rrw_begin && ast.issolidtoken()) {
443                        compoundBlockNesting++;
444                    } else if (ast.tokencode == TBaseType.rrw_case && ast.issolidtoken() && compoundBlockNesting > 0) {
445                        caseDepth++;
446                    } else if (ast.tokencode == TBaseType.rrw_end && ast.issolidtoken() && compoundBlockNesting > 0) {
447                        if (caseDepth > 0) {
448                            // This END closes a CASE expression, not a BEGIN block
449                            TSourceToken nextSolid = sourcetokenlist.nextsolidtoken(i, 1, false);
450                            boolean isEndCase = nextSolid == null || nextSolid.tokencode != TBaseType.rrw_if
451                                && nextSolid.tokencode != TBaseType.rrw_while
452                                && nextSolid.tokencode != TBaseType.rrw_loop
453                                && nextSolid.tokencode != TBaseType.rrw_repeat
454                                && nextSolid.tokencode != TBaseType.rrw_for;
455                            if (isEndCase) {
456                                caseDepth--;
457                            }
458                        } else {
459                            // Only decrement for END that closes a BEGIN block.
460                            // END IF, END WHILE, END LOOP, END FOR, END CASE don't close BEGIN.
461                            TSourceToken nextSolid = sourcetokenlist.nextsolidtoken(i, 1, false);
462                            boolean isStructEnd = nextSolid != null && (
463                                nextSolid.tokencode == TBaseType.rrw_if ||
464                                nextSolid.tokencode == TBaseType.rrw_while ||
465                                nextSolid.tokencode == TBaseType.rrw_loop ||
466                                nextSolid.tokencode == TBaseType.rrw_repeat ||
467                                nextSolid.tokencode == TBaseType.rrw_case ||
468                                nextSolid.tokencode == TBaseType.rrw_for);
469                            if (!isStructEnd) {
470                                compoundBlockNesting--;
471                            }
472                        }
473                    }
474
475                    if (ast.tokentype == ETokenType.ttsemicolon) {
476                        if (compoundBlockNesting > 0) {
477                            // Inside compound block - don't complete on semicolon
478                            gcurrentsqlstatement.sourcetokenlist.add(ast);
479                            continue;
480                        }
481                        gst = EFindSqlStateType.stnormal;
482                        parenDepth = 0;
483                        compoundBlockNesting = 0;
484                        caseDepth = 0;
485                        gcurrentsqlstatement.sourcetokenlist.add(ast);
486                        gcurrentsqlstatement.semicolonended = ast;
487                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
488                        continue;
489                    }
490
491                    // Inside compound block - don't split on DML keywords
492                    if (compoundBlockNesting > 0) {
493                        gcurrentsqlstatement.sourcetokenlist.add(ast);
494                        break;
495                    }
496
497                    // Track parenthesis depth for subquery detection
498                    if (ast.tokencode == '(') parenDepth++;
499                    if (ast.tokencode == ')') parenDepth--;
500
501                    // SET as new statement: if we encounter SET and the current statement
502                    // is not one where SET is a valid clause keyword (UPDATE, MERGE, ALTER),
503                    // complete the current statement and start a new one
504                    if (ast.tokencode == TBaseType.rrw_set
505                            && gcurrentsqlstatement != null) {
506                        ESqlStatementType curType = gcurrentsqlstatement.sqlstatementtype;
507                        boolean setIsClause = (curType == ESqlStatementType.sstupdate)
508                                || (curType == ESqlStatementType.sstmerge)
509                                || (curType == ESqlStatementType.sstaltertable)
510                                || (curType == ESqlStatementType.sstalterview)
511                                || (curType == ESqlStatementType.sstalterindex)
512                                || (curType == ESqlStatementType.sstalterdatabase)
513                                || (curType == ESqlStatementType.sstAlterMaterializedView)
514                                || (curType == ESqlStatementType.ssthiveExplain)
515                                || (curType == ESqlStatementType.ssthiveShow);
516                        if (!setIsClause) {
517                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
518                            parenDepth = 0;
519                            gcurrentsqlstatement = sqlcmds.issql(ast, EFindSqlStateType.stnormal, null);
520                            if (gcurrentsqlstatement != null) {
521                                gcurrentsqlstatement.sourcetokenlist.add(ast);
522                            }
523                            break;
524                        }
525                    }
526
527                    // SELECT as new statement: when at parenthesis depth 0, SELECT
528                    // starts a new statement only when the current statement is SELECT
529                    // and the previous token is not a set operator (UNION/INTERSECT/EXCEPT/ALL)
530                    // and the token is not marked as part of a CTE
531                    if (ast.tokencode == TBaseType.rrw_select
532                            && parenDepth <= 0
533                            && gcurrentsqlstatement != null
534                            && gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect
535                            && ast.tokenstatus != ETokenStatus.tsignoredbygetrawstatement) {
536                        boolean isSetOp = false;
537                        if (lcprevsolidtoken != null) {
538                            int prevCode = lcprevsolidtoken.tokencode;
539                            isSetOp = (prevCode == TBaseType.rrw_union)
540                                    || (prevCode == TBaseType.rrw_intersect)
541                                    || (prevCode == TBaseType.rrw_except)
542                                    || (prevCode == TBaseType.rrw_all)
543                                    || (prevCode == TBaseType.rrw_minus);
544                        }
545                        if (!isSetOp) {
546                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
547                            parenDepth = 0;
548                            gcurrentsqlstatement = sqlcmds.issql(ast, EFindSqlStateType.stnormal, null);
549                            if (gcurrentsqlstatement != null) {
550                                gcurrentsqlstatement.sourcetokenlist.add(ast);
551                            }
552                            break;
553                        }
554                    }
555
556                    gcurrentsqlstatement.sourcetokenlist.add(ast);
557                    break;
558                }//case stsql
559
560            } //switch
561        }//for
562
563        //last statement
564        if ((gcurrentsqlstatement != null) &&
565                ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.sterror))) {
566            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
567        }
568
569        return syntaxErrors.size();
570    }
571
572    // ========== Statement Parsing ==========
573
574    /**
575     * Parse all raw SQL statements.
576     * <p>
577     * This method performs full syntax analysis of each statement:
578     * <ul>
579     *   <li>Initializes global context and SQL environment</li>
580     *   <li>Parses each statement using TParserHive</li>
581     *   <li>Handles errors with optional error recovery</li>
582     *   <li>Collects syntax errors for reporting</li>
583     * </ul>
584     * <p>
585     * Migrated from TGSqlParser.performParsing()
586     *
587     * @param context the parser context
588     * @param parser the main parser (TParserHive)
589     * @param secondaryParser the secondary parser (null for Hive)
590     * @param tokens the source token list
591     * @param rawStatements raw statements already extracted (never null)
592     * @return the parsed statement list
593     */
594    @Override
595    protected TStatementList performParsing(ParserContext context, TCustomParser parser, TCustomParser secondaryParser, TSourceTokenList tokens, TStatementList rawStatements) {
596        this.parserContext = context;
597        this.fparser = (TParserHive) parser;
598        this.sourcetokenlist = tokens;
599        this.sqlstatements = rawStatements;
600
601        // Initialize sqlcmds for this parsing operation
602        if (this.sqlcmds == null) {
603            this.sqlcmds = SqlCmdsFactory.get(vendor);
604        }
605
606        // CRITICAL: Inject sqlcmds into parser (required for make_stmt to work)
607        this.fparser.sqlcmds = this.sqlcmds;
608
609        // Initialize global context (inherited method from AbstractSqlParser)
610        initializeGlobalContext();
611
612        // Parse each statement
613        for (int i = 0; i < sqlstatements.size(); i++) {
614            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
615
616            try {
617                // Set frame stack for nested scope resolution
618                stmt.setFrameStack(frameStack);
619
620                // Parse the statement
621                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
622
623                // Attempt error recovery using inherited method
624                parseResult = attemptErrorRecovery(stmt, parseResult, context.isOnlyNeedRawParseTree());
625
626                // Collect errors from statement
627                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
628                    copyErrorsFromStatement(stmt);
629                }
630
631            } catch (Exception ex) {
632                // Use inherited exception handler from AbstractSqlParser
633                handleStatementParsingException(stmt, i, ex);
634                continue;
635            }
636        }
637
638        // Clean up frame stack
639        if (globalFrame != null) globalFrame.popMeFromStack(frameStack);
640
641        return sqlstatements;
642    }
643
644    // ========== Semantic Analysis ==========
645
646    /**
647     * Perform semantic analysis on parsed statements.
648     * <p>
649     * Runs TSQLResolver to build relationships between tables and columns,
650     * resolve references, and perform type checking.
651     */
652    @Override
653    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
654        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
655            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
656            resolver.resolve();
657        }
658    }
659
660    // ========== Interpretation ==========
661
662    /**
663     * Perform interpretation/evaluation on statements.
664     * <p>
665     * Runs TASTEvaluator for compile-time constant expression evaluation.
666     * Hive does not require interpretation currently.
667     */
668    @Override
669    protected void performInterpreter(ParserContext context, TStatementList statements) {
670        // Hive does not require interpretation currently
671    }
672
673    @Override
674    public String toString() {
675        return "HiveSqlParser{vendor=" + vendor + "}";
676    }
677}