001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerBigquery;
009import gudusoft.gsqlparser.TParserBigquery;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement;
021import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
022import gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage;
023import gudusoft.gsqlparser.nodes.TTypeName;
024import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
025import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
026import gudusoft.gsqlparser.compiler.TContext;
027import gudusoft.gsqlparser.sqlenv.TSQLEnv;
028import gudusoft.gsqlparser.compiler.TGlobalScope;
029import gudusoft.gsqlparser.compiler.TFrame;
030import gudusoft.gsqlparser.resolver.TSQLResolver;
031import gudusoft.gsqlparser.TLog;
032import gudusoft.gsqlparser.compiler.TASTEvaluator;
033
034import java.io.BufferedReader;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038import java.util.Stack;
039
040import static gudusoft.gsqlparser.ESqlStatementType.*;
041
042/**
043 * Google BigQuery SQL parser implementation.
044 *
045 * <p>This parser handles BigQuery-specific SQL syntax including:
046 * <ul>
047 *   <li>BigQuery scripting language (BEGIN...END blocks, IF/WHILE/FOR/LOOP/REPEAT statements)</li>
048 *   <li>BigQuery UDFs (CREATE FUNCTION with SQL or JavaScript)</li>
049 *   <li>BigQuery procedures (CREATE PROCEDURE with BEGIN...END blocks)</li>
050 *   <li>Special type handling (STRUCT, ARRAY, DATE/TIME/TIMESTAMP literals)</li>
051 *   <li>Backtick-quoted identifiers including qualified names (`schema.table`)</li>
052 *   <li>BigQuery-specific keywords and functions</li>
053 * </ul>
054 *
055 * <p><b>Design Notes:</b>
056 * <ul>
057 *   <li>Extends {@link AbstractSqlParser} using the template method pattern</li>
058 *   <li>Uses {@link TLexerBigquery} for tokenization</li>
059 *   <li>Uses {@link TParserBigquery} for parsing</li>
060 *   <li>Delimiter character: ';' for SQL statements</li>
061 *   <li>Splits backtick-quoted qualified names (`schema.table`) into individual tokens</li>
062 * </ul>
063 *
064 * <p><b>Usage Example:</b>
065 * <pre>
066 * // Get BigQuery parser from factory
067 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvbigquery);
068 *
069 * // Build context
070 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvbigquery)
071 *     .sqlText("SELECT * FROM `project.dataset.table` WHERE region = 'US'")
072 *     .build();
073 *
074 * // Parse
075 * SqlParseResult result = parser.parse(context);
076 *
077 * // Access statements
078 * TStatementList statements = result.getSqlStatements();
079 * </pre>
080 *
081 * @see SqlParser
082 * @see AbstractSqlParser
083 * @see TLexerBigquery
084 * @see TParserBigquery
085 * @since 3.2.0.0
086 */
087public class BigQuerySqlParser extends AbstractSqlParser {
088
089    /**
090     * Construct BigQuery SQL parser.
091     * <p>
092     * Configures the parser for BigQuery database with default delimiter (;).
093     * <p>
094     * Following the original TGSqlParser pattern, the lexer and parser are
095     * created once in the constructor and reused for all parsing operations.
096     */
097    public BigQuerySqlParser() {
098        super(EDbVendor.dbvbigquery);
099        this.delimiterChar = ';';
100        this.defaultDelimiterStr = ";";
101
102        // Create lexer once - will be reused for all parsing operations
103        this.flexer = new TLexerBigquery();
104        this.flexer.delimiterchar = this.delimiterChar;
105        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
106
107        // Set parent's lexer reference for shared tokenization logic
108        this.lexer = this.flexer;
109
110        // Create parser once - will be reused for all parsing operations
111        this.fparser = new TParserBigquery(null);
112        this.fparser.lexer = this.flexer;
113    }
114
115    // ========== Parser Components ==========
116
117    /** The BigQuery lexer used for tokenization */
118    public TLexerBigquery flexer;
119
120    /** SQL parser (for BigQuery statements) */
121    private TParserBigquery fparser;
122
123    /** Current statement being built during extraction */
124    private TCustomSqlStatement gcurrentsqlstatement;
125
126    /** Parser context for current operation */
127    private ParserContext parserContext;
128
129    /** User-defined delimiter string for MySQL DELIMITER command */
130    private String userDelimiterStr;
131
132    // Stored procedure parsing state tracking
133    private enum stored_procedure_type {
134        procedure, function, package_spec, package_body, block_with_declare,
135        block_with_begin, create_trigger, create_library, others
136    }
137
138    private enum stored_procedure_status {
139        start, is_as, body, bodyend, end
140    }
141
142    private static final int stored_procedure_nested_level = 50;
143
144    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
145    // - protected TContext globalContext
146    // - protected TSQLEnv sqlEnv
147    // - protected Stack<TFrame> frameStack
148    // - protected TFrame globalFrame
149
150    // ========== AbstractSqlParser Abstract Methods Implementation ==========
151
152    /**
153     * Return the BigQuery lexer instance.
154     */
155    @Override
156    protected TCustomLexer getLexer(ParserContext context) {
157        return this.flexer;
158    }
159
160    /**
161     * Return the BigQuery SQL parser instance with updated token list.
162     */
163    @Override
164    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
165        this.fparser.sourcetokenlist = tokens;
166        return this.fparser;
167    }
168
169    /**
170     * BigQuery does not use a secondary parser (unlike Oracle with PL/SQL).
171     */
172    @Override
173    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
174        return null;
175    }
176
177    /**
178     * Call BigQuery-specific tokenization logic.
179     * <p>
180     * Delegates to dobigquerysqltexttotokenlist which handles BigQuery's
181     * specific keyword recognition, backtick-quoted identifiers, and
182     * qualified name splitting.
183     */
184    @Override
185    protected void tokenizeVendorSql() {
186        dobigquerysqltexttotokenlist();
187    }
188
189    /**
190     * Setup BigQuery parser for raw statement extraction.
191     * <p>
192     * BigQuery uses a single parser, so we inject sqlcmds and update
193     * the token list for the main parser only.
194     */
195    @Override
196    protected void setupVendorParsersForExtraction() {
197        // Inject sqlcmds into parser (required for make_stmt)
198        this.fparser.sqlcmds = this.sqlcmds;
199
200        // Update token list for parser
201        this.fparser.sourcetokenlist = this.sourcetokenlist;
202    }
203
204    /**
205     * Call BigQuery-specific raw statement extraction logic.
206     * <p>
207     * Delegates to dobigquerygetrawsqlstatements which handles BigQuery's
208     * statement delimiters and scripting language boundaries (BEGIN...END blocks,
209     * IF/WHILE/FOR/LOOP statements).
210     */
211    @Override
212    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
213        int errorCount = dobigquerygetrawsqlstatements(builder);
214        // Error count is tracked internally; errors are already added to syntaxErrors list
215
216        // Set the extracted statements in the builder
217        builder.sqlStatements(this.sqlstatements);
218    }
219
220    // ========== Tokenization Methods ==========
221
222    /**
223     * Tokenize BigQuery SQL text into a list of tokens.
224     * <p>
225     * This method handles BigQuery-specific token processing:
226     * <ul>
227     *   <li>Splits backtick-quoted qualified names (`schema.table`) into separate tokens</li>
228     *   <li>Handles all standard SQL tokens (keywords, identifiers, operators, etc.)</li>
229     * </ul>
230     * <p>
231     * Migrated from TGSqlParser.dobigquerysqltexttotokenlist()
232     */
233    private void dobigquerysqltexttotokenlist() {
234        TSourceToken asourcetoken, lcprevst;
235        int yychar;
236
237        flexer.tmpDelimiter = "";
238
239        asourcetoken = getanewsourcetoken();
240        if (asourcetoken == null) return;
241        yychar = asourcetoken.tokencode;
242
243        while (yychar > 0) {
244            if (asourcetoken != null) {
245                sourcetokenlist.add(asourcetoken);
246            }
247
248            asourcetoken = getanewsourcetoken();
249            if (asourcetoken == null) break;
250            yychar = asourcetoken.tokencode;
251
252            // `schema.table_name` - split into separate tokens
253            if ((asourcetoken.tokencode == TBaseType.ident)
254                    && (asourcetoken.toString().startsWith("`")) && (asourcetoken.toString().endsWith("`"))
255                    && (asourcetoken.toString().indexOf(".") > 0)) {
256                yychar = splitQualifiedNameInBacktick(asourcetoken);
257                asourcetoken = null;
258            }
259        }
260    }
261
262    /**
263     * Turn one token: `schema.table_name` into 3 tokens: `schema` . `table_name`
264     * <p>
265     * This helper method splits backtick-quoted qualified names into individual
266     * identifier and period tokens, preserving line/column information for each part.
267     * <p>
268     * Migrated from TGSqlParser.splitQualifiedNameInBacktick()
269     *
270     * @param asourcetoken the token to split
271     * @return the token code of the last token created
272     */
273    private int splitQualifiedNameInBacktick(TSourceToken asourcetoken) {
274        int yychar = 0;
275
276        List<String> elephantList = Arrays.asList(TBaseType.getTextWithoutQuoted(asourcetoken.toString()).split("\\."));
277        int p = 0, offset = 0;
278        for (String s : elephantList) {
279            TSourceToken pst = new TSourceToken("`" + s + "`");
280            pst.tokencode = asourcetoken.tokencode;
281            pst.tokentype = asourcetoken.tokentype;
282            pst.tokenstatus = asourcetoken.tokenstatus;
283            pst.lineNo = asourcetoken.lineNo;
284            pst.columnNo = asourcetoken.columnNo + offset;
285            if (p == 0) offset++; // this count the first ` token
286            offset = offset + s.length();
287            pst.container = sourcetokenlist;
288            if (p > 0) { // 第一个token使用被拆分前那个token的位置,从第二个开始的token,需要先把列表的位置指针加 1
289                sourcetokenlist.curpos = sourcetokenlist.curpos + 1;
290            }
291            pst.posinlist = sourcetokenlist.curpos;
292
293            sourcetokenlist.add(pst);
294            yychar = pst.tokencode;
295
296            if (p != elephantList.size() - 1) {
297                //`schema.table_name`, add period token in the middle of the backtick included identifier.
298                TSourceToken periodst = new TSourceToken(".");
299                periodst.tokencode = '.';
300                periodst.tokentype = ETokenType.ttperiod;
301                periodst.tokenstatus = asourcetoken.tokenstatus;
302                periodst.lineNo = asourcetoken.lineNo;
303                periodst.columnNo = asourcetoken.columnNo + offset;
304                offset++;
305                periodst.container = sourcetokenlist;
306                sourcetokenlist.curpos = sourcetokenlist.curpos + 1;
307                periodst.posinlist = sourcetokenlist.curpos;
308                sourcetokenlist.add(periodst);
309                yychar = periodst.tokencode;
310            }
311
312            p++;
313        }
314
315        return yychar;
316    }
317
318    // ========== Raw Statement Extraction ==========
319
320    /**
321     * Check if token code is one that pairs with END keyword.
322     * <p>
323     * Used for BigQuery scripting language to track nested BEGIN/IF/CASE/LOOP/WHILE/FOR/REPEAT blocks.
324     * <p>
325     * Migrated from TGSqlParser.checkTokenPairWithEnd()
326     */
327    private boolean checkTokenPairWithEnd(int tokencode) {
328        return ((tokencode == TBaseType.rrw_if) || (tokencode == TBaseType.rrw_case)
329                || (tokencode == TBaseType.rrw_loop) || (tokencode == TBaseType.rrw_repeat)
330                || (tokencode == TBaseType.rrw_while) || (tokencode == TBaseType.rrw_for)
331                || (tokencode == TBaseType.rrw_case));
332    }
333
334    /**
335     * Extract raw SQL statements from the token list.
336     * <p>
337     * This method separates individual SQL statements without full syntax checking.
338     * It handles BigQuery-specific syntax including:
339     * <ul>
340     *   <li>Stored procedures and functions with BEGIN...END blocks</li>
341     *   <li>Scripting language statements (IF, WHILE, FOR, LOOP, REPEAT, CASE)</li>
342     *   <li>STRUCT constructor special handling</li>
343     *   <li>Type casting with literals (DATE '2021-01-01')</li>
344     *   <li>Nested procedure tracking</li>
345     * </ul>
346     * <p>
347     * Migrated from TGSqlParser.dobigquerygetrawsqlstatements()
348     *
349     * @param builder the result builder to populate
350     * @return error count
351     */
352    private int dobigquerygetrawsqlstatements(SqlParseResult.Builder builder) {
353        int errorcount = 0;
354        gcurrentsqlstatement = null;
355        EFindSqlStateType gst = EFindSqlStateType.stnormal;
356        int i, c, beginNested = 0, waitingEnd = 0;
357        TSourceToken ast = null, lcprevsolidtoken = null;
358        boolean waitingDelimiter = false;
359
360        int waitingEnds[] = new int[stored_procedure_nested_level];
361        stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level];
362        stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level];
363        boolean endBySlashOnly = true;
364        int nestedProcedures = 0, nestedParenthesis = 0;
365
366        //reset delimiter
367        userDelimiterStr = defaultDelimiterStr;
368
369        for (i = 0; i < sourcetokenlist.size(); i++) {
370            if ((ast != null) && (ast.issolidtoken()))
371                lcprevsolidtoken = ast;
372
373            ast = sourcetokenlist.get(i);
374            sourcetokenlist.curpos = i;
375
376            // Handle STRUCT constructor: STRUCT(...) -> mark as struct constructor
377            if (ast.tokencode == TBaseType.rrw_bigquery_struct) {
378                TSourceToken st1 = ast.nextSolidToken();
379                if (st1 != null) {
380                    if (st1.tokencode == '(') {
381                        ast.tokencode = TBaseType.rrw_bigquery_struct_constructor;
382                    }
383                }
384            }
385            // Handle type casting: DATE '2021-01-01' -> mark DATE as datatype used to cast
386            else if ((ast.tokencode == TBaseType.sconst)) {
387                if (TTypeName.searchTypeByName(lcprevsolidtoken.toString()) != null) {
388                    if (lcprevsolidtoken.tokencode != TBaseType.rrw_interval) {
389                        lcprevsolidtoken.tokencode = TBaseType.rrw_bigquery_datatype_used_to_cast;
390                    }
391                }
392            }
393            // Handle TIME/DATE: could be function or type literal
394            else if ((ast.tokencode == TBaseType.rrw_time) || (ast.tokencode == TBaseType.rrw_date)) {
395                TSourceToken st1 = ast.nextSolidToken();
396                if (st1 != null) {
397                    if (st1.tokencode == TBaseType.sconst) {
398                        // ast.tokencode = TBaseType.rrw_bigquery_time_before_const;
399                    } else if (st1.tokencode == '(') {
400                        ast.tokencode = TBaseType.ident;
401                    }
402                }
403            }
404            // Handle FROM after period: i.from -> treat FROM as identifier
405            else if (ast.tokencode == TBaseType.rrw_from) {
406                TSourceToken st1 = ast.prevSolidToken();
407                if (st1 != null) { // select i.from as `from` from t, treats from in i.from as identifier
408                    if (st1.tokencode == '.') {
409                        ast.tokencode = TBaseType.ident;
410                    }
411                }
412            }
413
414            switch (gst) {
415                case sterror: {
416                    if (ast.tokentype == ETokenType.ttsemicolon) {
417                        gcurrentsqlstatement.sourcetokenlist.add(ast);
418                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
419                        gst = EFindSqlStateType.stnormal;
420                    } else {
421                        gcurrentsqlstatement.sourcetokenlist.add(ast);
422                    }
423                    break;
424                }
425                case stnormal: {
426                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
427                            || (ast.tokencode == TBaseType.cmtslashstar)
428                            || (ast.tokencode == TBaseType.lexspace)
429                            || (ast.tokencode == TBaseType.lexnewline)
430                            || (ast.tokentype == ETokenType.ttsemicolon)) {
431                        if (TBaseType.assigned(gcurrentsqlstatement)) {
432                            gcurrentsqlstatement.sourcetokenlist.add(ast);
433                        }
434
435                        continue;
436                    }
437
438                    // find a token to start sql or plsql mode
439                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
440
441                    if (TBaseType.assigned(gcurrentsqlstatement)) {
442                        if (gcurrentsqlstatement.isBigQueryplsql()) {
443                            nestedProcedures = 0;
444                            gst = EFindSqlStateType.ststoredprocedure;
445                            gcurrentsqlstatement.sourcetokenlist.add(ast);
446
447                            switch (gcurrentsqlstatement.sqlstatementtype) {
448                                case sstplsql_createprocedure:
449                                case sstcreateprocedure:
450                                    sptype[nestedProcedures] = stored_procedure_type.procedure;
451                                    break;
452                                case sstplsql_createfunction:
453                                    sptype[nestedProcedures] = stored_procedure_type.function;
454                                    break;
455                                case sstplsql_createpackage:
456                                    sptype[nestedProcedures] = stored_procedure_type.package_spec;
457                                    if (ast.searchToken(TBaseType.rrw_body, 5) != null) {
458                                        sptype[nestedProcedures] = stored_procedure_type.package_body;
459                                    }
460                                    break;
461                                case sst_plsql_block:
462                                    sptype[nestedProcedures] = stored_procedure_type.block_with_declare;
463                                    if (ast.tokencode == TBaseType.rrw_begin) {
464                                        sptype[nestedProcedures] = stored_procedure_type.block_with_begin;
465                                    }
466                                    break;
467                                case sstplsql_createtrigger:
468                                    sptype[nestedProcedures] = stored_procedure_type.create_trigger;
469                                    break;
470                                case sstoraclecreatelibrary:
471                                    sptype[nestedProcedures] = stored_procedure_type.create_library;
472                                    break;
473                                case sstplsql_createtype_placeholder:
474                                    gst = EFindSqlStateType.stsql;
475                                    break;
476                                default:
477                                    sptype[nestedProcedures] = stored_procedure_type.others;
478                                    break;
479                            }
480
481                            if (sptype[0] == stored_procedure_type.block_with_declare) {
482                                // sd
483                                endBySlashOnly = false;
484                                procedure_status[0] = stored_procedure_status.is_as;
485                            } else if (sptype[0] == stored_procedure_type.block_with_begin) {
486                                // sb
487                                endBySlashOnly = false;
488                                procedure_status[0] = stored_procedure_status.body;
489                            } else if (sptype[0] == stored_procedure_type.procedure) {
490                                // ss
491                                endBySlashOnly = false;
492                                procedure_status[0] = stored_procedure_status.start;
493                            } else if (sptype[0] == stored_procedure_type.function) {
494                                // ss
495                                endBySlashOnly = false;
496                                procedure_status[0] = stored_procedure_status.start;
497                            } else if (sptype[0] == stored_procedure_type.package_spec) {
498                                // ss
499                                endBySlashOnly = false;
500                                procedure_status[0] = stored_procedure_status.start;
501                            } else if (sptype[0] == stored_procedure_type.package_body) {
502                                // ss
503                                endBySlashOnly = false;
504                                procedure_status[0] = stored_procedure_status.start;
505                            } else if (sptype[0] == stored_procedure_type.create_trigger) {
506                                // ss
507                                endBySlashOnly = false;
508                                procedure_status[0] = stored_procedure_status.start;
509                            } else if (sptype[0] == stored_procedure_type.create_library) {
510                                // ss
511                                endBySlashOnly = false;
512                                procedure_status[0] = stored_procedure_status.bodyend;
513                            } else {
514                                // so
515                                endBySlashOnly = true;
516                                procedure_status[0] = stored_procedure_status.bodyend;
517                            }
518                            if ((ast.tokencode == TBaseType.rrw_begin)
519                                    || (ast.tokencode == TBaseType.rrw_package)
520                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
521                                waitingEnds[nestedProcedures] = 1;
522                            }
523
524                        } else if ((gcurrentsqlstatement.sqlstatementtype == sst_ifstmt)
525                                || (gcurrentsqlstatement.sqlstatementtype == sst_loopstmt)
526                                || (gcurrentsqlstatement.sqlstatementtype == sstRepeat)
527                                || (gcurrentsqlstatement.sqlstatementtype == sstWhilestmt)
528                                || (gcurrentsqlstatement.sqlstatementtype == sstForStmt)
529                                || (gcurrentsqlstatement.sqlstatementtype == sst_plsql_block)
530                                || (gcurrentsqlstatement.sqlstatementtype == sst_casestmt)) {
531                            gst = EFindSqlStateType.stBigQueryIf;
532                            waitingEnd = 1;
533                            gcurrentsqlstatement.sourcetokenlist.add(ast);
534                        } else {
535                            gst = EFindSqlStateType.stsql;
536                            gcurrentsqlstatement.sourcetokenlist.add(ast);
537                        }
538
539                    }
540
541                    if (!TBaseType.assigned(gcurrentsqlstatement)) //error token found
542                    {
543                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo)
544                                , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
545
546                        ast.tokentype = ETokenType.tttokenlizererrortoken;
547                        gst = EFindSqlStateType.sterror;
548
549                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
550                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
551                        gcurrentsqlstatement.sourcetokenlist.add(ast);
552
553                    }
554                    break;
555                }
556                case stBigQueryIf: {
557                    gcurrentsqlstatement.sourcetokenlist.add(ast);
558
559                    if (checkTokenPairWithEnd(ast.tokencode)) { // if... end if
560                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
561                            //this is not if after END
562                            waitingEnd++;
563                        }
564                    } else if (ast.tokencode == TBaseType.rrw_end) { // if ... end if
565                        TSourceToken next = ast.nextSolidToken();
566                        if (next != null) {
567                            if (checkTokenPairWithEnd(next.tokencode)) { // if ... end if;
568                                waitingEnd--;
569                            } else if (next.tokencode == ';') { // begin ... end ;
570                                waitingEnd--;
571                            }
572
573                        }
574                    } else if ((ast.tokencode == ';') && (waitingEnd == 0)) {
575                        gst = EFindSqlStateType.stnormal;
576
577                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
578                    }
579                    break;
580                }
581                case stsqlplus: {
582                    if (ast.tokencode == TBaseType.lexnewline) {
583                        gst = EFindSqlStateType.stnormal;
584                        gcurrentsqlstatement.sourcetokenlist.add(ast); // so add it here
585                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
586                    } else {
587                        {
588                            gcurrentsqlstatement.sourcetokenlist.add(ast);
589                        }
590                    }
591
592                    break;
593                }//case source command or \. command
594                case stsql: {
595                    if ((ast.tokentype == ETokenType.ttsemicolon) && (gcurrentsqlstatement.sqlstatementtype != ESqlStatementType.sstmysqldelimiter)) {
596                        gst = EFindSqlStateType.stnormal;
597                        gcurrentsqlstatement.sourcetokenlist.add(ast);
598                        gcurrentsqlstatement.semicolonended = ast;
599                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
600                        continue;
601                    }
602                    if (ast.toString().equalsIgnoreCase(userDelimiterStr)) {
603                        gst = EFindSqlStateType.stnormal;
604                        ast.tokencode = ';';// treat it as semicolon
605                        gcurrentsqlstatement.sourcetokenlist.add(ast);
606                        gcurrentsqlstatement.semicolonended = ast;
607                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
608                        continue;
609                    }
610                    gcurrentsqlstatement.sourcetokenlist.add(ast);
611
612                    if ((ast.tokencode == TBaseType.lexnewline)
613                            && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstmysqldelimiter)) {
614                        gst = EFindSqlStateType.stnormal;
615                        userDelimiterStr = "";
616                        for (int k = 0; k < gcurrentsqlstatement.sourcetokenlist.size(); k++) {
617                            TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(k);
618                            if ((st.tokencode == TBaseType.rrw_mysql_delimiter)
619                                    || (st.tokencode == TBaseType.lexnewline)
620                                    || (st.tokencode == TBaseType.lexspace)) {
621                                continue;
622                            }
623
624                            userDelimiterStr += st.toString();
625                        }
626                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
627
628                        continue;
629                    }
630
631                    break;
632                }
633                case ststoredprocedure: {
634                    if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) {
635                        gcurrentsqlstatement.sourcetokenlist.add(ast);
636                    }
637
638                    switch (procedure_status[nestedProcedures]) {
639                        case start:
640                            if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) {
641                                // s1
642                                if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) {
643                                    if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) {
644                                        //when it's a package specification, only top level accept as/is
645                                    } else {
646                                        procedure_status[nestedProcedures] = stored_procedure_status.is_as;
647                                        if (ast.searchToken("language", 1) != null) {
648                                            if (nestedProcedures == 0) {
649                                                gst = EFindSqlStateType.stsql;
650                                            } else {
651                                                procedure_status[nestedProcedures] = stored_procedure_status.body;
652                                                nestedProcedures--;
653                                            }
654
655                                        }
656                                    }
657                                }
658                            } else if (ast.tokencode == TBaseType.rrw_begin) {
659                                // s4
660                                if (sptype[nestedProcedures] == stored_procedure_type.create_trigger)
661                                    waitingEnds[nestedProcedures]++;
662
663                                if (nestedProcedures > 0) {
664                                    nestedProcedures--;
665                                }
666                                procedure_status[nestedProcedures] = stored_procedure_status.body;
667                                waitingEnds[nestedProcedures] = 1;
668                            } else if (ast.tokencode == TBaseType.rrw_end) {
669                                //s10
670                                if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1)
671                                        && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body)
672                                        || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) {
673                                    nestedProcedures--;
674                                    procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
675                                }
676                            } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) {
677                                //s3
678                                if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0)
679                                        && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) {
680                                    nestedProcedures--;
681                                    nestedProcedures++;
682                                    waitingEnds[nestedProcedures] = 0;
683                                    procedure_status[nestedProcedures] = stored_procedure_status.start;
684                                }
685                            } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) {
686                                procedure_status[nestedProcedures] = stored_procedure_status.is_as;
687                            } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
688                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
689                                gst = EFindSqlStateType.stnormal;
690                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
691
692                                //make / a sqlplus cmd
693                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
694                                gcurrentsqlstatement.sourcetokenlist.add(ast);
695                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
696                            } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger)) {
697                                if (ast.tokencode == TBaseType.rrw_trigger) {
698                                    TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1);
699                                    if (compoundSt != null) {
700                                        //it's trigger with compound trigger block
701                                        procedure_status[nestedProcedures] = stored_procedure_status.body;
702                                        waitingEnds[nestedProcedures]++;
703                                    }
704                                }
705                            } else if ((sptype[nestedProcedures] == stored_procedure_type.function) && (ast.tokencode == TBaseType.rrw_teradata_using)) {
706                                if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) {
707                                    if (nestedProcedures == 0) {
708                                        gst = EFindSqlStateType.stsql;
709                                    } else {
710                                        procedure_status[nestedProcedures] = stored_procedure_status.body;
711                                        nestedProcedures--;
712                                    }
713                                }
714
715                            } else {
716                                //other tokens, do nothing
717                            }
718                            break;
719                        case is_as:
720                            if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) {
721                                // s2
722                                nestedProcedures++;
723                                waitingEnds[nestedProcedures] = 0;
724                                procedure_status[nestedProcedures] = stored_procedure_status.start;
725
726                                if (nestedProcedures > stored_procedure_nested_level - 1) {
727                                    gst = EFindSqlStateType.sterror;
728                                    nestedProcedures--;
729                                }
730
731                            } else if (ast.tokencode == TBaseType.rrw_begin) {
732                                // s5
733                                if ((nestedProcedures == 0) &&
734                                        ((sptype[nestedProcedures] == stored_procedure_type.package_body)
735                                                || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) {
736                                    //top level package or package body's BEGIN keyword already count,
737                                    // so don't increase waitingEnds[nestedProcedures] here
738
739                                } else {
740                                    waitingEnds[nestedProcedures]++;
741                                }
742                                procedure_status[nestedProcedures] = stored_procedure_status.body;
743                            } else if (ast.tokencode == TBaseType.rrw_end) {
744                                // s6
745                                if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) &&
746                                        ((sptype[nestedProcedures] == stored_procedure_type.package_body) || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) {
747                                    procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
748                                    waitingEnds[nestedProcedures]--;
749                                } else {
750                                    waitingEnds[nestedProcedures]--;
751                                }
752                            } else if (ast.tokencode == TBaseType.rrw_case) {
753                                if (ast.searchToken(';', 1) == null) {
754                                    //this is not case before ;
755                                    waitingEnds[nestedProcedures]++;
756                                }
757                            } else {
758                                //other tokens, do nothing
759                            }
760                            break;
761                        case body:
762                            if ((ast.tokencode == TBaseType.rrw_begin)) {
763                                waitingEnds[nestedProcedures]++;
764                            } else if (ast.tokencode == TBaseType.rrw_if) {
765
766                                if (ast.searchToken(';', 2) == null) {
767                                    //this is not if before ;
768
769                                    // 2015-02-27, change 1 to 2 make it able to detect label name after case
770                                    // like this: END CASE l1;
771                                    waitingEnds[nestedProcedures]++;
772                                }
773                            } else if (ast.tokencode == TBaseType.rrw_case) {
774                                if (ast.searchToken(';', 2) == null) {
775                                    //this is not case before ;
776                                    if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
777                                        waitingEnds[nestedProcedures]++;
778                                    }
779                                }
780                            } else if ((ast.tokencode == TBaseType.rrw_loop)
781                                    || (ast.tokencode == TBaseType.rrw_while) || (ast.tokencode == TBaseType.rrw_repeat)
782                                    || (ast.tokencode == TBaseType.rrw_for)) {
783                                if (!((ast.searchToken(TBaseType.rrw_end, -1) != null)
784                                        && (ast.searchToken(';', 2) != null))) {
785                                    // exclude loop like this:
786                                    // end loop [labelname];
787                                    waitingEnds[nestedProcedures]++;
788                                }
789
790                            } else if (ast.tokencode == TBaseType.rrw_end) {
791                                //foundEnd = true;
792                                waitingEnds[nestedProcedures]--;
793                                //if (waitingEnd < 0) { waitingEnd = 0;}
794                                if (waitingEnds[nestedProcedures] == 0) {
795                                    if (nestedProcedures == 0) {
796                                        // s7
797                                        procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
798                                    } else {
799                                        // s71
800                                        nestedProcedures--;
801                                        procedure_status[nestedProcedures] = stored_procedure_status.is_as;
802                                    }
803                                }
804                            } else if ((waitingEnds[nestedProcedures] == 0) && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
805                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
806                                gst = EFindSqlStateType.stnormal;
807                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
808
809                                //make / a sqlplus cmd
810                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
811                                gcurrentsqlstatement.sourcetokenlist.add(ast);
812                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
813                            }
814                            break;
815                        case bodyend:
816                            if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
817                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
818                                gst = EFindSqlStateType.stnormal;
819                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
820
821                                //make / a sqlplus cmd
822                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
823                                gcurrentsqlstatement.sourcetokenlist.add(ast);
824                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
825                            } else if ((ast.tokencode == ';')) {
826                                gst = EFindSqlStateType.stnormal;
827                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
828
829                            } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) {
830                                gcurrentsqlstatement.sourcetokenlist.add(ast);
831                                gst = EFindSqlStateType.stnormal;
832                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
833                            } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) {
834                                gcurrentsqlstatement.sourcetokenlist.add(ast);
835                                gst = EFindSqlStateType.stnormal;
836                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
837                            } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) {
838                                gcurrentsqlstatement.sourcetokenlist.add(ast);
839                                gst = EFindSqlStateType.stnormal;
840                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
841                            } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_package, 4) != null) && (!endBySlashOnly)) {
842                                gcurrentsqlstatement.sourcetokenlist.add(ast);
843                                gst = EFindSqlStateType.stnormal;
844                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
845                            } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) {
846                                gcurrentsqlstatement.sourcetokenlist.add(ast);
847                                gst = EFindSqlStateType.stnormal;
848                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
849                            } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) {
850                                gcurrentsqlstatement.sourcetokenlist.add(ast);
851                                gst = EFindSqlStateType.stnormal;
852                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
853                            } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) {
854                                gcurrentsqlstatement.sourcetokenlist.add(ast);
855                                gst = EFindSqlStateType.stnormal;
856                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
857                            } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) {
858                                gcurrentsqlstatement.sourcetokenlist.add(ast);
859                                gst = EFindSqlStateType.stnormal;
860                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
861                            } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) &&
862                                    (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) {
863                                gcurrentsqlstatement.sourcetokenlist.add(ast);
864                                gst = EFindSqlStateType.stnormal;
865                                onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
866                            } else {
867                                gcurrentsqlstatement.sourcetokenlist.add(ast);
868                            }
869                            break;
870                        case end:
871                            break;
872                        default:
873                            break;
874                    }
875
876                    if (ast.tokencode == TBaseType.sqlpluscmd) {
877                        //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd
878                        //in a sql statement(almost is plsql block) is not really a sqlplus cmd
879                        int m = flexer.getkeywordvalue(ast.getAstext());
880                        if (m != 0) {
881                            ast.tokencode = m;
882                        } else if (ast.tokentype == ETokenType.ttslash) {
883                            ast.tokencode = '/';
884                        } else {
885                            ast.tokencode = TBaseType.ident;
886                        }
887                    }
888
889                    final int wrapped_keyword_max_pos = 20;
890                    if ((ast.tokencode == TBaseType.rrw_wrapped) && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) {
891                        if (gcurrentsqlstatement instanceof TCommonStoredProcedureSqlStatement) {
892                            ((TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true);
893                        }
894
895                        if (gcurrentsqlstatement instanceof TPlsqlCreatePackage) {
896                            if (ast.prevSolidToken() != null) {
897                                ((TPlsqlCreatePackage) gcurrentsqlstatement).setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken()));
898                            }
899                        }
900                    }
901
902                    break;
903                } //ststoredprocedure
904
905            } //case
906        } //for
907
908        //last statement
909        if (TBaseType.assigned(gcurrentsqlstatement) &&
910                ((gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure)
911                        || (gst == EFindSqlStateType.stBigQueryIf) || (gst == EFindSqlStateType.sterror))) {
912            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
913        }
914
915        return errorcount;
916    }
917
918    // ========== Statement Parsing ==========
919
920    /**
921     * Parse all SQL statements after tokenization and raw extraction.
922     * <p>
923     * This method iterates through all statements, calls parsestatement on each,
924     * and handles error recovery for CREATE TABLE statements if enabled.
925     * <p>
926     * Inherited performParsing pattern from AbstractSqlParser, specialized for BigQuery.
927     */
928    @Override
929    protected TStatementList performParsing(ParserContext context, TCustomParser parser,
930                                            TCustomParser secondaryParser, TSourceTokenList tokens,
931                                            TStatementList rawStatements) {
932        // Store references for error handling
933        this.fparser = (TParserBigquery) parser;
934        this.sourcetokenlist = tokens;
935        this.parserContext = context;
936        this.sqlstatements = rawStatements;
937
938        // Initialize sqlcmds for BigQuery
939        if (this.sqlcmds == null) {
940            this.sqlcmds = SqlCmdsFactory.get(vendor);
941        }
942        this.fparser.sqlcmds = this.sqlcmds;
943
944        // Initialize global context using inherited method
945        initializeGlobalContext();
946
947        // Parse each statement
948        for (int i = 0; i < sqlstatements.size(); i++) {
949            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
950
951            try {
952                // Set frame stack for variable scope tracking
953                stmt.setFrameStack(frameStack);
954
955                // Parse the statement
956                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
957
958                // Vendor-specific post-processing (none needed for BigQuery currently)
959                afterStatementParsed(stmt);
960
961                // Collect errors from the statement
962                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
963                    copyErrorsFromStatement(stmt);
964                }
965
966            } catch (Exception ex) {
967                // Use inherited exception handler
968                handleStatementParsingException(stmt, i, ex);
969                continue;
970            }
971        }
972
973        // Clean up frame stack
974        if (globalFrame != null) {
975            globalFrame.popMeFromStack(frameStack);
976        }
977
978        return sqlstatements;
979    }
980
981    /**
982     * Post-process statement after parsing (hook method).
983     * <p>
984     * BigQuery does not require special post-processing, so this is a no-op.
985     * Override if BigQuery-specific validation is needed in the future.
986     */
987    protected void afterStatementParsed(TCustomSqlStatement stmt) {
988        // No special post-processing needed for BigQuery
989    }
990
991    // ========== Semantic Analysis ==========
992
993    /**
994     * Perform semantic analysis on parsed statements.
995     * <p>
996     * Runs TSQLResolver to build relationships between tables and columns,
997     * resolve references, and perform type checking.
998     */
999    @Override
1000    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
1001        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
1002            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
1003            resolver.resolve();
1004        }
1005    }
1006
1007    /**
1008     * Perform interpretation/evaluation on statements.
1009     * <p>
1010     * Runs TASTEvaluator to execute constant expressions and compile-time
1011     * evaluations.
1012     */
1013    @Override
1014    protected void performInterpreter(ParserContext context, TStatementList statements) {
1015        // BigQuery does not require interpretation currently
1016    }
1017
1018    @Override
1019    public String toString() {
1020        return "BigQuerySqlParser{vendor=" + vendor + "}";
1021    }
1022}