001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerPresto;
009import gudusoft.gsqlparser.TParserPresto;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ETokenType;
016import gudusoft.gsqlparser.ETokenStatus;
017import gudusoft.gsqlparser.ESqlStatementType;
018import gudusoft.gsqlparser.EErrorType;
019import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
021import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
022import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
023import gudusoft.gsqlparser.compiler.TContext;
024import gudusoft.gsqlparser.sqlenv.TSQLEnv;
025import gudusoft.gsqlparser.compiler.TGlobalScope;
026import gudusoft.gsqlparser.compiler.TFrame;
027import gudusoft.gsqlparser.resolver.TSQLResolver;
028import gudusoft.gsqlparser.TLog;
029import gudusoft.gsqlparser.compiler.TASTEvaluator;
030
031import java.io.BufferedReader;
032import java.util.ArrayList;
033import java.util.List;
034import java.util.Stack;
035
036/**
037 * Presto SQL parser implementation.
038 *
039 * <p>This parser handles Presto-specific SQL syntax including:
040 * <ul>
041 *   <li>Presto SQL statements</li>
042 *   <li>Presto PL/SQL blocks (stored procedures, functions)</li>
043 *   <li>Semicolon statement separators</li>
044 * </ul>
045 *
046 * <p><b>Design Notes:</b>
047 * <ul>
048 *   <li>Extends {@link AbstractSqlParser} using the template method pattern</li>
049 *   <li>Uses {@link TLexerPresto} for tokenization</li>
050 *   <li>Uses {@link TParserPresto} for parsing</li>
051 *   <li>Delimiter character: ';' for SQL statements</li>
052 * </ul>
053 *
054 * @see SqlParser
055 * @see AbstractSqlParser
056 * @see TLexerPresto
057 * @see TParserPresto
058 * @since 3.2.0.0
059 */
060public class PrestoSqlParser extends AbstractSqlParser {
061
062    /**
063     * Construct Presto SQL parser.
064     * <p>
065     * Configures the parser for Presto database with default delimiter (;).
066     * <p>
067     * Following the original TGSqlParser pattern, the lexer and parser are
068     * created once in the constructor and reused for all parsing operations.
069     */
070    public PrestoSqlParser() {
071        super(EDbVendor.dbvpresto);
072        this.delimiterChar = ';';
073        this.defaultDelimiterStr = ";";
074
075        // Create lexer once - will be reused for all parsing operations
076        this.flexer = new TLexerPresto();
077        this.flexer.delimiterchar = this.delimiterChar;
078        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
079
080        // Set parent's lexer reference for shared tokenization logic
081        this.lexer = this.flexer;
082
083        // Create parser once - will be reused for all parsing operations
084        this.fparser = new TParserPresto(null);
085        this.fparser.lexer = this.flexer;
086    }
087
088    // ========== Parser Components ==========
089
090    /** The Presto lexer used for tokenization */
091    public TLexerPresto flexer;
092
093    /** SQL parser (for Presto statements) */
094    private TParserPresto fparser;
095
096    /** Current statement being built during extraction */
097    private TCustomSqlStatement gcurrentsqlstatement;
098
099    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
100    // - protected TContext globalContext
101    // - protected TSQLEnv sqlEnv
102    // - protected Stack<TFrame> frameStack
103    // - protected TFrame globalFrame
104
105    // ========== AbstractSqlParser Abstract Methods Implementation ==========
106
107    /**
108     * Return the Presto lexer instance.
109     */
110    @Override
111    protected TCustomLexer getLexer(ParserContext context) {
112        return this.flexer;
113    }
114
115    /**
116     * Return the Presto SQL parser instance with updated token list.
117     */
118    @Override
119    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
120        this.fparser.sourcetokenlist = tokens;
121        return this.fparser;
122    }
123
124    /**
125     * Presto does not have a secondary parser (only Oracle has PL/SQL parser).
126     *
127     * @return null (no secondary parser)
128     */
129    @Override
130    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
131        return null;
132    }
133
134    /**
135     * Call Presto-specific tokenization logic.
136     * <p>
137     * Delegates to doprestotexttotokenlist which handles Presto's
138     * specific keyword recognition and token generation.
139     */
140    @Override
141    protected void tokenizeVendorSql() {
142        doprestotexttotokenlist();
143    }
144
145    /**
146     * Setup Presto parser for raw statement extraction.
147     * <p>
148     * Presto uses a single parser, so we inject sqlcmds and update
149     * the token list for the main parser only.
150     */
151    @Override
152    protected void setupVendorParsersForExtraction() {
153        // Inject sqlcmds into parser (required for make_stmt)
154        this.fparser.sqlcmds = this.sqlcmds;
155
156        // Update token list for parser
157        this.fparser.sourcetokenlist = this.sourcetokenlist;
158    }
159
160    /**
161     * Call Presto-specific raw statement extraction logic.
162     * <p>
163     * Delegates to doprestogetrawsqlstatements which handles Presto's
164     * statement delimiters (semicolon and slash for PL/SQL).
165     */
166    @Override
167    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
168        doprestogetrawsqlstatements(builder);
169    }
170
171    /**
172     * Perform full parsing of statements with syntax checking.
173     * <p>
174     * This method orchestrates the parsing of all statements.
175     */
176    @Override
177    protected TStatementList performParsing(ParserContext context,
178                                           TCustomParser parser,
179                                           TCustomParser secondaryParser,
180                                           TSourceTokenList tokens,
181                                           TStatementList rawStatements) {
182        // Store references
183        this.fparser = (TParserPresto) parser;
184        this.sourcetokenlist = tokens;
185        this.parserContext = context;
186
187        // Use the raw statements passed from AbstractSqlParser.parse()
188        this.sqlstatements = rawStatements;
189
190        // Initialize statement parsing infrastructure
191        this.sqlcmds = SqlCmdsFactory.get(vendor);
192
193        // Inject sqlcmds into parser (required for make_stmt and other methods)
194        this.fparser.sqlcmds = this.sqlcmds;
195
196        // Initialize global context for semantic analysis
197        initializeGlobalContext();
198
199        // Parse each statement with exception handling for robustness
200        for (int i = 0; i < sqlstatements.size(); i++) {
201            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
202
203            try {
204                stmt.setFrameStack(frameStack);
205
206                // Parse the statement
207                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
208
209                // Handle error recovery for CREATE TABLE/INDEX
210                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
211                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
212                    handleCreateTableErrorRecovery(stmt);
213                }
214
215                // Collect syntax errors
216                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
217                    copyErrorsFromStatement(stmt);
218                }
219
220            } catch (Exception ex) {
221                // Use inherited exception handler from AbstractSqlParser
222                // This provides consistent error handling across all database parsers
223                handleStatementParsingException(stmt, i, ex);
224                continue;
225            }
226        }
227
228        // Clean up frame stack
229        if (globalFrame != null) {
230            globalFrame.popMeFromStack(frameStack);
231        }
232
233        return this.sqlstatements;
234    }
235
236    // Note: initializeGlobalContext() inherited from AbstractSqlParser
237    // Note: No override of afterStatementParsed() needed - default (no-op) is appropriate for Presto
238
239    /**
240     * Handle error recovery for CREATE TABLE/INDEX statements.
241     */
242    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
243        if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable)
244                || (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex))
245                && (!TBaseType.c_createTableStrictParsing)) {
246
247            int nested = 0;
248            boolean isIgnore = false, isFoundIgnoreToken = false;
249            TSourceToken firstIgnoreToken = null;
250
251            for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
252                TSourceToken st = stmt.sourcetokenlist.get(k);
253                if (isIgnore) {
254                    if (st.issolidtoken() && (st.tokencode != ';')) {
255                        isFoundIgnoreToken = true;
256                        if (firstIgnoreToken == null) {
257                            firstIgnoreToken = st;
258                        }
259                    }
260                    if (st.tokencode != ';') {
261                        st.tokencode = TBaseType.sqlpluscmd;
262                    }
263                    continue;
264                }
265                if (st.tokencode == (int) ')') {
266                    nested--;
267                    if (nested == 0) {
268                        boolean isSelect = false;
269                        TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
270                        if (st1 != null) {
271                            TSourceToken st2 = st.searchToken((int) '(', 2);
272                            if (st2 != null) {
273                                TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
274                                isSelect = (st3 != null);
275                            }
276                        }
277                        if (!isSelect) isIgnore = true;
278                    }
279                } else if (st.tokencode == (int) '(') {
280                    nested++;
281                }
282            }
283
284            if (isFoundIgnoreToken) {
285                stmt.clearError();
286                stmt.parsestatement(null, false);
287            }
288        }
289    }
290
291    /**
292     * Perform Presto-specific semantic analysis using TSQLResolver.
293     */
294    @Override
295    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
296        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
297            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
298            resolver.resolve();
299        }
300    }
301
302    /**
303     * Perform interpretation/evaluation on parsed statements.
304     */
305    @Override
306    protected void performInterpreter(ParserContext context, TStatementList statements) {
307        if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) {
308            TLog.clearLogs();
309            TGlobalScope interpreterScope = new TGlobalScope(sqlEnv);
310            TLog.enableInterpreterLogOnly();
311            TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope);
312            astEvaluator.eval();
313        }
314    }
315
316    // ========== Presto-Specific Tokenization ==========
317
318    /**
319     * Presto-specific tokenization logic.
320     * <p>
321     * Extracted from: TGSqlParser.doprestotexttotokenlist() (lines 4727-4757)
322     */
323    private void doprestotexttotokenlist() {
324        TSourceToken asourcetoken;
325        int yychar;
326        boolean startDelimiter = false;
327
328        flexer.tmpDelimiter = "";
329
330        asourcetoken = getanewsourcetoken();
331        if (asourcetoken == null) return;
332        yychar = asourcetoken.tokencode;
333
334        while (yychar > 0) {
335            sourcetokenlist.add(asourcetoken);
336            asourcetoken = getanewsourcetoken();
337            if (asourcetoken == null) break;
338            checkMySQLCommentToken(asourcetoken);
339
340            if ((asourcetoken.tokencode == TBaseType.lexnewline) && (startDelimiter)) {
341                startDelimiter = false;
342                flexer.tmpDelimiter = sourcetokenlist.get(sourcetokenlist.size() - 1).getAstext();
343            }
344
345            yychar = asourcetoken.tokencode;
346        }
347    }
348
349    /**
350     * Check MySQL comment token format.
351     * <p>
352     * This method is currently a no-op (commented out in original TGSqlParser).
353     */
354    private void checkMySQLCommentToken(TSourceToken cmtToken) {
355        // Currently a no-op - left for future enhancement
356    }
357
358    // ========== Presto-Specific Raw Statement Extraction ==========
359
360    /**
361     * Extract raw SQL statements from token list.
362     * <p>
363     * Extracted from: TGSqlParser.doprestogetrawsqlstatements() (lines 7205-7424)
364     * <p>
365     * Handles Presto-specific statement boundaries:
366     * - Semicolon (;) for SQL statements
367     * - Forward slash (/) for PL/SQL blocks
368     * - BEGIN/END blocks for stored procedures
369     */
370    private void doprestogetrawsqlstatements(SqlParseResult.Builder builder) {
371        int waitingEnd = 0;
372        boolean foundEnd = false;
373
374        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
375        if (!TBaseType.assigned(sourcetokenlist)) {
376            builder.errorCode(-1);
377            return;
378        }
379
380        gcurrentsqlstatement = null;
381        EFindSqlStateType gst = EFindSqlStateType.stnormal;
382        TSourceToken lcprevsolidtoken = null, ast = null;
383
384        for (int i = 0; i < sourcetokenlist.size(); i++) {
385
386            if ((ast != null) && (ast.issolidtoken()))
387                lcprevsolidtoken = ast;
388
389            ast = sourcetokenlist.get(i);
390            sourcetokenlist.curpos = i;
391
392            switch (gst) {
393                case sterror: {
394                    if (ast.tokentype == ETokenType.ttsemicolon) {
395                        gcurrentsqlstatement.sourcetokenlist.add(ast);
396                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
397                        gst = EFindSqlStateType.stnormal;
398                    } else {
399                        gcurrentsqlstatement.sourcetokenlist.add(ast);
400                    }
401                    break;
402                } //sterror
403
404                case stnormal: {
405                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
406                            || (ast.tokencode == TBaseType.cmtslashstar)
407                            || (ast.tokencode == TBaseType.lexspace)
408                            || (ast.tokencode == TBaseType.lexnewline)
409                            || (ast.tokentype == ETokenType.ttsemicolon)) {
410                        if (gcurrentsqlstatement != null) {
411                            gcurrentsqlstatement.sourcetokenlist.add(ast);
412                        }
413
414                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
415                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
416                                // ;;;; continuous semicolon,treat it as comment
417                                ast.tokentype = ETokenType.ttsimplecomment;
418                                ast.tokencode = TBaseType.cmtdoublehyphen;
419                            }
420                        }
421
422                        continue;
423                    }
424
425                    // find a tokentext to start sql or plsql mode
426                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
427
428                    if (gcurrentsqlstatement != null) {
429                        if (gcurrentsqlstatement.isprestoplsql()) {
430                            gst = EFindSqlStateType.ststoredprocedure;
431                            gcurrentsqlstatement.sourcetokenlist.add(ast);
432                            foundEnd = false;
433                            if ((ast.tokencode == TBaseType.rrw_begin)
434                                    || (ast.tokencode == TBaseType.rrw_package)
435                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
436                                waitingEnd = 1;
437                            }
438                        } else {
439                            gst = EFindSqlStateType.stsql;
440                            gcurrentsqlstatement.sourcetokenlist.add(ast);
441                        }
442                    } else {
443                        //error tokentext found
444
445                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo)
446                                , "Error when tokenlize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
447
448                        ast.tokentype = ETokenType.tttokenlizererrortoken;
449                        gst = EFindSqlStateType.sterror;
450
451                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
452                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
453                        gcurrentsqlstatement.sourcetokenlist.add(ast);
454
455                    }
456
457                    break;
458                } // stnormal
459
460                case stsql: {
461                    if (ast.tokentype == ETokenType.ttsemicolon) {
462                        gst = EFindSqlStateType.stnormal;
463                        gcurrentsqlstatement.sourcetokenlist.add(ast);
464                        gcurrentsqlstatement.semicolonended = ast;
465                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
466                        continue;
467                    }
468
469                    if (sourcetokenlist.sqlplusaftercurtoken()) //most probaly is / cmd
470                    {
471                        gst = EFindSqlStateType.stnormal;
472                        gcurrentsqlstatement.sourcetokenlist.add(ast);
473                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
474                        continue;
475                    }
476                    gcurrentsqlstatement.sourcetokenlist.add(ast);
477                    break;
478                }//case stsql
479
480                case ststoredprocedure: {
481                    if ((ast.tokencode == TBaseType.rrw_begin)) {
482                        waitingEnd++;
483                    } else if ((ast.tokencode == TBaseType.rrw_if)) {
484                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
485                            //this is not if after END
486                            waitingEnd++;
487                        }
488                    } else if ((ast.tokencode == TBaseType.rrw_case)) {
489                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
490                            //this is not case after END
491                            waitingEnd++;
492                        }
493                    } else if ((ast.tokencode == TBaseType.rrw_loop)) {
494                        if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
495                            //this is not loop after END
496                            waitingEnd++;
497                        }
498                    } else if (ast.tokencode == TBaseType.rrw_end) {
499                        foundEnd = true;
500                        waitingEnd--;
501                        if (waitingEnd < 0) {
502                            waitingEnd = 0;
503                        }
504                    }
505
506                    if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) //and (prevst.NewlineIsLastTokenInTailerToken)) then
507                    {
508                        // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast;
509                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
510                        gst = EFindSqlStateType.stnormal;
511                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
512
513                        //make / a sqlplus cmd
514                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
515                        gcurrentsqlstatement.sourcetokenlist.add(ast);
516                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
517                    } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) {    // single dot at a seperate line
518                        ast.tokenstatus = ETokenStatus.tsignorebyyacc;
519                        gst = EFindSqlStateType.stnormal;
520                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
521
522                        //make ttperiod a sqlplus cmd
523                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
524                        gcurrentsqlstatement.sourcetokenlist.add(ast);
525                        onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
526                    } else {
527                        gcurrentsqlstatement.sourcetokenlist.add(ast);
528                        if ((ast.tokentype == ETokenType.ttsemicolon) && (waitingEnd == 0)
529                                && (foundEnd)) {
530                            gst = EFindSqlStateType.stnormal;
531                            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, false, builder);
532                        }
533                    }
534
535                    if (ast.tokencode == TBaseType.sqlpluscmd) {
536                        //change tokencode back to keyword or TBaseType.ident, because sqlplus cmd
537                        //in a sql statement(almost is plsql block) is not really a sqlplus cmd
538                        int m = flexer.getkeywordvalue(ast.getAstext());
539                        if (m != 0) {
540                            ast.tokencode = m;
541                        } else {
542                            ast.tokencode = TBaseType.ident;
543                        }
544                    }
545
546                    break;
547                } //ststoredprocedure
548            } //switch
549        }//for
550
551        //last statement
552        if ((gcurrentsqlstatement != null) &&
553                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) ||
554                        (gst == EFindSqlStateType.sterror))) {
555            onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, null, sqlstatements, true, builder);
556        }
557
558        // Set the statement list in the builder
559        builder.sqlStatements(this.sqlstatements);
560        builder.errorCode(syntaxErrors.size());
561    }
562
563    @Override
564    public String toString() {
565        return "PrestoSqlParser{vendor=" + vendor + "}";
566    }
567}