001package gudusoft.gsqlparser.parser;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.TCustomLexer;
006import gudusoft.gsqlparser.TCustomParser;
007import gudusoft.gsqlparser.TCustomSqlStatement;
008import gudusoft.gsqlparser.TLexerDameng;
009import gudusoft.gsqlparser.TParserDameng;
010import gudusoft.gsqlparser.TSourceToken;
011import gudusoft.gsqlparser.TSourceTokenList;
012import gudusoft.gsqlparser.TStatementList;
013import gudusoft.gsqlparser.TSyntaxError;
014import gudusoft.gsqlparser.EFindSqlStateType;
015import gudusoft.gsqlparser.ESqlPlusCmd;
016import gudusoft.gsqlparser.ETokenType;
017import gudusoft.gsqlparser.ETokenStatus;
018import gudusoft.gsqlparser.ESqlStatementType;
019import gudusoft.gsqlparser.EErrorType;
020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement;
021import gudusoft.gsqlparser.stmt.TUnknownSqlStatement;
022import gudusoft.gsqlparser.sqlcmds.ISqlCmds;
023import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory;
024import gudusoft.gsqlparser.compiler.TContext;
025import gudusoft.gsqlparser.sqlenv.TSQLEnv;
026import gudusoft.gsqlparser.compiler.TGlobalScope;
027import gudusoft.gsqlparser.compiler.TFrame;
028import gudusoft.gsqlparser.resolver.TSQLResolver;
029import gudusoft.gsqlparser.TLog;
030import gudusoft.gsqlparser.TGSqlParser;
031import gudusoft.gsqlparser.compiler.TASTEvaluator;
032import java.util.Stack;
033
034import java.io.BufferedReader;
035import java.util.ArrayList;
036import java.util.List;
037
038/**
039 * Dameng database SQL parser implementation.
040 *
041 * <p>This parser handles Dameng-specific SQL syntax including:
042 * <ul>
043 *   <li>PL/SQL blocks (procedures, functions, packages, triggers)</li>
044 *   <li>SQL*Plus commands (spool, set, show, etc.)</li>
045 *   <li>Dameng-specific DML/DDL (MERGE, flashback, etc.)</li>
046 *   <li>Dameng analytical functions and extensions</li>
047 *   <li>Special token handling (INNER, NOT DEFERRABLE, etc.)</li>
048 * </ul>
049 *
050 * <p><b>Implementation Status:</b> PHASE 3 - IN PROGRESS
051 * <ul>
052 *   <li><b>Completed:</b> Dameng classes (TLexerDameng, TParserDameng, TParserDameng) are now PUBLIC</li>
053 *   <li><b>Current:</b> Skeleton implementation delegates to legacy TGSqlParser</li>
054 *   <li><b>Next:</b> Extract vendor-specific logic from TGSqlParser into this class</li>
055 *   <li><b>Goal:</b> Fully self-contained Dameng parser using AbstractSqlParser template</li>
056 * </ul>
057 *
058 * <p><b>Design Notes:</b>
059 * <ul>
060 *   <li>Implements {@link SqlParser} directly (will extend {@link AbstractSqlParser} in Phase 4)</li>
061 *   <li>Can now directly instantiate: {@link TLexerDameng}, {@link TParserDameng}, {@link TParserDameng}</li>
062 *   <li>Uses two parsers: TParserDameng (SQL) + TParserDameng (PL/SQL blocks)</li>
063 *   <li>Handles SQL*Plus commands via special tokenization logic</li>
064 *   <li>Delimiter character: '/' for PL/SQL blocks, ';' for SQL statements</li>
065 * </ul>
066 *
067 * <p><b>Usage Example:</b>
068 * <pre>
069 * // Get Dameng parser from factory
070 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvdameng);
071 *
072 * // Build context
073 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvdameng)
074 *     .sqlText("SELECT * FROM emp WHERE deptno = 10")
075 *     .build();
076 *
077 * // Parse
078 * SqlParseResult result = parser.parse(context);
079 *
080 * // Access statements
081 * TStatementList statements = result.getSqlStatements();
082 * </pre>
083 *
084 * <p><b>Phase 3 Extraction Roadmap:</b>
085 * <ol>
086 *   <li>✅ DONE: Make TLexerDameng, TParserDameng, TParserDameng public</li>
087 *   <li>⏳ TODO: Extract tokenization logic (~367 lines from TGSqlParser.dodamengsqltexttotokenlist())</li>
088 *   <li>⏳ TODO: Extract raw statement logic (~200 lines from TGSqlParser.dodamenggetrawsqlstatements())</li>
089 *   <li>⏳ TODO: Extract parsing orchestration (SQL vs PL/SQL parser selection)</li>
090 *   <li>⏳ TODO: Extract helper methods (getanewsourcetoken, getprevsolidtoken, etc.)</li>
091 *   <li>⏳ TODO: Extend AbstractSqlParser and use template method pattern fully</li>
092 *   <li>⏳ TODO: Remove all delegation to TGSqlParser</li>
093 * </ol>
094 *
095 * <p><b>Key Methods to Extract from TGSqlParser:</b>
096 * <ul>
097 *   <li>{@code dodamengsqltexttotokenlist()} - Dameng tokenization with SQL*Plus command detection</li>
098 *   <li>{@code dodamenggetrawsqlstatements()} - Dameng raw statement boundaries (handles PL/SQL blocks)</li>
099 *   <li>{@code getanewsourcetoken()} - Token iterator from lexer</li>
100 *   <li>{@code getprevsolidtoken()} - Navigate token list backwards</li>
101 *   <li>{@code IsValidPlaceForDivToSqlplusCmd()} - Slash vs divide operator disambiguation</li>
102 *   <li>{@code countLines()} - Multi-line token handling</li>
103 *   <li>{@code spaceAtTheEndOfReturnToken()} - SQL*Plus command validation</li>
104 * </ul>
105 *
106 * @see SqlParser
107 * @see AbstractSqlParser
108 * @see TLexerDameng
109 * @see TParserDameng
110 * @see TParserDameng
111 * @since 3.2.0.0
112 */
113public class DamengSqlParser extends AbstractSqlParser {
114
115    /**
116     * Construct Dameng SQL parser.
117     * <p>
118     * Configures the parser for Dameng database with default delimiters:
119     * <ul>
120     *   <li>SQL statements: semicolon (;)</li>
121     *   <li>PL/SQL blocks: forward slash (/)</li>
122     * </ul>
123     * <p>
124     * Following the original TGSqlParser pattern, the lexer and parsers are
125     * created once in the constructor and reused for all parsing operations.
126     * This avoids unnecessary object allocation overhead since the parser
127     * is not thread-safe and designed for single-use per instance.
128     */
129    public DamengSqlParser() {
130        super(EDbVendor.dbvdameng);
131        this.delimiterChar = '/';  // PL/SQL delimiter
132        this.defaultDelimiterStr = ";";  // SQL delimiter
133
134        // Create lexer once - will be reused for all parsing operations
135        // (matches original TGSqlParser constructor pattern at line 1033)
136        this.flexer = new TLexerDameng();
137        this.flexer.delimiterchar = this.delimiterChar;
138        this.flexer.defaultDelimiterStr = this.defaultDelimiterStr;
139
140        // Set parent's lexer reference for shared tokenization logic
141        this.lexer = this.flexer;
142
143        // Create parsers once - will be reused for all parsing operations
144        // Token list will be set/updated when parsing begins
145        // (matches original TGSqlParser constructor pattern at lines 1036-1040)
146        this.fparser = new TParserDameng(null);
147        this.fplsqlparser = new TParserDameng(null);
148        this.fparser.lexer = this.flexer;
149        this.fplsqlparser.lexer = this.flexer;
150
151        // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor
152    }
153
154    // ========== Tokenization State (used during tokenization) ==========
155    // These instance variables are used during the tokenization process
156    // and are set up at the beginning of tokenization
157
158    /** The Dameng lexer used for tokenization */
159    public TLexerDameng flexer;  // Package-accessible for TGSqlParser integration
160
161    // NOTE: sourcetokenlist moved to AbstractSqlParser (inherited)
162
163    /** Optional callback for token processing (can be null) */
164    private Object tokenHandle;  // TTokenCallback interface - keeping as Object for now
165
166    // State variables for tokenization (set during dodamengsqltexttotokenlist())
167    private boolean continuesqlplusatnewline;
168    private boolean waitingreturnforsemicolon;
169    private boolean waitingreturnforfloatdiv;
170    private boolean isvalidplace;
171    private boolean insqlpluscmd;
172
173    // ========== Statement Parsing State (used during statement parsing) ==========
174    // These instance variables are used during the statement parsing process
175
176    // NOTE: The following fields moved to AbstractSqlParser (inherited):
177    //   - sqlcmds (ISqlCmds)
178    //   - sqlstatements (TStatementList)
179    //   - parserContext (ParserContext)
180
181    /** Current statement being built */
182    private TCustomSqlStatement gcurrentsqlstatement;
183
184    /** SQL parser (for regular SQL statements) */
185    private TParserDameng fparser;
186
187    /** PL/SQL parser (for PL/SQL blocks) */
188    private TParserDameng fplsqlparser;
189
190    // Note: Global context and frame stack fields inherited from AbstractSqlParser:
191    // - protected TContext globalContext
192    // - protected TSQLEnv sqlEnv
193    // - protected Stack<TFrame> frameStack
194    // - protected TFrame globalFrame
195
196    // ========== Enums for State Machine ==========
197    // These enums are used by the dodamenggetrawsqlstatements state machine
198
199    enum stored_procedure_status {start,is_as,body,bodyend,end, cursor_declare};
200    enum stored_procedure_type {function,procedure,package_spec,package_body, block_with_begin,block_with_declare,
201        create_trigger,create_library,cursor_in_package_spec,others};
202
203    static final int stored_procedure_nested_level = 1024;
204
205    // ========== AbstractSqlParser Abstract Methods Implementation ==========
206
207    /**
208     * Return the Dameng lexer instance.
209     * <p>
210     * The lexer is created once in the constructor and reused for all
211     * parsing operations. This method simply returns the existing instance,
212     * matching the original TGSqlParser pattern where the lexer is created
213     * once and reset before each use.
214     *
215     * @param context parser context (not used, lexer already created)
216     * @return the Dameng lexer instance created in constructor
217     */
218    @Override
219    protected TCustomLexer getLexer(ParserContext context) {
220        // Return existing lexer instance (created in constructor)
221        // No need to create new instance - matches original TGSqlParser pattern
222        return this.flexer;
223    }
224
225    /**
226     * Return the Dameng SQL parser instance with updated token list.
227     * <p>
228     * The parser is created once in the constructor and reused for all
229     * parsing operations. This method updates the token list and returns
230     * the existing instance, matching the original TGSqlParser pattern.
231     *
232     * @param context parser context (not used, parser already created)
233     * @param tokens source token list to parse
234     * @return the Dameng SQL parser instance created in constructor
235     */
236    @Override
237    protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) {
238        // Update token list for reused parser instance
239        this.fparser.sourcetokenlist = tokens;
240        return this.fparser;
241    }
242
243    /**
244     * Return the Dameng PL/SQL parser instance with updated token list.
245     * <p>
246     * Dameng needs a secondary parser (TParserDameng) for PL/SQL blocks
247     * (procedures, functions, packages, triggers, anonymous blocks).
248     * <p>
249     * The parser is created once in the constructor and reused for all
250     * parsing operations. This method updates the token list and returns
251     * the existing instance, matching the original TGSqlParser pattern.
252     *
253     * @param context parser context (not used, parser already created)
254     * @param tokens source token list to parse
255     * @return the Dameng PL/SQL parser instance created in constructor
256     */
257    @Override
258    protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) {
259        // Update token list for reused parser instance
260        this.fplsqlparser.sourcetokenlist = tokens;
261        return this.fplsqlparser;
262    }
263
264    /**
265     * Call Dameng-specific tokenization logic.
266     * <p>
267     * Delegates to dodamengsqltexttotokenlist which handles Dameng's
268     * specific keyword recognition, SQL*Plus commands, forward slash
269     * disambiguation, and token generation.
270     */
271    @Override
272    protected void tokenizeVendorSql() {
273        dodamengsqltexttotokenlist();
274    }
275
276    /**
277     * Post-tokenization: merge ${...} template variable tokens into single IDENT tokens.
278     * Template syntax like ${if(len(X) == 0, "", "...")} is used by BI tools.
279     */
280    @Override
281    protected void doAfterTokenize(TSourceTokenList tokens) {
282        super.doAfterTokenize(tokens);
283        mergeTemplateVariableTokens(tokens);
284    }
285
286    private void mergeTemplateVariableTokens(TSourceTokenList tokens) {
287        for (int i = 0; i < tokens.size() - 1; i++) {
288            TSourceToken dollar = tokens.get(i);
289
290            // Match either bare '$' (self-char) or '$IDENT' like $P, $X (identifier starting with $)
291            boolean isDollarChar = (dollar.tokencode == '$');
292            boolean isDollarIdent = (dollar.tokencode == TBaseType.ident
293                    && dollar.astext != null && dollar.astext.startsWith("$"));
294            if (!isDollarChar && !isDollarIdent) continue;
295
296            // Find next non-whitespace token — for $IDENT pattern, require immediate '{' (no whitespace)
297            int braceIdx = i + 1;
298            if (isDollarChar) {
299                while (braceIdx < tokens.size() && tokens.get(braceIdx).tokentype == ETokenType.ttwhitespace) {
300                    braceIdx++;
301                }
302            }
303            if (braceIdx >= tokens.size() || tokens.get(braceIdx).tokencode != '{') continue;
304
305            // Found ${ pattern — find matching } with depth tracking
306            int depth = 1;
307            int endIdx = braceIdx + 1;
308            boolean isComplex = false;
309            while (endIdx < tokens.size() && depth > 0) {
310                int code = tokens.get(endIdx).tokencode;
311                if (code == '{') depth++;
312                else if (code == '}') depth--;
313                else if (code == '(' || code == ',' || code == '\'' || code == '"') isComplex = true;
314                if (depth > 0) endIdx++;
315            }
316            if (depth != 0) continue; // unclosed
317
318            // Build merged token text
319            StringBuilder sb = new StringBuilder();
320            for (int j = i; j <= endIdx; j++) {
321                sb.append(tokens.get(j).astext);
322            }
323
324            if (isComplex && isDollarChar) {
325                // Complex template starting with bare $ like ${if(len(X)==0,...)}
326                // These expand to SQL fragments (e.g., AND clauses) at runtime,
327                // so convert to whitespace to let parser skip them entirely.
328                for (int j = i; j <= endIdx; j++) {
329                    tokens.get(j).tokentype = ETokenType.ttwhitespace;
330                    tokens.get(j).tokencode = TBaseType.lexspace;
331                }
332            } else {
333                // Simple template like ${NAME}, or JasperReports $P{VAR}/$X{IN,COL,PARAM}
334                // These expand to single values/expressions, so merge into IDENT placeholder.
335                dollar.astext = sb.toString();
336                dollar.tokencode = TBaseType.ident;
337                dollar.tokentype = ETokenType.ttidentifier;
338                // Convert remaining tokens to whitespace so parser skips them even if
339                // tokenstatus is overwritten by statement splitter (tsignoredbygetrawstatement)
340                for (int j = i + 1; j <= endIdx; j++) {
341                    tokens.get(j).tokentype = ETokenType.ttwhitespace;
342                    tokens.get(j).tokencode = TBaseType.lexspace;
343                }
344            }
345
346            i = endIdx; // skip past merged tokens
347        }
348    }
349
350    /**
351     * Setup Dameng parsers for raw statement extraction.
352     * <p>
353     * Dameng uses dual parsers (SQL + PL/SQL), so we inject sqlcmds and
354     * update token lists for both parsers.
355     */
356    @Override
357    protected void setupVendorParsersForExtraction() {
358        // Inject sqlcmds into BOTH parsers (SQL + PL/SQL)
359        this.fparser.sqlcmds = this.sqlcmds;
360        this.fplsqlparser.sqlcmds = this.sqlcmds;
361
362        // Update token list for BOTH parsers
363        this.fparser.sourcetokenlist = this.sourcetokenlist;
364        this.fplsqlparser.sourcetokenlist = this.sourcetokenlist;
365    }
366
367    /**
368     * Call Dameng-specific raw statement extraction logic.
369     * <p>
370     * Delegates to dodamenggetrawsqlstatements which handles Dameng's
371     * statement delimiters (semicolon and forward slash).
372     */
373    @Override
374    protected void extractVendorRawStatements(SqlParseResult.Builder builder) {
375        dodamenggetrawsqlstatements(builder);
376    }
377
378    /**
379     * Perform full parsing of statements with syntax checking.
380     * <p>
381     * This method orchestrates the parsing of all statements by:
382     * <ul>
383     *   <li>Using the raw statements passed from AbstractSqlParser.parse()</li>
384     *   <li>Initializing SQL and PL/SQL parsers</li>
385     *   <li>Creating global context and frame stack</li>
386     *   <li>Looping through each raw statement</li>
387     *   <li>Calling parsestatement() on each to build AST</li>
388     *   <li>Handling error recovery for CREATE TABLE/INDEX</li>
389     *   <li>Collecting syntax errors</li>
390     * </ul>
391     *
392     * <p><b>Important:</b> This method does NOT extract raw statements - they are
393     * passed in as a parameter already extracted by {@link #extractRawStatements}.
394     * This eliminates duplicate extraction that was occurring in the old design.
395     *
396     * <p>Extracted from: TGSqlParser.doparse() lines 16903-17026
397     *
398     * @param context parser context
399     * @param parser main SQL parser (TParserDameng)
400     * @param secondaryParser PL/SQL parser (TParserDameng)
401     * @param tokens source token list
402     * @param rawStatements raw statements already extracted (never null)
403     * @return list of fully parsed statements with AST built
404     */
405    @Override
406    protected TStatementList performParsing(ParserContext context,
407                                           TCustomParser parser,
408                                           TCustomParser secondaryParser,
409                                           TSourceTokenList tokens,
410                                           TStatementList rawStatements) {
411        // Store references
412        this.fparser = (TParserDameng) parser;
413        this.fplsqlparser = (TParserDameng) secondaryParser;
414        this.sourcetokenlist = tokens;
415        this.parserContext = context;
416
417        // Use the raw statements passed from AbstractSqlParser.parse()
418        // (already extracted - DO NOT re-extract to avoid duplication)
419        this.sqlstatements = rawStatements;
420
421        // Initialize statement parsing infrastructure
422        this.sqlcmds = SqlCmdsFactory.get(vendor);
423
424        // Inject sqlcmds into parsers (required for make_stmt and other methods)
425        this.fparser.sqlcmds = this.sqlcmds;
426        this.fplsqlparser.sqlcmds = this.sqlcmds;
427
428        // Initialize global context for semantic analysis
429        // CRITICAL: When delegated from TGSqlParser, use TGSqlParser's frameStack
430        // so that variables set in statements can be found by other statements
431        if (context != null && context.getGsqlparser() != null) {
432            TGSqlParser gsqlparser = (TGSqlParser) context.getGsqlparser();
433            this.frameStack = gsqlparser.getFrameStack();
434
435            // CRITICAL: Set gsqlparser on the NodeFactory - matches TGSqlParser behavior
436            // This is needed for proper AST node creation during parsing
437            // Without this, expression traversal order may differ, causing
438            // dataflow constant ordering issues
439            this.fparser.getNf().setGsqlParser(gsqlparser);
440            this.fplsqlparser.getNf().setGsqlParser(gsqlparser);
441
442            // Create global context if needed
443            this.globalContext = new TContext();
444            this.sqlEnv = new TSQLEnv(this.vendor) {
445                @Override
446                public void initSQLEnv() {
447                }
448            };
449            this.globalContext.setSqlEnv(this.sqlEnv, this.sqlstatements);
450        } else {
451            initializeGlobalContext();
452        }
453
454        // Parse each statement with exception handling for robustness
455        for (int i = 0; i < sqlstatements.size(); i++) {
456            TCustomSqlStatement stmt = sqlstatements.getRawSql(i);
457
458            try {
459                stmt.setFrameStack(frameStack);
460
461                // Parse the statement
462                int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree());
463
464                // Dameng-specific post-processing (overridden hook method)
465                afterStatementParsed(stmt);
466
467                // Handle error recovery for CREATE TABLE/INDEX
468                boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE;
469                if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) {
470                    handleCreateTableErrorRecovery(stmt);
471                }
472
473                // Collect syntax errors
474                if ((parseResult != 0) || (stmt.getErrorCount() > 0)) {
475                    copyErrorsFromStatement(stmt);
476                }
477
478            } catch (Exception ex) {
479                // Use inherited exception handler from AbstractSqlParser
480                // This provides consistent error handling across all database parsers
481                handleStatementParsingException(stmt, i, ex);
482                continue;
483            }
484        }
485
486        // Clean up frame stack
487        if (globalFrame != null) {
488            globalFrame.popMeFromStack(frameStack);
489        }
490
491        return this.sqlstatements;
492    }
493
494    // Note: initializeGlobalContext() inherited from AbstractSqlParser
495
496    /**
497     * Override to provide Dameng-specific post-processing after statement parsing.
498     * <p>
499     * For Dameng, we check if the statement is PL/SQL and recursively find syntax
500     * errors in nested PL/SQL statements.
501     */
502    @Override
503    protected void afterStatementParsed(TCustomSqlStatement stmt) {
504        if (stmt.isoracleplsql()) {
505            findAllSyntaxErrorsInPlsql(stmt);
506        }
507    }
508
509    /**
510     * Perform Dameng-specific semantic analysis using TSQLResolver.
511     *
512     * <p>This includes:
513     * <ul>
514     *   <li>Column-to-table resolution</li>
515     *   <li>Dataflow analysis</li>
516     *   <li>Reference resolution</li>
517     *   <li>Scope resolution</li>
518     * </ul>
519     *
520     * @param context the parser context
521     * @param statements the parsed statements
522     */
523    @Override
524    protected void performSemanticAnalysis(ParserContext context, TStatementList statements) {
525        if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) {
526            TSQLResolver resolver = new TSQLResolver(globalContext, statements);
527            resolver.resolve();
528        }
529    }
530
531    /**
532     * Perform Dameng-specific AST interpretation/evaluation using TASTEvaluator.
533     *
534     * <p>This executes simple SQL statements and evaluates expressions
535     * for static analysis and constant folding.
536     *
537     * @param context the parser context
538     * @param statements the parsed statements
539     */
540    @Override
541    protected void performInterpreter(ParserContext context, TStatementList statements) {
542        if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) {
543            TLog.clearLogs();
544            TGlobalScope interpreterScope = new TGlobalScope(sqlEnv);
545            TLog.enableInterpreterLogOnly();
546            TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope);
547            astEvaluator.eval();
548        }
549    }
550
551    // ========== Raw Statement Extraction ==========
552    // These methods extract raw SQL statements from tokens without full parsing
553    // Extracted from TGSqlParser.dodamenggetrawsqlstatements() and related methods
554
555    /**
556     * Extract raw Dameng SQL statements from tokenized source.
557     * <p>
558     * This is the main Dameng statement extraction state machine that:
559     * <ul>
560     *   <li>Groups tokens into statement boundaries</li>
561     *   <li>Identifies statement types (SQL vs PL/SQL, SQL*Plus commands)</li>
562     *   <li>Handles nested PL/SQL blocks (procedures, functions, packages, triggers)</li>
563     *   <li>Tracks BEGIN/END pairs and other block delimiters</li>
564     *   <li>Detects statement terminators (semicolon, forward slash, period)</li>
565     * </ul>
566     *
567     * <p><b>State Machine:</b> Uses 4 main states:
568     * <ul>
569     *   <li>{@code stnormal} - Between statements, looking for start of next statement</li>
570     *   <li>{@code stsql} - Inside a SQL statement</li>
571     *   <li>{@code stsqlplus} - Inside a SQL*Plus command</li>
572     *   <li>{@code ststoredprocedure} - Inside a PL/SQL block (procedure/function/package/trigger)</li>
573     *   <li>{@code sterror} - Error recovery mode</li>
574     * </ul>
575     *
576     * <p><b>Extracted from:</b> TGSqlParser.dodamenggetrawsqlstatements() (lines 10071-10859)
577     *
578     * <p><b>Design Note:</b> This method now receives a builder to populate with results,
579     * following Option A design where the vendor-specific method focuses on parsing logic
580     * while extractRawStatements() handles result construction.
581     *
582     * @param builder the result builder to populate with statements and error information
583     */
584    private void dodamenggetrawsqlstatements(SqlParseResult.Builder builder) {
585        int waitingEnds[] = new int[stored_procedure_nested_level];
586        stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level];
587        stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level];
588        boolean endBySlashOnly = true;
589        int nestedProcedures = 0, nestedParenthesis = 0;
590        // Flag for CREATE MLE MODULE with AS clause - terminates with / not ;
591        boolean mleModuleWithAs = false;
592        // Flag for WITH FUNCTION/PROCEDURE - track BEGIN/END nesting to handle embedded semicolons
593        boolean withPlsqlDefinition = false;
594        int withPlsqlBeginEndNesting = 0;
595        boolean withPlsqlFoundSelect = false;  // True when SELECT has been found after WITH FUNCTION
596        // Track whether the current CTE statement's main SELECT has been found
597        // (i.e., the SELECT after WITH name AS (...) at paren level 0)
598        boolean cteMainSelectFound = false;
599
600        if (TBaseType.assigned(sqlstatements)) sqlstatements.clear();
601        if (!TBaseType.assigned(sourcetokenlist)) {
602            // No tokens available - populate builder with error and return
603            builder.errorCode(1);
604            builder.errorMessage("No source token list available");
605            builder.sqlStatements(new TStatementList());
606            return;
607        }
608
609        gcurrentsqlstatement = null;
610        EFindSqlStateType gst = EFindSqlStateType.stnormal;
611        TSourceToken lcprevsolidtoken = null, ast = null;
612
613        // Main tokenization loop
614        for (int i = 0; i < sourcetokenlist.size(); i++) {
615
616            if ((ast != null) && (ast.issolidtoken()))
617                lcprevsolidtoken = ast;
618
619            ast = sourcetokenlist.get(i);
620            sourcetokenlist.curpos = i;
621
622            // Token-specific keyword transformations for Dameng
623            performRawStatementTokenTransformations(ast);
624
625            // State machine processing
626            switch (gst) {
627                case sterror: {
628                    if (ast.tokentype == ETokenType.ttsemicolon) {
629                        appendToken(gcurrentsqlstatement, ast);
630                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
631                        gst = EFindSqlStateType.stnormal;
632                    } else {
633                        appendToken(gcurrentsqlstatement, ast);
634                    }
635                    break;
636                } //sterror
637
638                case stnormal: {
639                    if ((ast.tokencode == TBaseType.cmtdoublehyphen)
640                            || (ast.tokencode == TBaseType.cmtslashstar)
641                            || (ast.tokencode == TBaseType.lexspace)
642                            || (ast.tokencode == TBaseType.lexnewline)
643                            || (ast.tokentype == ETokenType.ttsemicolon)) {
644                        if (gcurrentsqlstatement != null) {
645                            appendToken(gcurrentsqlstatement, ast);
646                        }
647
648                        if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) {
649                            if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) {
650                                // ;;;; continuous semicolon, treat it as comment
651                                ast.tokentype = ETokenType.ttsimplecomment;
652                                ast.tokencode = TBaseType.cmtdoublehyphen;
653                            }
654                        }
655
656                        continue;
657                    }
658
659                    if (ast.tokencode == TBaseType.sqlpluscmd) {
660                        gst = EFindSqlStateType.stsqlplus;
661                        gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
662                        appendToken(gcurrentsqlstatement, ast);
663                        continue;
664                    }
665
666                    // find a token to start sql or plsql mode
667                    gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement);
668
669                    if (gcurrentsqlstatement != null) {
670                        if (gcurrentsqlstatement.isoracleplsql()) {
671                            nestedProcedures = 0;
672                            gst = EFindSqlStateType.ststoredprocedure;
673                            appendToken(gcurrentsqlstatement, ast);
674
675                            switch (gcurrentsqlstatement.sqlstatementtype) {
676                                case sstplsql_createprocedure:
677                                    sptype[nestedProcedures] = stored_procedure_type.procedure;
678                                    break;
679                                case sstplsql_createfunction:
680                                    sptype[nestedProcedures] = stored_procedure_type.function;
681                                    break;
682                                case sstplsql_createpackage:
683                                    sptype[nestedProcedures] = stored_procedure_type.package_spec;
684                                    if (ast.searchToken(TBaseType.rrw_body, 5) != null) {
685                                        sptype[nestedProcedures] = stored_procedure_type.package_body;
686                                    }
687                                    break;
688                                case sst_plsql_block:
689                                    sptype[nestedProcedures] = stored_procedure_type.block_with_declare;
690                                    if (ast.tokencode == TBaseType.rrw_begin) {
691                                        sptype[nestedProcedures] = stored_procedure_type.block_with_begin;
692                                    }
693                                    break;
694                                case sstplsql_createtrigger:
695                                    sptype[nestedProcedures] = stored_procedure_type.create_trigger;
696                                    break;
697                                case sstoraclecreatelibrary:
698                                    sptype[nestedProcedures] = stored_procedure_type.create_library;
699                                    break;
700                                case sstplsql_createtype_placeholder:
701                                    gst = EFindSqlStateType.stsql;
702                                    break;
703                                case sstplsql_createtypebody:
704                                    sptype[nestedProcedures] = stored_procedure_type.others;
705                                    break;
706                                default:
707                                    sptype[nestedProcedures] = stored_procedure_type.others;
708                                    break;
709                            }
710
711                            if (sptype[0] == stored_procedure_type.block_with_declare) {
712                                endBySlashOnly = false;
713                                procedure_status[0] = stored_procedure_status.is_as;
714                            } else if (sptype[0] == stored_procedure_type.block_with_begin) {
715                                endBySlashOnly = false;
716                                procedure_status[0] = stored_procedure_status.body;
717                            } else if (sptype[0] == stored_procedure_type.procedure) {
718                                endBySlashOnly = false;
719                                procedure_status[0] = stored_procedure_status.start;
720                            } else if (sptype[0] == stored_procedure_type.function) {
721                                endBySlashOnly = false;
722                                procedure_status[0] = stored_procedure_status.start;
723                            } else if (sptype[0] == stored_procedure_type.package_spec) {
724                                endBySlashOnly = false;
725                                procedure_status[0] = stored_procedure_status.start;
726                            } else if (sptype[0] == stored_procedure_type.package_body) {
727                                endBySlashOnly = false;
728                                procedure_status[0] = stored_procedure_status.start;
729                            } else if (sptype[0] == stored_procedure_type.create_trigger) {
730                                endBySlashOnly = false;
731                                procedure_status[0] = stored_procedure_status.start;
732                            } else if (sptype[0] == stored_procedure_type.create_library) {
733                                endBySlashOnly = false;
734                                procedure_status[0] = stored_procedure_status.bodyend;
735                            } else {
736                                endBySlashOnly = true;
737                                procedure_status[0] = stored_procedure_status.bodyend;
738                            }
739
740                            if ((ast.tokencode == TBaseType.rrw_begin)
741                                    || (ast.tokencode == TBaseType.rrw_package)
742                                    || (ast.searchToken(TBaseType.rrw_package, 4) != null)) {
743                                waitingEnds[nestedProcedures] = 1;
744                            }
745                        } else {
746                            gst = EFindSqlStateType.stsql;
747                            appendToken(gcurrentsqlstatement, ast);
748                            nestedParenthesis = 0;
749                            // Check if this is CREATE MLE MODULE with AS clause (JavaScript code)
750                            // If AS is found after LANGUAGE JAVASCRIPT, it terminates with / not ;
751                            if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclecreatemlemodule) {
752                                // Look ahead to see if there's an AS keyword
753                                TSourceToken asToken = ast.searchToken(TBaseType.rrw_as, 10);
754                                mleModuleWithAs = (asToken != null);
755                            } else {
756                                mleModuleWithAs = false;
757                            }
758
759                            // Check if this is WITH FUNCTION/PROCEDURE (Dameng inline PL/SQL)
760                            // Need to track BEGIN/END nesting to handle embedded semicolons
761                            if (ast.tokencode == TBaseType.rrw_with && gcurrentsqlstatement.isctequery) {
762                                // Look ahead for FUNCTION or PROCEDURE keyword
763                                TSourceToken nextSolid = ast.nextSolidToken();
764                                if (nextSolid != null && (nextSolid.tokencode == TBaseType.rrw_function
765                                        || nextSolid.tokencode == TBaseType.rrw_procedure)) {
766                                    withPlsqlDefinition = true;
767                                    withPlsqlBeginEndNesting = 0;
768                                }
769                            }
770                        }
771                    } else {
772                        //error token found
773                        this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo)
774                                , "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist));
775
776                        ast.tokentype = ETokenType.tttokenlizererrortoken;
777                        gst = EFindSqlStateType.sterror;
778
779                        gcurrentsqlstatement = new TUnknownSqlStatement(vendor);
780                        gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid;
781                        appendToken(gcurrentsqlstatement, ast);
782                    }
783
784                    break;
785                } // stnormal
786
787                case stsqlplus: {
788                    if (ast.insqlpluscmd) {
789                        appendToken(gcurrentsqlstatement, ast);
790                    } else {
791                        gst = EFindSqlStateType.stnormal; //this token must be newline,
792                        appendToken(gcurrentsqlstatement, ast); // so add it here
793                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
794                    }
795
796                    break;
797                }//case stsqlplus
798
799                case stsql: {
800                    // For WITH FUNCTION/PROCEDURE, track BEGIN/END nesting and when SELECT is found
801                    if (withPlsqlDefinition) {
802                        if (ast.tokencode == TBaseType.rrw_begin) {
803                            withPlsqlBeginEndNesting++;
804                        } else if (ast.tokencode == TBaseType.rrw_end) {
805                            withPlsqlBeginEndNesting--;
806                            if (withPlsqlBeginEndNesting < 0) withPlsqlBeginEndNesting = 0;
807                        } else if (ast.tokencode == TBaseType.rrw_select && withPlsqlBeginEndNesting == 0) {
808                            // Found SELECT after all function definitions are done
809                            withPlsqlFoundSelect = true;
810                        }
811                    }
812
813                    // For CREATE MLE MODULE with AS clause, don't terminate on semicolon
814                    // The JavaScript code may contain semicolons; wait for / to terminate
815                    // For WITH FUNCTION/PROCEDURE, don't terminate on semicolon until SELECT is found
816                    // (the semicolons in function body and after END are part of the function definition)
817                    boolean skipSemicolonTermination = mleModuleWithAs || (withPlsqlDefinition && !withPlsqlFoundSelect);
818                    if (ast.tokentype == ETokenType.ttsemicolon && !skipSemicolonTermination) {
819                        gst = EFindSqlStateType.stnormal;
820                        appendToken(gcurrentsqlstatement, ast);
821                        gcurrentsqlstatement.semicolonended = ast;
822                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
823                        mleModuleWithAs = false; // Reset flag
824                        withPlsqlDefinition = false; // Reset WITH FUNCTION flag
825                        withPlsqlBeginEndNesting = 0;
826                        cteMainSelectFound = false;
827                        withPlsqlFoundSelect = false;
828                        continue;
829                    }
830
831                    if (sourcetokenlist.sqlplusaftercurtoken()) //most probably is / cmd
832                    {
833                        gst = EFindSqlStateType.stnormal;
834                        appendToken(gcurrentsqlstatement, ast);
835                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
836                        mleModuleWithAs = false; // Reset flag
837                        continue;
838                    }
839
840                    if (ast.tokencode == '(') nestedParenthesis++;
841                    if (ast.tokencode == ')') {
842                        nestedParenthesis--;
843                        if (nestedParenthesis < 0) nestedParenthesis = 0;
844                    }
845
846                    Boolean findNewStmt = false;
847                    TCustomSqlStatement lcStmt = null;
848                    // Check for new statement: CREATE TABLE (original), or SELECT inside a non-CTE SELECT
849                    boolean shouldCheckNewStmt = false;
850                    if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable)) {
851                        shouldCheckNewStmt = true;
852                    } else if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect)
853                            && (ast.tokencode == TBaseType.rrw_select || ast.tokencode == TBaseType.rrw_with)) {
854                        // Check if current statement is a CTE (starts with WITH)
855                        boolean isCteContext = false;
856                        for (int si = 0; si < gcurrentsqlstatement.sourcetokenlist.size(); si++) {
857                            TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(si);
858                            if (st.tokentype == ETokenType.ttwhitespace || st.tokentype == ETokenType.ttreturn
859                                    || st.tokencode == TBaseType.cmtdoublehyphen || st.tokencode == TBaseType.cmtslashstar) {
860                                continue;
861                            }
862                            if (st.tokencode == TBaseType.rrw_with) {
863                                isCteContext = true;
864                            }
865                            break;
866                        }
867                        // Don't split if previous token makes this SELECT part of current statement:
868                        // - Set operators: UNION, INTERSECT, MINUS, EXCEPT, ALL
869                        // - Left paren: (SELECT ...) — SELECT is main query of parenthesized expr
870                        boolean suppressSplit = false;
871                        if (ast.tokencode == TBaseType.rrw_select && lcprevsolidtoken != null) {
872                            int prevCode = lcprevsolidtoken.tokencode;
873                            if (prevCode == TBaseType.rrw_union || prevCode == TBaseType.rrw_intersect
874                                    || prevCode == TBaseType.rrw_minus || prevCode == TBaseType.rrw_except
875                                    || prevCode == TBaseType.rrw_all
876                                    || prevCode == '(') {
877                                suppressSplit = true;
878                            }
879                        }
880                        if (suppressSplit) {
881                            // SELECT is part of current statement — don't split
882                        } else if (!isCteContext) {
883                            // Non-CTE SELECT: any SELECT/WITH at paren level 0 starts a new statement
884                            shouldCheckNewStmt = true;
885                        } else if (cteMainSelectFound) {
886                            // CTE context: main SELECT already consumed, so this SELECT/WITH
887                            // at paren level 0 is a new statement
888                            shouldCheckNewStmt = true;
889                        } else if (ast.tokencode == TBaseType.rrw_select) {
890                            // CTE context: this is the main SELECT after WITH name AS (...)
891                            cteMainSelectFound = true;
892                            // Don't split — this SELECT is part of the CTE statement
893                        }
894                        // If ast is WITH and main SELECT not yet found, it could be another
895                        // CTE definition (WITH a AS (...), b AS (...)) — don't split
896                    }
897                    if (shouldCheckNewStmt) {
898                        // For SELECT-after-SELECT/WITH splitting, use stnormal so issql can detect CTE starts.
899                        // For CREATE TABLE, preserve original stsql state to avoid false positives
900                        // (e.g., INSERT/DELETE keywords in blockchain table clauses).
901                        EFindSqlStateType issqlState = (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect)
902                                ? EFindSqlStateType.stnormal : gst;
903                        lcStmt = sqlcmds.issql(ast, issqlState, gcurrentsqlstatement);
904                        if (lcStmt != null) {
905                            findNewStmt = true;
906                            if (lcStmt.sqlstatementtype == ESqlStatementType.sstselect) {
907                                TSourceToken prevst = ast.prevSolidToken();
908                                if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable) {
909                                    // For CREATE TABLE, suppress split when SELECT follows AS/(/): AS (SELECT ...)
910                                    if ((prevst.tokencode == TBaseType.rrw_as) || (prevst.tokencode == '(') || (prevst.tokencode == ')')) {
911                                        findNewStmt = false;
912                                    }
913                                }
914                                // For SELECT-after-SELECT/WITH splitting at paren level 0,
915                                // no suppression needed — the new SELECT/WITH is a new statement
916                            }
917                        }
918                    }
919
920                    if (findNewStmt) {
921                        onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
922                        gcurrentsqlstatement = lcStmt;
923                        cteMainSelectFound = false; // Reset for new statement
924                        nestedParenthesis = 0; // Reset paren tracking for new statement
925                        appendToken(gcurrentsqlstatement, ast);
926                        continue;
927                    } else
928                        appendToken(gcurrentsqlstatement, ast);
929
930                    break;
931                }//case stsql
932
933                case ststoredprocedure: {
934
935                    if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) {
936                        appendToken(gcurrentsqlstatement, ast);
937                    }
938
939                    switch (procedure_status[nestedProcedures]) {
940                        case cursor_declare:
941                            if (ast.tokencode == ';') {
942                                nestedProcedures--;
943                                if (nestedProcedures < 0) {
944                                    nestedProcedures = 0;
945                                }
946                            }
947                            break;
948                        case start:
949                            if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) {
950                                if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) {
951                                    if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) {
952                                        // when it's a package specification, only top level accept as/is
953                                    } else {
954                                        procedure_status[nestedProcedures] = stored_procedure_status.is_as;
955                                        if (ast.searchToken("language", 1) != null) {
956                                            if (nestedProcedures == 0) {
957                                                gst = EFindSqlStateType.stsql;
958                                            } else {
959                                                procedure_status[nestedProcedures] = stored_procedure_status.body;
960                                                nestedProcedures--;
961                                            }
962                                        }
963                                    }
964                                }
965                            } else if (ast.tokencode == TBaseType.rrw_begin) {
966                                if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) {
967                                    waitingEnds[nestedProcedures]++;
968                                }
969                                if (nestedProcedures > 0) {
970                                    nestedProcedures--;
971                                }
972                                procedure_status[nestedProcedures] = stored_procedure_status.body;
973                            } else if (ast.tokencode == TBaseType.rrw_end) {
974                                if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1)
975                                        && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body)
976                                        || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) {
977                                    nestedProcedures--;
978                                    procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
979                                }
980                            } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) {
981                                if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0)
982                                        && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) {
983                                    nestedProcedures--;
984                                    nestedProcedures++;
985                                    waitingEnds[nestedProcedures] = 0;
986                                    procedure_status[nestedProcedures] = stored_procedure_status.start;
987                                }
988                            } else if (ast.tokencode == TBaseType.rrw_oracle_cursor) {
989                                if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0)
990                                        && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) {
991                                    nestedProcedures--;
992                                    nestedProcedures++;
993                                    waitingEnds[nestedProcedures] = 0;
994                                    procedure_status[nestedProcedures] = stored_procedure_status.cursor_declare;
995                                }
996                            } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) {
997                                procedure_status[nestedProcedures] = stored_procedure_status.is_as;
998                            } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger)
999                                    && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
1000                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
1001                                gst = EFindSqlStateType.stnormal;
1002                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1003
1004                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
1005                                appendToken(gcurrentsqlstatement, ast);
1006                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1007                            } else if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) {
1008                                if (ast.tokencode == TBaseType.rrw_trigger) {
1009                                    TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1);
1010                                    if (compoundSt != null) {
1011                                        procedure_status[nestedProcedures] = stored_procedure_status.body;
1012                                        waitingEnds[nestedProcedures]++;
1013                                    }
1014                                }
1015                            } else if ((sptype[nestedProcedures] == stored_procedure_type.function)
1016                                    && (ast.tokencode == TBaseType.rrw_teradata_using)) {
1017                                if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) {
1018                                    if (nestedProcedures == 0) {
1019                                        gst = EFindSqlStateType.stsql;
1020                                    } else {
1021                                        procedure_status[nestedProcedures] = stored_procedure_status.body;
1022                                        nestedProcedures--;
1023                                    }
1024                                }
1025                            }
1026                            break;
1027                        case is_as:
1028                            if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) {
1029                                nestedProcedures++;
1030                                if (nestedProcedures > stored_procedure_nested_level - 1) {
1031                                    gst = EFindSqlStateType.sterror;
1032                                    nestedProcedures--;
1033                                } else {
1034                                    waitingEnds[nestedProcedures] = 0;
1035                                    procedure_status[nestedProcedures] = stored_procedure_status.start;
1036                                }
1037                            } else if (ast.tokencode == TBaseType.rrw_begin) {
1038                                if ((nestedProcedures == 0) &&
1039                                        ((sptype[nestedProcedures] == stored_procedure_type.package_body)
1040                                                || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) {
1041                                    // top level package begin already counted
1042                                } else {
1043                                    waitingEnds[nestedProcedures]++;
1044                                }
1045                                procedure_status[nestedProcedures] = stored_procedure_status.body;
1046                            } else if (ast.tokencode == TBaseType.rrw_end) {
1047                                if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1)
1048                                        && ((sptype[nestedProcedures] == stored_procedure_type.package_body)
1049                                        || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) {
1050                                    procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
1051                                    waitingEnds[nestedProcedures]--;
1052                                } else {
1053                                    waitingEnds[nestedProcedures]--;
1054                                }
1055                            } else if (ast.tokencode == TBaseType.rrw_case) {
1056                                if (ast.searchToken(';', 1) == null) {
1057                                    waitingEnds[nestedProcedures]++;
1058                                }
1059                            }
1060                            break;
1061                        case body:
1062                            if (ast.tokencode == TBaseType.rrw_begin) {
1063                                waitingEnds[nestedProcedures]++;
1064                            } else if (ast.tokencode == TBaseType.rrw_if) {
1065                                if (ast.searchToken(';', 2) == null) {
1066                                    waitingEnds[nestedProcedures]++;
1067                                }
1068                            } else if (ast.tokencode == TBaseType.rrw_case) {
1069                                if (ast.searchToken(';', 2) == null) {
1070                                    if (ast.searchToken(TBaseType.rrw_end, -1) == null) {
1071                                        waitingEnds[nestedProcedures]++;
1072                                    }
1073                                }
1074                            } else if (ast.tokencode == TBaseType.rrw_loop) {
1075                                if (!((ast.searchToken(TBaseType.rrw_end, -1) != null)
1076                                        && (ast.searchToken(';', 2) != null))) {
1077                                    waitingEnds[nestedProcedures]++;
1078                                }
1079                            } else if (ast.tokencode == TBaseType.rrw_end) {
1080                                waitingEnds[nestedProcedures]--;
1081                                if (waitingEnds[nestedProcedures] == 0) {
1082                                    if (nestedProcedures == 0) {
1083                                        procedure_status[nestedProcedures] = stored_procedure_status.bodyend;
1084                                    } else {
1085                                        nestedProcedures--;
1086                                        procedure_status[nestedProcedures] = stored_procedure_status.is_as;
1087                                    }
1088                                }
1089                            } else if ((waitingEnds[nestedProcedures] == 0)
1090                                    && (ast.tokentype == ETokenType.ttslash)
1091                                    && (ast.tokencode == TBaseType.sqlpluscmd)) {
1092                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
1093                                gst = EFindSqlStateType.stnormal;
1094                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1095
1096                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
1097                                appendToken(gcurrentsqlstatement, ast);
1098                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1099                            }
1100                            break;
1101                        case bodyend:
1102                            if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) {
1103                                // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast;
1104                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
1105                                gst = EFindSqlStateType.stnormal;
1106                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1107
1108                                //make / a sqlplus cmd
1109                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
1110                                appendToken(gcurrentsqlstatement, ast);
1111                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1112                            } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) {
1113                                // single dot at a seperate line
1114                                ast.tokenstatus = ETokenStatus.tsignorebyyacc;
1115                                gst = EFindSqlStateType.stnormal;
1116                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1117
1118                                //make ttperiod a sqlplus cmd
1119                                gcurrentsqlstatement = new TSqlplusCmdStatement(vendor);
1120                                appendToken(gcurrentsqlstatement, ast);
1121                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1122                            } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) {
1123                                appendToken(gcurrentsqlstatement, ast);
1124                                gst = EFindSqlStateType.stnormal;
1125                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1126                            } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) {
1127                                appendToken(gcurrentsqlstatement, ast);
1128                                gst = EFindSqlStateType.stnormal;
1129                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1130                            } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) {
1131                                appendToken(gcurrentsqlstatement, ast);
1132                                gst = EFindSqlStateType.stnormal;
1133                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1134                            } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null)
1135                                    && ((ast.searchToken(TBaseType.rrw_package, 4) != null) || (ast.searchToken(TBaseType.rrw_package, 5) != null))
1136                                    && (!endBySlashOnly)) {
1137                                appendToken(gcurrentsqlstatement, ast);
1138                                gst = EFindSqlStateType.stnormal;
1139                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1140                            } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null)
1141                                    && ((ast.searchToken(TBaseType.rrw_procedure, 4) != null)
1142                                            || (ast.searchToken(TBaseType.rrw_function, 4) != null)
1143                                            || (ast.searchToken(TBaseType.rrw_view, 4) != null)
1144                                            || (ast.searchToken(TBaseType.rrw_oracle_synonym, 4) != null)
1145                                            || (ast.searchToken(TBaseType.rrw_trigger, 4) != null))
1146                                    && (!endBySlashOnly)) {
1147                                appendToken(gcurrentsqlstatement, ast);
1148                                gst = EFindSqlStateType.stnormal;
1149                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1150                            } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) {
1151                                appendToken(gcurrentsqlstatement, ast);
1152                                gst = EFindSqlStateType.stnormal;
1153                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1154                            } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) {
1155                                appendToken(gcurrentsqlstatement, ast);
1156                                gst = EFindSqlStateType.stnormal;
1157                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1158                            } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) {
1159                                appendToken(gcurrentsqlstatement, ast);
1160                                gst = EFindSqlStateType.stnormal;
1161                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1162                            } else if ((ast.searchToken(TBaseType.rrw_call, 1) != null) && (!endBySlashOnly)) {
1163                                appendToken(gcurrentsqlstatement, ast);
1164                                gst = EFindSqlStateType.stnormal;
1165                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1166                            } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) {
1167                                appendToken(gcurrentsqlstatement, ast);
1168                                gst = EFindSqlStateType.stnormal;
1169                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1170                            } else if ((ast.searchToken(TBaseType.rrw_declare, 1) != null) && (!endBySlashOnly)) {
1171                                appendToken(gcurrentsqlstatement, ast);
1172                                gst = EFindSqlStateType.stnormal;
1173                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1174                            } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null)
1175                                    && (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) {
1176                                appendToken(gcurrentsqlstatement, ast);
1177                                gst = EFindSqlStateType.stnormal;
1178                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1179                            } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null)
1180                                    && (ast.searchToken(TBaseType.rrw_table, 2) != null) && (!endBySlashOnly)) {
1181                                appendToken(gcurrentsqlstatement, ast);
1182                                gst = EFindSqlStateType.stnormal;
1183                                onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder);
1184                            } else {
1185                                appendToken(gcurrentsqlstatement, ast);
1186                            }
1187                            break;
1188                        case end:
1189                            break;
1190                        default:
1191                            break;
1192                    }
1193
1194                    if (ast.tokencode == TBaseType.sqlpluscmd) {
1195                        int m = flexer.getkeywordvalue(ast.getAstext());
1196                        if (m != 0) {
1197                            ast.tokencode = m;
1198                        } else if (ast.tokentype == ETokenType.ttslash) {
1199                            ast.tokencode = '/';
1200                        } else {
1201                            ast.tokencode = TBaseType.ident;
1202                        }
1203                    }
1204
1205                    final int wrapped_keyword_max_pos = 20;
1206                    if ((ast.tokencode == TBaseType.rrw_wrapped)
1207                            && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) {
1208                        if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) {
1209                            ((gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true);
1210                        }
1211
1212                        if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) {
1213                            if (ast.prevSolidToken() != null) {
1214                                ((gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) gcurrentsqlstatement)
1215                                        .setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken()));
1216                            }
1217                        }
1218                    }
1219
1220                    break;
1221                } //ststoredprocedure
1222
1223            } //switch
1224        }//for
1225
1226        //last statement
1227        if ((gcurrentsqlstatement != null) &&
1228                ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) ||
1229                        (gst == EFindSqlStateType.sterror))) {
1230            onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, true, builder);
1231        }
1232
1233        // Populate builder with results
1234        builder.sqlStatements(this.sqlstatements);
1235        builder.syntaxErrors(syntaxErrors instanceof ArrayList ?
1236                (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors));
1237        builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size());
1238        builder.errorMessage(syntaxErrors.isEmpty() ? "" :
1239                String.format("Raw extraction completed with %d error(s)", syntaxErrors.size()));
1240    }
1241
1242    /**
1243     * Handle token transformations during raw statement extraction.
1244     * <p>
1245     * This performs Dameng-specific keyword disambiguation that must happen
1246     * before statement boundary detection. Examples:
1247     * <ul>
1248     *   <li>RETURN after WHERE → treat as identifier</li>
1249     *   <li>VALUE after BY → mark as value_after_by</li>
1250     *   <li>NEW → treat as identifier or constructor based on context</li>
1251     *   <li>And many more Dameng-specific cases</li>
1252     * </ul>
1253     *
1254     * @param ast current token being processed
1255     */
1256    private void performRawStatementTokenTransformations(TSourceToken ast) {
1257        // This method contains the keyword transformation logic from dodamenggetrawsqlstatements
1258        // It's been extracted to keep the main method more readable
1259
1260        if (ast.tokencode == TBaseType.rrw_return) {
1261            TSourceToken stMatch = ast.searchToken(TBaseType.rrw_where, 1);
1262            if (stMatch != null) {
1263                ast.tokencode = TBaseType.ident;
1264            }
1265        } else if (ast.tokencode == TBaseType.rrw_value_oracle) {
1266            TSourceToken stBy = ast.searchToken(TBaseType.rrw_by, -1);
1267            if (stBy != null) {
1268                ast.tokencode = TBaseType.rrw_value_after_by;
1269            }
1270        } else if (ast.tokencode == TBaseType.rrw_new_oracle) {
1271            TSourceToken stRightParen = ast.searchToken(')', -1);
1272            if (stRightParen != null) {
1273                ast.tokencode = TBaseType.ident;
1274            }
1275            TSourceToken stDot = ast.searchToken('.', 1);
1276            if (stDot != null) {
1277                ast.tokencode = TBaseType.ident;
1278            }
1279
1280            TSourceToken stNext = ast.searchTokenAfterObjectName();
1281            stDot = ast.searchToken('.', 1);
1282            if ((stDot == null) && (stNext != null) && (stNext.tokencode == '(')) {
1283                ast.tokencode = TBaseType.rrw_oracle_new_constructor;
1284            }
1285        } else if (ast.tokencode == TBaseType.rrw_chr_oracle) {
1286            TSourceToken stLeftParen = ast.searchToken('(', 1);
1287            if (stLeftParen == null) {
1288                ast.tokencode = TBaseType.ident;
1289            }
1290        } else if (ast.tokencode == TBaseType.rrw_log_oracle) {
1291            TSourceToken stNext = ast.searchToken(TBaseType.rrw_errors_oracle, 1);
1292            TSourceToken stPrev = ast.searchToken(TBaseType.rrw_view, -1);
1293            if (stPrev == null) {
1294                stPrev = ast.searchToken(TBaseType.rrw_oracle_supplemental, -1);
1295            }
1296            if (stPrev == null) {
1297                stPrev = ast.searchToken(1223, -1); // RW_ADVANCED: keep LOG as keyword in ADVANCED LOG
1298            }
1299            if ((stNext == null) && (stPrev == null)) {
1300                ast.tokencode = TBaseType.ident;
1301            }
1302        } else if (ast.tokencode == TBaseType.rrw_delete) {
1303            TSourceToken stPrev = ast.searchToken('.', -1);
1304            if (stPrev != null) {
1305                ast.tokencode = TBaseType.ident;
1306            }
1307        } else if (ast.tokencode == TBaseType.rrw_partition) {
1308            TSourceToken stPrev = ast.searchToken(TBaseType.rrw_add, -1);
1309            if (stPrev != null) {
1310                stPrev.tokencode = TBaseType.rrw_add_p;
1311            }
1312        } else if (ast.tokencode == TBaseType.rrw_oracle_column) {
1313            TSourceToken stPrev = ast.searchToken(TBaseType.rrw_oracle_modify, -1);
1314            if (stPrev != null) {
1315                ast.tokencode = TBaseType.rrw_oracle_column_after_modify;
1316            }
1317        } else if (ast.tokencode == TBaseType.rrw_oracle_apply) {
1318            TSourceToken stPrev = ast.searchToken(TBaseType.rrw_outer, -1);
1319            if (stPrev != null) {
1320                stPrev.tokencode = TBaseType.ORACLE_OUTER2;
1321            }
1322        } else if (ast.tokencode == TBaseType.rrw_oracle_subpartition) {
1323            TSourceToken stNext = ast.searchToken("(", 2);
1324            if (stNext != null) {
1325                TSourceToken st1 = ast.nextSolidToken();
1326                if (st1.toString().equalsIgnoreCase("template")) {
1327                    // don't change, keep as RW_SUBPARTITION
1328                } else {
1329                    ast.tokencode = TBaseType.rrw_oracle_subpartition_tablesample;
1330                }
1331            }
1332        } else if (ast.tokencode == TBaseType.rrw_primary) {
1333            TSourceToken stNext = ast.searchToken("key", 1);
1334            if (stNext == null) {
1335                ast.tokencode = TBaseType.ident;
1336            }
1337        } else if (ast.tokencode == TBaseType.rrw_oracle_offset) {
1338            TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_row, 2);
1339            if (stNext == null) {
1340                stNext = ast.searchToken(TBaseType.rrw_oracle_rows, 2);
1341            }
1342            if (stNext != null) {
1343                ast.tokencode = TBaseType.rrw_oracle_offset_row;
1344            }
1345        } else if (ast.tokencode == TBaseType.rrw_translate) {
1346            TSourceToken stNext = ast.searchToken("(", 2);
1347            if (stNext == null) {
1348                ast.tokencode = TBaseType.ident;
1349            }
1350        } else if (ast.tokencode == TBaseType.rrw_constraint) {
1351            TSourceToken stNext = ast.nextSolidToken();
1352            if (stNext == null) {
1353                ast.tokencode = TBaseType.ident;
1354            } else {
1355                if (stNext.tokencode != TBaseType.ident) {
1356                    ast.tokencode = TBaseType.ident;
1357                }
1358            }
1359        } else if (ast.tokencode == TBaseType.rrw_oracle_without) {
1360            TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_count, 1);
1361            if (stNext != null) {
1362                ast.tokencode = TBaseType.rrw_oracle_without_before_count;
1363            }
1364        } else if (ast.tokencode == TBaseType.rrw_bulk) {
1365            TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_collect, 1);
1366            if (stNext == null) {
1367                ast.tokencode = TBaseType.ident;
1368            }
1369        } else if (ast.tokencode == TBaseType.rrw_oracle_model) {
1370            TSourceToken stNext = ast.nextSolidToken();
1371            if (stNext != null) {
1372                switch (stNext.toString().toUpperCase()) {
1373                    case "RETURN":
1374                    case "REFERENCE":
1375                    case "IGNORE":
1376                    case "KEEP":
1377                    case "UNIQUE":
1378                    case "PARTITION":
1379                    case "DIMENSION":
1380                    case "MEASURES":
1381                    case "RULES":
1382                        ast.tokencode = TBaseType.rrw_oracle_model_in_model_clause;
1383                        break;
1384                    default:
1385                        ;
1386                }
1387            }
1388        }
1389    }
1390
1391    private void appendToken(TCustomSqlStatement statement, TSourceToken token) {
1392        if (statement == null || token == null) {
1393            return;
1394        }
1395        token.stmt = statement;
1396        statement.sourcetokenlist.add(token);
1397    }
1398
1399    // ========== Error Handling and Recovery ==========
1400
1401    /**
1402     * Find all syntax errors in PL/SQL statements recursively.
1403     * Extracted from TGSqlParser.findAllSyntaxErrorsInPlsql().
1404     */
1405    private void findAllSyntaxErrorsInPlsql(TCustomSqlStatement psql) {
1406        if (psql.getErrorCount() > 0) {
1407            copyErrorsFromStatement(psql);
1408        }
1409
1410        for (int k = 0; k < psql.getStatements().size(); k++) {
1411            findAllSyntaxErrorsInPlsql(psql.getStatements().get(k));
1412        }
1413    }
1414
1415    /**
1416     * Handle error recovery for CREATE TABLE/INDEX statements.
1417     * Dameng allows table properties that may not be fully parsed.
1418     * This method marks unparseable properties as SQL*Plus commands to skip them.
1419     *
1420     * <p>Extracted from TGSqlParser.doparse() lines 16916-16971
1421     */
1422    private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) {
1423        if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) ||
1424             (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) &&
1425            (!TBaseType.c_createTableStrictParsing)) {
1426
1427            // Find the closing parenthesis of table definition
1428            int nested = 0;
1429            boolean isIgnore = false, isFoundIgnoreToken = false;
1430            TSourceToken firstIgnoreToken = null;
1431
1432            for (int k = 0; k < stmt.sourcetokenlist.size(); k++) {
1433                TSourceToken st = stmt.sourcetokenlist.get(k);
1434
1435                if (isIgnore) {
1436                    if (st.issolidtoken() && (st.tokencode != ';')) {
1437                        isFoundIgnoreToken = true;
1438                        if (firstIgnoreToken == null) {
1439                            firstIgnoreToken = st;
1440                        }
1441                    }
1442                    if (st.tokencode != ';') {
1443                        st.tokencode = TBaseType.sqlpluscmd;
1444                    }
1445                    continue;
1446                }
1447
1448                if (st.tokencode == (int) ')') {
1449                    nested--;
1450                    if (nested == 0) {
1451                        // Check if next token is "AS ( SELECT"
1452                        boolean isSelect = false;
1453                        TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1);
1454                        if (st1 != null) {
1455                            TSourceToken st2 = st.searchToken((int) '(', 2);
1456                            if (st2 != null) {
1457                                TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3);
1458                                isSelect = (st3 != null);
1459                            }
1460                        }
1461                        if (!isSelect) isIgnore = true;
1462                    }
1463                }
1464
1465                if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) {
1466                    nested++;
1467                }
1468            }
1469
1470            // Verify it's a valid Dameng table property
1471            if ((firstIgnoreToken != null) &&
1472                (!TBaseType.searchOracleTablePros(firstIgnoreToken.toString()))) {
1473                // Not a valid property, keep the error
1474                isFoundIgnoreToken = false;
1475            }
1476
1477            // Retry parsing if we found ignoreable properties
1478            if (isFoundIgnoreToken) {
1479                stmt.clearError();
1480                stmt.parsestatement(null, false);
1481            }
1482        }
1483    }
1484
1485    /**
1486     * Copy syntax errors from a statement to our error list.
1487     * Extracted from TGSqlParser.copyerrormsg().
1488     */
1489
1490    @Override
1491    public String toString() {
1492        return "DamengSqlParser{vendor=" + vendor + "}";
1493    }
1494
1495    // ========== Main Dameng Tokenization ==========
1496    // Core tokenization logic extracted from TGSqlParser.dodamengsqltexttotokenlist()
1497
1498    /**
1499     * Perform Dameng-specific tokenization with SQL*Plus command detection.
1500     * <p>
1501     * This method implements Dameng's complex tokenization rules including:
1502     * <ul>
1503     *   <li>SQL*Plus command detection (SPOOL, SET, START, etc.)</li>
1504     *   <li>Forward slash disambiguation (division vs PL/SQL delimiter)</li>
1505     *   <li>Dameng-specific keyword transformations (INNER, TYPE, FULL, etc.)</li>
1506     *   <li>Context-dependent token code modifications</li>
1507     * </ul>
1508     *
1509     * <p><b>State Machine:</b> Uses 5 boolean flags to track tokenization state:
1510     * <ul>
1511     *   <li>{@code insqlpluscmd} - Currently inside SQL*Plus command</li>
1512     *   <li>{@code isvalidplace} - Valid place to start SQL*Plus command</li>
1513     *   <li>{@code waitingreturnforfloatdiv} - Slash seen, waiting for newline</li>
1514     *   <li>{@code waitingreturnforsemicolon} - Semicolon seen, waiting for newline</li>
1515     *   <li>{@code continuesqlplusatnewline} - SQL*Plus command continues to next line</li>
1516     * </ul>
1517     *
1518     * <p><b>Extracted from:</b> TGSqlParser.dodamengsqltexttotokenlist() (lines 3931-4298)
1519     *
1520     * @throws RuntimeException if tokenization fails
1521     */
1522    private void dodamengsqltexttotokenlist() {
1523        // Initialize state machine for SQL*Plus command detection
1524        insqlpluscmd = false;
1525        isvalidplace = true;
1526        waitingreturnforfloatdiv = false;
1527        waitingreturnforsemicolon = false;
1528        continuesqlplusatnewline = false;
1529
1530        ESqlPlusCmd currentCmdType = ESqlPlusCmd.spcUnknown;
1531
1532        TSourceToken lct = null, prevst = null;
1533
1534        TSourceToken asourcetoken, lcprevst;
1535        int yychar;
1536
1537        asourcetoken = getanewsourcetoken();
1538        if (asourcetoken == null) return;
1539        yychar = asourcetoken.tokencode;
1540
1541        while (yychar > 0) {
1542            sourcetokenlist.add(asourcetoken);
1543
1544            switch (yychar) {
1545                case TBaseType.cmtdoublehyphen:
1546                case TBaseType.cmtslashstar:
1547                case TBaseType.lexspace: {
1548                    if (insqlpluscmd) {
1549                        asourcetoken.insqlpluscmd = true;
1550                    }
1551                    break;
1552                }
1553
1554                case TBaseType.lexnewline: {
1555                    if (insqlpluscmd) {
1556                        insqlpluscmd = false;
1557                        isvalidplace = true;
1558
1559                        if (continuesqlplusatnewline) {
1560                            insqlpluscmd = true;
1561                            isvalidplace = false;
1562                            asourcetoken.insqlpluscmd = true;
1563                        }
1564
1565                        if (!insqlpluscmd) {
1566                            currentCmdType = ESqlPlusCmd.spcUnknown;
1567                        }
1568                    }
1569
1570                    if (waitingreturnforsemicolon) {
1571                        isvalidplace = true;
1572                    }
1573
1574                    if (waitingreturnforfloatdiv) {
1575                        isvalidplace = true;
1576                        lct.tokencode = TBaseType.sqlpluscmd;
1577                        if (lct.tokentype != ETokenType.ttslash) {
1578                            lct.tokentype = ETokenType.ttsqlpluscmd;
1579                        }
1580                    }
1581
1582                    if (countLines(asourcetoken.toString()) > 1) {
1583                        // There is a line after select, so spool is the right place to start a sqlplus command
1584                        isvalidplace = true;
1585                    }
1586
1587                    flexer.insqlpluscmd = insqlpluscmd;
1588                    break;
1589                }
1590
1591                default: {
1592                    // Solid token
1593                    // Save semicolon flag before clearing: slash after semicolon on
1594                    // the same line (e.g. "END; /") should be a SQL*Plus delimiter,
1595                    // not division.
1596                    boolean prevWasSemicolon = waitingreturnforsemicolon;
1597                    continuesqlplusatnewline = false;
1598                    waitingreturnforsemicolon = false;
1599                    waitingreturnforfloatdiv = false;
1600
1601                    if (insqlpluscmd) {
1602                        asourcetoken.insqlpluscmd = true;
1603                        if (asourcetoken.toString().equalsIgnoreCase("-")) {
1604                            continuesqlplusatnewline = true;
1605                        }
1606                    } else {
1607                        if (asourcetoken.tokentype == ETokenType.ttsemicolon) {
1608                            waitingreturnforsemicolon = true;
1609                        }
1610
1611                        if ((asourcetoken.tokentype == ETokenType.ttslash)
1612                                && (isvalidplace || prevWasSemicolon || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) {
1613                            lct = asourcetoken;
1614                            waitingreturnforfloatdiv = true;
1615                        }
1616
1617                        currentCmdType = TSqlplusCmdStatement.searchCmd(asourcetoken.toString(), asourcetoken.nextToken());
1618                        if (currentCmdType != ESqlPlusCmd.spcUnknown) {
1619                            if (isvalidplace) {
1620                                TSourceToken lnbreak = null;
1621                                boolean aRealSqlplusCmd = true;
1622                                if (sourcetokenlist.curpos > 0) {
1623                                    lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1);
1624                                    aRealSqlplusCmd = !spaceAtTheEndOfReturnToken(lnbreak.toString());
1625                                }
1626
1627                                if (aRealSqlplusCmd) {
1628                                    asourcetoken.prevTokenCode = asourcetoken.tokencode;
1629                                    asourcetoken.tokencode = TBaseType.sqlpluscmd;
1630                                    if (asourcetoken.tokentype != ETokenType.ttslash) {
1631                                        asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
1632                                    }
1633                                    insqlpluscmd = true;
1634                                    flexer.insqlpluscmd = insqlpluscmd;
1635                                }
1636                            } else if ((asourcetoken.tokencode == TBaseType.rrw_connect) && (sourcetokenlist.returnbeforecurtoken(true))) {
1637                                asourcetoken.tokencode = TBaseType.sqlpluscmd;
1638                                if (asourcetoken.tokentype != ETokenType.ttslash) {
1639                                    asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
1640                                }
1641                                insqlpluscmd = true;
1642                                flexer.insqlpluscmd = insqlpluscmd;
1643                            } else if (sourcetokenlist.returnbeforecurtoken(true)) {
1644                                TSourceToken lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1);
1645
1646                                if ((countLines(lnbreak.toString()) > 1) && (!spaceAtTheEndOfReturnToken(lnbreak.toString()))) {
1647                                    asourcetoken.tokencode = TBaseType.sqlpluscmd;
1648                                    if (asourcetoken.tokentype != ETokenType.ttslash) {
1649                                        asourcetoken.tokentype = ETokenType.ttsqlpluscmd;
1650                                    }
1651                                    insqlpluscmd = true;
1652                                    flexer.insqlpluscmd = insqlpluscmd;
1653                                }
1654                            }
1655                        }
1656                    }
1657
1658                    isvalidplace = false;
1659
1660                    // Dameng-specific keyword handling (inline to match legacy behavior)
1661                    if (prevst != null) {
1662                        if (prevst.tokencode == TBaseType.rrw_inner) {
1663                            if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) {
1664                                prevst.tokencode = TBaseType.ident;
1665                            }
1666                        } else if ((prevst.tokencode == TBaseType.rrw_not)
1667                                && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) {
1668                            prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE");
1669                        }
1670                    }
1671
1672                    if (asourcetoken.tokencode == TBaseType.rrw_inner) {
1673                        prevst = asourcetoken;
1674                    } else if (asourcetoken.tokencode == TBaseType.rrw_not) {
1675                        prevst = asourcetoken;
1676                    } else {
1677                        prevst = null;
1678                    }
1679
1680                    // Dameng keyword transformations that rely on prev token state
1681                    if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD"))
1682                            || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) {
1683                        lcprevst = getprevsolidtoken(asourcetoken);
1684                        if (lcprevst != null) {
1685                            if (lcprevst.tokencode == TBaseType.rrw_for)
1686                                lcprevst.tokencode = TBaseType.rw_for1;
1687                        }
1688                    } else if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) {
1689                        TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2);
1690                        if (stKeep != null) {
1691                            stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank;
1692                        }
1693                    } else if (asourcetoken.tokencode == TBaseType.rrw_full) {
1694                        TSourceToken stMatch = asourcetoken.searchToken(TBaseType.rrw_match, -1);
1695                        if (stMatch != null) {
1696                            asourcetoken.tokencode = TBaseType.RW_FULL2;
1697                        }
1698                    } else if (asourcetoken.tokencode == TBaseType.rrw_join) {
1699                        TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1);
1700                        if (stFull != null) {
1701                            stFull.tokencode = TBaseType.RW_FULL2;
1702                        } else {
1703                            TSourceToken stNatural = asourcetoken.searchToken(TBaseType.rrw_natural, -4);
1704                            if (stNatural != null) {
1705                                stNatural.tokencode = TBaseType.RW_NATURAL2;
1706                            }
1707                        }
1708                    } else if (asourcetoken.tokencode == TBaseType.rrw_outer) {
1709                        TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1);
1710                        if (stFull != null) {
1711                            stFull.tokencode = TBaseType.RW_FULL2;
1712                        }
1713                    } else if (asourcetoken.tokencode == TBaseType.rrw_is) {
1714                        TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2);
1715                        if (stType != null) {
1716                            stType.tokencode = TBaseType.rrw_type2;
1717                        }
1718                    } else if (asourcetoken.tokencode == TBaseType.rrw_as) {
1719                        TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2);
1720                        if (stType != null) {
1721                            stType.tokencode = TBaseType.rrw_type2;
1722                        }
1723                    } else if (asourcetoken.tokencode == TBaseType.rrw_oid) {
1724                        TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2);
1725                        if (stType != null) {
1726                            stType.tokencode = TBaseType.rrw_type2;
1727                        }
1728                    } else if (asourcetoken.tokencode == TBaseType.rrw_type) {
1729                        TSourceToken stPrev;
1730                        stPrev = asourcetoken.searchToken(TBaseType.rrw_drop, -1);
1731                        if (stPrev != null) {
1732                            asourcetoken.tokencode = TBaseType.rrw_type2;
1733                        }
1734                        if (asourcetoken.tokencode == TBaseType.rrw_type) {
1735                            stPrev = asourcetoken.searchToken(TBaseType.rrw_of, -1);
1736                            if (stPrev != null) {
1737                                asourcetoken.tokencode = TBaseType.rrw_type2;
1738                            }
1739                        }
1740                        if (asourcetoken.tokencode == TBaseType.rrw_type) {
1741                            stPrev = asourcetoken.searchToken(TBaseType.rrw_create, -1);
1742                            if (stPrev != null) {
1743                                asourcetoken.tokencode = TBaseType.rrw_type2;
1744                            }
1745                        }
1746                        if (asourcetoken.tokencode == TBaseType.rrw_type) {
1747                            stPrev = asourcetoken.searchToken(TBaseType.rrw_replace, -1);
1748                            if (stPrev != null) {
1749                                asourcetoken.tokencode = TBaseType.rrw_type2;
1750                            }
1751                        }
1752                        if (asourcetoken.tokencode == TBaseType.rrw_type) {
1753                            stPrev = asourcetoken.searchToken('%', -1);
1754                            if (stPrev != null) {
1755                                asourcetoken.tokencode = TBaseType.rrw_type2;
1756                            }
1757                        }
1758                    } else if ((asourcetoken.tokencode == TBaseType.rrw_by) || (asourcetoken.tokencode == TBaseType.rrw_to)) {
1759                        lcprevst = getprevsolidtoken(asourcetoken);
1760                        if (lcprevst != null) {
1761                            if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("connect"))) {
1762                                lcprevst.tokencode = TBaseType.rrw_connect;
1763                                lcprevst.tokentype = ETokenType.ttkeyword;
1764                                flexer.insqlpluscmd = false;
1765
1766                                continuesqlplusatnewline = false;
1767                                waitingreturnforsemicolon = false;
1768                                waitingreturnforfloatdiv = false;
1769                                isvalidplace = false;
1770                                insqlpluscmd = false;
1771                            }
1772                        }
1773                    } else if (asourcetoken.tokencode == TBaseType.rrw_with) {
1774                        lcprevst = getprevsolidtoken(asourcetoken);
1775                        if (lcprevst != null) {
1776                            if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("start"))) {
1777                                lcprevst.tokencode = TBaseType.rrw_start;
1778                                lcprevst.tokentype = ETokenType.ttkeyword;
1779                                flexer.insqlpluscmd = false;
1780
1781                                continuesqlplusatnewline = false;
1782                                waitingreturnforsemicolon = false;
1783                                waitingreturnforfloatdiv = false;
1784                                isvalidplace = false;
1785                                insqlpluscmd = false;
1786                            }
1787                        }
1788                    } else if (asourcetoken.tokencode == TBaseType.rrw_set) {
1789                        lcprevst = getprevsolidtoken(asourcetoken);
1790                        if (lcprevst != null) {
1791                            if (lcprevst.getAstext().equalsIgnoreCase("a")) {
1792                                TSourceToken lcpp = getprevsolidtoken(lcprevst);
1793                                if (lcpp != null) {
1794                                    if ((lcpp.tokencode == TBaseType.rrw_not) || (lcpp.tokencode == TBaseType.rrw_is)) {
1795                                        lcprevst.tokencode = TBaseType.rrw_oracle_a_in_aset;
1796                                        asourcetoken.tokencode = TBaseType.rrw_oracle_set_in_aset;
1797                                    }
1798                                }
1799                            }
1800                        }
1801                    }
1802
1803                    break;
1804                }
1805            }
1806
1807            // Get next token
1808            asourcetoken = getanewsourcetoken();
1809            if (asourcetoken != null) {
1810                yychar = asourcetoken.tokencode;
1811
1812                // Handle special case: dot after SQL*Plus commands
1813                if ((asourcetoken.tokencode == '.') && (getprevsolidtoken(asourcetoken) != null)
1814                        && ((currentCmdType == ESqlPlusCmd.spcAppend)
1815                        || (currentCmdType == ESqlPlusCmd.spcChange) || (currentCmdType == ESqlPlusCmd.spcInput)
1816                        || (currentCmdType == ESqlPlusCmd.spcList) || (currentCmdType == ESqlPlusCmd.spcRun))) {
1817                    // a.ent_rp_usr_id is not a real sqlplus command
1818                    TSourceToken lcprevst2 = getprevsolidtoken(asourcetoken);
1819                    lcprevst2.insqlpluscmd = false;
1820                    if (lcprevst2.prevTokenCode != 0) {
1821                        lcprevst2.tokencode = lcprevst2.prevTokenCode;
1822                    } else {
1823                        lcprevst2.tokencode = TBaseType.ident;
1824                    }
1825
1826                    flexer.insqlpluscmd = false;
1827                    continuesqlplusatnewline = false;
1828                    waitingreturnforsemicolon = false;
1829                    waitingreturnforfloatdiv = false;
1830                    isvalidplace = false;
1831                    insqlpluscmd = false;
1832                }
1833            } else {
1834                yychar = 0;
1835
1836                if (waitingreturnforfloatdiv) {
1837                    // / at the end of line treat as sqlplus command
1838                    lct.tokencode = TBaseType.sqlpluscmd;
1839                    if (lct.tokentype != ETokenType.ttslash) {
1840                        lct.tokentype = ETokenType.ttsqlpluscmd;
1841                    }
1842                }
1843            }
1844
1845            if ((yychar == 0) && (prevst != null)) {
1846                if (prevst.tokencode == TBaseType.rrw_inner) {
1847                    prevst.tokencode = TBaseType.ident;
1848                }
1849            }
1850        }
1851    }
1852
1853    // ========== Helper Methods for Tokenization ==========
1854    // These methods support Dameng-specific tokenization logic
1855
1856    /**
1857     * Count number of newlines in a string.
1858     *
1859     * @param s string to analyze
1860     * @return number of line breaks (LF or CR)
1861     */
1862    private int countLines(String s) {
1863        int pos = 0, lf = 0, cr = 0;
1864
1865        while (pos < s.length()) {
1866            if (s.charAt(pos) == '\r') {
1867                cr++;
1868                pos++;
1869                continue;
1870            }
1871            if (s.charAt(pos) == '\n') {
1872                lf++;
1873                pos++;
1874                continue;
1875            }
1876
1877            if (s.charAt(pos) == ' ') {
1878                pos++;
1879                continue;
1880            }
1881            break;
1882        }
1883
1884        if (lf >= cr) return lf;
1885        else return cr;
1886    }
1887
1888    /**
1889     * Check if return token ends with space or tab.
1890     *
1891     * @param s token text
1892     * @return true if ends with space/tab
1893     */
1894    private boolean spaceAtTheEndOfReturnToken(String s) {
1895        if (s == null) return false;
1896        if (s.length() == 0) return false;
1897
1898        return ((s.charAt(s.length() - 1) == ' ') || (s.charAt(s.length() - 1) == '\t'));
1899    }
1900
1901    /**
1902     * Determine if forward slash should be treated as SQL*Plus command delimiter.
1903     * <p>
1904     * Dameng uses '/' as both division operator and SQL*Plus block delimiter.
1905     * This method disambiguates by checking if the '/' appears at the beginning
1906     * of a line (after a return token without trailing whitespace).
1907     *
1908     * @param pstlist token list
1909     * @param pPos position of '/' token
1910     * @return true if '/' should be SQL*Plus command
1911     */
1912    private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) {
1913        boolean ret = false;
1914
1915        if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret;
1916
1917        // Token directly before div must be ttreturn without space appending it
1918        gudusoft.gsqlparser.TSourceToken lcst = pstlist.get(pPos - 1);
1919        if (lcst.tokentype != gudusoft.gsqlparser.ETokenType.ttreturn) {
1920            return ret;
1921        }
1922
1923        if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) {
1924            ret = true;
1925        }
1926
1927        return ret;
1928    }
1929
1930    /**
1931     * Get previous non-whitespace token.
1932     *
1933     * @param ptoken current token
1934     * @return previous solid token, or null
1935     */
1936    private gudusoft.gsqlparser.TSourceToken getprevsolidtoken(gudusoft.gsqlparser.TSourceToken ptoken) {
1937        gudusoft.gsqlparser.TSourceToken ret = null;
1938        TSourceTokenList lctokenlist = ptoken.container;
1939
1940        if (lctokenlist != null) {
1941            if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) {
1942                if (!(
1943                        (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttwhitespace)
1944                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttreturn)
1945                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttsimplecomment)
1946                        || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttbracketedcomment)
1947                )) {
1948                    ret = lctokenlist.get(ptoken.posinlist - 1);
1949                } else {
1950                    ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false);
1951                }
1952            }
1953        }
1954        return ret;
1955    }
1956}